From d7d9915b766c96ad0442fff242ac4b1e65da0c18 Mon Sep 17 00:00:00 2001 From: Weifeng Wang Date: Wed, 3 Apr 2024 15:35:23 +0800 Subject: [PATCH] [kubernetes]: Imporve kubernetes deploy Signed-off-by: Weifeng Wang --- Makefile | 114 +- monitoring-mixins/Makefile | 7 - monitoring-mixins/k8s-all-in-one.yaml | 66845 ++++++++++++++++++++++++ 3 files changed, 66904 insertions(+), 62 deletions(-) delete mode 100644 monitoring-mixins/Makefile create mode 100644 monitoring-mixins/k8s-all-in-one.yaml diff --git a/Makefile b/Makefile index 8e4221c4..2c202fa1 100644 --- a/Makefile +++ b/Makefile @@ -191,7 +191,11 @@ clean: ## Clean cluster .PHONY: manifests manifests: ## Generates k8s manifests -manifests: $(KUSTOMIZE) manifests-common manifests-monolithic-mode manifests-read-write-mode manifests-microservices-mode +manifests: $(KUSTOMIZE) manifests-common manifests-monolithic-mode manifests-read-write-mode manifests-microservices-mode manifests-monitoring-mixins + +manifests-monitoring-mixins: $(KUSTOMIZE) + $(info ******************** generates manifests-monitoring-mixins manifests ********************) + @$(KUSTOMIZE) build monitoring-mixins > monitoring-mixins/k8s-all-in-one.yaml manifests-common: $(KUSTOMIZE) $(info ******************** generates manifests-common manifests ********************) @@ -229,43 +233,43 @@ manifests-microservices-mode: $(KUSTOMIZE) deploy-prometheus-operator-crds: $(info ******************** deploy prometheus-operator-crds manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/common/prometheus-operator-crds | kubectl replace -f - || $(KUSTOMIZE) build --enable-helm kubernetes/common/prometheus-operator-crds | kubectl create -f - + @kubectl replace -f kubernetes/common/prometheus-operator-crds/manifests/k8s-all-in-one.yaml || kubectl create -f kubernetes/common/prometheus-operator-crds/manifests/k8s-all-in-one.yaml # kube-prometheus-stack .PHONY: deploy-kube-prometheus-stack deploy-kube-prometheus-stack: ## Deploy kube-prometheus-stack manifests $(info ******************** deploy kube-prometheus-stack manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/common/kube-prometheus-stack | kubectl apply -f - - @$(KUSTOMIZE) build --enable-helm kubernetes/common/rancher-pushprox | kubectl apply -f - + @kubectl apply -f kubernetes/common/kube-prometheus-stack/manifests/k8s-all-in-one.yaml + @kubectl apply -f kubernetes/common/rancher-pushprox/manifests/k8s-all-in-one.yaml delete-kube-prometheus-stack: $(info ******************** delete kube-prometheus-stack manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/common/kube-prometheus-stack | kubectl delete -f - - @$(KUSTOMIZE) build --enable-helm kubernetes/common/rancher-pushprox | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/common/kube-prometheus-stack/manifests/k8s-all-in-one.yaml + @kubectl delete --ignore-not-found -f kubernetes/common/rancher-pushprox/manifests/k8s-all-in-one.yaml .PHONY: deploy-minio deploy-minio: $(info ******************** deploy minio manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-operator | kubectl apply -f - - kubectl rollout status -n minio-system deployment/minio-operator --watch --timeout=600s - @$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-tenant | kubectl apply -f - + @kubectl apply -f kubernetes/common/minio-operator/manifests/k8s-all-in-one.yaml + @kubectl rollout status -n minio-system deployment/minio-operator --watch --timeout=600s + @kubectl apply -f kubernetes/common/minio-tenant/manifests/k8s-all-in-one.yaml @echo "Waiting for Minio to be ready..." @sleep 20 - kubectl rollout status -n minio-system statefulset/codelab-pool-10gb --watch --timeout=600s || true + @kubectl rollout status -n minio-system statefulset/codelab-pool-10gb --watch --timeout=600s || true delete-minio: - @$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-tenant | kubectl delete --ignore-not-found -f - + @kubectl delete --ignore-not-found -f kubernetes/common/minio-tenant/manifests/k8s-all-in-one.yaml .PHONY: deploy-gateway deploy-gateway: $(info ******************** deploy gateway manifests ********************) - @$(KUSTOMIZE) build kubernetes/common/gateway | kubectl apply -f - - kubectl rollout status -n gateway deployment/nginx --watch --timeout=600s + @kubectl apply -f kubernetes/common/gateway/manifests/k8s-all-in-one.yaml + @kubectl rollout status -n gateway deployment/nginx --watch --timeout=600s .PHONY: deploy-grafana deploy-grafana: deploy-prometheus-operator-crds deploy-minio deploy-gateway $(info ******************** deploy grafana manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana | kubectl apply -f - + @kubectl apply -f kubernetes/common/grafana/manifests/k8s-all-in-one.yaml delete-grafana: - @$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana | kubectl delete --ignore-not-found -f - + @kubectl delete --ignore-not-found -f kubernetes/common/grafana/manifests/k8s-all-in-one.yaml define echo_info $(eval $@_MSG = $(1)) @@ -277,9 +281,9 @@ endef define config_changes_trigger_pod_restart $(eval $@_MSG = $(1)) @kubectl rollout restart deployment -n gateway nginx - kubectl rollout status -n gateway deployment/nginx --watch --timeout=600s + @kubectl rollout status -n gateway deployment/nginx --watch --timeout=600s @echo "Provisioning Grafana dashboards Prometheus rules and alerts..." - @$(KUSTOMIZE) build monitoring-mixins | kubectl apply -f - + @kubectl apply -f monitoring-mixins/k8s-all-in-one.yaml kubectl rollout status -n monitoring-system daemonset/grafana-agent --watch --timeout=600s @$(call echo_info, ${$@_MSG}) endef @@ -288,135 +292,135 @@ endef .PHONY: deploy-monolithic-mode-metrics deploy-monolithic-mode-metrics: deploy-memcached ## Deploy monolithic-mode Mimir for metrics $(info ******************** deploy monolithic-mode metrics manifests ********************) - @$(KUSTOMIZE) build kubernetes/monolithic-mode/metrics | kubectl apply -f - - kubectl rollout status -n monitoring-system deployment/mimir --watch --timeout=600s + @kubectl apply -f kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml + @kubectl rollout status -n monitoring-system deployment/mimir --watch --timeout=600s @$(call config_changes_trigger_pod_restart, "Go to http://localhost:8080/explore for the metrics.") delete-monolithic-mode-metrics: delete-memcached - @$(KUSTOMIZE) build kubernetes/monolithic-mode/metrics | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml .PHONY: deploy-monolithic-mode-logs deploy-monolithic-mode-logs: deploy-memcached ## Deploy monolithic-mode Loki for logs $(info ******************** deploy monolithic-mode logs manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/logs | kubectl apply -f - - kubectl rollout status -n logging-system statefulset/loki --watch --timeout=600s + @kubectl apply -f kubernetes/monolithic-mode/logs/k8s-all-in-one.yaml + @kubectl rollout status -n logging-system statefulset/loki --watch --timeout=600s @$(call config_changes_trigger_pod_restart, "Go to http://localhost:8080/explore for the logs.") delete-monolithic-mode-logs: delete-memcached - @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/logs | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/monolithic-mode/logs/k8s-all-in-one.yaml .PHONY: deploy-monolithic-mode-profiles deploy-monolithic-mode-profiles: deploy-memcached ## Deploy monolithic-mode Pyroscope for profiles $(info ******************** deploy monolithic-mode profiles manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/profiles | kubectl apply -f - - kubectl rollout status -n profiles-system statefulset/pyroscope --watch --timeout=600s + @kubectl apply -f kubernetes/monolithic-mode/profiles/k8s-all-in-one.yaml + @kubectl rollout status -n profiles-system statefulset/pyroscope --watch --timeout=600s @$(call config_changes_trigger_pod_restart,"Go to http://localhost:8080/explore for the profiles.") delete-monolithic-mode-profiles: delete-memcached - @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/profiles | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/monolithic-mode/profiles/k8s-all-in-one.yaml .PHONY: deploy-monolithic-mode-traces deploy-monolithic-mode-traces: deploy-memcached ## Deploy monolithic-mode Tempo for traces $(info ******************** deploy monolithic-mode traces manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/traces | kubectl apply -f - - kubectl rollout status -n tracing-system statefulset/tempo --watch --timeout=600s + @kubectl apply -f kubernetes/monolithic-mode/traces/k8s-all-in-one.yaml + @kubectl rollout status -n tracing-system statefulset/tempo --watch --timeout=600s @$(call config_changes_trigger_pod_restart, "Go to http://localhost:8080/explore for the traces.") delete-monolithic-mode-traces: delete-memcached - @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/traces | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/monolithic-mode/traces/k8s-all-in-one.yaml .PHONY: deploy-monolithic-mode-all-in-one deploy-monolithic-mode-all-in-one: deploy-memcached ## Deploy monolithic-mode all-in-one $(info ******************** deploy monolithic-mode all-in-one manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/all-in-one | kubectl apply -f - - kubectl rollout status -n monitoring-system deployment/mimir --watch --timeout=600s + @kubectl apply -f kubernetes/monolithic-mode/all-in-one/k8s-all-in-one.yaml + @kubectl rollout status -n monitoring-system deployment/mimir --watch --timeout=600s @$(call config_changes_trigger_pod_restart, "Go to http://localhost:8080/explore for the all-in-one.") delete-monolithic-mode-all-in-one: delete-memcached - @$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/all-in-one | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/monolithic-mode/all-in-one/k8s-all-in-one.yaml .PHONY: deploy-read-write-mode-metrics deploy-read-write-mode-metrics: deploy-memcached ## Deploy read-write-mode Mimir for metrics $(info ******************** deploy read-write-mode metrics manifests ********************) - @$(KUSTOMIZE) build kubernetes/read-write-mode/metrics | kubectl apply -f - - kubectl rollout status -n monitoring-system deployment/mimir-write --watch --timeout=600s + @kubectl apply -f kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml + @kubectl rollout status -n monitoring-system deployment/mimir-write --watch --timeout=600s @$(call config_changes_trigger_pod_restart, "Go to http://localhost:8080/explore for the metrics.") delete-read-write-mode-metrics: delete-memcached - @$(KUSTOMIZE) build kubernetes/read-write-mode/metrics | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml .PHONY: deploy-read-write-mode-logs deploy-read-write-mode-logs: deploy-memcached ## Deploy read-write-mode Loki for logs $(info ******************** deploy read-write-mode logs manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/read-write-mode/logs | kubectl apply -f - - kubectl rollout status -n logging-system statefulset/loki-write --watch --timeout=600s + @kubectl apply -f kubernetes/read-write-mode/logs/k8s-all-in-one.yaml + @kubectl rollout status -n logging-system statefulset/loki-write --watch --timeout=600s @$(call config_changes_trigger_pod_restart, "Go to http://localhost:8080/explore for the logs.") delete-read-write-mode-logs: delete-memcached - @$(KUSTOMIZE) build --enable-helm kubernetes/read-write-mode/logs | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/read-write-mode/logs/k8s-all-in-one.yaml .PHONY: deploy-microservices-mode-logs deploy-microservices-mode-logs: deploy-memcached ## Deploy microservices-mode Loki for logs $(info ******************** deploy microservices-mode logs manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/microservices-mode/logs | kubectl apply -f - - kubectl rollout status -n logging-system statefulset/loki-distributed-ingester --watch --timeout=600s + @kubectl apply -f kubernetes/microservices-mode/logs/k8s-all-in-one.yaml + @kubectl rollout status -n logging-system statefulset/loki-distributed-ingester --watch --timeout=600s @$(call config_changes_trigger_pod_restart, "Go to http://localhost:8080/explore for the logs.") delete-microservices-mode-logs: delete-memcached - @$(KUSTOMIZE) build --enable-helm kubernetes/microservices-mode/logs | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/microservices-mode/logs/k8s-all-in-one.yaml .PHONY: deploy-microservices-mode-metrics deploy-microservices-mode-metrics: deploy-memcached ## Deploy microservices-mode Mimir for metrics $(info ******************** deploy microservices-mode metrics manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/microservices-mode/metrics | kubectl apply -f - - kubectl rollout status -n monitoring-system statefulset/mimir-distributed-ingester --watch --timeout=600s + @kubectl apply -f kubernetes/microservices-mode/metrics/k8s-all-in-one.yaml + @kubectl rollout status -n monitoring-system statefulset/mimir-distributed-ingester --watch --timeout=600s @$(call config_changes_trigger_pod_restart, "Go to http://localhost:8080/explore for the metrics.") delete-microservices-mode-metrics: delete-memcached - @$(KUSTOMIZE) build --enable-helm kubernetes/microservices-mode/metrics | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/microservices-mode/metrics/k8s-all-in-one.yaml .PHONY: deploy-microservices-mode-profiles deploy-microservices-mode-profiles: deploy-memcached ## Deploy microservices-mode Pyroscope for profiles $(info ******************** deploy microservices-mode profiles manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/microservices-mode/profiles | kubectl apply -f - + @kubectl apply -f kubernetes/microservices-mode/profiles/k8s-all-in-one.yaml @$(call config_changes_trigger_pod_restart, "Go to http://localhost:8080/explore for the profiles.") delete-microservices-mode-profiles: delete-memcached - @$(KUSTOMIZE) build --enable-helm kubernetes/microservices-mode/profiles | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/microservices-mode/profiles/k8s-all-in-one.yaml .PHONY: deploy-microservices-mode-traces deploy-microservices-mode-traces: deploy-memcached ## Deploy microservices-mode Tempo for traces $(info ******************** deploy microservices-mode traces manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/microservices-mode/traces | kubectl apply -f - - kubectl rollout status -n tracing-system statefulset/tempo-distributed-ingester --watch --timeout=600s + @kubectl apply -f kubernetes/microservices-mode/traces/k8s-all-in-one.yaml + @kubectl rollout status -n tracing-system statefulset/tempo-distributed-ingester --watch --timeout=600s @$(call config_changes_trigger_pod_restart, "Go to http://localhost:8080/explore for the traces.") delete-microservices-mode-traces: delete-memcached - @$(KUSTOMIZE) build --enable-helm kubernetes/microservices-mode/traces | kubectl delete -f - + @kubectl delete --ignore-not-found -f kubernetes/microservices-mode/traces/k8s-all-in-one.yaml ##@ Grafana Agent Integrations deploy-memcached: deploy-grafana $(info ******************** deploy integration memcached manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/common/memcached | kubectl apply -f - + @kubectl apply -f kubernetes/common/memcached/manifests/k8s-all-in-one.yaml delete-memcached: delete-minio - @$(KUSTOMIZE) build --enable-helm kubernetes/common/memcached | kubectl delete --ignore-not-found -f - + @kubectl delete --ignore-not-found -f kubernetes/common/memcached/manifests/k8s-all-in-one.yaml .PHONY: deploy-mysql deploy-mysql: ## Deploy integration mysql manifests $(info ******************** deploy integration mysql manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/common/mysql | kubectl apply -f - + @kubectl apply -f kubernetes/common/mysql/manifests/k8s-all-in-one.yaml delete-mysql: - @$(KUSTOMIZE) build --enable-helm kubernetes/common/mysql | kubectl delete --ignore-not-found -f - + @kubectl delete --ignore-not-found -f kubernetes/common/mysql/manifests/k8s-all-in-one.yaml .PHONY: deploy-redis deploy-redis: ## Deploy integration redis manifests $(info ******************** deploy integration redis manifests ********************) - @$(KUSTOMIZE) build --enable-helm kubernetes/common/redis | kubectl apply -f - + @kubectl apply -f kubernetes/common/redis/manifests/k8s-all-in-one.yaml delete-redis: - @$(KUSTOMIZE) build --enable-helm kubernetes/common/redis | kubectl delete --ignore-not-found -f - + @kubectl delete --ignore-not-found -f kubernetes/common/redis/manifests/k8s-all-in-one.yaml ##@ Build diff --git a/monitoring-mixins/Makefile b/monitoring-mixins/Makefile deleted file mode 100644 index b2e20df8..00000000 --- a/monitoring-mixins/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -include ../.bingo/Variables.mk - -all: build - -.PHONY: build -build: $(KUSTOMIZE) - @$(KUSTOMIZE) build . > /dev/null diff --git a/monitoring-mixins/k8s-all-in-one.yaml b/monitoring-mixins/k8s-all-in-one.yaml new file mode 100644 index 00000000..7c683140 --- /dev/null +++ b/monitoring-mixins/k8s-all-in-one.yaml @@ -0,0 +1,66845 @@ +apiVersion: v1 +data: + agent-cluster-node.json: |- + { + "annotations": { + "list": [ + { + "datasource": "$loki_datasource", + "enable": true, + "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", + "iconColor": "rgba(0, 211, 255, 1)", + "instant": false, + "name": "Deployments", + "titleFormat": "{{cluster}}/{{namespace}}" + } + ] + }, + "graphTooltip": 1, + "links": [ + { + "icon": "doc", + "targetBlank": true, + "title": "Documentation", + "tooltip": "Clustering documentation", + "type": "link", + "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" + }, + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "grafana-agent-flow-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "title": "Node Info", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "Information about a specific cluster node.\n\n* Lamport clock time: The observed Lamport time on the specific node's clock used to provide partial ordering around gossip messages. Nodes should ideally be observing roughly the same time, meaning they are up-to-date on the cluster state. If a node is falling behind, it means that it has not recently processed the same number of messages and may have an outdated view of its peers.\n\n* Internal cluster state observers: The number of Observer functions that are registered to run whenever the node detects a cluster change.\n\n* Gossip health score: A health score assigned to this node by the memberlist implementation. The lower, the better.\n\n* Gossip protocol version: The protocol version used by nodes to communicate with one another. It should match across all nodes.\n", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(cluster_node_lamport_time{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "Lamport clock time" + }, + { + "datasource": "${datasource}", + "expr": "sum(cluster_node_update_observers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "Internal cluster state observers" + }, + { + "datasource": "${datasource}", + "expr": "sum(cluster_node_gossip_health_score{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "Gossip health score" + }, + { + "datasource": "${datasource}", + "expr": "sum(cluster_node_gossip_proto_version{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "Gossip protocol version" + } + ], + "title": "Node Info", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "Value #(.*)", + "renamePattern": "$1" + } + }, + { + "id": "reduce", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { }, + "indexByName": { }, + "renameByName": { + "Field": "Metric", + "Max": "Value" + } + } + } + ], + "type": "table" + }, + { + "datasource": "${datasource}", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(cluster_node_gossip_received_events_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", + "instant": false, + "legendFormat": "{{event}}", + "range": true + } + ], + "title": "Gossip ops/s", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Known peers to the node (including the local node).\n", + "fieldConfig": { + "defaults": { + "unit": "suffix:peers" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", + "instant": false, + "legendFormat": "__auto", + "range": true + } + ], + "title": "Known peers", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "Known peers to the node by state (including the local node).\n", + "fieldConfig": { + "defaults": { + "unit": "suffix:nodes" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", + "instant": false, + "legendFormat": "{{state}}", + "range": true + } + ], + "title": "Peers by state", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "title": "Gossip Transport", + "type": "row" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "axisCenteredZero": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 18 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(cluster_transport_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", + "instant": false, + "legendFormat": "rx", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "-1 * rate(cluster_transport_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", + "instant": false, + "legendFormat": "tx", + "range": true + } + ], + "title": "Transport bandwidth", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 18 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "1 - (\nrate(cluster_transport_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\nrate(cluster_transport_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n)\n", + "instant": false, + "legendFormat": "Tx success %", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "1 - (\n rate(cluster_transport_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", + "instant": false, + "legendFormat": "Rx success %", + "range": true + } + ], + "title": "Packet write success rate", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "The number of packets enqueued currently to be decoded or encoded and sent during communication with other nodes.\n\nThe incoming and outgoing packet queue should be as empty as possible; a growing queue means that the Agent cannot keep up with the number of messages required to have all nodes informed of cluster changes, and the nodes may not converge in a timely fashion.\n", + "fieldConfig": { + "defaults": { + "unit": "pkts" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 18 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "cluster_transport_tx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", + "instant": false, + "legendFormat": "tx queue", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "cluster_transport_rx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", + "instant": false, + "legendFormat": "rx queue", + "range": true + } + ], + "title": "Pending packet queue", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "axisCenteredZero": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 26 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(cluster_transport_stream_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", + "instant": false, + "legendFormat": "rx", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "-1 * rate(cluster_transport_stream_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", + "instant": false, + "legendFormat": "tx", + "range": true + } + ], + "title": "Stream bandwidth", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 26 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "1 - (\n rate(cluster_transport_stream_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", + "instant": false, + "legendFormat": "Tx success %", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "1 - (\n rate(cluster_transport_stream_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", + "instant": false, + "legendFormat": "Rx success %", + "range": true + } + ], + "title": "Stream write success rate", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "The number of open connections from this node to its peers.\n\nEach node picks up a subset of its peers to continuously gossip messages around cluster status using streaming HTTP/2 connections. This panel can be used to detect networking failures that result in cluster communication being disrupted and convergence taking longer than expected or outright failing.\n", + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 26 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "cluster_transport_streams{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", + "instant": false, + "legendFormat": "Open streams", + "range": true + } + ], + "title": "Open transport streams", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 36, + "tags": [ + "grafana-agent-flow-mixin" + ], + "templating": { + "list": [ + { + "label": "Data Source", + "name": "datasource", + "query": "prometheus", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "label": "Loki Data Source", + "name": "loki_datasource", + "query": "loki", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "datasource": "${datasource}", + "label": "cluster", + "name": "cluster", + "query": { + "query": "label_values(agent_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "datasource": "${datasource}", + "label": "namespace", + "name": "namespace", + "query": { + "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "datasource": "${datasource}", + "label": "instance", + "name": "instance", + "query": { + "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", + "refId": "instance" + }, + "refresh": 2, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "utc", + "title": "Grafana Agent Flow / Cluster Node", + "uid": "dd370cd333b2d9258435fb1b5a20a89b" + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Agent Flow Mixin + labels: + grafana_dashboard: "1" + name: agent-cluster-node.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + agent-cluster-overview.json: |- + { + "annotations": { + "list": [ + { + "datasource": "$loki_datasource", + "enable": true, + "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", + "iconColor": "rgba(0, 211, 255, 1)", + "instant": false, + "name": "Deployments", + "titleFormat": "{{cluster}}/{{namespace}}" + } + ] + }, + "graphTooltip": 1, + "links": [ + { + "icon": "doc", + "targetBlank": true, + "title": "Documentation", + "tooltip": "Clustering documentation", + "type": "link", + "url": "https://grafana.com/docs/agent/latest/flow/reference/cli/run/#clustered-mode-experimental" + }, + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "grafana-agent-flow-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "${datasource}", + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 0 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})", + "instant": true, + "legendFormat": "__auto", + "range": false + } + ], + "title": "Nodes", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "Nodes info.\n", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dashboard" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "1": { + "index": 0, + "text": "Link" + } + }, + "type": "value" + } + ] + }, + { + "id": "links", + "value": [ + { + "targetBlank": false, + "title": "Detail dashboard for node", + "url": "/d/dd370cd333b2d9258435fb1b5a20a89b/grafana-agent-flow-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-cluster=${cluster}&var-namespace=${namespace}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 16, + "x": 8, + "y": 0 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false + } + ], + "title": "Node table", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": false, + "__name__": true, + "cluster": true, + "namespace": true, + "state": false + }, + "indexByName": { }, + "renameByName": { + "Value": "Dashboard", + "instance": "", + "state": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": "${datasource}", + "description": "Whether the cluster state has converged.\n\nIt is normal for the cluster state to be diverged briefly as gossip events propagate. It is not normal for the cluster state to be diverged for a long period of time.\n\nThis will show one of the following:\n\n* Converged: Nodes are aware of all other nodes, with the correct states.\n* Not converged: A subset of nodes aren't aware of their peers, or don't have an updated view of peer states.\n", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "1": { + "color": "red", + "index": 1, + "text": "Not converged" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "green", + "index": 0, + "text": "Converged" + } + }, + "type": "special" + } + ], + "unit": "suffix:nodes" + } + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 9 + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"}) != 0) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})) != 0))\n ),\n 1, 1\n)\n", + "format": "time_series", + "instant": true, + "legendFormat": "__auto", + "range": false + } + ], + "title": "Convergance state", + "type": "stat" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 80, + "spanNulls": true + }, + "mappings": [ + { + "options": { + "0": { + "color": "green", + "text": "Yes" + } + }, + "type": "value" + }, + { + "options": { + "1": { + "color": "red", + "text": "No" + } + }, + "type": "value" + } + ], + "max": 1, + "noValue": 0 + } + }, + "gridPos": { + "h": 9, + "w": 16, + "x": 8, + "y": 9 + }, + "options": { + "mergeValues": true + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "ceil(clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}))))\n ),\n 0, 1\n))\n", + "instant": false, + "legendFormat": "Converged", + "range": true + } + ], + "title": "Convergance state timeline", + "type": "state-timeline" + } + ], + "refresh": "10s", + "schemaVersion": 36, + "tags": [ + "grafana-agent-flow-mixin" + ], + "templating": { + "list": [ + { + "label": "Data Source", + "name": "datasource", + "query": "prometheus", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "label": "Loki Data Source", + "name": "loki_datasource", + "query": "loki", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "datasource": "${datasource}", + "label": "cluster", + "name": "cluster", + "query": { + "query": "label_values(agent_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "datasource": "${datasource}", + "label": "namespace", + "name": "namespace", + "query": { + "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "utc", + "title": "Grafana Agent Flow / Cluster Overview", + "uid": "7e07f9c975fcfc2a6e120a95f579f843" + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Agent Flow Mixin + labels: + grafana_dashboard: "1" + name: agent-cluster-overview.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + agent-flow-controller.json: |- + { + "annotations": { + "list": [ + { + "datasource": "$loki_datasource", + "enable": true, + "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", + "iconColor": "rgba(0, 211, 255, 1)", + "instant": false, + "name": "Deployments", + "titleFormat": "{{cluster}}/{{namespace}}" + } + ] + }, + "graphTooltip": 1, + "links": [ + { + "icon": "doc", + "targetBlank": true, + "title": "Documentation", + "tooltip": "Component controller documentation", + "type": "link", + "url": "https://grafana.com/docs/agent/latest/flow/concepts/component_controller/" + }, + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "grafana-agent-flow-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "${datasource}", + "description": "The number of Grafana Agent Flow instances whose metrics are being sent and reported.\n", + "fieldConfig": { + "defaults": { + "unit": "agents" + } + }, + "gridPos": { + "h": 4, + "w": 10, + "x": 0, + "y": 0 + }, + "options": { + "colorMode": "none", + "graphMode": "none" + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "count(agent_component_controller_evaluating{cluster=\"$cluster\", namespace=\"$namespace\"})", + "instant": false, + "legendFormat": "__auto", + "range": true + } + ], + "title": "Running agents", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "The number of running components across all running agents.\n", + "fieldConfig": { + "defaults": { + "unit": "components" + } + }, + "gridPos": { + "h": 4, + "w": 10, + "x": 0, + "y": 4 + }, + "options": { + "colorMode": "none", + "graphMode": "none" + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})", + "instant": false, + "legendFormat": "__auto", + "range": true + } + ], + "title": "Running components", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "The percentage of components which are in a healthy state.\n", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "No components", + "unit": "percentunit" + } + }, + "gridPos": { + "h": 4, + "w": 10, + "x": 0, + "y": 8 + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "text": { + "valueSize": 80 + } + }, + "pluginVersion": "9.0.6", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\",health_type=\"healthy\"}) /\nsum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})\n", + "instant": false, + "legendFormat": "__auto", + "range": true + } + ], + "title": "Overall component health", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "Breakdown of components by health across all running agents.\n\n* Healthy: components have been evaluated completely and are reporting themselves as healthy.\n* Unhealthy: Components either could not be evaluated or are reporting themselves as unhealthy.\n* Unknown: A component has been created but has not yet been started.\n* Exited: A component has exited. It will not return to the running state.\n\nMore information on a component's health state can be retrieved using\nthe Grafana Agent Flow UI.\n\nNote that components may be in a degraded state even if they report\nthemselves as healthy. Use component-specific dashboards and alerts\nto observe detailed information about the behavior of a component.\n", + "fieldConfig": { + "defaults": { + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unhealthy" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unknown" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Exited" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 14, + "x": 10, + "y": 0 + }, + "options": { + "orientation": "vertical", + "showUnfilled": true + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"healthy\"}) or vector(0)", + "instant": true, + "legendFormat": "Healthy", + "range": false + }, + { + "datasource": "${datasource}", + "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unhealthy\"}) or vector(0)", + "instant": true, + "legendFormat": "Unhealthy", + "range": false + }, + { + "datasource": "${datasource}", + "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unknown\"}) or vector(0)", + "instant": true, + "legendFormat": "Unknown", + "range": false + }, + { + "datasource": "${datasource}", + "expr": "sum(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"exited\"}) or vector(0)", + "instant": true, + "legendFormat": "Exited", + "range": false + } + ], + "title": "Components by health", + "type": "bargauge" + }, + { + "datasource": "${datasource}", + "description": "The frequency at which components get updated.\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "points", + "pointSize": 3 + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (instance) (rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true + } + ], + "title": "Component evaluation rate", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "The percentiles for how long it takes to complete component evaluations.\n\nComponent evaluations must complete for components to have the latest\narguments. The longer the evaluations take, the slower it will be to\nreconcile the state of components.\n\nIf evaluation is taking too long, consider sharding your components to\ndeal with smaller amounts of data and reuse data as much as possible.\n", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 12 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "histogram_quantile(0.99, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.99, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", + "instant": false, + "legendFormat": "99th percentile", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "histogram_quantile(0.50, sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.50, sum by (le) (rate(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", + "instant": false, + "legendFormat": "50th percentile", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "(\n histogram_sum(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))) /\n histogram_count(sum(rate(agent_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n)\nor\n(\n sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])) /\n sum(rate(agent_component_evaluation_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n)\n", + "instant": false, + "legendFormat": "Average", + "range": true + } + ], + "title": "Component evaluation time", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "The percentage of time spent evaluating 'slow' components - components that took longer than 1 minute to evaluate.\n\nIdeally, no component should take more than 1 minute to evaluate. The components displayed in this chart\nmay be a sign of a problem with the pipeline.\n", + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 12 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (component_id) (rate(agent_component_evaluation_slow_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/ scalar(sum(rate(agent_component_evaluation_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))\n", + "instant": false, + "legendFormat": "{{component_id}}", + "range": true + } + ], + "title": "Slow components evaluation times", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Detailed histogram view of how long component evaluations take.\n\nThe goal is to design your config so that evaluations take as little\ntime as possible; under 100ms is a good goal.\n", + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 22 + }, + "maxDataPoints": 30, + "options": { + "calculate": false, + "cellGap": 0, + "color": { + "scheme": "Spectral" + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 0.10000000000000001 + }, + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "unit": "s" + } + }, + "pluginVersion": "9.0.6", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(increase(agent_component_evaluation_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_evaluation_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", + "format": "heatmap", + "instant": false, + "legendFormat": "{{le}}", + "range": true + } + ], + "title": "Component evaluation histogram", + "type": "heatmap" + }, + { + "datasource": "${datasource}", + "description": "Detailed histogram of how long components wait to be evaluated after their dependency is updated.\n\nThe goal is to design your config so that most of the time components do not\nqueue for long; under 10ms is a good goal.\n", + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 22 + }, + "maxDataPoints": 30, + "options": { + "calculate": false, + "cellGap": 0, + "color": { + "scheme": "Spectral" + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 0.10000000000000001 + }, + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "unit": "s" + } + }, + "pluginVersion": "9.0.6", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(increase(agent_component_dependencies_wait_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(agent_component_dependencies_wait_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", + "format": "heatmap", + "instant": false, + "legendFormat": "{{le}}", + "range": true + } + ], + "title": "Component dependency wait histogram", + "type": "heatmap" + } + ], + "refresh": "10s", + "schemaVersion": 36, + "tags": [ + "grafana-agent-flow-mixin" + ], + "templating": { + "list": [ + { + "label": "Data Source", + "name": "datasource", + "query": "prometheus", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "label": "Loki Data Source", + "name": "loki_datasource", + "query": "loki", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "datasource": "${datasource}", + "label": "cluster", + "name": "cluster", + "query": { + "query": "label_values(agent_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "datasource": "${datasource}", + "label": "namespace", + "name": "namespace", + "query": { + "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "utc", + "title": "Grafana Agent Flow / Controller", + "uid": "f861e5fef2e795edd5c4c73bee1ba769" + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Agent Flow Mixin + labels: + grafana_dashboard: "1" + name: agent-flow-controller.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + agent-flow-opentelemetry.json: |- + { + "graphTooltip": 1, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "grafana-agent-flow-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "title": "Receivers for traces [otelcol.receiver]", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "Number of spans successfully pushed into the pipeline.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 20, + "gradientMode": "hue", + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 0 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(receiver_accepted_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{ pod }} / {{ transport }}", + "range": true + } + ], + "title": "Accepted spans", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Number of spans that could not be pushed into the pipeline.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 20, + "gradientMode": "hue", + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 0 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(receiver_refused_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{ pod }} / {{ transport }}", + "range": true + } + ], + "title": "Refused spans", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "The duration of inbound RPCs.\n", + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 0 + }, + "maxDataPoints": 30, + "options": { + "calculate": false, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "scale": "exponential", + "scheme": "Oranges", + "steps": 65 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1.0000000000000001e-09 + }, + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "unit": "ms" + } + }, + "pluginVersion": "9.0.6", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (le) (increase(rpc_server_duration_milliseconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", rpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\"}[$__rate_interval]))", + "format": "heatmap", + "instant": false, + "legendFormat": "{{le}}", + "range": true + } + ], + "title": "RPC server duration", + "type": "heatmap" + }, + { + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "title": "Batching of logs, metrics, and traces [otelcol.processor.batch]", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "Number of spans, metric datapoints, or log lines in a batch\n", + "fieldConfig": { + "defaults": { + "unit": "short" + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "maxDataPoints": 30, + "options": { + "calculate": false, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "scale": "exponential", + "scheme": "Oranges", + "steps": 65 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1.0000000000000001e-09 + }, + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "unit": "short" + } + }, + "pluginVersion": "9.0.6", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (le) (increase(processor_batch_batch_send_size_ratio_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "heatmap", + "instant": false, + "legendFormat": "{{le}}", + "range": true + } + ], + "title": "Number of units in the batch", + "type": "heatmap" + }, + { + "datasource": "${datasource}", + "description": "Number of distinct metadata value combinations being processed\n", + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "processor_batch_metadata_cardinality_ratio{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}\n", + "instant": false, + "legendFormat": "{{ pod }}", + "range": true + } + ], + "title": "Distinct metadata values", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Number of times the batch was sent due to a timeout trigger\n", + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 10 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(processor_batch_timeout_trigger_send_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{ pod }}", + "range": true + } + ], + "title": "Timeout trigger", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "title": "Exporters for traces [otelcol.exporter]", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "Number of spans successfully sent to destination.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 20, + "gradientMode": "hue", + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 20 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(exporter_sent_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{ pod }}", + "range": true + } + ], + "title": "Exported sent spans", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Number of spans in failed attempts to send to destination.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 20, + "gradientMode": "hue", + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 20 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(exporter_send_failed_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{ pod }}", + "range": true + } + ], + "title": "Exported failed spans", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 36, + "tags": [ + "grafana-agent-flow-mixin" + ], + "templating": { + "list": [ + { + "label": "Data Source", + "name": "datasource", + "query": "prometheus", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "label": "Loki Data Source", + "name": "loki_datasource", + "query": "loki", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "datasource": "${datasource}", + "label": "cluster", + "name": "cluster", + "query": { + "query": "label_values(agent_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "datasource": "${datasource}", + "label": "namespace", + "name": "namespace", + "query": { + "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "allValue": ".*", + "datasource": "${datasource}", + "includeAll": true, + "label": "instance", + "multi": true, + "name": "instance", + "query": { + "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", + "refId": "instance" + }, + "refresh": 2, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "utc", + "title": "Grafana Agent Flow / OpenTelemetry", + "uid": "c90e752eb8c0fce588f906b7279aceea" + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Agent Flow Mixin + labels: + grafana_dashboard: "1" + name: agent-flow-opentelemetry.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + agent-flow-prometheus-remote-write.json: |- + { + "annotations": { + "list": [ + { + "datasource": "$loki_datasource", + "enable": true, + "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", + "iconColor": "rgba(0, 211, 255, 1)", + "instant": false, + "name": "Deployments", + "titleFormat": "{{cluster}}/{{namespace}}" + } + ] + }, + "graphTooltip": 1, + "links": [ + { + "icon": "doc", + "targetBlank": true, + "title": "Documentation", + "tooltip": "Component documentation", + "type": "link", + "url": "https://grafana.com/docs/agent/latest/flow/reference/components/prometheus.remote_write/" + }, + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "grafana-agent-flow-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "title": "prometheus.scrape", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "Percentage of targets successfully scraped by prometheus.scrape\ncomponents.\n\nThis metric is calculated by dividing the number of targets\nsuccessfully scraped by the total number of targets scraped,\nacross all the namespaces in the selected cluster.\n\nLow success rates can indicate a problem with scrape targets,\nstale service discovery, or agent misconfiguration.\n", + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 1 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(up{cluster=\"$cluster\"})\n/\ncount (up{cluster=\"$cluster\"})\n", + "instant": false, + "legendFormat": "% of targets successfully scraped", + "range": true + } + ], + "title": "Scrape success rate in $cluster", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Duration of successful scrapes by prometheus.scrape components,\nacross all the namespaces in the selected cluster.\n\nThis metric should be below your configured scrape interval.\nHigh durations can indicate a problem with a scrape target or\na performance issue with the agent.\n", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 1 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "quantile(0.99, scrape_duration_seconds{cluster=\"$cluster\"})\n", + "instant": false, + "legendFormat": "p99", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "quantile(0.95, scrape_duration_seconds{cluster=\"$cluster\"})\n", + "instant": false, + "legendFormat": "p95", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "quantile(0.50, scrape_duration_seconds{cluster=\"$cluster\"})\n", + "instant": false, + "legendFormat": "p50", + "range": true + } + ], + "title": "Scrape duration in $cluster", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "title": "prometheus.remote_write", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "How far behind prometheus.remote_write from samples recently written\nto the WAL.\n\nEach endpoint prometheus.remote_write is configured to send metrics\nhas its own delay. The time shown here is the sum across all\nendpoints for the given component.\n\nIt is normal for the WAL delay to be within 1-3 scrape intervals. If\nthe WAL delay continues to increase beyond that amount, try\nincreasing the number of maximum shards.\n", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 0, + "y": 12 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (instance, component_id) (\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}\n - ignoring(url, remote_name) group_right(instance)\n prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_id}}", + "range": true + } + ], + "title": "WAL delay", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate of data containing samples and metadata sent by\nprometheus.remote_write.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 20, + "gradientMode": "hue", + "stacking": { + "mode": "normal" + } + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 6, + "y": 12 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum without (remote_name, url) (\n rate(prometheus_remote_storage_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval]) +\n rate(prometheus_remote_storage_metadata_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_id}}", + "range": true + } + ], + "title": "Data write throughput", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Latency of writes to the remote system made by\nprometheus.remote_write.\n", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 12, + "y": 12 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "histogram_quantile(0.99, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", + "instant": false, + "legendFormat": "99th percentile", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "histogram_quantile(0.50, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", + "instant": false, + "legendFormat": "50th percentile", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval])) /\nsum(rate(prometheus_remote_storage_sent_batch_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_id=~\"$component\"}[$__rate_interval]))\n", + "instant": false, + "legendFormat": "Average", + "range": true + } + ], + "title": "Write latency", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Total number of shards which are concurrently sending samples read\nfrom the Write-Ahead Log.\n\nShards are bound to a minimum and maximum, displayed on the graph.\nThe lowest minimum and the highest maximum across all clients is\nshown.\n\nEach client has its own set of shards, minimum shards, and maximum\nshards; filter to a specific URL to display more granular\ninformation.\n", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Minimum" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 15 + ], + "fill": "dash" + } + }, + { + "id": "custom.showPoints", + "value": "never" + }, + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Maximum" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 15 + ], + "fill": "dash" + } + }, + { + "id": "custom.showPoints", + "value": "never" + }, + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 12 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum without (remote_name, url) (\n prometheus_remote_storage_shards{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_id}}", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "min (\n prometheus_remote_storage_shards_min{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", + "instant": false, + "legendFormat": "Minimum", + "range": true + }, + { + "datasource": "${datasource}", + "expr": "max (\n prometheus_remote_storage_shards_max{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}\n)\n", + "instant": false, + "legendFormat": "Maximum", + "range": true + } + ], + "title": "Shards", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Total outgoing samples sent by prometheus.remote_write.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 20, + "gradientMode": "hue", + "stacking": { + "mode": "normal" + } + }, + "unit": "cps" + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 22 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum without (url, remote_name) (\n rate(prometheus_remote_storage_samples_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_id}}", + "range": true + } + ], + "title": "Sent samples / second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate of samples which prometheus.remote_write could not send due to\nnon-recoverable errors.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 20, + "gradientMode": "hue", + "stacking": { + "mode": "normal" + } + }, + "unit": "cps" + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 22 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_failed_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_id}}", + "range": true + } + ], + "title": "Failed samples / second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate of samples which prometheus.remote_write attempted to resend\nafter receiving a recoverable error.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 20, + "gradientMode": "hue", + "stacking": { + "mode": "normal" + } + }, + "unit": "cps" + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 22 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_retried_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_id}}", + "range": true + } + ], + "title": "Retried samples / second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Total number of active series across all components.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", + "fieldConfig": { + "defaults": { + "unit": "short" + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 32 + }, + "options": { + "legend": { + "showLegend": false + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\", url=~\"$url\"})\n", + "instant": false, + "legendFormat": "Series", + "range": true + } + ], + "title": "Active series (total)", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, with separate lines for each agent instance.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", + "fieldConfig": { + "defaults": { + "unit": "short" + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 32 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"}\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_id}}", + "range": true + } + ], + "title": "Active series (by instance/component)", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, aggregated across all instances.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", + "fieldConfig": { + "defaults": { + "unit": "short" + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 32 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (component_id) (agent_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_id=~\"$component\", url=~\"$url\"})\n", + "instant": false, + "legendFormat": "{{component_id}}", + "range": true + } + ], + "title": "Active series (by component)", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 36, + "tags": [ + "grafana-agent-flow-mixin" + ], + "templating": { + "list": [ + { + "label": "Data Source", + "name": "datasource", + "query": "prometheus", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "label": "Loki Data Source", + "name": "loki_datasource", + "query": "loki", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "datasource": "${datasource}", + "label": "cluster", + "name": "cluster", + "query": { + "query": "label_values(agent_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "datasource": "${datasource}", + "label": "namespace", + "name": "namespace", + "query": { + "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "allValue": ".*", + "datasource": "${datasource}", + "includeAll": true, + "label": "instance", + "multi": true, + "name": "instance", + "query": { + "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", + "refId": "instance" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "allValue": ".*", + "datasource": "${datasource}", + "includeAll": true, + "label": "component", + "multi": true, + "name": "component", + "query": { + "query": "label_values(agent_wal_samples_appended_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"prometheus\\\\.remote_write\\\\..*\"}, component_id)\n", + "refId": "component" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "allValue": ".*", + "datasource": "${datasource}", + "includeAll": true, + "label": "url", + "multi": true, + "name": "url", + "query": { + "query": "label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}, url)\n", + "refId": "url" + }, + "refresh": 2, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "utc", + "title": "Grafana Agent Flow / Prometheus Components", + "uid": "ee34ffa2d084547d650e1d96a26306aa" + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Agent Flow Mixin + labels: + grafana_dashboard: "1" + name: agent-flow-prometheus-remote-write.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + agent-flow-resources.json: |- + { + "annotations": { + "list": [ + { + "datasource": "$loki_datasource", + "enable": true, + "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"grafana-agent\" | name_extracted=~\"grafana-agent.*\"", + "iconColor": "rgba(0, 211, 255, 1)", + "instant": false, + "name": "Deployments", + "titleFormat": "{{cluster}}/{{namespace}}" + } + ] + }, + "graphTooltip": 1, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "grafana-agent-flow-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "${datasource}", + "description": "CPU usage of the Grafana Agent process relative to 1 CPU core.\n\nFor example, 100% means using one entire CPU core.\n", + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(agent_resources_process_cpu_seconds_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])", + "instant": false, + "legendFormat": "{{instance}}", + "range": true + } + ], + "title": "CPU usage", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Resident memory size of the Grafana Agent process.\n", + "fieldConfig": { + "defaults": { + "unit": "decbytes" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "agent_resources_process_resident_memory_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}", + "instant": false, + "legendFormat": "{{instance}}", + "range": true + } + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate at which the Grafana Agent process performs garbage collections.\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "points", + "pointSize": 3 + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 8 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(go_gc_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[5m])\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", + "instant": false, + "legendFormat": "{{instance}}", + "range": true + } + ], + "title": "Garbage collections", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Number of goroutines which are running in parallel. An infinitely\ngrowing number of these indicates a goroutine leak.\n", + "fieldConfig": { + "defaults": { + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 8 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "go_goroutines{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", + "instant": false, + "legendFormat": "{{instance}}", + "range": true + } + ], + "title": "Goroutines", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Heap memory currently in use by the Grafana Agent process.\n", + "fieldConfig": { + "defaults": { + "unit": "decbytes" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nagent_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", + "instant": false, + "legendFormat": "{{instance}}", + "range": true + } + ], + "title": "Memory (heap inuse)", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate of data received across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "none", + "stacking": { + "mode": "normal" + } + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(agent_resources_machine_rx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{instance}}", + "range": true + } + ], + "title": "Network receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate of data sent across all network interfaces for the machine\nGrafana Agent is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Grafana Agent process.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "none", + "stacking": { + "mode": "normal" + } + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(agent_resources_machine_tx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{instance}}", + "range": true + } + ], + "title": "Network send bandwidth", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 36, + "tags": [ + "grafana-agent-flow-mixin" + ], + "templating": { + "list": [ + { + "label": "Data Source", + "name": "datasource", + "query": "prometheus", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "label": "Loki Data Source", + "name": "loki_datasource", + "query": "loki", + "refresh": 1, + "sort": 2, + "type": "datasource" + }, + { + "datasource": "${datasource}", + "label": "cluster", + "name": "cluster", + "query": { + "query": "label_values(agent_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "datasource": "${datasource}", + "label": "namespace", + "name": "namespace", + "query": { + "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "sort": 2, + "type": "query" + }, + { + "allValue": ".*", + "datasource": "${datasource}", + "includeAll": true, + "label": "instance", + "multi": true, + "name": "instance", + "query": { + "query": "label_values(agent_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", + "refId": "instance" + }, + "refresh": 2, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "utc", + "title": "Grafana Agent Flow / Resources", + "uid": "d47aae5c53be5550f8e3bc8a904ba61a" + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Agent Flow Mixin + labels: + grafana_dashboard: "1" + name: agent-flow-resources.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + go-runtime.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Go runtime metrics", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 14, + "iteration": 1623758038990, + "links": [ ], + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Average total bytes of memory reserved across all process instances of a job.", + "fieldConfig": { + "defaults": { + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by(job)(go_memstats_sys_bytes{job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{job}} (avg)", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Total Reserved Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Average stack memory usage across all instances of a job.", + "fieldConfig": { + "defaults": { + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (job) (go_memstats_stack_sys_bytes{job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{job}}: stack inuse (avg)", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Stack Memory Use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Average memory reservations by the runtime, not for stack or heap, across all instances of a job.", + "fieldConfig": { + "defaults": { + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (job)(go_memstats_mspan_sys_bytes{job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{instance}}: mspan (avg)", + "refId": "B" + }, + { + "expr": "avg by (job)(go_memstats_mcache_sys_bytes{job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{instance}}: mcache (avg)", + "refId": "D" + }, + { + "expr": "avg by (job)(go_memstats_buck_hash_sys_bytes{job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{instance}}: buck hash (avg)", + "refId": "E" + }, + { + "expr": "avg by (job)(go_memstats_gc_sys_bytes{job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{job}}: gc (avg)", + "refId": "F" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Other Memory Reservations", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Average memory reserved, and actually in use, by the heap, across all instances of a job.", + "fieldConfig": { + "defaults": { + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (job)(go_memstats_heap_sys_bytes{job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{job}}: heap reserved (avg)", + "refId": "B" + }, + { + "expr": "avg by (job)(go_memstats_heap_inuse_bytes{job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{job}}: heap in use (avg)", + "refId": "A" + }, + { + "expr": "avg by (job)(go_memstats_heap_alloc_bytes{job=~\"tns_app\",instance=~\".*\"})", + "interval": "", + "legendFormat": "{{job}}: heap alloc (avg)", + "refId": "C" + }, + { + "expr": "avg by (job)(go_memstats_heap_idle_bytes{job=~\"tns_app\",instance=~\".*\"})", + "interval": "", + "legendFormat": "{{job}}: heap idle (avg)", + "refId": "D" + }, + { + "expr": "avg by (job)(go_memstats_heap_released_bytes{job=~\"tns_app\",instance=~\".*\"})", + "interval": "", + "legendFormat": "{{job}}: heap released (avg)", + "refId": "E" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Heap Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Average allocation rate in bytes per second, across all instances of a job.", + "fieldConfig": { + "defaults": { + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (job)(rate(go_memstats_alloc_bytes_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "{{job}}: bytes malloced/s (avg)", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Allocation Rate, Bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Average rate of heap object allocation, across all instances of a job.", + "fieldConfig": { + "defaults": { + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "interval": "", + "legendFormat": "{{job}}: obj mallocs/s (avg)", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Heap Object Allocation Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Average number of live memory objects across all instances of a job.", + "fieldConfig": { + "defaults": { + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by(job)(go_memstats_mallocs_total{job=\"$job\", instance=~\"$instance\"} - go_memstats_frees_total{job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{job}}: object count (avg)", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Number of Live Objects", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Average number of goroutines across instances of a job.", + "fieldConfig": { + "defaults": { + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (job)(go_goroutines{job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{job}}: goroutine count (avg)", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"0\", job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{job}}: min gc time (avg)", + "refId": "A" + }, + { + "expr": "avg by (job)(go_gc_duration_seconds{quantile=\"1\", job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{job}}: max gc time (avg)", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "GC min & max duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "The number used bytes at which the runtime plans to perform the next GC, averaged across all instances of a job.", + "fieldConfig": { + "defaults": { + "links": [ ] + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "hiddenSeries": false, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (job)(go_memstats_next_gc_bytes{job=\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "{{job}} next gc bytes (avg)", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Next GC, Bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "30s", + "schemaVersion": 30, + "style": "dark", + "tags": [ + "go-runtime" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "MONITORING", + "value": "MONITORING" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "pilot", + "value": "pilot" + }, + "datasource": "$datasource", + "definition": "label_values(go_info, job)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "job", + "multi": false, + "name": "job", + "options": [ ], + "query": { + "query": "label_values(go_info, job)", + "refId": "MONITORING-job-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": "", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "definition": "label_values(go_info{job=\"$job\"}, instance)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "instance", + "multi": true, + "name": "instance", + "options": [ ], + "query": { + "query": "label_values(go_info{job=\"$job\"}, instance)", + "refId": "MONITORING-instance-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Go runtime metrics", + "uid": "T4sSTLBGzgp", + "version": 1 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Go - Runtime + labels: + grafana_dashboard: "1" + name: go-runtime.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + loki-chunks.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"})", + "format": "time_series", + "legendFormat": "series", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Series", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"})", + "format": "time_series", + "legendFormat": "chunks", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Chunks per series", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Active Series / Chunks", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Utilization", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Age", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Flush Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Log Entries Per Chunk", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))", + "format": "time_series", + "legendFormat": "Index Entries", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Index Entries Per Chunk", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Flush Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Queue Length", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Flush Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Flush Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Chunks Flushed/Second", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Chunk Flush Reason", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": 1, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Flush Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "$datasource", + "heatmap": { }, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 11, + "legend": { + "show": true + }, + "span": 12, + "targets": [ + { + "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Chunk Utilization", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "percentunit", + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Utilization", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "$datasource", + "heatmap": { }, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 12, + "legend": { + "show": true + }, + "span": 12, + "targets": [ + { + "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Chunk Size Bytes", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "bytes", + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Utilization", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", + "format": "time_series", + "legendFormat": "p90", + "legendLink": null + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Chunk Size Quantiles", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Utilization", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + }, + { + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Chunk Duration hours (end-start)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Duration", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Chunks", + "uid": "chunks", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Loki Mixin + labels: + grafana_dashboard: "1" + name: loki-chunks.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + loki-deletion.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "none", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(loki_compactor_pending_delete_requests_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Number of Pending Requests", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "dtdurations", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Oldest Pending Request Age", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on () vector(0))", + "format": "time_series", + "legendFormat": "in progress", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "# of Delete Requests (received - processed) ", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", + "format": "time_series", + "legendFormat": "received", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Requests Received / Day", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", + "format": "time_series", + "legendFormat": "processed", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Requests Processed / Day", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Churn", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compactor CPU usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / 1024 / 1024 ", + "format": "time_series", + "legendFormat": " {{pod}} ", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compactor memory usage (MiB)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compaction run duration (seconds)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Compactor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))", + "format": "time_series", + "legendFormat": "failures", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Failures in Loading Delete Requests / Hour", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/compactor\"}[$__rate_interval])) by (user)", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Lines Deleted / Sec", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Deletion metrics", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$loki_datasource", + "id": 11, + "span": 6, + "targets": [ + { + "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ", + "refId": "A" + } + ], + "title": "In progress/finished", + "type": "logs" + }, + { + "datasource": "$loki_datasource", + "id": 12, + "span": 6, + "targets": [ + { + "expr": "{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"", + "refId": "A" + } + ], + "title": "Requests", + "type": "logs" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "List of deletion requests", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Deletion", + "uid": "deletion", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Loki Mixin + labels: + grafana_dashboard: "1" + name: loki-deletion.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + loki-logs.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "iteration": 1583185057230, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(go_goroutines{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"})", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "goroutines", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 3, + "x": 3, + "y": 0 + }, + "hiddenSeries": false, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(go_gc_duration_seconds{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}) by (quantile)", + "legendFormat": "{{quantile}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "gc duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 0 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "cpu", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 0 + }, + "hiddenSeries": false, + "id": 40, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"})", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "working set", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 0 + }, + "hiddenSeries": false, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "tx", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 0 + }, + "hiddenSeries": false, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "rx", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 3, + "x": 18, + "y": 0 + }, + "hiddenSeries": false, + "id": 37, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(kube_pod_container_status_last_terminated_reason{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[30m]) > 0", + "legendFormat": "{{reason}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "restarts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 0 + }, + "hiddenSeries": false, + "id": 42, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)", + "legendFormat": "{{level}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "bad words", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$loki_datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 4 + }, + "hiddenSeries": false, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "warn", + "color": "#FF780A" + }, + { + "alias": "error", + "color": "#E02F44" + }, + { + "alias": "info", + "color": "#56A64B" + }, + { + "alias": "debug", + "color": "#3274D9" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" [5m])) by (level)", + "intervalFactor": 3, + "legendFormat": "{{level}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Log Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$loki_datasource", + "gridPos": { + "h": 19, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 29, + "maxDataPoints": "", + "options": { + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"} | logfmt | level=\"$level\" |= \"$filter\"", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Logs", + "type": "logs" + } + ], + "refresh": "10s", + "rows": [ ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "hide": 0, + "label": null, + "name": "loki_datasource", + "options": [ ], + "query": "loki", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "deployment", + "options": [ ], + "query": "label_values(kube_deployment_created{cluster=\"$cluster\", namespace=\"$namespace\"}, deployment)", + "refresh": 0, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "pod", + "options": [ ], + "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\"}, pod)", + "refresh": 0, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "container", + "options": [ ], + "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\", pod=~\"$deployment.*\"}, container)", + "refresh": 0, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "", + "value": "" + }, + "hide": 0, + "includeAll": false, + "label": "", + "multi": true, + "name": "level", + "options": [ + { + "selected": false, + "text": "debug", + "value": "debug" + }, + { + "selected": false, + "text": "info", + "value": "info" + }, + { + "selected": false, + "text": "warn", + "value": "warn" + }, + { + "selected": false, + "text": "error", + "value": "error" + } + ], + "query": "debug,info,warn,error", + "refresh": 0, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "label": "LogQL Filter", + "name": "filter", + "query": "", + "type": "textbox" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Logs", + "uid": "logs", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Loki Mixin + labels: + grafana_dashboard: "1" + name: loki-logs.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + loki-mixin-recording-rules.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "iteration": 1635347545534, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "liveNow": false, + "panels": [ + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 2, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.3.0-38205pre", + "targets": [ + { + "datasource": "${datasource}", + "exemplar": false, + "expr": "sum(loki_ruler_wal_appender_ready) by (pod, tenant) == 0", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Appenders Not Ready", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 11, + "x": 2, + "y": 0 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "sum(rate(loki_ruler_wal_samples_appended_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "Samples Appended to WAL per Second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Series are unique combinations of labels", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 11, + "x": 13, + "y": 0 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "sum(rate(loki_ruler_wal_storage_created_series_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "Series Created per Second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Difference between highest timestamp appended to WAL and highest timestamp successfully written to remote storage", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds{tenant=~\"${tenant}\"}\n- on (tenant)\n (\n loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds{tenant=~\"${tenant}\"}\n or vector(0)\n )", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "Write Behind", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "sum(rate(loki_ruler_wal_prometheus_remote_storage_samples_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "Samples Sent per Second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "sum by (tenant) (loki_ruler_wal_disk_size{tenant=~\"${tenant}\"})", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "WAL Disk Size", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Some number of pending samples is expected, but if remote-write is failing this value will remain high", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "exemplar": true, + "expr": "max(loki_ruler_wal_prometheus_remote_storage_samples_pending{tenant=~\"${tenant}\"}) by (tenant,pod) > 0", + "interval": "", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "title": "Pending Samples", + "type": "timeseries" + } + ], + "refresh": "10s", + "rows": [ ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "hide": 0, + "label": null, + "name": "loki_datasource", + "options": [ ], + "query": "loki", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "tenant", + "options": [ ], + "query": "query_result(sum by (id) (grafanacloud_logs_instance_info) and sum(label_replace(loki_tenant:active_streams{cluster=\"$cluster\",namespace=\"$namespace\"},\"id\",\"$1\",\"tenant\",\"(.*)\")) by(id))", + "refresh": 0, + "regex": "/\"([^\"]+)\"/", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Recording Rules", + "uid": "recording-rules", + "version": 0, + "weekStart": "" + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Loki Mixin + labels: + grafana_dashboard: "1" + name: loki-mixin-recording-rules.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + loki-operational.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "iteration": 1588704280892, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 17, + "panels": [ ], + "targets": [ ], + "title": "Main", + "type": "row" + }, + { + "aliasColors": { + "5xx": "red" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Queries/Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 10, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "5xx": "red" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 1 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Pushes/Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 10, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "hiddenSeries": false, + "id": 2, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Lines Per Tenant (top 10)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 1 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "MBs Per Tenant (Top 10)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 1 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\"}[10m]) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{container}}-{{pod}}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Container Restarts", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.75, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".5", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Push Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 6 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.9, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".5", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Distributor Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 6 + }, + "hiddenSeries": false, + "id": 71, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Distributor Success Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 11 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.9, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "hide": false, + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "hide": false, + "legendFormat": ".5", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Ingester Latency Write", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 11 + }, + "hiddenSeries": false, + "id": 72, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Ingester Success Rate Write", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "legendFormat": "{{route}}-.99", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "legendFormat": "{{route}}-.9", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "legendFormat": "{{route}}-.5", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Query Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".99-{{route}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".9-{{route}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".5-{{route}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Querier Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 16 + }, + "hiddenSeries": false, + "id": 73, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Querier Success Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 21 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".99-{{route}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".9-{{route}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".5-{{route}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Ingester Latency Read", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 21 + }, + "hiddenSeries": false, + "id": 74, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Ingester Success Rate Read", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 110, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "hiddenSeries": false, + "id": 112, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])))", + "interval": "", + "legendFormat": "{{ tenant }} - {{ reason }}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Discarded Lines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [ ], + "datasource": "$datasource", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 113, + "pageSize": null, + "panels": [ ], + "showHeader": true, + "sort": { + "col": 3, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "tenant", + "thresholds": [ ], + "type": "string", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "reason", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "align": "right", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])[$__range:1m])))", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "{{ tenant }} - {{ reason }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Discarded Lines Per Interval", + "transform": "table", + "type": "table-old" + } + ], + "targets": [ ], + "title": "Limits", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 23, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"distributor.*\"}[$__rate_interval]))", + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 28 + }, + "hiddenSeries": false, + "id": 27, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"distributor.*\"}", + "instant": false, + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$loki_datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 28 + }, + "hiddenSeries": false, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "{}", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} | logfmt | level=\"error\"[1m]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Error Log Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$loki_datasource", + "gridPos": { + "h": 18, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 29, + "options": { + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "panels": [ ], + "targets": [ + { + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} |= \"level=error\"", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Logs", + "type": "logs" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 35 + }, + "hiddenSeries": false, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Success Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 35 + }, + "hiddenSeries": false, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Append Failures By Ingester", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 42 + }, + "hiddenSeries": false, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Bytes Received/Second", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 42 + }, + "hiddenSeries": false, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Lines Received/Second", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Distributor", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 19, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 29 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"ingester.*\"}[$__rate_interval]))", + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 36 + }, + "hiddenSeries": false, + "id": 37, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"ingester.*\"}", + "instant": false, + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$loki_datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 3, + "w": 18, + "x": 12, + "y": 29 + }, + "hiddenSeries": false, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "{}", + "color": "#F2495C" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} | logfmt | level=\"error\"[1m]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Error Log Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$loki_datasource", + "gridPos": { + "h": 18, + "w": 18, + "x": 12, + "y": 32 + }, + "id": 39, + "options": { + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "panels": [ ], + "targets": [ + { + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} |= \"level=error\"", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Logs", + "type": "logs" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 41 + }, + "hiddenSeries": false, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Success Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Ingester", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 104, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 106, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum by (tenant) (loki_ingester_memory_streams{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}))", + "interval": "", + "legendFormat": "{{ tenant }}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Active Streams", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 30 + }, + "hiddenSeries": false, + "id": 108, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m]) > 0))", + "interval": "", + "legendFormat": "{{ tenant }}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Streams Created/Sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Streams", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 94, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 31 + }, + "hiddenSeries": false, + "id": 102, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "De-Dupe Ratio", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m]))", + "interval": "", + "legendFormat": "Chunks", + "refId": "A" + }, + { + "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m])) < 1", + "interval": "", + "legendFormat": "De-Dupe Ratio", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Chunks Flushed/Sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "$datasource", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 31 + }, + "heatmap": { }, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 100, + "legend": { + "show": true + }, + "panels": [ ], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Chunk Size Bytes", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "bytes", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 7, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 96, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "{{ reason }}" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Chunk Flush Reason %", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "$datasource", + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 39 + }, + "heatmap": { }, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 98, + "legend": { + "show": true + }, + "panels": [ ], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m]))", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Chunk Utilization", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "percentunit", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + } + ], + "targets": [ ], + "title": "Chunks", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 64, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 32 + }, + "hiddenSeries": false, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"querier.*\"}[$__rate_interval]))", + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 69, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"querier.*\"}", + "instant": false, + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$loki_datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 3, + "w": 18, + "x": 12, + "y": 32 + }, + "hiddenSeries": false, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "{}", + "color": "#F2495C" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} | logfmt | level=\"error\"[1m]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Error Log Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$loki_datasource", + "gridPos": { + "h": 18, + "w": 18, + "x": 12, + "y": 35 + }, + "id": 66, + "options": { + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "panels": [ ], + "targets": [ + { + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} |= \"level=error\"", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Logs", + "type": "logs" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 46 + }, + "hiddenSeries": false, + "id": 70, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Success Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Querier", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 52, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 53, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", + "intervalFactor": 1, + "legendFormat": "{{container}}: .99-{{method}}-{{name}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", + "hide": false, + "legendFormat": "{{container}}: .9-{{method}}-{{name}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", + "hide": false, + "legendFormat": "{{container}}: .5-{{method}}-{{name}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Latency By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 38 + }, + "hiddenSeries": false, + "id": 54, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, method, name, container)", + "intervalFactor": 1, + "legendFormat": "{{container}}: {{status_code}}-{{method}}-{{name}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Memcached", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 57, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 31 + }, + "hiddenSeries": false, + "id": 55, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 58, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, status_code, method)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Status By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Consul", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 43, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 9 + }, + "hiddenSeries": false, + "id": 41, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".9", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "MutateRows Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 9 + }, + "hiddenSeries": false, + "id": 46, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", + "interval": "", + "legendFormat": "90%", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", + "interval": "", + "legendFormat": "50%", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "ReadRows Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 9 + }, + "hiddenSeries": false, + "id": 44, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", + "interval": "", + "legendFormat": "90%", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", + "interval": "", + "legendFormat": "50%", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "GetTable Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 9 + }, + "hiddenSeries": false, + "id": 45, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".9", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "ListTables Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 47, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "MutateRows Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 16 + }, + "hiddenSeries": false, + "id": 50, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "ReadRows Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 48, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "GetTable Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 16 + }, + "hiddenSeries": false, + "id": 49, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "ListTables Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Big Table", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 60, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 33 + }, + "hiddenSeries": false, + "id": 61, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 41 + }, + "hiddenSeries": false, + "id": 62, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "GCS", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 76, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 9 + }, + "id": 82, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Failure Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 9 + }, + "id": 83, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Consumed Capacity Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 9 + }, + "id": 84, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Throttled Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 9 + }, + "id": 85, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Dropped Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 15 + }, + "id": 86, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "legendFormat": ".5", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Query Pages", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 9, + "x": 6, + "y": 15 + }, + "id": 87, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 15 + }, + "id": 88, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Dynamo", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 78, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 79, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 80, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "S3", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 78, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 79, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 80, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Azure Blob", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 114, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 115, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 116, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "BoltDB Shipper", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "hide": 0, + "label": null, + "name": "loki_datasource", + "options": [ ], + "query": "loki", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Operational", + "uid": "operational", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Loki Mixin + labels: + grafana_dashboard: "1" + name: loki-operational.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + loki-reads-resources.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-scheduler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/querier\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "gridPos": { }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Writes", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "gridPos": { }, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Reads", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"querier.*\"})", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Utilization", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "gridPos": { }, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Writes", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "gridPos": { }, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Reads", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"index-gateway.*\"})", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Utilization", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Index Gateway", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.+\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (loki_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"}) or sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Rules", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Reads Resources", + "uid": "reads-resources", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Loki Mixin + labels: + grafana_dashboard: "1" + name: loki-reads-resources.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + loki-reads.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", + "format": "time_series", + "legendFormat": "{{ route }} Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])) by (le,pod)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "__auto", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Pod Latency (p99)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Frontend (query-frontend)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", + "format": "time_series", + "legendFormat": "{{ route }} Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])) by (le,pod)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "__auto", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Pod Latency (p99)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ", + "format": "time_series", + "legendFormat": "{{ route }} Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (le,pod)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "__auto", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Pod Latency (p99)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ", + "format": "time_series", + "legendFormat": "{{ route }} Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (le,pod)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "__auto", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Pod Latency (p99)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester - Zone Aware", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le,pod)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "__auto", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Pod Latency (p99)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Index", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le,pod)) * 1e3", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "__auto", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Pod Latency (p99)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "BoltDB Shipper", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Reads", + "uid": "reads", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Loki Mixin + labels: + grafana_dashboard: "1" + name: loki-reads.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + loki-retention.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Resource Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "custom": { }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dateTimeFromNow" + } + }, + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { }, + "textMode": "auto" + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Last Compact Tables Operation Success", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", + "format": "time_series", + "legendFormat": "duration", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compact Tables Operations Duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Compaction", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(loki_compactor_skipped_compacting_locked_table_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))", + "format": "time_series", + "legendFormat": "{{table_name}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Number of times Tables were skipped during Compaction", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{success}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compact Tables Operations Per Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "custom": { }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dateTimeFromNow" + } + }, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { }, + "textMode": "auto" + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_compactor_apply_retention_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Last Mark Operation Success", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_compactor_apply_retention_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", + "format": "time_series", + "legendFormat": "duration", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Mark Operations Duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status)(rate(loki_compactor_apply_retention_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{success}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Mark Operations Per Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Retention", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"})", + "format": "time_series", + "legendFormat": "{{action}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Processed Tables Per Action", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\" , action=~\"modified|deleted\"})", + "format": "time_series", + "legendFormat": "{{table}}-{{action}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Modified Tables", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >0", + "format": "time_series", + "legendFormat": "{{table}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Marks Creation Rate Per Table", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Per Table Marker", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (increase(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Marked Chunks (24h)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Mark Table Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Delete Chunks (24h)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Sweeper", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{cluster=~\"$cluster\", namespace=~\"$namespace\"} > 0)", + "format": "time_series", + "legendFormat": "lag", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Sweeper Lag", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{cluster=~\"$cluster\", namespace=~\"$namespace\"})", + "format": "time_series", + "legendFormat": "count", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Marks Files to Process", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{status}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Rate Per Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$loki_datasource", + "id": 21, + "span": 12, + "targets": [ + { + "expr": "{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"}", + "refId": "A" + } + ], + "title": "Compactor Logs", + "type": "logs" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Logs", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "hide": 0, + "label": null, + "name": "loki_datasource", + "options": [ ], + "query": "loki", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Retention", + "uid": "retention", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Loki Mixin + labels: + grafana_dashboard: "1" + name: loki-retention.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + loki-writes-resources.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "In-memory streams", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 + }, + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "gridPos": { }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Writes", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "gridPos": { }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Reads", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"ingester.*.*\"})", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Utilization", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Writes Resources", + "uid": "writes-resources", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Loki Mixin + labels: + grafana_dashboard: "1" + name: loki-writes-resources.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + loki-writes.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "bytes", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Total Received Bytes", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{tenant}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Tenant", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": 1, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor - Structured Metadata", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester - Zone Aware", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Index", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "BoltDB Shipper", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Writes", + "uid": "writes", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Loki Mixin + labels: + grafana_dashboard: "1" + name: loki-writes.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + memcached-overview.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\", command=\"get\", status=\"hit\"}[$__rate_interval])) / sum(rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", command=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Hit Rate", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Hit Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(20,\n max by (cluster, job, instance) (\n memcached_current_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"} / memcached_max_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}\n))\n", + "format": "time_series", + "legendFormat": "{{ cluster }} / {{ job }} / {{ instance }}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top 20 Highest Connection Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Hits", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(command, status) (rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{command}} {{status}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Commands", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (rate(memcached_items_evicted_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Evictions", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (rate(memcached_items_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Stored", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ops", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (\n rate(memcached_process_user_cpu_seconds_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) +\n rate(memcached_process_system_cpu_seconds_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (memcached_current_bytes{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (memcached_current_items{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Items", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Resources", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (memcached_current_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + }, + { + "expr": "min(memcached_max_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "legendFormat": "Max Connections (min setting across all instances)", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Current Connections", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (rate(memcached_connections_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Connections / sec", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (rate(memcached_read_bytes_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes received", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (rate(memcached_written_bytes_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes transmitted", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Count", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Uptime", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ ], + "type": "number", + "unit": "dtdurations" + }, + { + "alias": "Instance", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "instance", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "Job", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "job", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "version", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count by (job, instance, version) (memcached_version{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + }, + { + "expr": "max by (job, instance) (memcached_uptime_seconds{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memcached Info", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached Info", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(memcached_commands_total, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(memcached_commands_total{cluster=~\"$cluster\"}, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "instance", + "multi": true, + "name": "instance", + "options": [ ], + "query": "label_values(memcached_commands_total{cluster=~\"$cluster\",job=~\"$job\"}, instance)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Memcached Overview", + "uid": "124d5222454213f748dbfaf69b77ec48", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Memcached + labels: + grafana_dashboard: "1" + name: memcached-overview.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-alertmanager-resources.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|alertmanager)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|alertmanager)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|alertmanager)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|alertmanager)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|alertmanager)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|alertmanager)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|alertmanager)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|alertmanager)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|alertmanager|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|alertmanager|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|alertmanager)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|alertmanager)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"((mimir|alertmanager)).*\"\n }\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Alertmanager resources", + "uid": "a6883fb22799ac74479c7db872451092", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-alertmanager-resources.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-alertmanager.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Total alerts", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Total silences", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(cortex_alertmanager_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Tenants", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum(cluster_job:cortex_alertmanager_alerts_received_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "APS", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alerts received", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "cortex_alertmanager_dispatcher_aggregation_groups{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "per pod Active Aggregation Groups", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alerts grouping", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "NPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "(\nsum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}) by(integration)\n-\nsum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}) by(integration)\n) > 0\nor on () vector(0)\n", + "format": "time_series", + "legendFormat": "success - {{ integration }}", + "legendLink": null + }, + { + "expr": "sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}) by(integration)", + "format": "time_series", + "legendFormat": "failed - {{ integration }}", + "legendLink": null + } + ], + "title": "NPS by integration", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_alertmanager_notification_latency_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_notification_latency_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alert notifications", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager Configuration Object Store (Alertmanager accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (cortex_alertmanager_tenants_owned{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Per pod tenants", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_alerts:sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Per pod alerts", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (cluster_job_pod:cortex_alertmanager_silences:sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Per pod silences", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Replication", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_sync_configs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Syncs/sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(reason) (rate(cortex_alertmanager_sync_configs_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Syncs/sec (by reason)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum (rate(cortex_alertmanager_ring_check_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "errors", + "legendLink": null + } + ], + "title": "Ring check errors/sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Tenant configuration sync", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{outcome}}", + "legendLink": null + } + ], + "title": "Initial syncs /sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "1m", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "1m", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Initial sync duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 27, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Fetch state from other alertmanagers /sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Sharding initial state sync", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 28, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cluster_job:cortex_alertmanager_state_replication_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Replicate state to other alertmanagers /sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 29, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})\n-\nsum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Merge state from other alertmanagers /sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 30, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_state_persist_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_state_persist_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Persist state to remote storage /sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Sharding runtime state sync", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Alertmanager", + "uid": "b0d38d318bbddd80476246d4930f9e55", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-alertmanager.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-compactor-resources.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|compactor|mimir-backend)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|compactor|mimir-backend)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|compactor|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|compactor|mimir-backend)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|compactor|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU and memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|compactor|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|compactor|mimir-backend)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|compactor|mimir-backend)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|compactor|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|compactor|mimir-backend)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|compactor|mimir-backend)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|compactor|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|compactor|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|compactor|mimir-backend)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|compactor|mimir-backend)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"((mimir|compactor|mimir-backend)).*\"\n }\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Compactor resources", + "uid": "09a5c49e9cdb2f2b24c6d184574a07fd", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-compactor-resources.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-compactor.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Per-instance runs\nNumber of times a compactor instance triggers a compaction across all tenants that it manages.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "bars", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "completed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "started" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#34CCEB", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_runs_started_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "started", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_compactor_runs_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "completed", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_compactor_runs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Per-instance runs / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Tenants compaction progress\nIn a multi-tenant cluster, display the progress of tenants that are compacted while compaction is running.\n\n", + "fieldConfig": { + "defaults": { + "max": 1, + "noValue": 1, + "unit": "percentunit" + } + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "(\n cortex_compactor_tenants_processing_succeeded{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"} +\n cortex_compactor_tenants_processing_failed{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"} +\n cortex_compactor_tenants_skipped{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}\n)\n/\ncortex_compactor_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"} > 0\n", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Tenants compaction progress", + "type": "timeseries" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Longest time since last successful run\nDisplays the amount of time since the most recent successful execution\nof the compactor.\nThe value shown will be for the compactor replica that has the longest time since its\nlast successful run.\nThe table to the right shows a summary for all compactor replicas.\n\nIf there is no time value, one of the following messages might appear:\n\n- If you see \"No compactor data\" in this panel, that means that no compactors are active yet.\n\n- If you see \"No successful runs\" in this panel, that means that compactors are active, but none\n of them were successfully executed yet.\n\nThese might be expected - for example, if you just recently restarted your compactors,\nthey might not have had a chance to complete their first compaction run.\nHowever, if these messages persist, you should check the health of your compactors.\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "No compactor data", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Last run" + }, + "properties": [ + { + "id": "custom.width", + "value": 74 + }, + { + "id": "mappings", + "value": [ + { + "options": { + "from": "-Infinity", + "result": { + "color": "text", + "text": "No successful runs since startup yet" + }, + "to": 0 + }, + "type": "range" + } + ] + }, + { + "id": "color", + "value": { + "mode": "thresholds" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 7200 + }, + { + "color": "orange", + "value": 21600 + }, + { + "color": "red", + "value": 43200 + } + ] + } + } + ] + } + ] + }, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "reduceOptions": { + "calcs": [ + "first" + ], + "fields": "/^Last run$/", + "values": false + }, + "textMode": "value" + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[1h])\n)\n", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Longest time since last successful run", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transformations": [ + { + "id": "organize", + "options": { + "renameByName": { + "Value": "Last run", + "pod": "Compactor" + } + } + }, + { + "id": "sortBy", + "options": { + "sort": [ + { + "desc": true, + "field": "Last run" + } + ] + } + } + ], + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Last successful run per-compactor replica\nDisplays the compactor replicas, and for each, shows how long it has been since\nits last successful compaction run.\n\nThe value in the status column is based on how long it has been since the last successful compaction.\n\n- Okay: less than 2 hours\n- Delayed: more than 2 hours\n- Late: more than 6 hours\n- Very late: more than 12 hours\n\nIf the status of any compactor replicas are *Late* or *Very late*, check their health.\n\n", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "mappings", + "value": [ + { + "options": { + "from": "-Infinity", + "result": { + "color": "transparent", + "text": "N/A" + }, + "to": 0 + }, + "type": "range" + }, + { + "options": { + "from": 0, + "result": { + "color": "green", + "text": "Ok" + }, + "to": 7200 + }, + "type": "range" + }, + { + "options": { + "from": 7200, + "result": { + "color": "yellow", + "text": "Delayed" + }, + "to": 21600 + }, + "type": "range" + }, + { + "options": { + "from": 21600, + "result": { + "color": "orange", + "text": "Late" + }, + "to": 43200 + }, + "type": "range" + }, + { + "options": { + "from": 43200, + "result": { + "color": "red", + "text": "Very late" + }, + "to": "Infinity" + }, + "type": "range" + }, + { + "options": { + "match": "null+nan", + "result": { + "color": "transparent", + "text": "Unknown" + } + }, + "type": "special" + } + ] + }, + { + "id": "custom.width", + "value": 86 + }, + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Last run" + }, + "properties": [ + { + "id": "unit", + "value": "s" + }, + { + "id": "custom.width", + "value": 74 + }, + { + "id": "mappings", + "value": [ + { + "options": { + "from": "-Infinity", + "result": { + "text": "Never" + }, + "to": 0 + }, + "type": "range" + } + ] + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[1h])\n)\n", + "format": "table", + "instant": true, + "legendFormat": "Last run", + "legendLink": null + } + ], + "title": "Last successful run per-compactor replica", + "transformations": [ + { + "id": "organize", + "options": { + "renameByName": { + "Value": "Last run", + "pod": "Compactor" + } + } + }, + { + "id": "sortBy", + "options": { + "sort": [ + { + "desc": true, + "field": "Last run" + } + ] + } + }, + { + "id": "calculateField", + "options": { + "alias": "One", + "binary": { + "left": "Last run", + "operator": "/", + "right": "Last run" + }, + "mode": "binary", + "replaceFields": false + } + }, + { + "id": "calculateField", + "options": { + "alias": "Status", + "binary": { + "left": "Last run", + "operator": "*", + "right": "One" + }, + "mode": "binary", + "replaceFields": false + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "Compactor", + "Last run", + "Status" + ] + } + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs based on latest version of bucket index. Ingesters upload new blocks every 2 hours (shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.),\nand compactors should process all of them within 2h interval. If this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction works as designed.\n\nMetric with number of compaction jobs is computed from blocks in bucket index, which is updated regularly. Metric doesn't change between bucket index updates, even if\nthere were compaction jobs finished in this time. When computing compaction jobs, only jobs that can be executed at given moment are counted. There can be more\njobs, but if they are blocked, they are not counted in the metric. For example if there is a split compaction job pending for some time range, no merge job\ncovering the same time range can run. In this case only split compaction job is counted toward the metric, but merge job isn't.\n\nIn other words, computed number of compaction jobs is the minimum number of compaction jobs based on latest version of bucket index.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval])) == 0)", + "format": "time_series", + "legendFormat": "Jobs", + "legendLink": null + } + ], + "title": "Estimated Compaction Jobs", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "compactions", + "legendLink": null + } + ], + "title": "TSDB compactions / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### TSDB compaction duration\nDisplay the amount of time that it has taken to run a single TSDB compaction.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(prometheus_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(prometheus_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "TSDB compaction duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "avg(max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + } + ], + "title": "Average blocks / tenant", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Tenants with largest number of blocks\nThe 10 tenants with the largest number of blocks.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "topk(10, max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "Tenants with largest number of blocks", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_blocks_marked_for_deletion_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "blocks", + "legendLink": null + } + ], + "title": "Blocks marked for deletion / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_blocks_cleaned_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_compactor_block_cleanup_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Blocks deletions / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Garbage collector", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_compactor_meta_syncs_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Metadata syncs / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_compactor_meta_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_meta_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Metadata sync duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Metadata sync", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Object Store", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 21, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Key-value store for compactors ring", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Compactor", + "uid": "1b3443aea86db629e6efdb7d05c53823", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-compactor.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-config.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "instances" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "count(cortex_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", + "format": "time_series", + "legendFormat": "sha256:{{sha256}}", + "legendLink": null + } + ], + "title": "Startup config file hashes", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Startup config file", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "instances" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "count(cortex_runtime_config_hash{cluster=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", + "format": "time_series", + "legendFormat": "sha256:{{sha256}}", + "legendLink": null + } + ], + "title": "Runtime config file hashes", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Runtime config file", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Config", + "uid": "5d9d0b4724c0f80d68467088ec61e003", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-config.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-object-store.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{component}}", + "legendLink": null + } + ], + "title": "RPS / component", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{component}}", + "legendLink": null + } + ], + "title": "Error rate / component", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Components", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "RPS / operation", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate / operation", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Operations", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Object Store", + "uid": "e1324ee2a434f4158c00a9ee279d3292", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-object-store.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-overrides.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${datasource}", + "id": 1, + "span": 12, + "targets": [ + { + "expr": "max by(limit_name) (cortex_limits_defaults{cluster=~\"$cluster\",namespace=~\"$namespace\"})", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Defaults", + "transformations": [ + { + "id": "labelsToFields", + "options": { } + }, + { + "id": "merge", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Value": 1, + "limit_name": 0 + } + } + }, + { + "id": "sortBy", + "options": { + "fields": { }, + "sort": [ + { + "field": "limit_name" + } + ] + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${datasource}", + "id": 2, + "span": 12, + "targets": [ + { + "expr": "max by(user, limit_name) (cortex_limits_overrides{cluster=~\"$cluster\",namespace=~\"$namespace\",user=~\"${tenant_id}\"})", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Per-tenant overrides", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "mode": "columns", + "valueLabel": "limit_name" + } + }, + { + "id": "merge", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "user": 0 + } + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "Tenant ID", + "name": "tenant_id", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Overrides", + "uid": "1e2c358600ac53f09faea133f811b5bb", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-overrides.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-overview-networking.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Writes", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Reads", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Overview networking", + "uid": "e15c71d372cc541367a088f10d9fcd92", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-overview-networking.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-overview-resources.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-write|distributor|ingester)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-write|distributor|ingester)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-write|distributor|ingester)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Writes", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|mimir-write|distributor|ingester)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|mimir-write|distributor|ingester)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"((mimir|mimir-write|distributor|ingester)).*\"\n }\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Reads", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"((mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter)).*\"\n }\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Overview resources", + "uid": "a9b92d3c4d1af325d872a9e9a7083d71", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-overview-resources.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-overview.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "content": "The 'Status' panel shows an overview on the cluster health over the time.\nTo investigate failures, see a specific dashboard:\n\n- Writes\n- Reads\n- Rule evaluations\n- Alerting notifications\n- Object storage\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#7EB26D", + "value": null + }, + { + "color": "#EAB839", + "value": 0.01 + }, + { + "color": "#E24D42", + "value": 0.050000000000000003 + } + ] + } + } + }, + "id": 2, + "options": { + "showValue": "never" + }, + "span": 6, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "instant": false, + "legendFormat": "Writes", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", + "instant": false, + "legendFormat": "Reads", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n (\n sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n +\n # Consider missed evaluations as failures.\n sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n )\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "instant": false, + "legendFormat": "Rule evaluations", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n # Failed notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Failed notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})\n) or vector(0))\n)\n/\n(\n # Total notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Total notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\"})\n) or vector(0))\n)\n", + "instant": false, + "legendFormat": "Alerting notifications", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", + "instant": false, + "legendFormat": "Object storage", + "range": true + } + ], + "title": "Status", + "type": "state-timeline" + }, + { + "id": 3, + "options": { + "alertInstanceLabelFilter": "cluster=~\"$cluster\", namespace=~\"$namespace\"", + "alertName": "Mimir", + "dashboardAlerts": false, + "maxItems": 100, + "sortOrder": 3, + "stateFilter": { + "error": true, + "firing": true, + "noData": false, + "normal": false, + "pending": false + } + }, + "span": 3, + "title": "Firing alerts", + "type": "alertlist" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Mimir cluster health", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "content": "These panels show an overview on the write path. \nTo examine the write path in detail, see a specific dashboard:\n\n- Writes\n- Writes resources\n- Writes networking\n- Overview resources\n- Overview networking\n", + "datasource": null, + "description": "", + "id": 4, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Write requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Write latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "cps" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "samples / sec", + "legendLink": null + }, + { + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "exemplars / sec", + "legendLink": null + } + ], + "title": "Ingestion / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Writes", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "content": "These panels show an overview on the read path. \nTo examine the read path in detail, see a specific dashboard:\n\n- Reads\n- Reads resources\n- Reads networking\n- Overview resources\n- Overview networking\n- Queries\n- Compactor\n", + "datasource": null, + "description": "", + "id": 8, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Read requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Read latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_query($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "instant queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#429D48", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_query_range($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "range queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#F1C731", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_labels($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label names\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#2A66CF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_label_name_values($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label values\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#9E44C1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_series($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "series queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#FFAB57", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_read($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "remote read queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#C79424", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_metadata($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "metadata queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#84D586", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_query_exemplars($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "exemplar queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#A1C4FC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_cardinality_active_series($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"active series\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#C788DE", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_cardinality_label_names($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label name cardinality\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#3F6833", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_cardinality_label_values($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label value cardinality\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#447EBC", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (route) (rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\",route=~\"(prometheus|api_prom)(_api_v1_query|_api_v1_query_range|_api_v1_labels|_api_v1_label_name_values|_api_v1_series|_api_v1_read|_api_v1_metadata|_api_v1_query_exemplars|_api_v1_cardinality_active_series|_api_v1_cardinality_label_names|_api_v1_cardinality_label_values)\"}[$__rate_interval]))", + "format": "time_series", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_.*\",route!~\"(prometheus|api_prom)(_api_v1_query|_api_v1_query_range|_api_v1_labels|_api_v1_label_name_values|_api_v1_series|_api_v1_read|_api_v1_metadata|_api_v1_query_exemplars|_api_v1_cardinality_active_series|_api_v1_cardinality_label_names|_api_v1_cardinality_label_values)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "other", + "legendLink": null + } + ], + "title": "Queries / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Reads", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "content": "These panels show an overview on the recording and alerting rules evaluation.\nTo examine the rules evaluation and alerts notifications in detail, see a specific dashboard:\n\n- Ruler\n- Alertmanager\n- Alertmanager resources\n- Overview resources\n- Overview networking\n", + "datasource": null, + "description": "", + "id": 12, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "success", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "missed", + "legendLink": null + } + ], + "title": "Rule evaluations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "average", + "legendLink": null + } + ], + "title": "Rule evaluations latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n -\nsum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Alerting notifications sent to Alertmanager / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Recording and alerting rules", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "content": "These panels show an overview on the long-term storage (object storage).\nTo examine the storage in detail, see a specific dashboard:\n\n- Object store\n- Compactor\n", + "datasource": null, + "description": "", + "id": 16, + "mode": "markdown", + "span": 3, + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n-\nsum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "seriesOverrides": [ + { + "alias": "attributes", + "color": "#429D48" + }, + { + "alias": "delete", + "color": "#F1C731" + }, + { + "alias": "exists", + "color": "#2A66CF" + }, + { + "alias": "get", + "color": "#9E44C1" + }, + { + "alias": "get_range", + "color": "#FFAB57" + }, + { + "alias": "iter", + "color": "#C79424" + }, + { + "alias": "upload", + "color": "#84D586" + } + ], + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(max by(user) (max_over_time(cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[15m])))", + "format": "time_series", + "legendFormat": "blocks", + "legendLink": null + } + ], + "title": "Total number of blocks in the storage", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Long-term storage (object storage)", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Overview", + "uid": "ffcd83628d7d4b5a03d1cafd159e6c9c", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-overview.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-queries.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_frontend_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_frontend_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Queue duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_frontend_retries_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_query_frontend_retries_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Retries", + "type": "timeseries", + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Queue length (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(user) (cortex_query_frontend_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}) > 0", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "Queue length (per user)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Queue duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Queue length (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(user) (cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}) > 0", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "Queue length (per user)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Intervals per query\nThe average number of split queries (partitioned by time) executed a single input query.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_frontend_split_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_frontend_query_range_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", method=\"split_by_interval_and_results_cache\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "splitting rate", + "legendLink": null + } + ], + "title": "Intervals per query", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "# Query the new metric introduced in Mimir 2.10.\n(\n sum by(request_type) (rate(cortex_frontend_query_result_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval]))\n /\n sum by(request_type) (rate(cortex_frontend_query_result_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval]))\n)\n# Otherwise fallback to the previous general-purpose metrics.\nor\n(\n label_replace(\n # Query metrics before and after dskit cache refactor.\n sum (\n rate(thanos_cache_memcached_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_hits_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])\n )\n /\n sum (\n rate(thanos_cache_memcached_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_requests_total{name=~\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])\n ),\n \"request_type\", \"query_range\", \"\", \"\")\n)\n", + "format": "time_series", + "legendFormat": "{{request_type}}", + "legendLink": null + } + ], + "title": "Query results cache hit ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Query results cache skipped\nThe % of queries whose results could not be cached.\nIt is tracked for each split query when the splitting by interval is enabled.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_frontend_query_result_cache_skipped_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (reason) /\nignoring (reason) group_left sum(rate(cortex_frontend_query_result_cache_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Query results cache skipped", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend - query splitting and results cache", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Sharded queries ratio\nThe % of queries that have been successfully rewritten and executed in a shardable way.\nThis panel only takes into account the type of queries that are supported by query sharding (eg. range queries).\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_frontend_query_sharding_rewrites_succeeded_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) /\nsum(rate(cortex_frontend_query_sharding_rewrites_attempted_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "sharded queries ratio", + "legendLink": null + } + ], + "title": "Sharded queries ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Number of sharded queries per query\nThe number of sharded queries that have been executed for a single input query. It only tracks queries that\nhave been successfully rewritten in a shardable way.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_frontend_sharded_queries_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_frontend_sharded_queries_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Number of sharded queries per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend - query sharding", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_series_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1 * sum(cluster_job:cortex_ingester_queried_series_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_series_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Series per query", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1 * sum(cluster_job:cortex_ingester_queried_samples_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_samples_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Samples per query", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1 * sum(cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}) / sum(cluster_job:cortex_ingester_queried_exemplars_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Exemplars per query", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Number of store-gateways hit per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Refetches of missing blocks per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failure Rate" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Failure Rate", + "legendLink": null + } + ], + "title": "Consistency checks failed", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Rejected queries\nThe proportion of all queries received by queriers that were rejected for some reason.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_querier_queries_rejected_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) / ignoring (reason) group_left sum(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_query(_range)?\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Rejected queries", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"})", + "format": "time_series", + "legendFormat": "Max", + "legendLink": null + }, + { + "expr": "min(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"})", + "format": "time_series", + "legendFormat": "Min", + "legendLink": null + }, + { + "expr": "avg(cortex_bucket_index_loaded{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"})", + "format": "time_series", + "legendFormat": "Average", + "legendLink": null + } + ], + "title": "Bucket indexes loaded (per querier)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_index_loads_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_index_load_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Bucket indexes load / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_bucket_index_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_index_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Bucket indexes load latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "blocks", + "legendLink": null + } + ], + "title": "Blocks queried / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "binBps" + }, + "overrides": [ ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(data_type) (\n # Exclude \"chunks refetched\".\n rate(cortex_bucket_store_series_data_size_fetched_bytes_sum{component=\"store-gateway\", stage!=\"refetched\", cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{data_type}}", + "legendLink": null + } + ], + "title": "Data fetched / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "binBps" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(data_type) (\n # Exclude \"chunks processed\" to only count \"chunks returned\", other than postings and series.\n rate(cortex_bucket_store_series_data_size_touched_bytes_sum{component=\"store-gateway\", stage!=\"processed\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{data_type}}", + "legendLink": null + } + ], + "title": "Data touched / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "{{stage}}", + "legendLink": null + } + ], + "title": "Series request average latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 27, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by(stage, le) (rate(cortex_bucket_store_series_request_stage_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval])))\n", + "format": "time_series", + "legendFormat": "{{stage}}", + "legendLink": null + } + ], + "title": "Series request 99th percentile latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Series batch preloading efficiency\nThis panel shows the % of time reduced by preloading, for Series() requests which have been\nsplit to 2+ batches. If a Series() request is served within a single batch, then preloading\nis not triggered, and thus not counted in this measurement.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 28, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "# Clamping min to 0 because if preloading not useful at all, then the actual value we get is\n# slightly negative because of the small overhead introduced by preloading.\nclamp_min(1 - (\n sum(rate(cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n /\n sum(rate(cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n), 0)\n", + "format": "time_series", + "legendFormat": "% of time reduced by preloading", + "legendLink": null + } + ], + "title": "Series batch preloading efficiency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Blocks currently owned\nThis panel shows the number of blocks owned by each store-gateway replica.\nFor each owned block, the store-gateway keeps its index-header on disk, and\neventually loaded in memory (if index-header lazy loading is disabled, or lazy loading\nis enabled and the index-header was loaded).\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 29, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "cortex_bucket_store_blocks_loaded{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Blocks currently owned", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 30, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_block_loads_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Blocks loaded / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 31, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_block_drops_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Blocks dropped / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 32, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "cortex_bucket_store_indexheader_lazy_load_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"} - cortex_bucket_store_indexheader_lazy_unload_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Lazy loaded index-headers", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 33, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Index-header lazy load duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Index-header lazy load gate latency\nTime spent waiting for a turn to load an index header. This time is not included in \"Index-header lazy load duration.\"\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 34, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_bucket_stores_gate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_stores_gate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Index-header lazy load gate latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 35, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_series_hash_cache_hits_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_bucket_store_series_hash_cache_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "hit ratio", + "legendLink": null + } + ], + "title": "Series hash cache hit ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 36, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(thanos_store_index_cache_hits_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(thanos_store_index_cache_requests_total{item_type=\"ExpandedPostings\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "hit ratio", + "legendLink": null + } + ], + "title": "ExpandedPostings cache hit ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 37, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_cache_memory_hits_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_cache_memory_requests_total{name=\"chunks-attributes-cache\",cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "hit ratio", + "legendLink": null + } + ], + "title": "Chunks attributes in-memory cache hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Queries", + "uid": "b3abe8d5c040395cc36615cb4334c92d", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-queries.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-reads-networking.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-frontend|mimir-read).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-frontend|mimir-read).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-frontend|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-frontend|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-frontend|mimir-read).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-frontend|mimir-read).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-frontend|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-scheduler|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-scheduler|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-scheduler|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-scheduler|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-scheduler|mimir-backend).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-scheduler|mimir-backend).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|query-scheduler|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|querier|mimir-read).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|querier|mimir-read).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|querier|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|querier|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|querier|mimir-read).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|querier|mimir-read).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|querier|mimir-read).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|store-gateway|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|store-gateway|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|store-gateway|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|store-gateway|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|store-gateway|mimir-backend).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|store-gateway|mimir-backend).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|store-gateway|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ruler|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ruler|mimir-backend).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ruler|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ruler|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ruler|mimir-backend).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ruler|mimir-backend).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ruler|mimir-backend).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Reads networking", + "uid": "54b2a0a4748b3bd1aefa92ce5559a1c2", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-reads-networking.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-reads-resources.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-read|query-frontend|querier|ruler-query-frontend|ruler-querier)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-frontend|mimir-read)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-frontend|mimir-read)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-frontend|mimir-read)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-frontend|mimir-read)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-frontend|mimir-read)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-frontend|mimir-read)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-frontend|mimir-read)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-frontend|mimir-read)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-scheduler|mimir-backend)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-scheduler|mimir-backend)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-scheduler|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-scheduler|mimir-backend)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-scheduler|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-scheduler|mimir-backend)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-scheduler|mimir-backend)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|query-scheduler|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|querier|mimir-read)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|querier|mimir-read)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|querier|mimir-read)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|querier|mimir-read)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|querier|mimir-read)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|querier|mimir-read)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|querier|mimir-read)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|querier|mimir-read)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Rules", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler|mimir-backend)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler|mimir-backend)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler|mimir-backend)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler|mimir-backend)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler|mimir-backend)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|store-gateway|mimir-backend)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|store-gateway|mimir-backend)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|store-gateway|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|store-gateway|mimir-backend)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|store-gateway|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|store-gateway|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|store-gateway|mimir-backend)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|store-gateway|mimir-backend)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|store-gateway|mimir-backend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|store-gateway|mimir-backend)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|store-gateway|mimir-backend)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|store-gateway|mimir-backend)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|store-gateway|mimir-backend)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 27, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"((mimir|store-gateway|mimir-backend)).*\"\n }\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Reads resources", + "uid": "cc86fd5aa9301c6528986572ad974db9", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-reads-resources.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-reads.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "175px", + "panels": [ + { + "content": "

\n This dashboard shows health metrics for the read path.\n It is broken into sections for each service on the read path, and organized by the order in which the read request flows.\n
\n Incoming queries travel from the gateway → query frontend → query scheduler → querier → ingester and/or store-gateway (depending on the time range of the query).\n
\n For each service, there are 3 panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n

\n The dashboard also shows metrics for the 4 optional caches that can be deployed:\n the query results cache, the metadata cache, the chunks cache, and the index cache.\n
\n These panels will show “no data” if the caches are not deployed.\n

\n

\n Lastly, it also includes metrics for how the ingester and store-gateway interact with object storage.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Reads dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Instant queries per second\nRate of instant queries per second being made to the system.\nIncludes both queries made to the /prometheus API as\nwell as queries from the ruler.\n\n", + "fill": 1, + "format": "reqps", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\",\n route=~\"(prometheus|api_prom)_api_v1_query\"\n }[$__rate_interval]\n )\n or\n rate(\n cortex_prometheus_rule_evaluations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Instant queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Range queries per second\nRate of range queries per second being made to\nMimir via the /prometheus API.\n\n", + "fill": 1, + "format": "reqps", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Range queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### \"Label names\" queries per second\nRate of \"label names\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", + "fill": 1, + "format": "reqps", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Label names queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### \"Label values\" queries per second\nRate of specific \"label values\" endpoint queries per second being made to\nMimir via the /prometheus API.\n\n", + "fill": 1, + "format": "reqps", + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Label values queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Series queries per second\nRate of series queries per second being made to\nMimir via the /prometheus API.\n\n", + "fill": 1, + "format": "reqps", + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Series queries / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (Time in Queue)", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "queries" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__interval]))", + "format": "time_series", + "legendFormat": "Queue length", + "legendLink": null + } + ], + "title": "Queue length", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", + "refId": "A" + } + ], + "title": "99th Percentile Latency by Queue Dimension", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", + "refId": "A" + } + ], + "title": "50th Percentile Latency by Queue Dimension", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "Average: {{ additional_queue_dimensions }}", + "refId": "C" + } + ], + "title": "Average Latency by Queue Dimension", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum (\n rate(thanos_memcached_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{name=\"frontend-cache\", cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\"}[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "Requests/s", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cache – query results", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\",route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 27, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 28, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway – key-value store for store-gateways ring", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 29, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(\n thanos_memcached_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"\n }[$__rate_interval]\n )\n or ignoring(backend)\n rate(\n thanos_cache_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 30, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 31, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(item_type) (\n rate(\n thanos_store_index_cache_hits_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n/\nsum by(item_type) (\n rate(\n thanos_store_index_cache_requests_total{\n component=\"store-gateway\",\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "legendFormat": "{{item_type}}", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – block index cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 32, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 33, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 34, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "items", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – chunks cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 35, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 36, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 37, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|store-gateway.*|cortex|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "items", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – metadata cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ ] + }, + "id": 38, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 39, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 40, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|querier.*|cortex|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "items", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – metadata cache (querier accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 41, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 42, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 43, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 44, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Blocks object store (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 45, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 46, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 47, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 48, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 49, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 50, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 51, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 52, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Blocks object store (querier accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 53, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 54, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 55, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 56, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Reads", + "uid": "e327503188913dc38ad571c647eef643", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-reads.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-remote-ruler-reads-resources.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-frontend)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-frontend)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-frontend)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-frontend)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-frontend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-frontend)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-frontend)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-frontend)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend (dedicated to ruler)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-scheduler)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-scheduler)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-scheduler)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-scheduler)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-scheduler)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-scheduler)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-scheduler)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-query-scheduler)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler (dedicated to ruler)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-querier)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-querier)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-querier)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-querier)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-querier)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-querier)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-querier)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ruler-querier)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier (dedicated to ruler)", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Remote ruler reads resources", + "uid": "1940f6ef765a506a171faa2056c956c3", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-remote-ruler-reads-resources.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-remote-ruler-reads.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "175px", + "panels": [ + { + "content": "

\n This dashboard shows health metrics for the ruler read path when remote operational mode is enabled.\n It is broken into sections for each service on the ruler read path, and organized by the order in which the read request flows.\n
\n For each service, there are three panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Remote ruler reads dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Evaluations per second\nRate of rule expressions evaluated per second.\n\n", + "fill": 1, + "format": "reqps", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-frontend.*))\",\n route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Evaluations / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend (dedicated to ruler)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-scheduler.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-scheduler.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (Time in Queue)", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "queries" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-scheduler.*))\"}[$__interval]))", + "format": "time_series", + "legendFormat": "Queue length", + "legendLink": null + } + ], + "title": "Queue length", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler (dedicated to ruler)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", + "refId": "A" + } + ], + "title": "99th Percentile Latency by Queue Dimension", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", + "refId": "A" + } + ], + "title": "50th Percentile Latency by Queue Dimension", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "Average: {{ additional_queue_dimensions }}", + "refId": "C" + } + ], + "title": "Average Latency by Queue Dimension", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier (dedicated to ruler)", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Remote ruler reads", + "uid": "f103238f7f5ab2f1345ce650cbfbfe2f", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-remote-ruler-reads.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-rollout-progress.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Ready" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Updated" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 10, + "x": 0, + "y": 0 + }, + "id": 1, + "links": [ ], + "options": { + "barRadius": 0, + "barWidth": 0.96999999999999997, + "fullHighlight": false, + "groupWidth": 0.69999999999999996, + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "horizontal", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "multi", + "sort": "none" + }, + "xField": "Workload", + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_updated{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", + "format": "table", + "instant": true, + "intervalFactor": null, + "legendFormat": "__auto", + "legendLink": null, + "step": null + }, + { + "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_ready{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", + "format": "table", + "instant": true, + "intervalFactor": null, + "legendFormat": "__auto", + "legendLink": null, + "step": null + } + ], + "title": "Rollout progress", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time 1": true, + "Time 2": true + }, + "renameByName": { + "Value #A": "Updated", + "Value #B": "Ready", + "workload": "Workload" + } + } + }, + { + "id": "sortBy", + "options": { + "sort": [ + { + "field": "Workload" + } + ] + } + } + ], + "type": "barchart" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 10, + "y": 0 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 2xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.20000000000000001 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 0 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 4xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.01 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 14, + "y": 0 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 5xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.20000000000000001 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Writes 99th latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 10, + "y": 4 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 2xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.01 + }, + { + "color": "red", + "value": 0.050000000000000003 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 4 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 4xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.01 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 14, + "y": 4 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 5xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2.5 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Reads 99th latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "noValue": "All healthy", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2 + } + ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "fill": 1, + "gridPos": { + "h": 3, + "w": 10, + "x": 0, + "y": 13 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "text": { + "titleSize": 14, + "valueSize": 14 + }, + "textMode": "value_and_name" + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kube_deployment_status_replicas_unavailable{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", + "format": null, + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "{{deployment}}", + "legendLink": null, + "step": null + }, + { + "expr": "kube_statefulset_status_replicas_current{cluster=~\"$cluster\", namespace=~\"$namespace\"} -\nkube_statefulset_status_replicas_ready {cluster=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", + "format": null, + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "{{statefulset}}", + "legendLink": null, + "step": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Unhealthy pods", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "r.*" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 10, + "y": 8 + }, + "id": 11, + "targets": [ + { + "expr": "count by(container, version) (\n label_replace(\n kube_pod_container_info{cluster=~\"$cluster\", namespace=~\"$namespace\"},\n \"version\", \"$1\", \"image\", \".*:(.*)\"\n )\n)\n", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pods count per version", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "valueLabel": "version" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "container": 1 + } + } + }, + { + "id": "sortBy", + "options": { + "fields": { }, + "sort": [ + { + "field": "container" + } + ] + } + } + ], + "type": "table" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", + "format": "time_series", + "legendFormat": "writes", + "legendLink": null + }, + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))[1h:])\n)\n", + "format": "time_series", + "legendFormat": "reads", + "legendLink": null + } + ], + "title": "Latency vs 24h ago", + "type": "timeseries" + } + ], + "refresh": "10s", + "rows": null, + "schemaVersion": 27, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Rollout progress", + "uid": "7f0b5567d543a1698e695b530eb7f5de", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-rollout-progress.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-ruler.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cortex_ruler_managers_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Active configurations", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Total rules", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Read from ingesters - QPS\nNote: Even while operating in Remote ruler mode you will still see values for this panel.\n\nThis is because the metrics are inclusive of intermediate services and are showing the requests that ultimately reach the ingesters.\n\nFor a more detailed view of the read path when using remote ruler mode, see the Remote ruler reads dashboard.\n\n", + "fill": 1, + "format": "reqps", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*|mimir|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Read from ingesters - QPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "reqps", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Write to ingesters - QPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "success", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "missed", + "legendLink": null + } + ], + "title": "Evaluations per second", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "average", + "legendLink": null + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rule evaluations global", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Writes (ingesters)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*|mimir|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*|mimir|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*|mimir|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*|mimir|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*|mimir|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Reads (ingesters)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler - key-value store for rulers ring", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Number of store-gateways hit per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Refetches of missing blocks per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failures / sec" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Failures / sec", + "legendLink": null + } + ], + "title": "Consistency checks failed", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler - blocks storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(user) (rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]) > 0)\n> 0\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Delivery errors", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(user) (rate(cortex_prometheus_notifications_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_queue_capacity{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])) > 0\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Queue length", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (user) (increase(cortex_prometheus_notifications_dropped_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])) > 0\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Dropped", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Notifications", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(user) (rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])) > 0", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Missed iterations", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])\n /\nrate(cortex_prometheus_rule_group_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval])) > 0", + "format": "time_series", + "legendFormat": "{{ rule_group }}", + "legendLink": null + } + ], + "title": "Failures", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Group evaluations", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rule evaluation per user", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler configuration object store (ruler accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 27, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 28, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 29, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 30, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Ruler", + "uid": "631e15d5d85afb2ca8e35d62984eeaa0", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-ruler.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-scaling.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "200px", + "panels": [ + { + "id": 1, + "options": { + "content": "This dashboard identifies scaling-related issues by suggesting services that you might want to scale up.\nThe table that follows contains a suggested number of replicas and the reason why.\nIf the system is failing and depending on the reason, try scaling up to the specified number.\nThe specified numbers are intended as helpful guidelines when things go wrong, rather than prescriptive guidelines.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n", + "mode": "markdown" + }, + "span": 12, + "title": "", + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Service scaling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "400px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Required Replicas", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "Cluster", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "__name__", + "thresholds": [ ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Cluster", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "cluster", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "Service", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "deployment", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "namespace", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "Reason", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "reason", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sort_desc(\n cluster_namespace_deployment_reason:required_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_namespace_deployment:actual_replicas:count{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Workload-based scaling", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Scaling", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Scaling", + "uid": "64bbad83507b7289b514725658e10352", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-scaling.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-slow-queries.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Response time", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Fetched series", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Fetched chunks", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Response size", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Time span", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Query wall time", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Across tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 response time", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 fetched series", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 fetched chunks", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 response size", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 time span", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 query wall time", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 response time", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 fetched series", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 fetched chunks", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 response size", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 time span", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 query wall time", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 User-Agents", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "fetched_chunk_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "fetched_index_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "response_size_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "results_cache_hit_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "results_cache_miss_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "estimated_series_count" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "fetched_chunks_count" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "fetched_series_count" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Time span" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Duration" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Step" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "queue_time_seconds" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "query_wall_time_seconds" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + } + ] + }, + "height": "500px", + "id": 19, + "span": 12, + "targets": [ + { + "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | label_format response_time_seconds=\"{{ if .response_time }} {{ duration .response_time }} {{ end }}\",param_step_seconds=\"{{ if .param_step }} {{ div .param_step 1000 }} {{ end }}\",length_seconds=\"{{ if .length }} {{ duration .length }} {{ end }}\"", + "instant": false, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Slow queries", + "transformations": [ + { + "id": "extractFields", + "options": { + "source": "labels" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Line": true, + "Time": true, + "caller": true, + "cluster": true, + "component": true, + "container": true, + "gossip_ring_member": true, + "host": true, + "id": true, + "job": true, + "labels": true, + "length": true, + "level": true, + "line": true, + "method": true, + "msg": true, + "name": true, + "namespace": true, + "param_step": true, + "path": true, + "pod": true, + "pod_template_hash": true, + "response_time": true, + "stream": true, + "traceID": true, + "tsNs": true + }, + "indexByName": { + "err": 10, + "length_seconds": 3, + "param_end": 5, + "param_query": 8, + "param_start": 4, + "param_step_seconds": 7, + "param_time": 6, + "response_time_seconds": 9, + "status": 1, + "ts": 0, + "user": 2 + }, + "renameByName": { + "err": "Error", + "length_seconds": "Time span", + "param_end": "End", + "param_query": "Query", + "param_start": "Start", + "param_step_seconds": "Step", + "param_time": "Time (instant query)", + "response_time_seconds": "Duration", + "ts": "Completion date", + "user": "Tenant ID" + } + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "sharded_queries" + }, + { + "destinationType": "number", + "targetField": "split_queries" + }, + { + "destinationType": "number", + "targetField": "fetched_chunk_bytes" + }, + { + "destinationType": "number", + "targetField": "fetched_index_bytes" + }, + { + "destinationType": "number", + "targetField": "response_size_bytes" + }, + { + "destinationType": "number", + "targetField": "results_cache_hit_bytes" + }, + { + "destinationType": "number", + "targetField": "results_cache_miss_bytes" + }, + { + "destinationType": "number", + "targetField": "estimated_series_count" + }, + { + "destinationType": "number", + "targetField": "fetched_chunks_count" + }, + { + "destinationType": "number", + "targetField": "fetched_series_count" + }, + { + "destinationType": "number", + "targetField": "Time span" + }, + { + "destinationType": "number", + "targetField": "Duration" + }, + { + "destinationType": "number", + "targetField": "Step" + }, + { + "destinationType": "number", + "targetField": "queue_time_seconds" + }, + { + "destinationType": "number", + "targetField": "query_wall_time_seconds" + } + ] + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "hide": 0, + "includeAll": false, + "label": "Loki data source", + "multi": false, + "name": "loki_datasource", + "query": "loki", + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": "5s", + "value": "5s" + }, + "hide": 0, + "label": "Min duration", + "name": "min_duration", + "options": [ + { + "selected": true, + "text": "5s", + "value": "5s" + } + ], + "query": "5s", + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "Tenant ID", + "name": "tenant_id", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "User-Agent HTTP Header", + "name": "user_agent", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Slow queries", + "uid": "6089e1ce1e678788f46312a0a1e647e6", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-slow-queries.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-tenants.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "25px", + "panels": [ + { + "content": "

\n This dashboard shows various metrics detailed by tenant (user) selected above.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Tenants dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### All series\nNumber of active, in-memory, and owned series per user, and active series matching custom trackers (in parenthesis).\nNote that these counts include all series regardless of the type of data (counter, gauge, native histogram, etc.).\nNote that active series matching custom trackers are included in the total active series count.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "in-memory", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "active", + "legendLink": null + }, + { + "expr": "sum(\n cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "owned", + "legendLink": null + }, + { + "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n) > 0\n", + "format": "time_series", + "legendFormat": "active ({{ name }})", + "legendLink": null + } + ], + "title": "All series", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### In-memory series per ingester\nLocal tenant series limit and number of in-memory series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nNote that in-memory series may exceed the local limit if limiting based on owned series is enabled.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/local limit .+/" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", + "format": "time_series", + "legendFormat": "local limit ({{job}})", + "legendLink": null + }, + { + "expr": "cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n- cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "In-memory series per ingester", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Owned series per ingester\nLocal tenant series limit and number of owned series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nOwned series are the subset of an ingester's in-memory series that currently map to it in the ring\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/local limit .+/" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "min by (job) (cortex_ingester_local_limits{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", + "format": "time_series", + "legendFormat": "local limit ({{job}})", + "legendLink": null + }, + { + "expr": "cortex_ingester_owned_series{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Owned series per ingester", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Tenant series counts", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Series with exemplars\nNumber of series with exemplars currently in storage.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "series", + "legendLink": null + } + ], + "title": "Series with exemplars", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Oldest exemplar age\nThe age of the oldest exemplar stored in circular storage.\nUseful to check for what time range the current exemplar buffer limit allows.\nThis usually means the max age for all exemplars for a typical setup.\nThis is not true though if one of the series timestamp is in future compared to rest series.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "time() - min(cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"} > 0)", + "format": "time_series", + "legendFormat": "age", + "legendLink": null + } + ], + "title": "Oldest exemplar age", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Native histogram series\nNumber of active native histogram series per user, and active native histogram series matching custom trackers (in parenthesis).\nNote that active series matching custom trackers are included in the total active series count.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "active", + "legendLink": null + }, + { + "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n) > 0\n", + "format": "time_series", + "legendFormat": "active ({{ name }})", + "legendLink": null + } + ], + "title": "Native histogram series", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Total number of buckets used by native histogram series\nTotal number of buckets in active native histogram series per user, and total active native histogram buckets matching custom trackers (in parenthesis).\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "buckets", + "legendLink": null + }, + { + "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n) > 0\n", + "format": "time_series", + "legendFormat": "buckets ({{ name }})", + "legendLink": null + } + ], + "title": "Total number of buckets used by native histogram series", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Exemplars and native histograms", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Distributor requests incoming rate\nThe rate of requests that have come in to the distributor, including rejected requests.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor requests incoming rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor requests received (accepted) rate\nThe rate of received requests, excluding rejected requests.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Distributor requests received (accepted) rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Newest seen sample age\nThe age of the newest received sample seen in the distributors.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"} > 0)", + "format": "time_series", + "legendFormat": "age", + "legendLink": null + } + ], + "title": "Newest seen sample age", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor discarded requests rate\nThe rate of each request's discarding reason.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }}", + "legendLink": null + } + ], + "title": "Distributor discarded requests rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor ingestion requests", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Distributor samples incoming rate\nThe rate of samples that have come in to the distributor, including rejected or deduped exemplars.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor samples incoming rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor samples received (accepted) rate\nThe rate of received samples, excluding rejected and deduped samples.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Distributor samples received (accepted) rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor deduplicated/non-HA\nThe rate of deduplicated samples and the rate of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "deduplicated", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "non-HA", + "legendLink": null + } + ], + "title": "Distributor deduplicated/non-HA", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor and ingester discarded samples rate\nThe rate of each sample's discarding reason.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }} (distributor)", + "legendLink": null + }, + { + "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }} (ingester)", + "legendLink": null + } + ], + "title": "Distributor and ingester discarded samples rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Samples ingestion funnel", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor exemplars incoming rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor exemplars received (accepted) rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time.\nSee discarded rate for reasons why exemplars are being discarded.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor exemplars received (accepted) rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor discarded exemplars rate\nThe rate of each exmplars' discarding reason.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }}", + "legendLink": null + } + ], + "title": "Distributor discarded exemplars rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingester appended exemplars rate\nTotal number of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Ingester appended exemplars rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Exemplars ingestion funnel", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Symbol table size for loaded blocks\nSize of symbol table in memory for loaded blocks, averaged by ingester.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (job) (cortex_ingester_tsdb_symbol_table_size_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Symbol table size for loaded blocks", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Space used by local blocks\nThe number of bytes that are currently used for local storage by all blocks.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (job) (cortex_ingester_tsdb_storage_blocks_bytes{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Space used by local blocks", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingesters' storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Number of groups\nTotal number of rule groups for a tenant.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "count(sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", user=\"$user\"}))", + "format": "time_series", + "legendFormat": "groups", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Number of groups", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Number of rules\nTotal number of rules for a tenant.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "rules", + "legendLink": null + } + ], + "title": "Number of rules", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Total evaluations rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 26, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) > 0", + "format": "time_series", + "legendFormat": "{{ rule_group }}", + "legendLink": null + } + ], + "title": "Failed evaluations rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rules", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "rules", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", user=\"$user\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit biggest groups", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "seconds", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", user=\"$user\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit slowest groups (last evaluation)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top rules", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 29, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Sent notifications rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "rate" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 30, + "links": [ ], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Failed notifications rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Notifications", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 31, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (user) (cortex_alertmanager_alerts{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "alerts", + "legendLink": null + }, + { + "expr": "sum by (user) (cortex_alertmanager_silences{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "silences", + "legendLink": null + } + ], + "title": "Alerts", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 32, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "NPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 33, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", + "format": "time_series", + "legendFormat": "success - {{ integration }}", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|alertmanager|cortex|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", + "format": "time_series", + "legendFormat": "failed - {{ integration }}", + "legendLink": null + } + ], + "title": "NPS by integration", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 34, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-frontend.*|cortex|mimir-read.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Queries / Sec", + "legendLink": null + } + ], + "title": "Rate of Read Requests - query-frontend", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 35, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|query-scheduler.*|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "Queue Length", + "legendLink": null + } + ], + "title": "Number of Queries Queued - query-scheduler", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Read Path - Queries (User)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 36, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-frontend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Queries / Sec", + "legendLink": null + } + ], + "title": "Rate of Read Requests - ruler-query-frontend", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 37, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler-query-scheduler.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "Queue Length", + "legendLink": null + } + ], + "title": "Number of Queries Queued - ruler-query-scheduler", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Read Path - Queries (Ruler)", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs for selected user, based on latest version of bucket index. When user sends data, ingesters upload new user blocks every 2 hours\n(shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.), and compactors should process all of the blocks within 2h interval.\nIf this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction for this user works correctly.\n\nDepending on the configuration, there are two types of jobs: `split` jobs and `merge` jobs. Split jobs will only show up when user is configured with positive number of `compactor_split_and_merge_shards`.\nValues for split and merge jobs are stacked.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 50, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 38, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (type) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\", user=\"$user\"})\nand ignoring(type)\n(sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval])) == 0)\n", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Estimated Compaction Jobs", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Number of blocks\nNumber of blocks stored in long-term storage for this user.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 39, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by (user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\", user=\"$user\"})\n", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Blocks", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Compactions", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "user", + "multi": false, + "name": "user", + "options": [ ], + "query": "label_values(cortex_ingester_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\"}, user)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "10", + "value": "10" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "limit", + "options": [ + { + "selected": true, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + }, + { + "selected": false, + "text": "500", + "value": "500" + }, + { + "selected": false, + "text": "1000", + "value": "1000" + } + ], + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Tenants", + "uid": "35fa247ce651ba189debf33d7ae41611", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-tenants.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-top-tenants.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "25px", + "panels": [ + { + "content": "

\n This dashboard shows the top tenants based on multiple selection criterias.\n Rows are collapsed by default to avoid querying all of them.\n Use the templating variable \"limit\" above to select the amount of users to be shown.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Top tenants dashboard description", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "series", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [ ], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n )\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by active series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By active series", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "series", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [ ], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"} )\n)\n)", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by in-memory series (series created - series removed)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By in-memory series", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"} @ start())\n)\n)\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Top $limit users by in-memory series (series created - series removed) that grew the most between query range start and query range end", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By in-memory series growth", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "samples/s", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [ ], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"}[5m])))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by received samples rate in last 5m", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By samples rate", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Top $limit users by received samples rate that grew the most between query range start and query range end", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By samples rate growth", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "samples/s", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [ ], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*|mimir|distributor.*|cortex|mimir-write.*))\"}[5m])))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by discarded samples rate in last 5m", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By discarded samples rate", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*|mimir|distributor.*|cortex|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*|mimir|distributor.*|cortex|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*|mimir|distributor.*|cortex|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Top $limit users by discarded samples rate that grew the most between query range start and query range end", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By discarded samples rate growth", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "series", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [ ], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n )\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by series with exemplars", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By series with exemplars", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "exemplars/s", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [ ], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"}[5m])))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by received exemplars rate in last 5m", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By exemplars rate", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 3, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "rules", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [ ], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit biggest groups", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By rule group size", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 3, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "seconds", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [ ], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ruler|cortex|mimir-backend.*))\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit slowest groups (last evaluation)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By rule group evaluation time", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Compaction Jobs", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [ ], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit,\n sum by (user) (cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"})\n and ignoring(user)\n (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|compactor.*|cortex|mimir-backend.*))\"}[$__rate_interval])) == 0)\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by estimated compaction jobs from bucket-index", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By estimated compaction jobs from bucket-index", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "10", + "value": "10" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "limit", + "options": [ + { + "selected": true, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + } + ], + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Top tenants", + "uid": "bc6e12d4fe540e4a1785b9d3ca0ffdd9", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-top-tenants.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-writes-networking.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|distributor|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ingester|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ingester|mimir-write).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ingester|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ingester|mimir-write).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(mimir|ingester|mimir-write).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Writes networking", + "uid": "978c1cb452585c96697a238eaac7fe2d", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-writes-networking.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-writes-resources.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-write|distributor|ingester)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-write|distributor|ingester)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|mimir-write|distributor|ingester)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|distributor|mimir-write)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|distributor|mimir-write)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|distributor|mimir-write)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|distributor|mimir-write)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|distributor|mimir-write)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|distributor|mimir-write)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|distributor|mimir-write)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|distributor|mimir-write)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "In-memory series", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"(mimir|ingester|mimir-write)\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|ingester|mimir-write)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"(mimir|ingester|mimir-write)\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"((mimir|ingester|mimir-write)).*\"\n }\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Writes resources", + "uid": "bc9160e50b52e89e0e49c840fea3d379", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-writes-resources.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + mimir-writes.json: |- + { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "125px", + "panels": [ + { + "content": "

\n This dashboard shows various health metrics for the write path.\n It is broken into sections for each service on the write path,\n and organized by the order in which the write request flows.\n
\n Incoming metrics data travels from the gateway → distributor → ingester.\n
\n For each service, there are 3 panels showing\n (1) requests per second to that service,\n (2) average, median, and p99 latency of requests to that service, and\n (3) p99 latency of requests to each instance of that service.\n

\n

\n It also includes metrics for the key-value (KV) stores used to manage\n the high-availability tracker and the ingesters.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Writes dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Samples / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Exemplars / sec\nThe total number of received exemplars by the distributors, excluding rejected and deduped exemplars, but not necessarily ingested by the ingesters.\n\n", + "fill": 1, + "format": "short", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Exemplars / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### In-memory series\nThe number of series not yet flushed to object storage that are held in ingester memory.\n\n", + "fill": 1, + "format": "short", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"}))\n", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "In-memory series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Exemplars in ingesters\nNumber of TSDB exemplars currently in ingesters' storage.\n\n", + "fill": 1, + "format": "short", + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"}))\n", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Exemplars in ingesters", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(count by(user) (cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Tenants", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to distributor.\nRejected requests are requests that distributor fails to handle because of distributor instance limits.\nWhen distributor is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen distributor is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 11, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor - key-value store for high-availability (HA) deduplication", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor - key-value store for distributors ring", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 17, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 18, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester - key-value store for the ingesters ring", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Uploaded blocks / sec\nThe rate of blocks being uploaded from the ingesters\nto object storage.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 19, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_shipper_uploads_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Uploaded blocks / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Upload latency\nThe average, median (50th percentile), and 99th percentile time\nthe ingesters take to upload blocks to object storage.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 20, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Upload latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester - shipper", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Compactions per second\nIngesters maintain a local TSDB per-tenant on disk. Each TSDB maintains a head block for each\nactive time series; these blocks get periodically compacted (by default, every 2h).\nThis panel shows the rate of compaction operations across all TSDBs on all ingesters.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 21, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_compactions_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Compactions / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Compaction latency\nThe average, median (50th percentile), and 99th percentile time ingesters take to compact TSDB head blocks\non the local filesystem.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 22, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Compactions latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester - TSDB head", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### WAL truncations per second\nThe WAL is truncated each time a new TSDB block is written. This panel measures the rate of\ntruncations.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 23, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "WAL truncations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Checkpoints created per second\nCheckpoints are created as part of the WAL truncation process.\nThis metric measures the rate of checkpoint creation.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 24, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Checkpoints created / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### WAL truncations latency (including checkpointing)\nAverage time taken to perform a full WAL truncation,\nincluding the time taken for the checkpointing to complete.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "id": 25, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval])) >= 0\n", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + } + ], + "title": "WAL truncations latency (includes checkpointing)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "WAL" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "mmap-ed chunks" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E28A42", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 26, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_ingester_tsdb_wal_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "WAL", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_ingester_tsdb_mmap_chunk_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "mmap-ed chunks", + "legendLink": null + } + ], + "title": "Corruptions / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester - TSDB write ahead log (WAL)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ex/s" + }, + "overrides": [ ] + }, + "id": 27, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "incoming exemplars", + "legendLink": null + } + ], + "title": "Distributor exemplars incoming rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor exemplars received rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time, see `cortex_discarded_exemplars_total` for specific reasons rates.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ex/s" + }, + "overrides": [ ] + }, + "id": 28, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "received exemplars", + "legendLink": null + } + ], + "title": "Distributor exemplars received rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingester ingested exemplars rate\nThe rate of exemplars ingested in the ingesters.\nEvery exemplar is sent to the replication factor number of ingesters, so the sum of rates from all ingesters is divided by the replication factor.\nThis ingested exemplars rate should match the distributor's received exemplars rate.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ex/s" + }, + "overrides": [ ] + }, + "id": 29, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "ingested exemplars", + "legendLink": null + } + ], + "title": "Ingester ingested exemplars rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingester appended exemplars rate\nThe rate of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ex/s" + }, + "overrides": [ ] + }, + "id": 30, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "appended exemplars", + "legendLink": null + } + ], + "title": "Ingester appended exemplars rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Exemplars", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 31, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|distributor.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Rejected distributor requests", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "id": 32, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_ingester_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((mimir|ingester.*|cortex|mimir-write.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Rejected ingester requests", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Instance Limits", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "Metrics", + "value": "Metrics" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [ ], + "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Writes", + "uid": "8280707b8f16e7b87b840fc1cc92d4c5", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Mimir + labels: + grafana_dashboard: "1" + name: mimir-writes.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + pyroscope-reads.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "pyroscope" + ], + "targetBlank": false, + "title": "Pyroscope Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(pyroscope_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(pyroscope|querier)\", route=~\".*merge.*|.*series.*|.*type.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (job_route_cluster:pyroscope_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(pyroscope|querier)\", route=~\".*merge.*|.*series.*|.*type.*\", cluster=~\"$cluster\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le,route) (job_route_cluster:pyroscope_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(pyroscope|querier)\", route=~\".*merge.*|.*series.*|.*type.*\", cluster=~\"$cluster\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(job_route_cluster:pyroscope_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(pyroscope|querier)\", route=~\".*merge.*|.*series.*|.*type.*\", cluster=~\"$cluster\"}) by (route) / sum(job_route_cluster:pyroscope_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(pyroscope|querier)\", route=~\".*merge.*|.*series.*|.*type.*\", cluster=~\"$cluster\"}) by (route) ", + "format": "time_series", + "legendFormat": "{{ route }} Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(pyroscope_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(pyroscope|ingester)\", route=~\".*merge.*|.*series.*|.*type.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (job_route_cluster:pyroscope_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(pyroscope|ingester)\", route=~\".*merge.*|.*series.*|.*type.*\", cluster=~\"$cluster\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le,route) (job_route_cluster:pyroscope_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(pyroscope|ingester)\", route=~\".*merge.*|.*series.*|.*type.*\", cluster=~\"$cluster\"})) * 1e3", + "format": "time_series", + "legendFormat": "{{ route }} 50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(job_route_cluster:pyroscope_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(pyroscope|ingester)\", route=~\".*merge.*|.*series.*|.*type.*\", cluster=~\"$cluster\"}) by (route) / sum(job_route_cluster:pyroscope_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(pyroscope|ingester)\", route=~\".*merge.*|.*series.*|.*type.*\", cluster=~\"$cluster\"}) by (route) ", + "format": "time_series", + "legendFormat": "{{ route }} Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "pyroscope" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(pyroscope_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(pyroscope_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Pyroscope / Reads", + "uid": "pyroscope-reads", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Pyroscope + labels: + grafana_dashboard: "1" + name: pyroscope-reads.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + pyroscope-writes.json: |- + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "pyroscope" + ], + "targetBlank": false, + "title": "Pyroscope Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,type) (job_type_cluster:pyroscope_distributor_received_compressed_bytes_bucket:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", type=~\".*\", cluster=~\"$cluster\"})) * 1", + "format": "time_series", + "legendFormat": "{{ type }} 99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le,type) (job_type_cluster:pyroscope_distributor_received_compressed_bytes_bucket:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", type=~\".*\", cluster=~\"$cluster\"})) * 1", + "format": "time_series", + "legendFormat": "{{ type }} 50th percentile", + "refId": "B" + }, + { + "expr": "1 * sum(job_type_cluster:pyroscope_distributor_received_compressed_bytes_sum:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", type=~\".*\", cluster=~\"$cluster\"}) by (type) / sum(job_type_cluster:pyroscope_distributor_received_compressed_bytes_count:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", type=~\".*\", cluster=~\"$cluster\"}) by (type) ", + "format": "time_series", + "legendFormat": "{{ type }} Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compressed Size", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,type) (job_type_cluster:pyroscope_distributor_received_samples_bucket:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", type=~\".*\", cluster=~\"$cluster\"})) * 1", + "format": "time_series", + "legendFormat": "{{ type }} 99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le,type) (job_type_cluster:pyroscope_distributor_received_samples_bucket:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", type=~\".*\", cluster=~\"$cluster\"})) * 1", + "format": "time_series", + "legendFormat": "{{ type }} 50th percentile", + "refId": "B" + }, + { + "expr": "1 * sum(job_type_cluster:pyroscope_distributor_received_samples_sum:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", type=~\".*\", cluster=~\"$cluster\"}) by (type) / sum(job_type_cluster:pyroscope_distributor_received_samples_count:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", type=~\".*\", cluster=~\"$cluster\"}) by (type) ", + "format": "time_series", + "legendFormat": "{{ type }} Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Samples", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "count", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor Profiles received", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(pyroscope_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(pyroscope|distributor)\", route=~\".*push.*|.*ingest.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (job_route_cluster:pyroscope_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", route=~\".*push.*\", cluster=~\"$cluster\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (job_route_cluster:pyroscope_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", route=~\".*push.*\", cluster=~\"$cluster\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(job_route_cluster:pyroscope_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", route=~\".*push.*\", cluster=~\"$cluster\"}) / sum(job_route_cluster:pyroscope_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(pyroscope|distributor)\", route=~\".*push.*\", cluster=~\"$cluster\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor Requests", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(pyroscope_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(pyroscope|ingester)\", route=~\".*push.*|.*ingest.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (job_route_cluster:pyroscope_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(pyroscope|ingester)\", route=~\".*push.*|.*ingest.*\", cluster=~\"$cluster\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (job_route_cluster:pyroscope_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(pyroscope|ingester)\", route=~\".*push.*|.*ingest.*\", cluster=~\"$cluster\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(job_route_cluster:pyroscope_request_duration_seconds_sum:sum_rate{job=~\"($namespace)/(pyroscope|ingester)\", route=~\".*push.*|.*ingest.*\", cluster=~\"$cluster\"}) / sum(job_route_cluster:pyroscope_request_duration_seconds_count:sum_rate{job=~\"($namespace)/(pyroscope|ingester)\", route=~\".*push.*|.*ingest.*\", cluster=~\"$cluster\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Head size in bytes per table type\nIngesters maintain a local Head per-tenant. Each\nHead maintains the active profiling series; Then\nthe head gets periodically compacted into a block\non disk. This panel shows the estimated size of\nthe Head in memory for all ingesters.\n\n", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(pyroscope_head_size_bytes{cluster=~\"$cluster\",job=~\"($namespace)/(pyroscope|ingester)\",}) by (type)", + "format": "time_series", + "legendFormat": "{{type}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Head size in bytes per table type", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Head size in bytes per pod\nIngesters maintain a local Head per-tenant. Each\nHead maintains the active profiling series; Then\nthe head gets periodically compacted into a block\non disk. This panel shows the estimated size of\nthe Head in memory for all ingesters.\n\n", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(pyroscope_head_size_bytes{cluster=~\"$cluster\",job=~\"($namespace)/(pyroscope|ingester)\",}) by (instance)", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Head size in bytes per pod", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester - Head", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "pyroscope" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(pyroscope_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(pyroscope_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Pyroscope / Writes", + "uid": "pyroscope-writes", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Pyroscope + labels: + grafana_dashboard: "1" + name: pyroscope-writes.json + namespace: monitoring-system +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: agent-flow-mixin-alerts + namespace: monitoring-system +spec: + groups: + - name: clustering + rules: + - alert: ClusterNotConverging + annotations: + message: 'Cluster is not converging: nodes report different number of peers + in the cluster.' + expr: stddev by (cluster, namespace) (sum without (state) (cluster_node_peers)) + != 0 + for: 10m + - alert: ClusterNodeCountMismatch + annotations: + message: Nodes report different number of peers vs. the count of observed + agent metrics. Some agent metrics may be missing or the cluster is in a + split brain state. + expr: | + sum without (state) (cluster_node_peers) != + on (cluster, namespace) group_left + count by (cluster, namespace) (cluster_node_info) + for: 15m + - alert: ClusterNodeUnhealthy + annotations: + message: Cluster node is reporting a gossip protocol health score > 0. + expr: | + cluster_node_gossip_health_score > 0 + for: 10m + - alert: ClusterNodeNameConflict + annotations: + message: A node tried to join the cluster with a name conflicting with an + existing peer. + expr: sum by (cluster, namespace) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) + > 0 + for: 10m + - alert: ClusterNodeStuckTerminating + annotations: + message: Cluster node stuck in Terminating state. + expr: sum by (cluster, namespace, instance) (cluster_node_peers{state="terminating"}) + > 0 + for: 10m + - alert: ClusterConfigurationDrift + annotations: + message: Cluster nodes are not using the same configuration file. + expr: | + count without (sha256) ( + max by (cluster, namespace, sha256) (agent_config_hash and on(cluster, namespace) cluster_node_info) + ) > 1 + for: 5m + - name: agent_controller + rules: + - alert: SlowComponentEvaluations + annotations: + message: Flow component evaluations are taking too long. + expr: sum by (cluster, namespace, component_id) (rate(agent_component_evaluation_slow_seconds[10m])) + > 0 + for: 15m + - alert: UnhealthyComponents + annotations: + message: Unhealthy Flow components detected. + expr: sum by (cluster, namespace) (agent_component_controller_running_components{health_type!="healthy"}) + > 0 + for: 15m +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: loki-mixin-alerts + namespace: monitoring-system +spec: + groups: + - name: loki_alerts + rules: + - alert: LokiRequestErrors + annotations: + message: | + {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. + expr: | + 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route) + / + sum(rate(loki_request_duration_seconds_count[2m])) by (namespace, job, route) + > 10 + for: 15m + labels: + severity: critical + - alert: LokiRequestPanics + annotations: + message: | + {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics. + expr: | + sum(increase(loki_panic_total[10m])) by (namespace, job) > 0 + labels: + severity: critical + - alert: LokiRequestLatency + annotations: + message: | + {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. + expr: | + cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1 + for: 15m + labels: + severity: critical + - alert: LokiTooManyCompactorsRunning + annotations: + message: | + {{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time. + expr: | + sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1 + for: 5m + labels: + severity: warning +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: loki-mixin-rules + namespace: monitoring-system +spec: + groups: + - name: loki_rules + rules: + - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:loki_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:loki_request_duration_seconds:50quantile + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[1m])) + by (cluster, job) + record: cluster_job:loki_request_duration_seconds:avg + - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job) + record: cluster_job:loki_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:loki_request_duration_seconds_sum:sum_rate + - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:loki_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) + by (le, cluster, job, route)) + record: cluster_job_route:loki_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) + by (le, cluster, job, route)) + record: cluster_job_route:loki_request_duration_seconds:50quantile + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route) + / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:loki_request_duration_seconds:avg + - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job, + route) + record: cluster_job_route:loki_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route) + record: cluster_job_route:loki_request_duration_seconds_sum:sum_rate + - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:loki_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) + by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:loki_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) + by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:loki_request_duration_seconds:50quantile + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, + job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, + namespace, job, route) + record: cluster_namespace_job_route:loki_request_duration_seconds:avg + - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, + job, route) + record: cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, + job, route) + record: cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate + - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, namespace, + job, route) + record: cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: memcached-mixin-alerts + namespace: monitoring-system +spec: + groups: + - name: memcached + rules: + - alert: MemcachedDown + annotations: + description: Memcached instance {{ $labels.job }} / {{ $labels.instance }} + is down for more than 15 minutes. + summary: Memcached instance is down. + expr: | + memcached_up == 0 + for: 15m + labels: + severity: critical + - alert: MemcachedConnectionLimitApproaching + annotations: + description: Memcached instance {{ $labels.job }} / {{ $labels.instance }} + connection usage is at {{ printf "%0.0f" $value }}% for at least 15 minutes. + summary: Memcached max connection limit is approaching. + expr: | + (memcached_current_connections / memcached_max_connections * 100) > 80 + for: 15m + labels: + severity: warning + - alert: MemcachedConnectionLimitApproaching + annotations: + description: Memcached instance {{ $labels.job }} / {{ $labels.instance }} + connection usage is at {{ printf "%0.0f" $value }}% for at least 15 minutes. + summary: Memcached connections at critical level. + expr: | + (memcached_current_connections / memcached_max_connections * 100) > 95 + for: 15m + labels: + severity: critical +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: mimir-mixin-alerts + namespace: monitoring-system +spec: + groups: + - name: mimir_alerts + rules: + - alert: MimirIngesterUnhealthy + annotations: + message: Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} has {{ + printf "%f" $value }} unhealthy ingester(s). + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterunhealthy + expr: | + min by (cluster, namespace) (cortex_ring_members{state="Unhealthy", name="ingester"}) > 0 + for: 15m + labels: + severity: critical + - alert: MimirRequestErrors + annotations: + message: | + The route {{ $labels.route }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequesterrors + expr: | + 100 * sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{status_code=~"5..",route!~"ready|debug_pprof"}[1m])) + / + sum by (cluster, namespace, job, route) (rate(cortex_request_duration_seconds_count{route!~"ready|debug_pprof"}[1m])) + > 1 + for: 15m + labels: + severity: critical + - alert: MimirRequestLatency + annotations: + message: | + {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrequestlatency + expr: | + cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop|debug_pprof"} + > + 2.5 + for: 15m + labels: + severity: warning + - alert: MimirQueriesIncorrect + annotations: + message: | + The Mimir cluster {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% incorrect query results. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirqueriesincorrect + expr: | + 100 * sum by (cluster, namespace) (rate(test_exporter_test_case_result_total{result="fail"}[5m])) + / + sum by (cluster, namespace) (rate(test_exporter_test_case_result_total[5m])) > 1 + for: 15m + labels: + severity: warning + - alert: MimirInconsistentRuntimeConfig + annotations: + message: | + An inconsistent runtime config file is used across cluster {{ $labels.cluster }}/{{ $labels.namespace }}. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirinconsistentruntimeconfig + expr: | + count(count by(cluster, namespace, job, sha256) (cortex_runtime_config_hash)) without(sha256) > 1 + for: 1h + labels: + severity: critical + - alert: MimirBadRuntimeConfig + annotations: + message: | + {{ $labels.job }} failed to reload runtime config. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbadruntimeconfig + expr: | + # The metric value is reset to 0 on error while reloading the config at runtime. + cortex_runtime_config_last_reload_successful == 0 + for: 5m + labels: + severity: critical + - alert: MimirFrontendQueriesStuck + annotations: + message: | + There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirfrontendqueriesstuck + expr: | + sum by (cluster, namespace, job) (min_over_time(cortex_query_frontend_queue_length[1m])) > 0 + for: 5m + labels: + severity: critical + - alert: MimirSchedulerQueriesStuck + annotations: + message: | + There are {{ $value }} queued up queries in {{ $labels.cluster }}/{{ $labels.namespace }} {{ $labels.job }}. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirschedulerqueriesstuck + expr: | + sum by (cluster, namespace, job) (min_over_time(cortex_query_scheduler_queue_length[1m])) > 0 + for: 7m + labels: + severity: critical + - alert: MimirCacheRequestErrors + annotations: + message: | + The cache {{ $labels.name }} used by Mimir {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors for {{ $labels.operation }} operation. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircacherequesterrors + expr: | + ( + sum by(cluster, namespace, name, operation) ( + rate(thanos_memcached_operation_failures_total[1m]) + or + rate(thanos_cache_operation_failures_total[1m]) + ) + / + sum by(cluster, namespace, name, operation) ( + rate(thanos_memcached_operations_total[1m]) + or + rate(thanos_cache_operations_total[1m]) + ) + ) * 100 > 5 + for: 5m + labels: + severity: warning + - alert: MimirIngesterRestarts + annotations: + message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has restarted {{ printf "%.2f" $value }} times in the last 30 mins. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterrestarts + expr: | + ( + sum by(cluster, namespace, pod) ( + increase(kube_pod_container_status_restarts_total{container=~"((mimir|ingester|mimir-write)|(mimir|mimir-write|distributor|ingester))"}[30m]) + ) + >= 2 + ) + and + ( + count by(cluster, namespace, pod) (cortex_build_info) > 0 + ) + labels: + severity: warning + - alert: MimirKVStoreFailure + annotations: + message: | + Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is failing to talk to the KV store {{ $labels.kv_name }}. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirkvstorefailure + expr: | + ( + sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count{status_code!~"2.+"}[1m])) + / + sum by(cluster, namespace, pod, status_code, kv_name) (rate(cortex_kv_request_duration_seconds_count[1m])) + ) + # We want to get alerted only in case there's a constant failure. + == 1 + for: 5m + labels: + severity: critical + - alert: MimirMemoryMapAreasTooHigh + annotations: + message: '{{ $labels.job }}/{{ $labels.pod }} has a number of mmap-ed areas + close to the limit.' + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirmemorymapareastoohigh + expr: | + process_memory_map_areas{job=~".*/(mimir|ingester.*|cortex|mimir-write.*|mimir|store-gateway.*|cortex|mimir-backend.*)"} / process_memory_map_areas_limit{job=~".*/(mimir|ingester.*|cortex|mimir-write.*|mimir|store-gateway.*|cortex|mimir-backend.*)"} > 0.8 + for: 5m + labels: + severity: critical + - alert: MimirIngesterInstanceHasNoTenants + annotations: + message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has no tenants assigned. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterinstancehasnotenants + expr: | + (min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0) + and on (cluster, namespace) + # Only if there are more time-series than would be expected due to continuous testing load + ( + sum by(cluster, namespace) (cortex_ingester_memory_series) + / + max by(cluster, namespace) (cortex_distributor_replication_factor) + ) > 100000 + for: 1h + labels: + severity: warning + - alert: MimirRulerInstanceHasNoRuleGroups + annotations: + message: Mimir ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has no rule groups assigned. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerinstancehasnorulegroups + expr: | + # Alert on ruler instances in microservices mode that have no rule groups assigned, + min by(cluster, namespace, pod) (cortex_ruler_managers_total{pod=~"(.*mimir-)?(mimir|ruler|mimir-backend).*"}) == 0 + # but only if other ruler instances of the same cell do have rule groups assigned + and on (cluster, namespace) + (max by(cluster, namespace) (cortex_ruler_managers_total) > 0) + # and there are more than two instances overall + and on (cluster, namespace) + (count by (cluster, namespace) (cortex_ruler_managers_total) > 2) + for: 1h + labels: + severity: warning + - alert: MimirIngestedDataTooFarInTheFuture + annotations: + message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has ingested samples with timestamps more than 1h in the future. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesteddatatoofarinthefuture + expr: | + max by(cluster, namespace, pod) ( + cortex_ingester_tsdb_head_max_timestamp_seconds - time() + and + cortex_ingester_tsdb_head_max_timestamp_seconds > 0 + ) > 60*60 + for: 5m + labels: + severity: warning + - alert: MimirStoreGatewayTooManyFailedOperations + annotations: + message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ + $labels.namespace }} is experiencing {{ $value | humanizePercentage }} errors + while doing {{ $labels.operation }} on the object storage. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewaytoomanyfailedoperations + expr: | + sum by(cluster, namespace, operation) (rate(thanos_objstore_bucket_operation_failures_total{component="store-gateway"}[1m])) > 0 + for: 5m + labels: + severity: warning + - alert: MimirRingMembersMismatch + annotations: + message: | + Number of members in Mimir ingester hash ring does not match the expected number in {{ $labels.cluster }}/{{ $labels.namespace }}. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirringmembersmismatch + expr: | + ( + avg by(cluster, namespace) (sum by(cluster, namespace, pod) (cortex_ring_members{name="ingester",job=~".*/(mimir|ingester.*|cortex|mimir-write.*)"})) + != sum by(cluster, namespace) (up{job=~".*/(mimir|ingester.*|cortex|mimir-write.*)"}) + ) + and + ( + count by(cluster, namespace) (cortex_build_info) > 0 + ) + for: 15m + labels: + component: ingester + severity: warning + - name: mimir_instance_limits_alerts + rules: + - alert: MimirIngesterReachingSeriesLimit + annotations: + message: | + Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit + expr: | + ( + (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) + and ignoring (limit) + (cortex_ingester_instance_limits{limit="max_series"} > 0) + ) > 0.8 + for: 3h + labels: + severity: warning + - alert: MimirIngesterReachingSeriesLimit + annotations: + message: | + Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its series limit. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingserieslimit + expr: | + ( + (cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"}) + and ignoring (limit) + (cortex_ingester_instance_limits{limit="max_series"} > 0) + ) > 0.9 + for: 5m + labels: + severity: critical + - alert: MimirIngesterReachingTenantsLimit + annotations: + message: | + Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit + expr: | + ( + (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) + and ignoring (limit) + (cortex_ingester_instance_limits{limit="max_tenants"} > 0) + ) > 0.7 + for: 5m + labels: + severity: warning + - alert: MimirIngesterReachingTenantsLimit + annotations: + message: | + Ingester {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its tenant limit. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterreachingtenantslimit + expr: | + ( + (cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"}) + and ignoring (limit) + (cortex_ingester_instance_limits{limit="max_tenants"} > 0) + ) > 0.8 + for: 5m + labels: + severity: critical + - alert: MimirReachingTCPConnectionsLimit + annotations: + message: | + Mimir instance {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its TCP connections limit for {{ $labels.protocol }} protocol. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirreachingtcpconnectionslimit + expr: | + cortex_tcp_connections / cortex_tcp_connections_limit > 0.8 and + cortex_tcp_connections_limit > 0 + for: 5m + labels: + severity: critical + - alert: MimirDistributorReachingInflightPushRequestLimit + annotations: + message: | + Distributor {{ $labels.job }}/{{ $labels.pod }} has reached {{ $value | humanizePercentage }} of its inflight push request limit. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirdistributorreachinginflightpushrequestlimit + expr: | + ( + (cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"}) + and ignoring (limit) + (cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0) + ) > 0.8 + for: 5m + labels: + severity: critical + - name: mimir-rollout-alerts + rules: + - alert: MimirRolloutStuck + annotations: + message: | + The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck + expr: | + ( + max without (revision) ( + sum without(statefulset) (label_replace(kube_statefulset_status_current_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) + unless + sum without(statefulset) (label_replace(kube_statefulset_status_update_revision, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) + ) + * + ( + sum without(statefulset) (label_replace(kube_statefulset_replicas, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) + != + sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")) + ) + ) and ( + changes(sum without(statefulset) (label_replace(kube_statefulset_status_replicas_updated, "rollout_group", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) + == + 0 + ) + * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) + for: 30m + labels: + severity: warning + workload_type: statefulset + - alert: MimirRolloutStuck + annotations: + message: | + The {{ $labels.rollout_group }} rollout is stuck in {{ $labels.cluster }}/{{ $labels.namespace }}. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrolloutstuck + expr: | + ( + sum without(deployment) (label_replace(kube_deployment_spec_replicas, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) + != + sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?")) + ) and ( + changes(sum without(deployment) (label_replace(kube_deployment_status_replicas_updated, "rollout_group", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"))[15m:1m]) + == + 0 + ) + * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) + for: 30m + labels: + severity: warning + workload_type: deployment + - alert: RolloutOperatorNotReconciling + annotations: + message: | + Rollout operator is not reconciling the rollout group {{ $labels.rollout_group }} in {{ $labels.cluster }}/{{ $labels.namespace }}. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#rolloutoperatornotreconciling + expr: | + max by(cluster, namespace, rollout_group) (time() - rollout_operator_last_successful_group_reconcile_timestamp_seconds) > 600 + for: 5m + labels: + severity: critical + - name: mimir-provisioning + rules: + - alert: MimirAllocatingTooMuchMemory + annotations: + message: | + Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory + expr: | + ( + # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. + # See: https://github.com/grafana/mimir/issues/2466 + container_memory_rss{container=~"((mimir|ingester|mimir-write)|(mimir|mimir-write|distributor|ingester)|(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter))"} + / + ( container_spec_memory_limit_bytes{container=~"((mimir|ingester|mimir-write)|(mimir|mimir-write|distributor|ingester)|(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter))"} > 0 ) + ) + # Match only Mimir namespaces. + * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) + > 0.65 + for: 15m + labels: + severity: warning + - alert: MimirAllocatingTooMuchMemory + annotations: + message: | + Instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirallocatingtoomuchmemory + expr: | + ( + # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. + # See: https://github.com/grafana/mimir/issues/2466 + container_memory_rss{container=~"((mimir|ingester|mimir-write)|(mimir|mimir-write|distributor|ingester)|(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter))"} + / + ( container_spec_memory_limit_bytes{container=~"((mimir|ingester|mimir-write)|(mimir|mimir-write|distributor|ingester)|(mimir|mimir-backend|query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter))"} > 0 ) + ) + # Match only Mimir namespaces. + * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) + > 0.8 + for: 15m + labels: + severity: critical + - name: ruler_alerts + rules: + - alert: MimirRulerTooManyFailedPushes + annotations: + message: | + Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% write (push) errors. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedpushes + expr: | + 100 * ( + sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_failed_total[1m])) + / + sum by (cluster, namespace, pod) (rate(cortex_ruler_write_requests_total[1m])) + ) > 1 + for: 5m + labels: + severity: critical + - alert: MimirRulerTooManyFailedQueries + annotations: + message: | + Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors while evaluating rules. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulertoomanyfailedqueries + expr: | + 100 * ( + sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_failed_total[1m])) + / + sum by (cluster, namespace, pod) (rate(cortex_ruler_queries_total[1m])) + ) > 1 + for: 5m + labels: + severity: critical + - alert: MimirRulerMissedEvaluations + annotations: + message: | + Mimir Ruler {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% missed iterations for the rule group {{ $labels.rule_group }}. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulermissedevaluations + expr: | + 100 * ( + sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_missed_total[1m])) + / + sum by (cluster, namespace, pod, rule_group) (rate(cortex_prometheus_rule_group_iterations_total[1m])) + ) > 1 + for: 5m + labels: + severity: warning + - alert: MimirRulerFailedRingCheck + annotations: + message: | + Mimir Rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are experiencing errors when checking the ring for rule group ownership. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerfailedringcheck + expr: | + sum by (cluster, namespace, job) (rate(cortex_ruler_ring_check_errors_total[1m])) + > 0 + for: 5m + labels: + severity: critical + - alert: MimirRulerRemoteEvaluationFailing + annotations: + message: | + Mimir rulers in {{ $labels.cluster }}/{{ $labels.namespace }} are failing to perform {{ printf "%.2f" $value }}% of remote evaluations through the ruler-query-frontend. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirrulerremoteevaluationfailing + expr: | + 100 * ( + sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", status_code=~"5..", job=~".*/(mimir|ruler-query-frontend.*)"}[5m])) + / + sum by (cluster, namespace) (rate(cortex_request_duration_seconds_count{route="/httpgrpc.HTTP/Handle", job=~".*/(mimir|ruler-query-frontend.*)"}[5m])) + ) > 1 + for: 5m + labels: + severity: warning + - name: gossip_alerts + rules: + - alert: MimirGossipMembersTooHigh + annotations: + message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace + }} consistently sees a higher than expected number of gossip members. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoohigh + expr: | + max by (cluster, namespace) (memberlist_client_cluster_members_count) + > + (sum by (cluster, namespace) (up{job=~".+/(mimir|admin-api|alertmanager|compactor.*|distributor.*|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir-write.*|mimir-read.*|mimir-backend.*)"}) + 10) + for: 20m + labels: + severity: warning + - alert: MimirGossipMembersTooLow + annotations: + message: One or more Mimir instances in {{ $labels.cluster }}/{{ $labels.namespace + }} consistently sees a lower than expected number of gossip members. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirgossipmemberstoolow + expr: | + min by (cluster, namespace) (memberlist_client_cluster_members_count) + < + (sum by (cluster, namespace) (up{job=~".+/(mimir|admin-api|alertmanager|compactor.*|distributor.*|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir-write.*|mimir-read.*|mimir-backend.*)"}) * 0.5) + for: 20m + labels: + severity: warning + - name: etcd_alerts + rules: + - alert: EtcdAllocatingTooMuchMemory + annotations: + message: | + Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory + expr: | + ( + container_memory_working_set_bytes{container="etcd"} + / + ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) + ) > 0.65 + for: 15m + labels: + severity: warning + - alert: EtcdAllocatingTooMuchMemory + annotations: + message: | + Too much memory being used by {{ $labels.namespace }}/{{ $labels.pod }} - bump memory limit. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory + expr: | + ( + container_memory_working_set_bytes{container="etcd"} + / + ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) + ) > 0.8 + for: 15m + labels: + severity: critical + - name: alertmanager_alerts + rules: + - alert: MimirAlertmanagerSyncConfigsFailing + annotations: + message: | + Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to read tenant configurations from storage. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagersyncconfigsfailing + expr: | + rate(cortex_alertmanager_sync_configs_failed_total[5m]) > 0 + for: 30m + labels: + severity: critical + - alert: MimirAlertmanagerRingCheckFailing + annotations: + message: | + Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to check tenants ownership via the ring. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerringcheckfailing + expr: | + rate(cortex_alertmanager_ring_check_errors_total[2m]) > 0 + for: 10m + labels: + severity: critical + - alert: MimirAlertmanagerPartialStateMergeFailing + annotations: + message: | + Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to merge partial state changes received from a replica. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpartialstatemergefailing + expr: | + rate(cortex_alertmanager_partial_state_merges_failed_total[2m]) > 0 + for: 10m + labels: + severity: critical + - alert: MimirAlertmanagerReplicationFailing + annotations: + message: | + Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is failing to replicating partial state to its replicas. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerreplicationfailing + expr: | + rate(cortex_alertmanager_state_replication_failed_total[2m]) > 0 + for: 10m + labels: + severity: critical + - alert: MimirAlertmanagerPersistStateFailing + annotations: + message: | + Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} is unable to persist full state snaphots to remote storage. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerpersiststatefailing + expr: | + rate(cortex_alertmanager_state_persist_failed_total[15m]) > 0 + for: 1h + labels: + severity: critical + - alert: MimirAlertmanagerInitialSyncFailed + annotations: + message: | + Mimir Alertmanager {{ $labels.job }}/{{ $labels.pod }} was unable to obtain some initial state when starting up. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinitialsyncfailed + expr: | + increase(cortex_alertmanager_state_initial_sync_completed_total{outcome="failed"}[1m]) > 0 + labels: + severity: critical + - alert: MimirAlertmanagerAllocatingTooMuchMemory + annotations: + message: | + Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory + expr: | + (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.80 + and + (container_spec_memory_limit_bytes{container="alertmanager"} > 0) + for: 15m + labels: + severity: warning + - alert: MimirAlertmanagerAllocatingTooMuchMemory + annotations: + message: | + Alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is using too much memory. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerallocatingtoomuchmemory + expr: | + (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > 0.90 + and + (container_spec_memory_limit_bytes{container="alertmanager"} > 0) + for: 15m + labels: + severity: critical + - alert: MimirAlertmanagerInstanceHasNoTenants + annotations: + message: Mimir alertmanager {{ $labels.pod }} in {{ $labels.cluster }}/{{ + $labels.namespace }} owns no tenants. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiralertmanagerinstancehasnotenants + expr: | + # Alert on alertmanager instances in microservices mode that own no tenants, + min by(cluster, namespace, pod) (cortex_alertmanager_tenants_owned{pod=~"(.*mimir-)?(mimir|alertmanager|mimir-backend).*"}) == 0 + # but only if other instances of the same cell do have tenants assigned. + and on (cluster, namespace) + max by(cluster, namespace) (cortex_alertmanager_tenants_owned) > 0 + for: 1h + labels: + severity: warning + - name: mimir_blocks_alerts + rules: + - alert: MimirIngesterHasNotShippedBlocks + annotations: + message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has not shipped any block in the last 4 hours. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblocks + expr: | + (min by(cluster, namespace, pod) (time() - cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 60 * 60 * 4) + and + (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 0) + and + # Only if the ingester has ingested samples over the last 4h. + (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) + and + # Only if the ingester was ingesting samples 4h ago. This protects against the case where the ingester replica + # had ingested samples in the past, then no traffic was received for a long period and then it starts + # receiving samples again. Without this check, the alert would fire as soon as it gets back receiving + # samples, while the a block shipping is expected within the next 4h. + (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[1h] offset 4h)) > 0) + for: 15m + labels: + severity: critical + - alert: MimirIngesterHasNotShippedBlocksSinceStart + annotations: + message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has not shipped any block in the last 4 hours. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasnotshippedblockssincestart + expr: | + (max by(cluster, namespace, pod) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) == 0) + and + (max by(cluster, namespace, pod) (max_over_time(cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) + for: 4h + labels: + severity: critical + - alert: MimirIngesterHasUnshippedBlocks + annotations: + message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has compacted a block {{ $value | humanizeDuration }} ago but it hasn't + been successfully uploaded to the storage yet. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterhasunshippedblocks + expr: | + (time() - cortex_ingester_oldest_unshipped_block_timestamp_seconds > 3600) + and + (cortex_ingester_oldest_unshipped_block_timestamp_seconds > 0) + for: 15m + labels: + severity: critical + - alert: MimirIngesterTSDBHeadCompactionFailed + annotations: + message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} is failing to compact TSDB head. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadcompactionfailed + expr: | + rate(cortex_ingester_tsdb_compactions_failed_total[5m]) > 0 + for: 15m + labels: + severity: critical + - alert: MimirIngesterTSDBHeadTruncationFailed + annotations: + message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} is failing to truncate TSDB head. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbheadtruncationfailed + expr: | + rate(cortex_ingester_tsdb_head_truncations_failed_total[5m]) > 0 + labels: + severity: critical + - alert: MimirIngesterTSDBCheckpointCreationFailed + annotations: + message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} is failing to create TSDB checkpoint. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointcreationfailed + expr: | + rate(cortex_ingester_tsdb_checkpoint_creations_failed_total[5m]) > 0 + labels: + severity: critical + - alert: MimirIngesterTSDBCheckpointDeletionFailed + annotations: + message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} is failing to delete TSDB checkpoint. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbcheckpointdeletionfailed + expr: | + rate(cortex_ingester_tsdb_checkpoint_deletions_failed_total[5m]) > 0 + labels: + severity: critical + - alert: MimirIngesterTSDBWALTruncationFailed + annotations: + message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} is failing to truncate TSDB WAL. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwaltruncationfailed + expr: | + rate(cortex_ingester_tsdb_wal_truncations_failed_total[5m]) > 0 + labels: + severity: warning + - alert: MimirIngesterTSDBWALCorrupted + annotations: + message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} got a corrupted TSDB WAL. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted + expr: | + # alert when there are more than one corruptions + count by (cluster, namespace) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0) > 1 + and + # and there is only one zone + count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) == 1 + labels: + deployment: single-zone + severity: critical + - alert: MimirIngesterTSDBWALCorrupted + annotations: + message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} got a corrupted TSDB WAL. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted + expr: | + # alert when there are more than one corruptions + count by (cluster, namespace) (sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0)) > 1 + and + # and there are multiple zones + count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) > 1 + labels: + deployment: multi-zone + severity: critical + - alert: MimirIngesterTSDBWALWritesFailed + annotations: + message: Mimir Ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} is failing to write to TSDB WAL. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalwritesfailed + expr: | + rate(cortex_ingester_tsdb_wal_writes_failed_total[1m]) > 0 + for: 3m + labels: + severity: critical + - alert: MimirStoreGatewayHasNotSyncTheBucket + annotations: + message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ + $labels.namespace }} has not successfully synched the bucket since {{ $value + | humanizeDuration }}. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewayhasnotsyncthebucket + expr: | + (time() - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 60 * 30) + and + cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 0 + for: 5m + labels: + severity: critical + - alert: MimirStoreGatewayNoSyncedTenants + annotations: + message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ + $labels.namespace }} is not syncing any blocks for any tenant. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewaynosyncedtenants + expr: | + min by(cluster, namespace, pod) (cortex_bucket_stores_tenants_synced{component="store-gateway"}) == 0 + for: 1h + labels: + severity: warning + - alert: MimirBucketIndexNotUpdated + annotations: + message: Mimir bucket index for tenant {{ $labels.user }} in {{ $labels.cluster + }}/{{ $labels.namespace }} has not been updated since {{ $value | humanizeDuration + }}. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirbucketindexnotupdated + expr: | + min by(cluster, namespace, user) (time() - cortex_bucket_index_last_successful_update_timestamp_seconds) > 7200 + labels: + severity: critical + - name: mimir_compactor_alerts + rules: + - alert: MimirCompactorHasNotSuccessfullyCleanedUpBlocks + annotations: + message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has not successfully cleaned up blocks in the last 6 hours. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullycleanedupblocks + expr: | + # The "last successful run" metric is updated even if the compactor owns no tenants, + # so this alert correctly doesn't fire if compactor has nothing to do. + (time() - cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds > 60 * 60 * 6) + for: 1h + labels: + severity: critical + - alert: MimirCompactorHasNotSuccessfullyRunCompaction + annotations: + message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has not run compaction in the last 24 hours. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction + expr: | + # The "last successful run" metric is updated even if the compactor owns no tenants, + # so this alert correctly doesn't fire if compactor has nothing to do. + (time() - cortex_compactor_last_successful_run_timestamp_seconds > 60 * 60 * 24) + and + (cortex_compactor_last_successful_run_timestamp_seconds > 0) + for: 1h + labels: + reason: in-last-24h + severity: critical + - alert: MimirCompactorHasNotSuccessfullyRunCompaction + annotations: + message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has not run compaction in the last 24 hours. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction + expr: | + # The "last successful run" metric is updated even if the compactor owns no tenants, + # so this alert correctly doesn't fire if compactor has nothing to do. + cortex_compactor_last_successful_run_timestamp_seconds == 0 + for: 24h + labels: + reason: since-startup + severity: critical + - alert: MimirCompactorHasNotSuccessfullyRunCompaction + annotations: + message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} failed to run 2 consecutive compactions. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotsuccessfullyruncompaction + expr: | + increase(cortex_compactor_runs_failed_total{reason!="shutdown"}[2h]) >= 2 + labels: + reason: consecutive-failures + severity: critical + - alert: MimirCompactorHasNotUploadedBlocks + annotations: + message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has not uploaded any block in the last 24 hours. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks + expr: | + (time() - (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"})) > 60 * 60 * 24) + and + (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) > 0) + and + # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do + # (e.g. there are more replicas than required because running as part of mimir-backend). + (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) + for: 15m + labels: + severity: critical + time_period: 24h + - alert: MimirCompactorHasNotUploadedBlocks + annotations: + message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has not uploaded any block since its start. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorhasnotuploadedblocks + expr: | + (max by(cluster, namespace, pod) (thanos_objstore_bucket_last_successful_upload_time{component="compactor"}) == 0) + and + # Only if some compactions have started. We don't want to fire this alert if the compactor has nothing to do + # (e.g. there are more replicas than required because running as part of mimir-backend). + (sum by(cluster, namespace, pod) (rate(cortex_compactor_group_compaction_runs_started_total[24h])) > 0) + for: 24h + labels: + severity: critical + time_period: since-start + - alert: MimirCompactorSkippedUnhealthyBlocks + annotations: + message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has found and ignored unhealthy blocks. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks + expr: | + increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 0 + for: 1m + labels: + severity: warning + - alert: MimirCompactorSkippedUnhealthyBlocks + annotations: + message: Mimir Compactor {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace + }} has found and ignored unhealthy blocks. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircompactorskippedunhealthyblocks + expr: | + increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 1 + for: 30m + labels: + severity: critical + - name: mimir_autoscaling + rules: + - alert: MimirAutoscalerNotActive + annotations: + message: The Horizontal Pod Autoscaler (HPA) {{ $labels.horizontalpodautoscaler + }} in {{ $labels.namespace }} is not active. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalernotactive + expr: | + ( + label_replace(( + kube_horizontalpodautoscaler_status_condition{condition="ScalingActive",status="false"} + # Match only Mimir namespaces. + * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) + # Add "metric" label. + + on(cluster, namespace, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)") + > 0), + "scaledObject", "$1", "horizontalpodautoscaler", "keda-hpa-(.*)" + ) + ) + # Alert only if the scaling metric exists and is > 0. If the KEDA ScaledObject is configured to scale down 0, + # then HPA ScalingActive may be false when expected to run 0 replicas. In this case, the scaling metric exported + # by KEDA could not exist at all or being exposed with a value of 0. + and on (cluster, namespace, metric, scaledObject) + (label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0) + for: 1h + labels: + severity: critical + - alert: MimirAutoscalerKedaFailing + annotations: + message: The Keda ScaledObject {{ $labels.scaledObject }} in {{ $labels.namespace + }} is experiencing errors. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirautoscalerkedafailing + expr: | + ( + # Find KEDA scalers reporting errors. + label_replace(rate(keda_scaler_errors[5m]), "namespace", "$1", "exported_namespace", "(.*)") + # Match only Mimir namespaces. + * on(cluster, namespace) group_left max by(cluster, namespace) (cortex_build_info) + ) + > 0 + for: 1h + labels: + severity: critical + - name: mimir_continuous_test + rules: + - alert: MimirContinuousTestNotRunningOnWrites + annotations: + message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ + $labels.namespace }} is not effectively running because writes are failing. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonwrites + expr: | + sum by(cluster, namespace, test) (rate(mimir_continuous_test_writes_failed_total[5m])) > 0 + for: 1h + labels: + severity: warning + - alert: MimirContinuousTestNotRunningOnReads + annotations: + message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ + $labels.namespace }} is not effectively running because queries are failing. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestnotrunningonreads + expr: | + sum by(cluster, namespace, test) (rate(mimir_continuous_test_queries_failed_total[5m])) > 0 + for: 1h + labels: + severity: warning + - alert: MimirContinuousTestFailed + annotations: + message: Mimir continuous test {{ $labels.test }} in {{ $labels.cluster }}/{{ + $labels.namespace }} failed when asserting query results. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimircontinuoustestfailed + expr: | + sum by(cluster, namespace, test) (rate(mimir_continuous_test_query_result_checks_failed_total[10m])) > 0 + labels: + severity: warning +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: mimir-mixin-rules + namespace: monitoring-system +spec: + groups: + - name: mimir_api_1 + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_request_duration_seconds:50quantile + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / + sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_request_duration_seconds:avg + - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, + job) + record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:cortex_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_request_duration_seconds_count:sum_rate + - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job) + record: cluster_job:cortex_request_duration_seconds:sum_rate + - name: mimir_api_2 + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) + by (le, cluster, job, route)) + record: cluster_job_route:cortex_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) + by (le, cluster, job, route)) + record: cluster_job_route:cortex_request_duration_seconds:50quantile + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) + / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds:avg + - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, + job, route) + record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, + route) + record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate + - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, job, route) + record: cluster_job_route:cortex_request_duration_seconds:sum_rate + - name: mimir_api_3 + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) + by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) + by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, + job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, + namespace, job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds:avg + - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, + namespace, job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, + job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, + job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate + - expr: sum(rate(cortex_request_duration_seconds[1m])) by (cluster, namespace, + job, route) + record: cluster_namespace_job_route:cortex_request_duration_seconds:sum_rate + - name: mimir_querier_api + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_querier_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_querier_request_duration_seconds:50quantile + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, + job) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, + job) + record: cluster_job:cortex_querier_request_duration_seconds:avg + - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, + cluster, job) + record: cluster_job:cortex_querier_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, + job) + record: cluster_job:cortex_querier_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, + job) + record: cluster_job:cortex_querier_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) + by (le, cluster, job, route)) + record: cluster_job_route:cortex_querier_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) + by (le, cluster, job, route)) + record: cluster_job_route:cortex_querier_request_duration_seconds:50quantile + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, + job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) + by (cluster, job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds:avg + - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, + cluster, job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, + job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, + job, route) + record: cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) + by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) + by (le, cluster, namespace, job, route)) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:50quantile + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, + namespace, job, route) / sum(rate(cortex_querier_request_duration_seconds_count[1m])) + by (cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds:avg + - expr: sum(rate(cortex_querier_request_duration_seconds_bucket[1m])) by (le, + cluster, namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_sum[1m])) by (cluster, + namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_querier_request_duration_seconds_count[1m])) by (cluster, + namespace, job, route) + record: cluster_namespace_job_route:cortex_querier_request_duration_seconds_count:sum_rate + - name: mimir_storage + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_kv_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_kv_request_duration_seconds:50quantile + - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) + / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_kv_request_duration_seconds:avg + - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, + job) + record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) + record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job) + record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate + - name: mimir_queries + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_retries:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_retries:50quantile + - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) + by (cluster, job) + record: cluster_job:cortex_query_frontend_retries:avg + - expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job) + record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate + - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_retries_sum:sum_rate + - expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job) + record: cluster_job:cortex_query_frontend_retries_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, + job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by + (cluster, job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by + (le, cluster, job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, + job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate + - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, + job) + record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate + - name: mimir_ingester_queries + rules: + - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_series:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_series:50quantile + - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / + sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_series:avg + - expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, + job) + record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate + - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_series_sum:sum_rate + - expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_series_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_samples:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_samples:50quantile + - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / + sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_samples:avg + - expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, + job) + record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate + - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_samples_sum:sum_rate + - expr: sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_samples_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_exemplars:99quantile + - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) + by (le, cluster, job)) + record: cluster_job:cortex_ingester_queried_exemplars:50quantile + - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) + / sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_exemplars:avg + - expr: sum(rate(cortex_ingester_queried_exemplars_bucket[1m])) by (le, cluster, + job) + record: cluster_job:cortex_ingester_queried_exemplars_bucket:sum_rate + - expr: sum(rate(cortex_ingester_queried_exemplars_sum[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_exemplars_sum:sum_rate + - expr: sum(rate(cortex_ingester_queried_exemplars_count[1m])) by (cluster, job) + record: cluster_job:cortex_ingester_queried_exemplars_count:sum_rate + - name: mimir_received_samples + rules: + - expr: | + sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m])) + record: cluster_namespace_job:cortex_distributor_received_samples:rate5m + - name: mimir_exemplars_in + rules: + - expr: | + sum by (cluster, namespace, job) (rate(cortex_distributor_exemplars_in_total[5m])) + record: cluster_namespace_job:cortex_distributor_exemplars_in:rate5m + - name: mimir_received_exemplars + rules: + - expr: | + sum by (cluster, namespace, job) (rate(cortex_distributor_received_exemplars_total[5m])) + record: cluster_namespace_job:cortex_distributor_received_exemplars:rate5m + - name: mimir_exemplars_ingested + rules: + - expr: | + sum by (cluster, namespace, job) (rate(cortex_ingester_ingested_exemplars_total[5m])) + record: cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m + - name: mimir_exemplars_appended + rules: + - expr: | + sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total[5m])) + record: cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m + - name: mimir_scaling_rules + rules: + - expr: | + # Convenience rule to get the number of replicas for both a deployment and a statefulset. + # Multi-zone deployments are grouped together removing the "zone-X" suffix. + sum by (cluster, namespace, deployment) ( + label_replace( + kube_deployment_spec_replicas, + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + or + sum by (cluster, namespace, deployment) ( + label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?") + ) + record: cluster_namespace_deployment:actual_replicas:count + - expr: | + ceil( + quantile_over_time(0.99, + sum by (cluster, namespace) ( + cluster_namespace_job:cortex_distributor_received_samples:rate5m + )[24h:] + ) + / 240000 + ) + labels: + deployment: distributor + reason: sample_rate + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) + * 0.59999999999999998 / 240000 + ) + labels: + deployment: distributor + reason: sample_rate_limits + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + quantile_over_time(0.99, + sum by (cluster, namespace) ( + cluster_namespace_job:cortex_distributor_received_samples:rate5m + )[24h:] + ) + * 3 / 80000 + ) + labels: + deployment: ingester + reason: sample_rate + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + quantile_over_time(0.99, + sum by(cluster, namespace) ( + cortex_ingester_memory_series + )[24h:] + ) + / 1500000 + ) + labels: + deployment: ingester + reason: active_series + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + sum by (cluster, namespace) (cortex_limits_overrides{limit_name="max_global_series_per_user"}) + * 3 * 0.59999999999999998 / 1500000 + ) + labels: + deployment: ingester + reason: active_series_limits + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + sum by (cluster, namespace) (cortex_limits_overrides{limit_name="ingestion_rate"}) + * 0.59999999999999998 / 80000 + ) + labels: + deployment: ingester + reason: sample_rate_limits + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + ceil( + (sum by (cluster, namespace) ( + cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"} + ) / 4) + / + avg by (cluster, namespace) ( + memcached_limit_bytes{job=~".+/memcached"} + ) + ) + labels: + deployment: memcached + reason: active_series + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + sum by (cluster, namespace, pod)(rate(container_cpu_usage_seconds_total[1m])), + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + record: cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate + - expr: | + # Convenience rule to get the CPU request for both a deployment and a statefulset. + # Multi-zone deployments are grouped together removing the "zone-X" suffix. + # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 + # that remove resource metrics, ref: + # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 + # - https://github.com/kubernetes/kube-state-metrics/pull/1004 + # + # This is the old expression, compatible with kube-state-metrics < v2.0.0, + # where kube_pod_container_resource_requests_cpu_cores was removed: + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests_cpu_cores, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + or + # This expression is compatible with kube-state-metrics >= v1.4.0, + # where kube_pod_container_resource_requests was introduced. + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests{resource="cpu"}, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + record: cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum + - expr: | + # Jobs should be sized to their CPU usage. + # We do this by comparing 99th percentile usage over the last 24hrs to + # their current provisioned #replicas and resource requests. + ceil( + cluster_namespace_deployment:actual_replicas:count + * + quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) + / + cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum + ) + labels: + reason: cpu_usage + record: cluster_namespace_deployment_reason:required_replicas:count + - expr: | + # Convenience rule to get the Memory utilization for both a deployment and a statefulset. + # Multi-zone deployments are grouped together removing the "zone-X" suffix. + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + container_memory_usage_bytes{image!=""}, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + record: cluster_namespace_deployment:container_memory_usage_bytes:sum + - expr: | + # Convenience rule to get the Memory request for both a deployment and a statefulset. + # Multi-zone deployments are grouped together removing the "zone-X" suffix. + # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 + # that remove resource metrics, ref: + # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 + # - https://github.com/kubernetes/kube-state-metrics/pull/1004 + # + # This is the old expression, compatible with kube-state-metrics < v2.0.0, + # where kube_pod_container_resource_requests_memory_bytes was removed: + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests_memory_bytes, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + or + # This expression is compatible with kube-state-metrics >= v1.4.0, + # where kube_pod_container_resource_requests was introduced. + ( + sum by (cluster, namespace, deployment) ( + label_replace( + label_replace( + kube_pod_container_resource_requests{resource="memory"}, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ), + # The question mark in "(.*?)" is used to make it non-greedy, otherwise it + # always matches everything and the (optional) zone is not removed. + "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) + ) + record: cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum + - expr: | + # Jobs should be sized to their Memory usage. + # We do this by comparing 99th percentile usage over the last 24hrs to + # their current provisioned #replicas and resource requests. + ceil( + cluster_namespace_deployment:actual_replicas:count + * + quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h]) + / + cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum + ) + labels: + reason: memory_usage + record: cluster_namespace_deployment_reason:required_replicas:count + - name: mimir_alertmanager_rules + rules: + - expr: | + sum by (cluster, job, pod) (cortex_alertmanager_alerts) + record: cluster_job_pod:cortex_alertmanager_alerts:sum + - expr: | + sum by (cluster, job, pod) (cortex_alertmanager_silences) + record: cluster_job_pod:cortex_alertmanager_silences:sum + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_alerts_received_total[5m])) + record: cluster_job:cortex_alertmanager_alerts_received_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_alerts_invalid_total[5m])) + record: cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m + - expr: | + sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[5m])) + record: cluster_job_integration:cortex_alertmanager_notifications_total:rate5m + - expr: | + sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[5m])) + record: cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_state_replication_total[5m])) + record: cluster_job:cortex_alertmanager_state_replication_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_state_replication_failed_total[5m])) + record: cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_total[5m])) + record: cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m + - expr: | + sum by (cluster, job) (rate(cortex_alertmanager_partial_state_merges_failed_total[5m])) + record: cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m + - name: mimir_ingester_rules + rules: + - expr: | + sum by(cluster, namespace, pod) (rate(cortex_ingester_ingested_samples_total[1m])) + record: cluster_namespace_pod:cortex_ingester_ingested_samples_total:rate1m +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: pyroscope-mixin-rules + namespace: monitoring-system +spec: + groups: + - name: pyroscope_rules + rules: + - expr: histogram_quantile(0.99, sum(rate(pyroscope_request_duration_seconds_bucket[1m])) + by (le, job, cluster)) + record: job_cluster:pyroscope_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(pyroscope_request_duration_seconds_bucket[1m])) + by (le, job, cluster)) + record: job_cluster:pyroscope_request_duration_seconds:50quantile + - expr: sum(rate(pyroscope_request_duration_seconds_sum[1m])) by (job, cluster) + / sum(rate(pyroscope_request_duration_seconds_count[1m])) by (job, cluster) + record: job_cluster:pyroscope_request_duration_seconds:avg + - expr: sum(rate(pyroscope_request_duration_seconds_bucket[1m])) by (le, job, + cluster) + record: job_cluster:pyroscope_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(pyroscope_request_duration_seconds_sum[1m])) by (job, cluster) + record: job_cluster:pyroscope_request_duration_seconds_sum:sum_rate + - expr: sum(rate(pyroscope_request_duration_seconds_count[1m])) by (job, cluster) + record: job_cluster:pyroscope_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(pyroscope_request_duration_seconds_bucket[1m])) + by (le, job, route, cluster)) + record: job_route_cluster:pyroscope_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(pyroscope_request_duration_seconds_bucket[1m])) + by (le, job, route, cluster)) + record: job_route_cluster:pyroscope_request_duration_seconds:50quantile + - expr: sum(rate(pyroscope_request_duration_seconds_sum[1m])) by (job, route, + cluster) / sum(rate(pyroscope_request_duration_seconds_count[1m])) by (job, + route, cluster) + record: job_route_cluster:pyroscope_request_duration_seconds:avg + - expr: sum(rate(pyroscope_request_duration_seconds_bucket[1m])) by (le, job, + route, cluster) + record: job_route_cluster:pyroscope_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(pyroscope_request_duration_seconds_sum[1m])) by (job, route, + cluster) + record: job_route_cluster:pyroscope_request_duration_seconds_sum:sum_rate + - expr: sum(rate(pyroscope_request_duration_seconds_count[1m])) by (job, route, + cluster) + record: job_route_cluster:pyroscope_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(pyroscope_request_duration_seconds_bucket[1m])) + by (le, namespace, job, route, cluster)) + record: namespace_job_route_cluster:pyroscope_request_duration_seconds:99quantile + - expr: histogram_quantile(0.50, sum(rate(pyroscope_request_duration_seconds_bucket[1m])) + by (le, namespace, job, route, cluster)) + record: namespace_job_route_cluster:pyroscope_request_duration_seconds:50quantile + - expr: sum(rate(pyroscope_request_duration_seconds_sum[1m])) by (namespace, job, + route, cluster) / sum(rate(pyroscope_request_duration_seconds_count[1m])) + by (namespace, job, route, cluster) + record: namespace_job_route_cluster:pyroscope_request_duration_seconds:avg + - expr: sum(rate(pyroscope_request_duration_seconds_bucket[1m])) by (le, namespace, + job, route, cluster) + record: namespace_job_route_cluster:pyroscope_request_duration_seconds_bucket:sum_rate + - expr: sum(rate(pyroscope_request_duration_seconds_sum[1m])) by (namespace, job, + route, cluster) + record: namespace_job_route_cluster:pyroscope_request_duration_seconds_sum:sum_rate + - expr: sum(rate(pyroscope_request_duration_seconds_count[1m])) by (namespace, + job, route, cluster) + record: namespace_job_route_cluster:pyroscope_request_duration_seconds_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(pyroscope_distributor_received_compressed_bytes_bucket[1m])) + by (le, job, type, cluster)) + record: job_type_cluster:pyroscope_distributor_received_compressed_bytes:99quantile + - expr: histogram_quantile(0.50, sum(rate(pyroscope_distributor_received_compressed_bytes_bucket[1m])) + by (le, job, type, cluster)) + record: job_type_cluster:pyroscope_distributor_received_compressed_bytes:50quantile + - expr: sum(rate(pyroscope_distributor_received_compressed_bytes_sum[1m])) by + (job, type, cluster) / sum(rate(pyroscope_distributor_received_compressed_bytes_count[1m])) + by (job, type, cluster) + record: job_type_cluster:pyroscope_distributor_received_compressed_bytes:avg + - expr: sum(rate(pyroscope_distributor_received_compressed_bytes_bucket[1m])) + by (le, job, type, cluster) + record: job_type_cluster:pyroscope_distributor_received_compressed_bytes_bucket:sum_rate + - expr: sum(rate(pyroscope_distributor_received_compressed_bytes_sum[1m])) by + (job, type, cluster) + record: job_type_cluster:pyroscope_distributor_received_compressed_bytes_sum:sum_rate + - expr: sum(rate(pyroscope_distributor_received_compressed_bytes_count[1m])) by + (job, type, cluster) + record: job_type_cluster:pyroscope_distributor_received_compressed_bytes_count:sum_rate + - expr: histogram_quantile(0.99, sum(rate(pyroscope_distributor_received_samples_bucket[1m])) + by (le, job, type, cluster)) + record: job_type_cluster:pyroscope_distributor_received_samples:99quantile + - expr: histogram_quantile(0.50, sum(rate(pyroscope_distributor_received_samples_bucket[1m])) + by (le, job, type, cluster)) + record: job_type_cluster:pyroscope_distributor_received_samples:50quantile + - expr: sum(rate(pyroscope_distributor_received_samples_sum[1m])) by (job, type, + cluster) / sum(rate(pyroscope_distributor_received_samples_count[1m])) by + (job, type, cluster) + record: job_type_cluster:pyroscope_distributor_received_samples:avg + - expr: sum(rate(pyroscope_distributor_received_samples_bucket[1m])) by (le, job, + type, cluster) + record: job_type_cluster:pyroscope_distributor_received_samples_bucket:sum_rate + - expr: sum(rate(pyroscope_distributor_received_samples_sum[1m])) by (job, type, + cluster) + record: job_type_cluster:pyroscope_distributor_received_samples_sum:sum_rate + - expr: sum(rate(pyroscope_distributor_received_samples_count[1m])) by (job, type, + cluster) + record: job_type_cluster:pyroscope_distributor_received_samples_count:sum_rate