diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-chunks.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-chunks.json
index b1304ffa..59dd5286 100644
--- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-chunks.json
+++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-chunks.json
@@ -416,7 +416,7 @@
"span": 6,
"targets": [
{
- "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))",
+ "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "Index Entries",
"legendLink": null
@@ -981,19 +981,19 @@
"span": 12,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p99",
"legendLink": null
},
{
- "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))",
+ "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p90",
"legendLink": null
},
{
- "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))",
+ "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p50",
"legendLink": null
@@ -1052,19 +1052,19 @@
"span": 12,
"targets": [
{
- "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))",
+ "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p50",
"legendLink": null
},
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p99",
"legendLink": null
},
{
- "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))",
+ "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "avg",
"legendLink": null
diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-deletion.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-deletion.json
index 939f37e4..2db2b7cb 100644
--- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-deletion.json
+++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-deletion.json
@@ -579,7 +579,7 @@
"span": 6,
"targets": [
{
- "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/compactor\"}[$__rate_interval])) by (user)",
+ "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}[$__rate_interval])) by (user)",
"format": "time_series",
"legendFormat": "{{user}}",
"legendLink": null
diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-logs.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-logs.json
index 90691632..32b8e52a 100644
--- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-logs.json
+++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-logs.json
@@ -114,6 +114,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -236,7 +241,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))",
+ "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -287,6 +292,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "bytes"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -373,6 +383,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -408,7 +423,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
+ "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -459,6 +474,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -494,7 +514,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
+ "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -632,6 +652,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -667,7 +692,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)",
+ "expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[$__rate_interval])) by (level)",
"legendFormat": "{{level}}",
"refId": "A"
}
@@ -719,6 +744,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$loki_datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -771,7 +801,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" [5m])) by (level)",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__auto])) by (level)",
"intervalFactor": 3,
"legendFormat": "{{level}}",
"refId": "A"
diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-mixin-recording-rules.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-mixin-recording-rules.json
index 1234065b..f1f6c215 100644
--- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-mixin-recording-rules.json
+++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-mixin-recording-rules.json
@@ -300,7 +300,8 @@
"value": 80
}
]
- }
+ },
+ "unit": "s"
},
"overrides": [ ]
},
diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-operational.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-operational.json
index 7d25fc64..a54d3daa 100644
--- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-operational.json
+++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-operational.json
@@ -87,7 +87,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)",
+ "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)",
"legendFormat": "{{status}}",
"refId": "A"
}
@@ -183,7 +183,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))",
+ "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))",
"legendFormat": "{{status}}",
"refId": "A"
}
@@ -278,7 +278,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))",
+ "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (tenant))",
"legendFormat": "{{tenant}}",
"refId": "A"
}
@@ -332,7 +332,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "MBs"
},
"overrides": [ ]
},
@@ -374,7 +375,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024",
+ "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (tenant)) / 1024 / 1024",
"legendFormat": "{{tenant}}",
"refId": "A"
}
@@ -524,7 +525,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -630,7 +632,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -834,7 +837,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -1040,7 +1044,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -1148,7 +1153,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -1353,7 +1359,8 @@
"description": "",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -1602,7 +1609,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])))",
+ "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))",
"interval": "",
"legendFormat": "{{ tenant }} - {{ reason }}",
"refId": "A"
@@ -1727,7 +1734,7 @@
],
"targets": [
{
- "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])[$__range:1m])))",
+ "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])[$__range:$__rate_interval])))",
"format": "table",
"instant": true,
"interval": "",
@@ -1852,6 +1859,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -1985,7 +1997,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -2153,6 +2165,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2189,7 +2206,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -2242,6 +2259,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2278,7 +2300,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -2331,6 +2353,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2367,7 +2394,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -2525,6 +2552,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2658,7 +2690,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -2971,7 +3003,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m]) > 0))",
+ "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval]) > 0))",
"interval": "",
"legendFormat": "{{ tenant }}",
"refId": "A"
@@ -3081,13 +3113,13 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m]))",
+ "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval]))",
"interval": "",
"legendFormat": "Chunks",
"refId": "A"
},
{
- "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m])) < 1",
+ "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval])) < 1",
"interval": "",
"legendFormat": "De-Dupe Ratio",
"refId": "B"
@@ -3165,7 +3197,7 @@
"reverseYBuckets": false,
"targets": [
{
- "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m])) by (le)",
+ "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval])) by (le)",
"format": "heatmap",
"instant": false,
"interval": "",
@@ -3320,7 +3352,7 @@
"reverseYBuckets": false,
"targets": [
{
- "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m]))",
+ "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval]))",
"format": "heatmap",
"instant": false,
"interval": "",
@@ -3465,6 +3497,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -3598,7 +3635,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -3782,6 +3819,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -3821,19 +3863,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"intervalFactor": 1,
"legendFormat": "{{container}}: .99-{{method}}-{{name}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"hide": false,
"legendFormat": "{{container}}: .9-{{method}}-{{name}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"hide": false,
"legendFormat": "{{container}}: .5-{{method}}-{{name}}",
"refId": "C"
@@ -3925,7 +3967,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, method, name, container)",
+ "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, method, name, container)",
"intervalFactor": 1,
"legendFormat": "{{container}}: {{status_code}}-{{method}}-{{name}}",
"refId": "A"
@@ -3994,6 +4036,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4033,19 +4080,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -4098,6 +4145,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4137,7 +4189,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, status_code, method)",
+ "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, status_code, method)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -4206,6 +4258,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4243,17 +4300,17 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".9",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"refId": "C"
}
],
@@ -4304,6 +4361,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4341,20 +4403,20 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "99%",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "90%",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "50%",
"refId": "C"
@@ -4407,6 +4469,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4444,20 +4511,20 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "99%",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "90%",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "50%",
"refId": "C"
@@ -4510,6 +4577,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4547,17 +4619,17 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".9",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"refId": "C"
}
],
@@ -4608,6 +4680,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4645,7 +4722,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -4698,6 +4775,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4735,7 +4817,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -4788,6 +4870,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4825,7 +4912,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -4878,6 +4965,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4915,7 +5007,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -4984,6 +5076,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -5023,19 +5120,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -5127,7 +5224,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -5231,7 +5328,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5317,7 +5414,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5403,7 +5500,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5489,7 +5586,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5575,17 +5672,17 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".99",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".9",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".5",
"refId": "C"
}
@@ -5637,6 +5734,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -5675,19 +5777,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -5778,7 +5880,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -5847,6 +5949,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -5885,19 +5992,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -5988,7 +6095,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -6057,6 +6164,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -6095,19 +6207,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -6198,7 +6310,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -6267,6 +6379,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -6305,19 +6422,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -6408,7 +6525,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads-resources.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads-resources.json
index 7141e403..ed3965e5 100644
--- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads-resources.json
+++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads-resources.json
@@ -1364,7 +1364,7 @@
},
{
"collapse": false,
- "height": "250px",
+ "collapsed": false,
"panels": [
{
"datasource": "$datasource",
@@ -1429,6 +1429,7 @@
}
]
},
+ "gridPos": { },
"id": 19,
"links": [ ],
"options": {
@@ -1440,6 +1441,411 @@
"sort": "none"
}
},
+ "targets": [
+ {
+ "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"}[$__rate_interval]))",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ },
+ {
+ "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"cpu\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "request",
+ "legendLink": null
+ },
+ {
+ "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
+ "format": "time_series",
+ "legendFormat": "limit",
+ "legendLink": null
+ }
+ ],
+ "title": "CPU",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "request"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#FFC000",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E02F44",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": { },
+ "id": 20,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ },
+ {
+ "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"memory\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "request",
+ "legendLink": null
+ },
+ {
+ "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "limit",
+ "legendLink": null
+ }
+ ],
+ "title": "Memory (workingset)",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": { },
+ "id": 21,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\"})",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Memory (go heap inuse)",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 100,
+ "lineWidth": 0,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "Bps"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": { },
+ "id": 22,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
+ "format": "time_series",
+ "legendFormat": "{{pod}} - {{device}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Disk Writes",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 100,
+ "lineWidth": 0,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "Bps"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": { },
+ "id": 23,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
+ "format": "time_series",
+ "legendFormat": "{{pod}} - {{device}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Disk Reads",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "percentunit"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": { },
+ "id": 24,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"bloom-gateway.*\"})",
+ "format": "time_series",
+ "legendFormat": "{{persistentvolumeclaim}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Disk Space Utilization",
+ "type": "timeseries"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Bloom Gateway",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "request"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#FFC000",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E02F44",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ }
+ ]
+ },
+ "id": 25,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
"span": 4,
"targets": [
{
@@ -1530,7 +1936,7 @@
}
]
},
- "id": 20,
+ "id": 26,
"links": [ ],
"options": {
"legend": {
@@ -1592,7 +1998,7 @@
},
"overrides": [ ]
},
- "id": 21,
+ "id": 27,
"links": [ ],
"options": {
"legend": {
@@ -1655,7 +2061,7 @@
"overrides": [ ]
},
"gridPos": { },
- "id": 22,
+ "id": 28,
"links": [ ],
"options": {
"legend": {
@@ -1741,7 +2147,7 @@
]
},
"gridPos": { },
- "id": 23,
+ "id": 29,
"links": [ ],
"options": {
"legend": {
@@ -1842,7 +2248,7 @@
]
},
"gridPos": { },
- "id": 24,
+ "id": 30,
"links": [ ],
"options": {
"legend": {
@@ -1904,7 +2310,7 @@
"overrides": [ ]
},
"gridPos": { },
- "id": 25,
+ "id": 31,
"links": [ ],
"options": {
"legend": {
diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads.json
index c8cda840..99692299 100644
--- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads.json
+++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads.json
@@ -215,7 +215,7 @@
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -263,19 +263,19 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -341,7 +341,7 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -554,7 +554,7 @@
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -602,19 +602,19 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -680,7 +680,7 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -893,7 +893,7 @@
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -941,19 +941,19 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -1019,7 +1019,7 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -1232,7 +1232,7 @@
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -1280,19 +1280,19 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -1358,7 +1358,7 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -1569,6 +1569,684 @@
},
"span": 4,
"stack": true,
+ "targets": [
+ {
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "format": "time_series",
+ "legendFormat": "{{status}}",
+ "refId": "A"
+ }
+ ],
+ "title": "QPS",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": [ ]
+ },
+ "id": 14,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
+ "format": "time_series",
+ "legendFormat": "{{ route }} 99th percentile",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
+ "format": "time_series",
+ "legendFormat": "{{ route }} 50th percentile",
+ "refId": "B"
+ },
+ {
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
+ "format": "time_series",
+ "legendFormat": "{{ route }} Average",
+ "refId": "C"
+ }
+ ],
+ "title": "Latency",
+ "type": "timeseries",
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": [ ]
+ },
+ "id": 15,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 2,
+ "legendFormat": "__auto",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Per Pod Latency (p99)",
+ "type": "timeseries"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Index Gateway",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+ "1xx": "#EAB839",
+ "2xx": "#7EB26D",
+ "3xx": "#6ED0E0",
+ "4xx": "#EF843C",
+ "5xx": "#E24D42",
+ "OK": "#7EB26D",
+ "cancel": "#A9A9A9",
+ "error": "#E24D42",
+ "success": "#7EB26D"
+ },
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 100,
+ "lineWidth": 0,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ }
+ },
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "1xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EAB839",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "2xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "3xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#6ED0E0",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "4xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EF843C",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "5xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "OK"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "cancel"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#A9A9A9",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "error"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "success"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "fill": 10,
+ "id": 16,
+ "linewidth": 0,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "stack": true,
+ "targets": [
+ {
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "format": "time_series",
+ "legendFormat": "{{status}}",
+ "refId": "A"
+ }
+ ],
+ "title": "QPS",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": [ ]
+ },
+ "id": 17,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
+ "format": "time_series",
+ "legendFormat": "{{ route }} 99th percentile",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
+ "format": "time_series",
+ "legendFormat": "{{ route }} 50th percentile",
+ "refId": "B"
+ },
+ {
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
+ "format": "time_series",
+ "legendFormat": "{{ route }} Average",
+ "refId": "C"
+ }
+ ],
+ "title": "Latency",
+ "type": "timeseries",
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": [ ]
+ },
+ "id": 18,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 2,
+ "legendFormat": "__auto",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Per Pod Latency (p99)",
+ "type": "timeseries"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Bloom Gateway",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+ "1xx": "#EAB839",
+ "2xx": "#7EB26D",
+ "3xx": "#6ED0E0",
+ "4xx": "#EF843C",
+ "5xx": "#E24D42",
+ "OK": "#7EB26D",
+ "cancel": "#A9A9A9",
+ "error": "#E24D42",
+ "success": "#7EB26D"
+ },
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 100,
+ "lineWidth": 0,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ }
+ },
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "1xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EAB839",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "2xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "3xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#6ED0E0",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "4xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EF843C",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "5xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "OK"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "cancel"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#A9A9A9",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "error"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "success"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "fill": 10,
+ "id": 19,
+ "linewidth": 0,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "stack": true,
"targets": [
{
"expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
@@ -1604,7 +2282,7 @@
},
"overrides": [ ]
},
- "id": 14,
+ "id": 20,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -1682,7 +2360,7 @@
},
"overrides": [ ]
},
- "id": 15,
+ "id": 21,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -1714,7 +2392,7 @@
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Index",
+ "title": "TSBD Index",
"titleSize": "h6"
},
{
@@ -1894,7 +2572,7 @@
]
},
"fill": 10,
- "id": 16,
+ "id": 22,
"linewidth": 0,
"links": [ ],
"options": {
@@ -1943,7 +2621,7 @@
},
"overrides": [ ]
},
- "id": 17,
+ "id": 23,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -2021,7 +2699,7 @@
},
"overrides": [ ]
},
- "id": 18,
+ "id": 24,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -2053,7 +2731,7 @@
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "BoltDB Shipper",
+ "title": "BoltDB Index",
"titleSize": "h6"
}
],
diff --git a/monitoring-mixins/loki-mixin/deploy/loki-mixin-alerts.yaml b/monitoring-mixins/loki-mixin/deploy/loki-mixin-alerts.yaml
index af06880c..7c0825d8 100644
--- a/monitoring-mixins/loki-mixin/deploy/loki-mixin-alerts.yaml
+++ b/monitoring-mixins/loki-mixin/deploy/loki-mixin-alerts.yaml
@@ -3,8 +3,9 @@ groups:
rules:
- alert: LokiRequestErrors
annotations:
- message: |
+ description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
+ summary: Loki request error rate is high.
expr: |
100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route)
/
@@ -15,16 +16,18 @@ groups:
severity: critical
- alert: LokiRequestPanics
annotations:
- message: |
+ description: |
{{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
+ summary: Loki requests are causing code panics.
expr: |
sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
labels:
severity: critical
- alert: LokiRequestLatency
annotations:
- message: |
+ description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
+ summary: Loki request error latency is high.
expr: |
cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
for: 15m
@@ -32,8 +35,9 @@ groups:
severity: critical
- alert: LokiTooManyCompactorsRunning
annotations:
- message: |
+ description: |
{{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
+ summary: Loki deployment is running more than one compactor.
expr: |
sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1
for: 5m
diff --git a/monitoring-mixins/loki-mixin/deploy/manifests/k8s-all-in-one.yaml b/monitoring-mixins/loki-mixin/deploy/manifests/k8s-all-in-one.yaml
index d77ef2c9..5f60f007 100644
--- a/monitoring-mixins/loki-mixin/deploy/manifests/k8s-all-in-one.yaml
+++ b/monitoring-mixins/loki-mixin/deploy/manifests/k8s-all-in-one.yaml
@@ -419,7 +419,7 @@ data:
"span": 6,
"targets": [
{
- "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))",
+ "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "Index Entries",
"legendLink": null
@@ -984,19 +984,19 @@ data:
"span": 12,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p99",
"legendLink": null
},
{
- "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))",
+ "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p90",
"legendLink": null
},
{
- "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))",
+ "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p50",
"legendLink": null
@@ -1055,19 +1055,19 @@ data:
"span": 12,
"targets": [
{
- "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))",
+ "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p50",
"legendLink": null
},
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p99",
"legendLink": null
},
{
- "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))",
+ "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "avg",
"legendLink": null
@@ -1781,7 +1781,7 @@ data:
"span": 6,
"targets": [
{
- "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/compactor\"}[$__rate_interval])) by (user)",
+ "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}[$__rate_interval])) by (user)",
"format": "time_series",
"legendFormat": "{{user}}",
"legendLink": null
@@ -2068,6 +2068,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2190,7 +2195,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))",
+ "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -2241,6 +2246,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "bytes"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2327,6 +2337,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2362,7 +2377,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
+ "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -2413,6 +2428,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2448,7 +2468,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
+ "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -2586,6 +2606,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2621,7 +2646,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)",
+ "expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[$__rate_interval])) by (level)",
"legendFormat": "{{level}}",
"refId": "A"
}
@@ -2673,6 +2698,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$loki_datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2725,7 +2755,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" [5m])) by (level)",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__auto])) by (level)",
"intervalFactor": 3,
"legendFormat": "{{level}}",
"refId": "A"
@@ -3338,7 +3368,8 @@ data:
"value": 80
}
]
- }
+ },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -3860,7 +3891,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)",
+ "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)",
"legendFormat": "{{status}}",
"refId": "A"
}
@@ -3956,7 +3987,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))",
+ "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))",
"legendFormat": "{{status}}",
"refId": "A"
}
@@ -4051,7 +4082,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))",
+ "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (tenant))",
"legendFormat": "{{tenant}}",
"refId": "A"
}
@@ -4105,7 +4136,8 @@ data:
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "MBs"
},
"overrides": [ ]
},
@@ -4147,7 +4179,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024",
+ "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (tenant)) / 1024 / 1024",
"legendFormat": "{{tenant}}",
"refId": "A"
}
@@ -4297,7 +4329,8 @@ data:
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -4403,7 +4436,8 @@ data:
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -4607,7 +4641,8 @@ data:
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -4813,7 +4848,8 @@ data:
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -4921,7 +4957,8 @@ data:
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -5126,7 +5163,8 @@ data:
"description": "",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -5375,7 +5413,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])))",
+ "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))",
"interval": "",
"legendFormat": "{{ tenant }} - {{ reason }}",
"refId": "A"
@@ -5500,7 +5538,7 @@ data:
],
"targets": [
{
- "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])[$__range:1m])))",
+ "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])[$__range:$__rate_interval])))",
"format": "table",
"instant": true,
"interval": "",
@@ -5625,6 +5663,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -5758,7 +5801,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -5926,6 +5969,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -5962,7 +6010,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -6015,6 +6063,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -6051,7 +6104,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -6104,6 +6157,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -6140,7 +6198,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -6298,6 +6356,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -6431,7 +6494,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -6744,7 +6807,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m]) > 0))",
+ "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval]) > 0))",
"interval": "",
"legendFormat": "{{ tenant }}",
"refId": "A"
@@ -6854,13 +6917,13 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m]))",
+ "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval]))",
"interval": "",
"legendFormat": "Chunks",
"refId": "A"
},
{
- "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m])) < 1",
+ "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval])) < 1",
"interval": "",
"legendFormat": "De-Dupe Ratio",
"refId": "B"
@@ -6938,7 +7001,7 @@ data:
"reverseYBuckets": false,
"targets": [
{
- "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m])) by (le)",
+ "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval])) by (le)",
"format": "heatmap",
"instant": false,
"interval": "",
@@ -7093,7 +7156,7 @@ data:
"reverseYBuckets": false,
"targets": [
{
- "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m]))",
+ "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval]))",
"format": "heatmap",
"instant": false,
"interval": "",
@@ -7238,6 +7301,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -7371,7 +7439,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -7555,6 +7623,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -7594,19 +7667,19 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"intervalFactor": 1,
"legendFormat": "{{container}}: .99-{{method}}-{{name}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"hide": false,
"legendFormat": "{{container}}: .9-{{method}}-{{name}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"hide": false,
"legendFormat": "{{container}}: .5-{{method}}-{{name}}",
"refId": "C"
@@ -7698,7 +7771,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, method, name, container)",
+ "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, method, name, container)",
"intervalFactor": 1,
"legendFormat": "{{container}}: {{status_code}}-{{method}}-{{name}}",
"refId": "A"
@@ -7767,6 +7840,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -7806,19 +7884,19 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -7871,6 +7949,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -7910,7 +7993,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, status_code, method)",
+ "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, status_code, method)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -7979,6 +8062,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -8016,17 +8104,17 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".9",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"refId": "C"
}
],
@@ -8077,6 +8165,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -8114,20 +8207,20 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "99%",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "90%",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "50%",
"refId": "C"
@@ -8180,6 +8273,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -8217,20 +8315,20 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "99%",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "90%",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "50%",
"refId": "C"
@@ -8283,6 +8381,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -8320,17 +8423,17 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".9",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"refId": "C"
}
],
@@ -8381,6 +8484,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -8418,7 +8526,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -8471,6 +8579,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -8508,7 +8621,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -8561,6 +8674,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -8598,7 +8716,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -8651,6 +8769,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -8688,7 +8811,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -8757,6 +8880,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -8796,19 +8924,19 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -8900,7 +9028,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -9004,7 +9132,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -9090,7 +9218,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -9176,7 +9304,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -9262,7 +9390,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -9348,17 +9476,17 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".99",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".9",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".5",
"refId": "C"
}
@@ -9410,6 +9538,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -9448,19 +9581,19 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -9551,7 +9684,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -9620,6 +9753,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -9658,19 +9796,19 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -9761,7 +9899,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -9830,6 +9968,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -9868,19 +10011,19 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -9971,7 +10114,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -10040,6 +10183,11 @@ data:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -10078,19 +10226,19 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -10181,7 +10329,7 @@ data:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -11727,7 +11875,7 @@ data:
},
{
"collapse": false,
- "height": "250px",
+ "collapsed": false,
"panels": [
{
"datasource": "$datasource",
@@ -11792,6 +11940,7 @@ data:
}
]
},
+ "gridPos": { },
"id": 19,
"links": [ ],
"options": {
@@ -11803,22 +11952,21 @@ data:
"sort": "none"
}
},
- "span": 4,
"targets": [
{
- "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))",
+ "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
},
{
- "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)",
+ "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"cpu\"} > 0)",
"format": "time_series",
"legendFormat": "request",
"legendLink": null
},
{
- "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})",
+ "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
"format": "time_series",
"legendFormat": "limit",
"legendLink": null
@@ -11893,6 +12041,7 @@ data:
}
]
},
+ "gridPos": { },
"id": 20,
"links": [ ],
"options": {
@@ -11904,22 +12053,21 @@ data:
"sort": "none"
}
},
- "span": 4,
"targets": [
{
- "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})",
+ "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
},
{
- "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)",
+ "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"memory\"} > 0)",
"format": "time_series",
"legendFormat": "request",
"legendLink": null
},
{
- "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)",
+ "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} > 0)",
"format": "time_series",
"legendFormat": "limit",
"legendLink": null
@@ -11955,6 +12103,7 @@ data:
},
"overrides": [ ]
},
+ "gridPos": { },
"id": 21,
"links": [ ],
"options": {
@@ -11966,10 +12115,9 @@ data:
"sort": "none"
}
},
- "span": 4,
"targets": [
{
- "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.+\"})",
+ "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\"})",
"format": "time_series",
"legendFormat": "{{pod}}",
"legendLink": null
@@ -11980,40 +12128,28 @@ data:
"sort": 2
},
"type": "timeseries"
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Ingester",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
+ },
{
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
- "fillOpacity": 10,
- "lineWidth": 1,
+ "fillOpacity": 100,
+ "lineWidth": 0,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
- "mode": "none"
+ "mode": "normal"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
- "unit": "short"
+ "unit": "Bps"
},
"overrides": [ ]
},
@@ -12031,13 +12167,13 @@ data:
},
"targets": [
{
- "expr": "sum by(pod) (loki_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"}) or sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})",
+ "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
"format": "time_series",
- "legendFormat": "{{pod}}",
+ "legendFormat": "{{pod}} - {{device}}",
"legendLink": null
}
],
- "title": "Rules",
+ "title": "Disk Writes",
"type": "timeseries"
},
{
@@ -12046,62 +12182,23 @@ data:
"defaults": {
"custom": {
"drawStyle": "line",
- "fillOpacity": 10,
- "lineWidth": 1,
+ "fillOpacity": 100,
+ "lineWidth": 0,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
- "mode": "none"
+ "mode": "normal"
}
},
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
- "unit": "short"
+ "unit": "Bps"
},
- "overrides": [
- {
- "matcher": {
- "id": "byName",
- "options": "request"
- },
- "properties": [
- {
- "id": "color",
- "value": {
- "fixedColor": "#FFC000",
- "mode": "fixed"
- }
- },
- {
- "id": "custom.fillOpacity",
- "value": 0
- }
- ]
- },
- {
- "matcher": {
- "id": "byName",
- "options": "limit"
- },
- "properties": [
- {
- "id": "color",
- "value": {
- "fixedColor": "#E02F44",
- "mode": "fixed"
- }
- },
- {
- "id": "custom.fillOpacity",
- "value": 0
- }
- ]
- }
- ]
+ "overrides": [ ]
},
"gridPos": { },
"id": 23,
@@ -12117,28 +12214,13 @@ data:
},
"targets": [
{
- "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))",
- "format": "time_series",
- "legendFormat": "{{pod}}",
- "legendLink": null
- },
- {
- "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)",
- "format": "time_series",
- "legendFormat": "request",
- "legendLink": null
- },
- {
- "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})",
+ "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
"format": "time_series",
- "legendFormat": "limit",
+ "legendFormat": "{{pod}} - {{device}}",
"legendLink": null
}
],
- "title": "CPU",
- "tooltip": {
- "sort": 2
- },
+ "title": "Disk Reads",
"type": "timeseries"
},
{
@@ -12161,18 +12243,78 @@ data:
"mode": "absolute",
"steps": [ ]
},
- "unit": "bytes"
+ "unit": "percentunit"
},
- "overrides": [
- {
- "matcher": {
- "id": "byName",
- "options": "request"
- },
- "properties": [
- {
- "id": "color",
- "value": {
+ "overrides": [ ]
+ },
+ "gridPos": { },
+ "id": 24,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"bloom-gateway.*\"})",
+ "format": "time_series",
+ "legendFormat": "{{persistentvolumeclaim}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Disk Space Utilization",
+ "type": "timeseries"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Bloom Gateway",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "request"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
"fixedColor": "#FFC000",
"mode": "fixed"
}
@@ -12204,9 +12346,1223 @@ data:
}
]
},
- "gridPos": { },
- "id": 24,
+ "id": 25,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ },
+ {
+ "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "request",
+ "legendLink": null
+ },
+ {
+ "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})",
+ "format": "time_series",
+ "legendFormat": "limit",
+ "legendLink": null
+ }
+ ],
+ "title": "CPU",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "request"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#FFC000",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E02F44",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ }
+ ]
+ },
+ "id": 26,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ },
+ {
+ "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "request",
+ "legendLink": null
+ },
+ {
+ "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "limit",
+ "legendLink": null
+ }
+ ],
+ "title": "Memory (workingset)",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [ ]
+ },
+ "id": 27,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.+\"})",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Memory (go heap inuse)",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Ingester",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": { },
+ "id": 28,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "sum by(pod) (loki_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"}) or sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Rules",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "request"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#FFC000",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E02F44",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": { },
+ "id": 29,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ },
+ {
+ "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "request",
+ "legendLink": null
+ },
+ {
+ "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})",
+ "format": "time_series",
+ "legendFormat": "limit",
+ "legendLink": null
+ }
+ ],
+ "title": "CPU",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "request"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#FFC000",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E02F44",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": { },
+ "id": 30,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ },
+ {
+ "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "request",
+ "legendLink": null
+ },
+ {
+ "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "limit",
+ "legendLink": null
+ }
+ ],
+ "title": "Memory (workingset)",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": { },
+ "id": 31,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Memory (go heap inuse)",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Ruler",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "loki"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "prod",
+ "value": "prod"
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(loki_build_info, cluster)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 2,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "prod",
+ "value": "prod"
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "namespace",
+ "multi": false,
+ "name": "namespace",
+ "options": [ ],
+ "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 2,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "utc",
+ "title": "Loki / Reads Resources",
+ "uid": "reads-resources",
+ "version": 0
+ }
+kind: ConfigMap
+metadata:
+ annotations:
+ grafana_dashboard_folder: /dashboards/Loki Mixin
+ labels:
+ grafana_dashboard: "1"
+ name: loki-reads-resources.json
+ namespace: monitoring-system
+---
+apiVersion: v1
+data:
+ loki-reads.json: |-
+ {
+ "annotations": {
+ "list": [ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "links": [
+ {
+ "asDropdown": true,
+ "icon": "external link",
+ "includeVars": true,
+ "keepTime": true,
+ "tags": [
+ "loki"
+ ],
+ "targetBlank": false,
+ "title": "Loki Dashboards",
+ "type": "dashboards"
+ }
+ ],
+ "refresh": "10s",
+ "rows": [
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+ "1xx": "#EAB839",
+ "2xx": "#7EB26D",
+ "3xx": "#6ED0E0",
+ "4xx": "#EF843C",
+ "5xx": "#E24D42",
+ "OK": "#7EB26D",
+ "cancel": "#A9A9A9",
+ "error": "#E24D42",
+ "success": "#7EB26D"
+ },
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 100,
+ "lineWidth": 0,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ }
+ },
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "1xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EAB839",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "2xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "3xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#6ED0E0",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "4xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EF843C",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "5xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "OK"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "cancel"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#A9A9A9",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "error"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "success"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "fill": 10,
+ "id": 1,
+ "linewidth": 0,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "stack": true,
+ "targets": [
+ {
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "format": "time_series",
+ "legendFormat": "{{status}}",
+ "refId": "A"
+ }
+ ],
+ "title": "QPS",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": [ ]
+ },
+ "id": 2,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
+ "format": "time_series",
+ "legendFormat": "{{ route }} 99th percentile",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
+ "format": "time_series",
+ "legendFormat": "{{ route }} 50th percentile",
+ "refId": "B"
+ },
+ {
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) ",
+ "format": "time_series",
+ "legendFormat": "{{ route }} Average",
+ "refId": "C"
+ }
+ ],
+ "title": "Latency",
+ "type": "timeseries",
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": [ ]
+ },
+ "id": 3,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 2,
+ "legendFormat": "__auto",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Per Pod Latency (p99)",
+ "type": "timeseries"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Frontend (query-frontend)",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+ "1xx": "#EAB839",
+ "2xx": "#7EB26D",
+ "3xx": "#6ED0E0",
+ "4xx": "#EF843C",
+ "5xx": "#E24D42",
+ "OK": "#7EB26D",
+ "cancel": "#A9A9A9",
+ "error": "#E24D42",
+ "success": "#7EB26D"
+ },
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 100,
+ "lineWidth": 0,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ }
+ },
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "1xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EAB839",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "2xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "3xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#6ED0E0",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "4xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EF843C",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "5xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "OK"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "cancel"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#A9A9A9",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "error"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "success"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "fill": 10,
+ "id": 4,
+ "linewidth": 0,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "stack": true,
+ "targets": [
+ {
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "format": "time_series",
+ "legendFormat": "{{status}}",
+ "refId": "A"
+ }
+ ],
+ "title": "QPS",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": [ ]
+ },
+ "id": 5,
"links": [ ],
+ "nullPointMode": "null as zero",
"options": {
"legend": {
"showLegend": true
@@ -12216,31 +13572,47 @@ data:
"sort": "none"
}
},
+ "span": 4,
"targets": [
{
- "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
"format": "time_series",
- "legendFormat": "{{pod}}",
- "legendLink": null
+ "legendFormat": "{{ route }} 99th percentile",
+ "refId": "A"
},
{
- "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
"format": "time_series",
- "legendFormat": "request",
- "legendLink": null
+ "legendFormat": "{{ route }} 50th percentile",
+ "refId": "B"
},
{
- "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) ",
"format": "time_series",
- "legendFormat": "limit",
- "legendLink": null
+ "legendFormat": "{{ route }} Average",
+ "refId": "C"
}
],
- "title": "Memory (workingset)",
- "tooltip": {
- "sort": 2
- },
- "type": "timeseries"
+ "title": "Latency",
+ "type": "timeseries",
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
},
{
"datasource": "$datasource",
@@ -12262,13 +13634,13 @@ data:
"mode": "absolute",
"steps": [ ]
},
- "unit": "bytes"
+ "unit": "ms"
},
"overrides": [ ]
},
- "gridPos": { },
- "id": 25,
+ "id": 6,
"links": [ ],
+ "nullPointMode": "null as zero",
"options": {
"legend": {
"showLegend": true
@@ -12278,18 +13650,19 @@ data:
"sort": "none"
}
},
+ "span": 4,
"targets": [
{
- "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
- "legendFormat": "{{pod}}",
- "legendLink": null
+ "interval": "1m",
+ "intervalFactor": 2,
+ "legendFormat": "__auto",
+ "refId": "A",
+ "step": 10
}
],
- "title": "Memory (go heap inuse)",
- "tooltip": {
- "sort": 2
- },
+ "title": "Per Pod Latency (p99)",
"type": "timeseries"
}
],
@@ -12297,150 +13670,9 @@ data:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Ruler",
- "titleSize": "h6",
- "type": "row"
- }
- ],
- "schemaVersion": 14,
- "style": "dark",
- "tags": [
- "loki"
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "default",
- "value": "default"
- },
- "hide": 0,
- "label": "Data source",
- "name": "datasource",
- "options": [ ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- },
- {
- "allValue": null,
- "current": {
- "text": "prod",
- "value": "prod"
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": "cluster",
- "multi": false,
- "name": "cluster",
- "options": [ ],
- "query": "label_values(loki_build_info, cluster)",
- "refresh": 1,
- "regex": "",
- "sort": 2,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
- "text": "prod",
- "value": "prod"
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": "namespace",
- "multi": false,
- "name": "namespace",
- "options": [ ],
- "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)",
- "refresh": 1,
- "regex": "",
- "sort": 2,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "utc",
- "title": "Loki / Reads Resources",
- "uid": "reads-resources",
- "version": 0
- }
-kind: ConfigMap
-metadata:
- annotations:
- grafana_dashboard_folder: /dashboards/Loki Mixin
- labels:
- grafana_dashboard: "1"
- name: loki-reads-resources.json
- namespace: monitoring-system
----
-apiVersion: v1
-data:
- loki-reads.json: |-
- {
- "annotations": {
- "list": [ ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "links": [
- {
- "asDropdown": true,
- "icon": "external link",
- "includeVars": true,
- "keepTime": true,
- "tags": [
- "loki"
- ],
- "targetBlank": false,
- "title": "Loki Dashboards",
- "type": "dashboards"
- }
- ],
- "refresh": "10s",
- "rows": [
+ "title": "Querier",
+ "titleSize": "h6"
+ },
{
"collapse": false,
"height": "250px",
@@ -12618,7 +13850,7 @@ data:
]
},
"fill": 10,
- "id": 1,
+ "id": 7,
"linewidth": 0,
"links": [ ],
"options": {
@@ -12634,7 +13866,7 @@ data:
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -12667,7 +13899,7 @@ data:
},
"overrides": [ ]
},
- "id": 2,
+ "id": 8,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -12682,19 +13914,19 @@ data:
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -12745,7 +13977,7 @@ data:
},
"overrides": [ ]
},
- "id": 3,
+ "id": 9,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -12760,7 +13992,7 @@ data:
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -12777,7 +14009,7 @@ data:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Frontend (query-frontend)",
+ "title": "Ingester",
"titleSize": "h6"
},
{
@@ -12957,7 +14189,7 @@ data:
]
},
"fill": 10,
- "id": 4,
+ "id": 10,
"linewidth": 0,
"links": [ ],
"options": {
@@ -12973,7 +14205,7 @@ data:
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -13006,7 +14238,7 @@ data:
},
"overrides": [ ]
},
- "id": 5,
+ "id": 11,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -13021,19 +14253,19 @@ data:
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -13084,7 +14316,7 @@ data:
},
"overrides": [ ]
},
- "id": 6,
+ "id": 12,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -13099,7 +14331,7 @@ data:
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -13116,7 +14348,7 @@ data:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Querier",
+ "title": "Ingester - Zone Aware",
"titleSize": "h6"
},
{
@@ -13296,7 +14528,7 @@ data:
]
},
"fill": 10,
- "id": 7,
+ "id": 13,
"linewidth": 0,
"links": [ ],
"options": {
@@ -13312,7 +14544,7 @@ data:
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -13345,7 +14577,7 @@ data:
},
"overrides": [ ]
},
- "id": 8,
+ "id": 14,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -13360,19 +14592,19 @@ data:
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -13423,7 +14655,7 @@ data:
},
"overrides": [ ]
},
- "id": 9,
+ "id": 15,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -13438,7 +14670,7 @@ data:
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -13455,7 +14687,7 @@ data:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Ingester",
+ "title": "Index Gateway",
"titleSize": "h6"
},
{
@@ -13635,7 +14867,7 @@ data:
]
},
"fill": 10,
- "id": 10,
+ "id": 16,
"linewidth": 0,
"links": [ ],
"options": {
@@ -13651,7 +14883,7 @@ data:
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -13684,7 +14916,7 @@ data:
},
"overrides": [ ]
},
- "id": 11,
+ "id": 17,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -13699,19 +14931,19 @@ data:
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -13762,7 +14994,7 @@ data:
},
"overrides": [ ]
},
- "id": 12,
+ "id": 18,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -13777,7 +15009,7 @@ data:
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -13794,7 +15026,7 @@ data:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Ingester - Zone Aware",
+ "title": "Bloom Gateway",
"titleSize": "h6"
},
{
@@ -13974,7 +15206,7 @@ data:
]
},
"fill": 10,
- "id": 13,
+ "id": 19,
"linewidth": 0,
"links": [ ],
"options": {
@@ -14023,7 +15255,7 @@ data:
},
"overrides": [ ]
},
- "id": 14,
+ "id": 20,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -14101,7 +15333,7 @@ data:
},
"overrides": [ ]
},
- "id": 15,
+ "id": 21,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -14133,7 +15365,7 @@ data:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Index",
+ "title": "TSBD Index",
"titleSize": "h6"
},
{
@@ -14313,7 +15545,7 @@ data:
]
},
"fill": 10,
- "id": 16,
+ "id": 22,
"linewidth": 0,
"links": [ ],
"options": {
@@ -14362,7 +15594,7 @@ data:
},
"overrides": [ ]
},
- "id": 17,
+ "id": 23,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -14440,7 +15672,7 @@ data:
},
"overrides": [ ]
},
- "id": 18,
+ "id": 24,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -14472,7 +15704,7 @@ data:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "BoltDB Shipper",
+ "title": "BoltDB Index",
"titleSize": "h6"
}
],
@@ -18674,8 +19906,9 @@ spec:
rules:
- alert: LokiRequestErrors
annotations:
- message: |
+ description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
+ summary: Loki request error rate is high.
expr: |
100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route)
/
@@ -18686,16 +19919,18 @@ spec:
severity: critical
- alert: LokiRequestPanics
annotations:
- message: |
+ description: |
{{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
+ summary: Loki requests are causing code panics.
expr: |
sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
labels:
severity: critical
- alert: LokiRequestLatency
annotations:
- message: |
+ description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
+ summary: Loki request error latency is high.
expr: |
cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
for: 15m
@@ -18703,8 +19938,9 @@ spec:
severity: critical
- alert: LokiTooManyCompactorsRunning
annotations:
- message: |
+ description: |
{{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
+ summary: Loki deployment is running more than one compactor.
expr: |
sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1
for: 5m
diff --git a/monitoring-mixins/loki-mixin/deploy/prometheus-alerts.yaml b/monitoring-mixins/loki-mixin/deploy/prometheus-alerts.yaml
index 5f6576e7..7e99cfaf 100644
--- a/monitoring-mixins/loki-mixin/deploy/prometheus-alerts.yaml
+++ b/monitoring-mixins/loki-mixin/deploy/prometheus-alerts.yaml
@@ -10,8 +10,9 @@ spec:
rules:
- alert: LokiRequestErrors
annotations:
- message: |
+ description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
+ summary: Loki request error rate is high.
expr: |
100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route)
/
@@ -22,16 +23,18 @@ spec:
severity: critical
- alert: LokiRequestPanics
annotations:
- message: |
+ description: |
{{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
+ summary: Loki requests are causing code panics.
expr: |
sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
labels:
severity: critical
- alert: LokiRequestLatency
annotations:
- message: |
+ description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
+ summary: Loki request error latency is high.
expr: |
cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
for: 15m
@@ -39,8 +42,9 @@ spec:
severity: critical
- alert: LokiTooManyCompactorsRunning
annotations:
- message: |
+ description: |
{{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
+ summary: Loki deployment is running more than one compactor.
expr: |
sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1
for: 5m
diff --git a/monitoring-mixins/loki-mixin/jsonnetfile.lock.json b/monitoring-mixins/loki-mixin/jsonnetfile.lock.json
index f2675b4a..86296aed 100644
--- a/monitoring-mixins/loki-mixin/jsonnetfile.lock.json
+++ b/monitoring-mixins/loki-mixin/jsonnetfile.lock.json
@@ -18,8 +18,8 @@
"subdir": "grafana-builder"
}
},
- "version": "abf0830008f0a61f3a7f5782738b3569eb6b0203",
- "sum": "+z5VY+bPBNqXcmNAV8xbJcbsRA+pro1R3IM7aIY8OlU="
+ "version": "33d0c06eadc2558b339903771425c26b4e157919",
+ "sum": "5ku1Hd8UPnjmn8nWyaTFzMpT3Pa+VylBnmposMSVEuU="
},
{
"source": {
@@ -28,8 +28,8 @@
"subdir": "mixin-utils"
}
},
- "version": "abf0830008f0a61f3a7f5782738b3569eb6b0203",
- "sum": "0jg7qc3N8FtMnnQbunYCGSNcjHr9Y1krZW9OSTmWcEQ="
+ "version": "33d0c06eadc2558b339903771425c26b4e157919",
+ "sum": "A0f0G3aJEkdu5sqHXtizHDyU1jOSx6VuEXLRlI9Psp8="
},
{
"source": {
@@ -38,8 +38,8 @@
"subdir": "production/loki-mixin"
}
},
- "version": "82f6548183233da36a18019739c6223846f7baba",
- "sum": "poponbHA5aiE9loWWsqmgd8zqGASayqJr3IxHwcjkdU="
+ "version": "2d62fca05d6ec82196b46c956733c89439660754",
+ "sum": "hMxvpcgKvJqMAK9kooRu2RvD3ctX3mNk7/yij6RWiVs="
},
{
"source": {
@@ -48,8 +48,8 @@
"subdir": "operations/mimir-mixin"
}
},
- "version": "cd13a1b0509f877f3d84e3de5c884680563e6ab3",
- "sum": "eNgijCGmvYx1X4yXMLBsdo7jo3HbDknbzSYHGt/I/MY="
+ "version": "748a726fa073c7fa3ca1ea79d6ac1265656a9e01",
+ "sum": "Qx7y7DJqEtiHer20VwAULlh+67Ae2OGanw5pdABUvWg="
},
{
"source": {
@@ -58,7 +58,7 @@
"subdir": "jsonnet/kube-prometheus/lib"
}
},
- "version": "76f2e1ef95be0df752037baa040781c5219e1fb3",
+ "version": "407142589d5b38663e70f80c6c3b493e5b5e6578",
"sum": "QKRgrgEZ3k9nLmLCrDBaeIGVqQZf+AvZTcnhdLk3TrA="
}
],
diff --git a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-chunks.json b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-chunks.json
index b1304ffa..59dd5286 100644
--- a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-chunks.json
+++ b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-chunks.json
@@ -416,7 +416,7 @@
"span": 6,
"targets": [
{
- "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))",
+ "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "Index Entries",
"legendLink": null
@@ -981,19 +981,19 @@
"span": 12,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p99",
"legendLink": null
},
{
- "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))",
+ "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p90",
"legendLink": null
},
{
- "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))",
+ "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p50",
"legendLink": null
@@ -1052,19 +1052,19 @@
"span": 12,
"targets": [
{
- "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))",
+ "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p50",
"legendLink": null
},
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le))",
"format": "time_series",
"legendFormat": "p99",
"legendLink": null
},
{
- "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))",
+ "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "avg",
"legendLink": null
diff --git a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-deletion.json b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-deletion.json
index 939f37e4..2db2b7cb 100644
--- a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-deletion.json
+++ b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-deletion.json
@@ -579,7 +579,7 @@
"span": 6,
"targets": [
{
- "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/compactor\"}[$__rate_interval])) by (user)",
+ "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}[$__rate_interval])) by (user)",
"format": "time_series",
"legendFormat": "{{user}}",
"legendLink": null
diff --git a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-logs.json b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-logs.json
index 90691632..32b8e52a 100644
--- a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-logs.json
+++ b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-logs.json
@@ -114,6 +114,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -236,7 +241,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))",
+ "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -287,6 +292,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "bytes"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -373,6 +383,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -408,7 +423,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
+ "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -459,6 +474,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -494,7 +514,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
+ "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -632,6 +652,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -667,7 +692,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)",
+ "expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[$__rate_interval])) by (level)",
"legendFormat": "{{level}}",
"refId": "A"
}
@@ -719,6 +744,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$loki_datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -771,7 +801,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" [5m])) by (level)",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__auto])) by (level)",
"intervalFactor": 3,
"legendFormat": "{{level}}",
"refId": "A"
diff --git a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-mixin-recording-rules.json b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-mixin-recording-rules.json
index 1234065b..f1f6c215 100644
--- a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-mixin-recording-rules.json
+++ b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-mixin-recording-rules.json
@@ -300,7 +300,8 @@
"value": 80
}
]
- }
+ },
+ "unit": "s"
},
"overrides": [ ]
},
diff --git a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-operational.json b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-operational.json
index 7d25fc64..a54d3daa 100644
--- a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-operational.json
+++ b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-operational.json
@@ -87,7 +87,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)",
+ "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)",
"legendFormat": "{{status}}",
"refId": "A"
}
@@ -183,7 +183,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))",
+ "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))",
"legendFormat": "{{status}}",
"refId": "A"
}
@@ -278,7 +278,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))",
+ "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (tenant))",
"legendFormat": "{{tenant}}",
"refId": "A"
}
@@ -332,7 +332,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "MBs"
},
"overrides": [ ]
},
@@ -374,7 +375,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024",
+ "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (tenant)) / 1024 / 1024",
"legendFormat": "{{tenant}}",
"refId": "A"
}
@@ -524,7 +525,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -630,7 +632,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -834,7 +837,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -1040,7 +1044,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -1148,7 +1153,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -1353,7 +1359,8 @@
"description": "",
"fieldConfig": {
"defaults": {
- "custom": { }
+ "custom": { },
+ "unit": "s"
},
"overrides": [ ]
},
@@ -1602,7 +1609,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])))",
+ "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))",
"interval": "",
"legendFormat": "{{ tenant }} - {{ reason }}",
"refId": "A"
@@ -1727,7 +1734,7 @@
],
"targets": [
{
- "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])[$__range:1m])))",
+ "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])[$__range:$__rate_interval])))",
"format": "table",
"instant": true,
"interval": "",
@@ -1852,6 +1859,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -1985,7 +1997,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -2153,6 +2165,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2189,7 +2206,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -2242,6 +2259,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2278,7 +2300,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -2331,6 +2353,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2367,7 +2394,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -2525,6 +2552,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2658,7 +2690,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/ingester.*\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -2971,7 +3003,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m]) > 0))",
+ "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval]) > 0))",
"interval": "",
"legendFormat": "{{ tenant }}",
"refId": "A"
@@ -3081,13 +3113,13 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m]))",
+ "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval]))",
"interval": "",
"legendFormat": "Chunks",
"refId": "A"
},
{
- "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m])) < 1",
+ "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval])) < 1",
"interval": "",
"legendFormat": "De-Dupe Ratio",
"refId": "B"
@@ -3165,7 +3197,7 @@
"reverseYBuckets": false,
"targets": [
{
- "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[1m])) by (le)",
+ "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=~\"($namespace)/ingester.*\"}[$__rate_interval])) by (le)",
"format": "heatmap",
"instant": false,
"interval": "",
@@ -3320,7 +3352,7 @@
"reverseYBuckets": false,
"targets": [
{
- "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[1m]))",
+ "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"($namespace)/ingester.*\"}[$__rate_interval]))",
"format": "heatmap",
"instant": false,
"interval": "",
@@ -3465,6 +3497,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -3598,7 +3635,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/querier\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -3782,6 +3819,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -3821,19 +3863,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"intervalFactor": 1,
"legendFormat": "{{container}}: .99-{{method}}-{{name}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"hide": false,
"legendFormat": "{{container}}: .9-{{method}}-{{name}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"hide": false,
"legendFormat": "{{container}}: .5-{{method}}-{{name}}",
"refId": "C"
@@ -3925,7 +3967,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, method, name, container)",
+ "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, method, name, container)",
"intervalFactor": 1,
"legendFormat": "{{container}}: {{status_code}}-{{method}}-{{name}}",
"refId": "A"
@@ -3994,6 +4036,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4033,19 +4080,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -4098,6 +4145,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4137,7 +4189,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, status_code, method)",
+ "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, status_code, method)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -4206,6 +4258,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4243,17 +4300,17 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".9",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"refId": "C"
}
],
@@ -4304,6 +4361,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4341,20 +4403,20 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "99%",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "90%",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "50%",
"refId": "C"
@@ -4407,6 +4469,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4444,20 +4511,20 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "99%",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "90%",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "50%",
"refId": "C"
@@ -4510,6 +4577,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4547,17 +4619,17 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".9",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"refId": "C"
}
],
@@ -4608,6 +4680,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4645,7 +4722,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -4698,6 +4775,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4735,7 +4817,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -4788,6 +4870,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4825,7 +4912,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -4878,6 +4965,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4915,7 +5007,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -4984,6 +5076,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -5023,19 +5120,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -5127,7 +5224,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -5231,7 +5328,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5317,7 +5414,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5403,7 +5500,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5489,7 +5586,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5575,17 +5672,17 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".99",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".9",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".5",
"refId": "C"
}
@@ -5637,6 +5734,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -5675,19 +5777,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -5778,7 +5880,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -5847,6 +5949,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -5885,19 +5992,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -5988,7 +6095,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -6057,6 +6164,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -6095,19 +6207,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -6198,7 +6310,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -6267,6 +6379,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -6305,19 +6422,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -6408,7 +6525,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
diff --git a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-reads-resources.json b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-reads-resources.json
index 7141e403..ed3965e5 100644
--- a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-reads-resources.json
+++ b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-reads-resources.json
@@ -1364,7 +1364,7 @@
},
{
"collapse": false,
- "height": "250px",
+ "collapsed": false,
"panels": [
{
"datasource": "$datasource",
@@ -1429,6 +1429,7 @@
}
]
},
+ "gridPos": { },
"id": 19,
"links": [ ],
"options": {
@@ -1440,6 +1441,411 @@
"sort": "none"
}
},
+ "targets": [
+ {
+ "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"}[$__rate_interval]))",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ },
+ {
+ "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"cpu\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "request",
+ "legendLink": null
+ },
+ {
+ "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
+ "format": "time_series",
+ "legendFormat": "limit",
+ "legendLink": null
+ }
+ ],
+ "title": "CPU",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "request"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#FFC000",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E02F44",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": { },
+ "id": 20,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ },
+ {
+ "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\", resource=\"memory\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "request",
+ "legendLink": null
+ },
+ {
+ "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"} > 0)",
+ "format": "time_series",
+ "legendFormat": "limit",
+ "legendLink": null
+ }
+ ],
+ "title": "Memory (workingset)",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": { },
+ "id": 21,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\"})",
+ "format": "time_series",
+ "legendFormat": "{{pod}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Memory (go heap inuse)",
+ "tooltip": {
+ "sort": 2
+ },
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 100,
+ "lineWidth": 0,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "Bps"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": { },
+ "id": 22,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
+ "format": "time_series",
+ "legendFormat": "{{pod}} - {{device}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Disk Writes",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 100,
+ "lineWidth": 0,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "Bps"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": { },
+ "id": 23,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
+ "format": "time_series",
+ "legendFormat": "{{pod}} - {{device}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Disk Reads",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "percentunit"
+ },
+ "overrides": [ ]
+ },
+ "gridPos": { },
+ "id": 24,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"bloom-gateway.*\"})",
+ "format": "time_series",
+ "legendFormat": "{{persistentvolumeclaim}}",
+ "legendLink": null
+ }
+ ],
+ "title": "Disk Space Utilization",
+ "type": "timeseries"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Bloom Gateway",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "request"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#FFC000",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "limit"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E02F44",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ }
+ ]
+ }
+ ]
+ },
+ "id": 25,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
"span": 4,
"targets": [
{
@@ -1530,7 +1936,7 @@
}
]
},
- "id": 20,
+ "id": 26,
"links": [ ],
"options": {
"legend": {
@@ -1592,7 +1998,7 @@
},
"overrides": [ ]
},
- "id": 21,
+ "id": 27,
"links": [ ],
"options": {
"legend": {
@@ -1655,7 +2061,7 @@
"overrides": [ ]
},
"gridPos": { },
- "id": 22,
+ "id": 28,
"links": [ ],
"options": {
"legend": {
@@ -1741,7 +2147,7 @@
]
},
"gridPos": { },
- "id": 23,
+ "id": 29,
"links": [ ],
"options": {
"legend": {
@@ -1842,7 +2248,7 @@
]
},
"gridPos": { },
- "id": 24,
+ "id": 30,
"links": [ ],
"options": {
"legend": {
@@ -1904,7 +2310,7 @@
"overrides": [ ]
},
"gridPos": { },
- "id": 25,
+ "id": 31,
"links": [ ],
"options": {
"legend": {
diff --git a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-reads.json b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-reads.json
index c8cda840..99692299 100644
--- a/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-reads.json
+++ b/monitoring-mixins/loki-mixin/microservices-mode/dashboards_out/loki-reads.json
@@ -215,7 +215,7 @@
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -263,19 +263,19 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -341,7 +341,7 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -554,7 +554,7 @@
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -602,19 +602,19 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -680,7 +680,7 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"(api_prom_rules|api_prom_rules_namespace_groupname|api_v1_rules|loki_api_v1_delete|loki_api_v1_detected_labels|loki_api_v1_index_stats|loki_api_v1_index_volume|loki_api_v1_index_volume_range|loki_api_v1_label_name_values|loki_api_v1_label_values|loki_api_v1_labels|loki_api_v1_patterns|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_series|otlp_v1_logs|prometheus_api_v1_rules)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -893,7 +893,7 @@
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -941,19 +941,19 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -1019,7 +1019,7 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -1232,7 +1232,7 @@
"stack": true,
"targets": [
{
- "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
"format": "time_series",
"legendFormat": "{{status}}",
"refId": "A"
@@ -1280,19 +1280,19 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 99th percentile",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3",
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
"format": "time_series",
"legendFormat": "{{ route }} 50th percentile",
"refId": "B"
},
{
- "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ",
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
"format": "time_series",
"legendFormat": "{{ route }} Average",
"refId": "C"
@@ -1358,7 +1358,7 @@
"span": 4,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
@@ -1569,6 +1569,684 @@
},
"span": 4,
"stack": true,
+ "targets": [
+ {
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "format": "time_series",
+ "legendFormat": "{{status}}",
+ "refId": "A"
+ }
+ ],
+ "title": "QPS",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": [ ]
+ },
+ "id": 14,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
+ "format": "time_series",
+ "legendFormat": "{{ route }} 99th percentile",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
+ "format": "time_series",
+ "legendFormat": "{{ route }} 50th percentile",
+ "refId": "B"
+ },
+ {
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
+ "format": "time_series",
+ "legendFormat": "{{ route }} Average",
+ "refId": "C"
+ }
+ ],
+ "title": "Latency",
+ "type": "timeseries",
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": [ ]
+ },
+ "id": 15,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 2,
+ "legendFormat": "__auto",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Per Pod Latency (p99)",
+ "type": "timeseries"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Index Gateway",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+ "1xx": "#EAB839",
+ "2xx": "#7EB26D",
+ "3xx": "#6ED0E0",
+ "4xx": "#EF843C",
+ "5xx": "#E24D42",
+ "OK": "#7EB26D",
+ "cancel": "#A9A9A9",
+ "error": "#E24D42",
+ "success": "#7EB26D"
+ },
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 100,
+ "lineWidth": 0,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ }
+ },
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "1xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EAB839",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "2xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "3xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#6ED0E0",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "4xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EF843C",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "5xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "OK"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "cancel"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#A9A9A9",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "error"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "success"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "fill": 10,
+ "id": 16,
+ "linewidth": 0,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "stack": true,
+ "targets": [
+ {
+ "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
+ "format": "time_series",
+ "legendFormat": "{{status}}",
+ "refId": "A"
+ }
+ ],
+ "title": "QPS",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": [ ]
+ },
+ "id": 17,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
+ "format": "time_series",
+ "legendFormat": "{{ route }} 99th percentile",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"})) * 1e3",
+ "format": "time_series",
+ "legendFormat": "{{ route }} 50th percentile",
+ "refId": "B"
+ },
+ {
+ "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}) by (route) ",
+ "format": "time_series",
+ "legendFormat": "{{ route }} Average",
+ "refId": "C"
+ }
+ ],
+ "title": "Latency",
+ "type": "timeseries",
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ }
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": [ ]
+ },
+ "id": 18,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\", route=~\"(/base.Ruler/Rules|/indexgatewaypb.IndexGateway/GetChunkRef|/indexgatewaypb.IndexGateway/GetSeries|/indexgatewaypb.IndexGateway/GetShards|/indexgatewaypb.IndexGateway/GetStats|/indexgatewaypb.IndexGateway/GetVolume|/indexgatewaypb.IndexGateway/LabelNamesForMetricName|/indexgatewaypb.IndexGateway/LabelValuesForMetricName|/indexgatewaypb.IndexGateway/QueryIndex|/logproto.BloomGateway/FilterChunkRefs|/logproto.Pattern/Query|/logproto.Querier/GetChunkIDs|/logproto.Querier/GetDetectedLabels|/logproto.Querier/GetStats|/logproto.Querier/GetVolume|/logproto.Querier/Label|/logproto.Querier/Query|/logproto.Querier/QuerySample|/logproto.Querier/Series|/logproto.StreamData/GetStreamRates)\"}[$__rate_interval])) by (le,pod)) * 1e3",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 2,
+ "legendFormat": "__auto",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Per Pod Latency (p99)",
+ "type": "timeseries"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Bloom Gateway",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+ "1xx": "#EAB839",
+ "2xx": "#7EB26D",
+ "3xx": "#6ED0E0",
+ "4xx": "#EF843C",
+ "5xx": "#E24D42",
+ "OK": "#7EB26D",
+ "cancel": "#A9A9A9",
+ "error": "#E24D42",
+ "success": "#7EB26D"
+ },
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "drawStyle": "line",
+ "fillOpacity": 100,
+ "lineWidth": 0,
+ "pointSize": 5,
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "normal"
+ }
+ },
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [ ]
+ },
+ "unit": "short"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "1xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EAB839",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "2xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "3xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#6ED0E0",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "4xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EF843C",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "5xx"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "OK"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "cancel"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#A9A9A9",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "error"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E24D42",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "success"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "fill": 10,
+ "id": 19,
+ "linewidth": 0,
+ "links": [ ],
+ "options": {
+ "legend": {
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "span": 4,
+ "stack": true,
"targets": [
{
"expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n",
@@ -1604,7 +2282,7 @@
},
"overrides": [ ]
},
- "id": 14,
+ "id": 20,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -1682,7 +2360,7 @@
},
"overrides": [ ]
},
- "id": 15,
+ "id": 21,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -1714,7 +2392,7 @@
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Index",
+ "title": "TSBD Index",
"titleSize": "h6"
},
{
@@ -1894,7 +2572,7 @@
]
},
"fill": 10,
- "id": 16,
+ "id": 22,
"linewidth": 0,
"links": [ ],
"options": {
@@ -1943,7 +2621,7 @@
},
"overrides": [ ]
},
- "id": 17,
+ "id": 23,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -2021,7 +2699,7 @@
},
"overrides": [ ]
},
- "id": 18,
+ "id": 24,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
@@ -2053,7 +2731,7 @@
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "BoltDB Shipper",
+ "title": "BoltDB Index",
"titleSize": "h6"
}
],
diff --git a/monitoring-mixins/loki-mixin/microservices-mode/loki-mixin-alerts.yaml b/monitoring-mixins/loki-mixin/microservices-mode/loki-mixin-alerts.yaml
index af06880c..7c0825d8 100644
--- a/monitoring-mixins/loki-mixin/microservices-mode/loki-mixin-alerts.yaml
+++ b/monitoring-mixins/loki-mixin/microservices-mode/loki-mixin-alerts.yaml
@@ -3,8 +3,9 @@ groups:
rules:
- alert: LokiRequestErrors
annotations:
- message: |
+ description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
+ summary: Loki request error rate is high.
expr: |
100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route)
/
@@ -15,16 +16,18 @@ groups:
severity: critical
- alert: LokiRequestPanics
annotations:
- message: |
+ description: |
{{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
+ summary: Loki requests are causing code panics.
expr: |
sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
labels:
severity: critical
- alert: LokiRequestLatency
annotations:
- message: |
+ description: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
+ summary: Loki request error latency is high.
expr: |
cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
for: 15m
@@ -32,8 +35,9 @@ groups:
severity: critical
- alert: LokiTooManyCompactorsRunning
annotations:
- message: |
+ description: |
{{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
+ summary: Loki deployment is running more than one compactor.
expr: |
sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1
for: 5m
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet
index cc43f483..f4a9745a 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet
@@ -456,18 +456,20 @@ local utils = import 'mixin-utils/utils.libsonnet';
},
],
+ httpStatusColors:: {
+ '1xx': '#EAB839',
+ '2xx': '#7EB26D',
+ '3xx': '#6ED0E0',
+ '4xx': '#EF843C',
+ '5xx': '#E24D42',
+ OK: '#7EB26D',
+ success: '#7EB26D',
+ 'error': '#E24D42',
+ cancel: '#A9A9A9',
+ },
+
qpsPanel(selector, statusLabelName='status_code'):: {
- aliasColors: {
- '1xx': '#EAB839',
- '2xx': '#7EB26D',
- '3xx': '#6ED0E0',
- '4xx': '#EF843C',
- '5xx': '#E24D42',
- OK: '#7EB26D',
- success: '#7EB26D',
- 'error': '#E24D42',
- cancel: '#A9A9A9',
- },
+ aliasColors: $.httpStatusColors,
targets: [
{
expr:
@@ -484,6 +486,65 @@ local utils = import 'mixin-utils/utils.libsonnet';
],
} + $.stack,
+ // Assumes that the metricName is for a histogram (as opposed to qpsPanel above)
+ // Assumes that there is a dashboard variable named latency_metrics, values are -1 (native) or 1 (classic)
+ qpsPanelNativeHistogram(metricName, selector, statusLabelName='status_code'):: {
+ local sumByStatus(nativeClassicQuery) = {
+ local template =
+ |||
+ sum by (status) (
+ label_replace(label_replace(%(metricQuery)s,
+ "status", "${1}xx", "%(label)s", "([0-9]).."),
+ "status", "${1}", "%(label)s", "([a-zA-Z]+)"))
+ |||,
+ native: template % { metricQuery: nativeClassicQuery.native, label: statusLabelName },
+ classic: template % { metricQuery: nativeClassicQuery.classic, label: statusLabelName },
+ },
+ fieldConfig+: {
+ defaults+: {
+ custom+: {
+ lineWidth: 0,
+ fillOpacity: 100, // Get solid fill.
+ stacking: {
+ mode: 'normal',
+ group: 'A',
+ },
+ },
+ unit: 'reqps',
+ min: 0,
+ },
+ overrides+: [{
+ matcher: {
+ id: 'byName',
+ options: status,
+ },
+ properties: [
+ {
+ id: 'color',
+ value: {
+ mode: 'fixed',
+ fixedColor: $.httpStatusColors[status],
+ },
+ },
+ ],
+ } for status in std.objectFieldsAll($.httpStatusColors)],
+ },
+ targets: [
+ {
+ expr: utils.showClassicHistogramQuery(sumByStatus(utils.nativeClassicHistogramCountRate(metricName, selector))),
+ format: 'time_series',
+ legendFormat: '{{status}}',
+ refId: 'A_classic',
+ },
+ {
+ expr: utils.showNativeHistogramQuery(sumByStatus(utils.nativeClassicHistogramCountRate(metricName, selector))),
+ format: 'time_series',
+ legendFormat: '{{status}}',
+ refId: 'A',
+ },
+ ],
+ } + $.stack,
+
latencyPanel(metricName, selector, multiplier='1e3'):: {
nullPointMode: 'null as zero',
targets: [
@@ -509,6 +570,58 @@ local utils = import 'mixin-utils/utils.libsonnet';
yaxes: $.yaxes('ms'),
},
+ // Assumes that there is a dashboard variable named latency_metrics, values are -1 (native) or 1 (classic)
+ latencyPanelNativeHistogram(metricName, selector, multiplier='1e3'):: {
+ nullPointMode: 'null as zero',
+ fieldConfig+: {
+ defaults+: {
+ custom+: {
+ fillOpacity: 10,
+ },
+ unit: 'ms',
+ },
+ },
+ targets: [
+ {
+ expr: utils.showNativeHistogramQuery(utils.nativeClassicHistogramQuantile('0.99', metricName, selector, multiplier=multiplier)),
+ format: 'time_series',
+ legendFormat: '99th percentile',
+ refId: 'A',
+ },
+ {
+ expr: utils.showClassicHistogramQuery(utils.nativeClassicHistogramQuantile('0.99', metricName, selector, multiplier=multiplier)),
+ format: 'time_series',
+ legendFormat: '99th percentile',
+ refId: 'A_classic',
+ },
+ {
+ expr: utils.showNativeHistogramQuery(utils.nativeClassicHistogramQuantile('0.50', metricName, selector, multiplier=multiplier)),
+ format: 'time_series',
+ legendFormat: '50th percentile',
+ refId: 'B',
+ },
+ {
+ expr: utils.showClassicHistogramQuery(utils.nativeClassicHistogramQuantile('0.50', metricName, selector, multiplier=multiplier)),
+ format: 'time_series',
+ legendFormat: '50th percentile',
+ refId: 'B_classic',
+ },
+ {
+ expr: utils.showNativeHistogramQuery(utils.nativeClassicHistogramAverageRate(metricName, selector, multiplier=multiplier)),
+ format: 'time_series',
+ legendFormat: 'Average',
+ refId: 'C',
+ },
+ {
+ expr: utils.showClassicHistogramQuery(utils.nativeClassicHistogramAverageRate(metricName, selector, multiplier=multiplier)),
+ format: 'time_series',
+ legendFormat: 'Average',
+ refId: 'C_classic',
+ },
+ ],
+ yaxes: $.yaxes('ms'),
+ },
+
selector:: {
eq(label, value):: { label: label, op: '=', value: value },
neq(label, value):: { label: label, op: '!=', value: value },
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/mixin-utils/utils.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/mixin-utils/utils.libsonnet
index d669aa55..7d042989 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/mixin-utils/utils.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/mixin-utils/utils.libsonnet
@@ -3,84 +3,129 @@ local g = import 'grafana-builder/grafana.libsonnet';
{
// The classicNativeHistogramQuantile function is used to calculate histogram quantiles from native histograms or classic histograms.
// Metric name should be provided without _bucket suffix.
- nativeClassicHistogramQuantile(percentile, metric, selector, sum_by=[], rate_interval='$__rate_interval', multiplier='')::
+ // If from_recording is true, the function will assume :sum_rate metric suffix and no rate needed.
+ nativeClassicHistogramQuantile(percentile, metric, selector, sum_by=[], rate_interval='$__rate_interval', multiplier='', from_recording=false)::
local classicSumBy = if std.length(sum_by) > 0 then ' by (%(lbls)s) ' % { lbls: std.join(',', ['le'] + sum_by) } else ' by (le) ';
local nativeSumBy = if std.length(sum_by) > 0 then ' by (%(lbls)s) ' % { lbls: std.join(',', sum_by) } else ' ';
local multiplierStr = if multiplier == '' then '' else ' * %s' % multiplier;
+ local rateOpen = if from_recording then '' else 'rate(';
+ local rateClose = if from_recording then '' else '[%s])' % rate_interval;
{
- classic: 'histogram_quantile(%(percentile)s, sum%(classicSumBy)s(rate(%(metric)s_bucket{%(selector)s}[%(rateInterval)s])))%(multiplierStr)s' % {
+ classic: 'histogram_quantile(%(percentile)s, sum%(classicSumBy)s(%(rateOpen)s%(metric)s_bucket%(suffix)s{%(selector)s}%(rateClose)s))%(multiplierStr)s' % {
classicSumBy: classicSumBy,
metric: metric,
multiplierStr: multiplierStr,
percentile: percentile,
rateInterval: rate_interval,
+ rateOpen: rateOpen,
+ rateClose: rateClose,
selector: selector,
+ suffix: if from_recording then ':sum_rate' else '',
},
- native: 'histogram_quantile(%(percentile)s, sum%(nativeSumBy)s(rate(%(metric)s{%(selector)s}[%(rateInterval)s])))%(multiplierStr)s' % {
+ native: 'histogram_quantile(%(percentile)s, sum%(nativeSumBy)s(%(rateOpen)s%(metric)s%(suffix)s{%(selector)s}%(rateClose)s))%(multiplierStr)s' % {
metric: metric,
multiplierStr: multiplierStr,
nativeSumBy: nativeSumBy,
percentile: percentile,
rateInterval: rate_interval,
+ rateOpen: rateOpen,
+ rateClose: rateClose,
selector: selector,
+ suffix: if from_recording then ':sum_rate' else '',
},
},
// The classicNativeHistogramSumRate function is used to calculate the histogram sum of rate from native histograms or classic histograms.
// Metric name should be provided without _sum suffix.
- nativeClassicHistogramSumRate(metric, selector, rate_interval='$__rate_interval')::
+ // If from_recording is true, the function will assume :sum_rate metric suffix and no rate needed.
+ nativeClassicHistogramSumRate(metric, selector, rate_interval='$__rate_interval', from_recording=false)::
+ local rateOpen = if from_recording then '' else 'rate(';
+ local rateClose = if from_recording then '' else '[%s])' % rate_interval;
{
- classic: 'rate(%(metric)s_sum{%(selector)s}[%(rateInterval)s])' % {
+ classic: '%(rateOpen)s%(metric)s_sum%(suffix)s{%(selector)s}%(rateClose)s' % {
metric: metric,
rateInterval: rate_interval,
+ rateOpen: rateOpen,
+ rateClose: rateClose,
selector: selector,
+ suffix: if from_recording then ':sum_rate' else '',
},
- native: 'histogram_sum(rate(%(metric)s{%(selector)s}[%(rateInterval)s]))' % {
+ native: 'histogram_sum(%(rateOpen)s%(metric)s%(suffix)s{%(selector)s}%(rateClose)s)' % {
metric: metric,
rateInterval: rate_interval,
+ rateOpen: rateOpen,
+ rateClose: rateClose,
selector: selector,
+ suffix: if from_recording then ':sum_rate' else '',
},
},
// The classicNativeHistogramCountRate function is used to calculate the histogram count of rate from native histograms or classic histograms.
// Metric name should be provided without _count suffix.
- nativeClassicHistogramCountRate(metric, selector, rate_interval='$__rate_interval')::
+ // If from_recording is true, the function will assume :sum_rate metric suffix and no rate needed.
+ nativeClassicHistogramCountRate(metric, selector, rate_interval='$__rate_interval', from_recording=false)::
+ local rateOpen = if from_recording then '' else 'rate(';
+ local rateClose = if from_recording then '' else '[%s])' % rate_interval;
{
- classic: 'rate(%(metric)s_count{%(selector)s}[%(rateInterval)s])' % {
+ classic: '%(rateOpen)s%(metric)s_count%(suffix)s{%(selector)s}%(rateClose)s' % {
metric: metric,
rateInterval: rate_interval,
+ rateOpen: rateOpen,
+ rateClose: rateClose,
selector: selector,
+ suffix: if from_recording then ':sum_rate' else '',
},
- native: 'histogram_count(rate(%(metric)s{%(selector)s}[%(rateInterval)s]))' % {
+ native: 'histogram_count(%(rateOpen)s%(metric)s%(suffix)s{%(selector)s}%(rateClose)s)' % {
metric: metric,
rateInterval: rate_interval,
+ rateOpen: rateOpen,
+ rateClose: rateClose,
selector: selector,
+ suffix: if from_recording then ':sum_rate' else '',
},
},
// TODO(krajorama) Switch to histogram_avg function for native histograms later.
- nativeClassicHistogramAverageRate(metric, selector, rate_interval='$__rate_interval', multiplier='')::
+ // nativeClassicHistogramAverageRate function is used to calculate the histogram average rate from native histograms or classic histograms.
+ // If from_recording is true, the function will assume :sum_rate metric suffix and no rate needed.
+ nativeClassicHistogramAverageRate(metric, selector, rate_interval='$__rate_interval', multiplier='', from_recording=false)::
local multiplierStr = if multiplier == '' then '' else '%s * ' % multiplier;
{
classic: |||
%(multiplier)ssum(%(sumMetricQuery)s) /
sum(%(countMetricQuery)s)
||| % {
- sumMetricQuery: $.nativeClassicHistogramSumRate(metric, selector, rate_interval).classic,
- countMetricQuery: $.nativeClassicHistogramCountRate(metric, selector, rate_interval).classic,
+ sumMetricQuery: $.nativeClassicHistogramSumRate(metric, selector, rate_interval, from_recording).classic,
+ countMetricQuery: $.nativeClassicHistogramCountRate(metric, selector, rate_interval, from_recording).classic,
multiplier: multiplierStr,
},
native: |||
%(multiplier)ssum(%(sumMetricQuery)s) /
sum(%(countMetricQuery)s)
||| % {
- sumMetricQuery: $.nativeClassicHistogramSumRate(metric, selector, rate_interval).native,
- countMetricQuery: $.nativeClassicHistogramCountRate(metric, selector, rate_interval).native,
+ sumMetricQuery: $.nativeClassicHistogramSumRate(metric, selector, rate_interval, from_recording).native,
+ countMetricQuery: $.nativeClassicHistogramCountRate(metric, selector, rate_interval, from_recording).native,
multiplier: multiplierStr,
},
},
+ nativeClassicSumBy(query, sum_by=[], multiplier='')::
+ local sumBy = if std.length(sum_by) > 0 then ' by (%(lbls)s) ' % { lbls: std.join(', ', sum_by) } else ' ';
+ local multiplierStr = if multiplier == '' then '' else ' * %s' % multiplier;
+ {
+ classic: 'sum%(sumBy)s(%(query)s)%(multiplierStr)s' % {
+ multiplierStr: multiplierStr,
+ query: query.classic,
+ sumBy: sumBy,
+ },
+ native: 'sum%(sumBy)s(%(query)s)%(multiplierStr)s' % {
+ multiplierStr: multiplierStr,
+ query: query.native,
+ sumBy: sumBy,
+ },
+ },
+
// showClassicHistogramQuery wraps a query defined as map {classic: q, native: q}, and compares the classic query
// to dashboard variable which should take -1 or +1 as values in order to hide or show the classic query.
showClassicHistogramQuery(query, dashboard_variable='latency_metrics'):: '%s < ($%s * +Inf)' % [query.classic, dashboard_variable],
@@ -106,7 +151,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
},
{
record: '%(labels_underscore)s:%(metric)s:avg' % vars,
- expr: 'sum(rate(%(metric)s_sum[1m])) by (%(labels_comma)s) / sum(rate(%(metric)s_count[%(interval)s])) by (%(labels_comma)s)' % vars,
+ expr: 'sum(rate(%(metric)s_sum[%(interval)s])) by (%(labels_comma)s) / sum(rate(%(metric)s_count[%(interval)s])) by (%(labels_comma)s)' % vars,
},
{
record: '%(labels_underscore)s:%(metric)s_bucket:sum_rate' % vars,
@@ -205,12 +250,72 @@ local g = import 'grafana-builder/grafana.libsonnet';
noop(label):: { label: label, op: 'nop' },
},
- toPrometheusSelector(selector)::
+ // latencyRecordingRulePanelNativeHistogram - build a latency panel for a recording rule.
+ // - metric: the base metric name (middle part of recording rule name)
+ // - selectors: list of selectors which will be added to first part of
+ // recording rule name, and to the query selector itself.
+ // - extra_selectors (optional): list of selectors which will be added to the
+ // query selector, but not to the beginnig of the recording rule name.
+ // Useful for external labels.
+ // - multiplier (optional): assumes results are in seconds, will multiply
+ // by 1e3 to get ms. Can be turned off.
+ // - sum_by (optional): additional labels to use in the sum by clause, will also be used in the legend
+ latencyRecordingRulePanelNativeHistogram(metric, selectors, extra_selectors=[], multiplier='1e3', sum_by=[])::
+ local labels = std.join('_', [matcher.label for matcher in selectors]);
+ local legend = std.join('', ['{{ %(lb)s }} ' % lb for lb in sum_by]);
+ local metricStr = '%(labels)s:%(metric)s' % { labels: labels, metric: metric };
+ local selectorStr = $.toPrometheusSelectorNaked(selectors + extra_selectors);
+ {
+ nullPointMode: 'null as zero',
+ yaxes: g.yaxes('ms'),
+ targets: [
+ {
+ expr: $.showClassicHistogramQuery($.nativeClassicHistogramQuantile('0.99', metricStr, selectorStr, sum_by=sum_by, multiplier=multiplier, from_recording=true)),
+ format: 'time_series',
+ legendFormat: '%(legend)s99th percentile' % legend,
+ refId: 'A_classic',
+ },
+ {
+ expr: $.showNativeHistogramQuery($.nativeClassicHistogramQuantile('0.99', metricStr, selectorStr, sum_by=sum_by, multiplier=multiplier, from_recording=true)),
+ format: 'time_series',
+ legendFormat: '%(legend)s99th percentile' % legend,
+ refId: 'A_native',
+ },
+ {
+ expr: $.showClassicHistogramQuery($.nativeClassicHistogramQuantile('0.50', metricStr, selectorStr, sum_by=sum_by, multiplier=multiplier, from_recording=true)),
+ format: 'time_series',
+ legendFormat: '%(legend)s50th percentile' % legend,
+ refId: 'B_classic',
+ },
+ {
+ expr: $.showNativeHistogramQuery($.nativeClassicHistogramQuantile('0.50', metricStr, selectorStr, sum_by=sum_by, multiplier=multiplier, from_recording=true)),
+ format: 'time_series',
+ legendFormat: '%(legend)s50th percentile' % legend,
+ refId: 'B_native',
+ },
+ {
+ expr: $.showClassicHistogramQuery($.nativeClassicHistogramAverageRate(metricStr, selectorStr, multiplier=multiplier, from_recording=true)),
+ format: 'time_series',
+ legendFormat: '%(legend)sAverage' % legend,
+ refId: 'C_classic',
+ },
+ {
+ expr: $.showNativeHistogramQuery($.nativeClassicHistogramAverageRate(metricStr, selectorStr, multiplier=multiplier, from_recording=true)),
+ format: 'time_series',
+ legendFormat: '%(legend)sAverage' % legend,
+ refId: 'C_native',
+ },
+ ],
+ },
+
+ toPrometheusSelectorNaked(selector)::
local pairs = [
'%(label)s%(op)s"%(value)s"' % matcher
for matcher in std.filter(function(matcher) matcher.op != 'nop', selector)
];
- '{%s}' % std.join(', ', pairs),
+ '%s' % std.join(', ', pairs),
+
+ toPrometheusSelector(selector):: '{%s}' % $.toPrometheusSelectorNaked(selector),
// withRunbookURL - Add/Override the runbook_url annotations for all alerts inside a list of rule groups.
// - url_format: an URL format for the runbook, the alert name will be substituted in the URL.
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/alerts.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/alerts.libsonnet
index 0045cc19..5bff18e7 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/alerts.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/alerts.libsonnet
@@ -17,7 +17,8 @@
severity: 'critical',
},
annotations: {
- message: |||
+ summary: 'Loki request error rate is high.',
+ description: |||
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
|||,
},
@@ -31,7 +32,8 @@
severity: 'critical',
},
annotations: {
- message: |||
+ summary: 'Loki requests are causing code panics.',
+ description: |||
{{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
|||,
},
@@ -46,7 +48,8 @@
severity: 'critical',
},
annotations: {
- message: |||
+ summary: 'Loki request error latency is high.',
+ description: |||
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
|||,
},
@@ -54,16 +57,17 @@
{
alert: 'LokiTooManyCompactorsRunning',
expr: |||
- sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1
- |||,
+ sum(loki_boltdb_shipper_compactor_running) by (namespace, %s) > 1
+ ||| % $._config.per_cluster_label,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
- message: |||
+ summary: 'Loki deployment is running more than one compactor.',
+ description: std.strReplace(|||
{{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
- |||,
+ |||, 'cluster', $._config.per_cluster_label),
},
},
],
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/config.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/config.libsonnet
index 1fa22f56..48e75865 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/config.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/config.libsonnet
@@ -31,5 +31,10 @@
// The prefix used to match the write and read pods on SSD mode.
pod_prefix_matcher: '(loki|enterprise-logs)',
},
+
+ // Meta-monitoring related configuration
+ meta_monitoring: {
+ enabled: false,
+ },
},
}
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-logs.json b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-logs.json
index bcb5737a..72b8565e 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-logs.json
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-logs.json
@@ -116,6 +116,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -238,7 +243,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))",
+ "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -289,6 +294,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "bytes"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -375,6 +385,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -410,7 +425,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
+ "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -461,6 +476,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -496,7 +516,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))",
+ "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -634,6 +654,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -669,7 +694,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)",
+ "expr": "sum(rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[$__rate_interval])) by (level)",
"legendFormat": "{{level}}",
"refId": "A"
}
@@ -721,6 +746,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$loki_datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -773,7 +803,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" [5m])) by (level)",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" | __error__=\"\" [$__auto])) by (level)",
"intervalFactor": 3,
"legendFormat": "{{level}}",
"refId": "A"
@@ -862,8 +892,8 @@
"10s",
"30s",
"1m",
- "5m",
- "15m",
+ "$__rate_interval",
+ "1$__rate_interval",
"30m",
"1h",
"2h",
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-operational.json b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-operational.json
index 2dd944c2..3f215c2e 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-operational.json
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-operational.json
@@ -90,7 +90,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)",
+ "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)",
"legendFormat": "{{status}}",
"refId": "A"
}
@@ -185,7 +185,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\"api_prom_push|loki_api_v1_push\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))",
+ "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\"api_prom_push|loki_api_v1_push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))",
"legendFormat": "{{status}}",
"refId": "A"
}
@@ -239,7 +239,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": {}
+ "custom": {},
+ "unit": "ops"
},
"overrides": []
},
@@ -374,7 +375,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))",
+ "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (tenant))",
"legendFormat": "{{tenant}}",
"refId": "A"
}
@@ -428,7 +429,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": {}
+ "custom": {},
+ "unit": "MBs"
},
"overrides": []
},
@@ -469,7 +471,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024",
+ "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (tenant)) / 1024 / 1024",
"legendFormat": "{{tenant}}",
"refId": "A"
}
@@ -618,7 +620,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": {}
+ "custom": {},
+ "unit": "s"
},
"overrides": []
},
@@ -723,7 +726,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": {}
+ "custom": {},
+ "unit": "s"
},
"overrides": []
},
@@ -925,7 +929,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": {}
+ "custom": {},
+ "unit": "s"
},
"overrides": []
},
@@ -1129,7 +1134,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": {}
+ "custom": {},
+ "unit": "s"
},
"overrides": []
},
@@ -1236,7 +1242,8 @@
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": {}
+ "custom": {},
+ "unit": "s"
},
"overrides": []
},
@@ -1439,7 +1446,8 @@
"description": "",
"fieldConfig": {
"defaults": {
- "custom": {}
+ "custom": {},
+ "unit": "s"
},
"overrides": []
},
@@ -1685,7 +1693,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])))",
+ "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))",
"interval": "",
"legendFormat": "{{ tenant }} - {{ reason }}",
"refId": "A"
@@ -1809,7 +1817,7 @@
],
"targets": [
{
- "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[1m])[$__range:1m])))",
+ "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])[$__range:$__rate_interval])))",
"format": "table",
"instant": true,
"interval": "",
@@ -1844,6 +1852,7 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -1932,6 +1941,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2063,7 +2077,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/distributor\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/distributor\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -2229,6 +2243,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2264,7 +2283,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -2317,6 +2336,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2352,7 +2376,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -2405,6 +2429,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2440,7 +2469,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)",
+ "expr": "sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
@@ -2596,6 +2625,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -2727,7 +2761,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -3035,7 +3069,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=\"$namespace/ingester\"}[1m]) > 0))",
+ "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=\"$namespace/ingester\"}[$__rate_interval]) > 0))",
"interval": "",
"legendFormat": "{{ tenant }}",
"refId": "A"
@@ -3143,13 +3177,13 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=\"$namespace/ingester\"}[1m]))",
+ "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=\"$namespace/ingester\"}[$__rate_interval]))",
"interval": "",
"legendFormat": "Chunks",
"refId": "A"
},
{
- "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=\"$namespace/ingester\"}[1m]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=\"$namespace/ingester\"}[1m])) < 1",
+ "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=\"$namespace/ingester\"}[$__rate_interval]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=\"$namespace/ingester\"}[$__rate_interval])) < 1",
"interval": "",
"legendFormat": "De-Dupe Ratio",
"refId": "B"
@@ -3226,7 +3260,7 @@
"reverseYBuckets": false,
"targets": [
{
- "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=\"$namespace/ingester\"}[1m])) by (le)",
+ "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=\"$namespace/ingester\"}[$__rate_interval])) by (le)",
"format": "heatmap",
"instant": false,
"interval": "",
@@ -3379,7 +3413,7 @@
"reverseYBuckets": false,
"targets": [
{
- "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=\"$namespace/ingester\"}[1m]))",
+ "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=\"$namespace/ingester\"}[$__rate_interval]))",
"format": "heatmap",
"instant": false,
"interval": "",
@@ -3522,6 +3556,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "binBps"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -3653,7 +3692,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\"} | logfmt | level=\"error\"[1m]))",
+ "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\"} | logfmt | level=\"error\"[$__auto]))",
"refId": "A"
}
],
@@ -3834,6 +3873,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -3872,19 +3916,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"intervalFactor": 1,
"legendFormat": "{{container}}: .99-{{method}}-{{name}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"hide": false,
"legendFormat": "{{container}}: .9-{{method}}-{{name}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))",
"hide": false,
"legendFormat": "{{container}}: .5-{{method}}-{{name}}",
"refId": "C"
@@ -3975,7 +4019,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, method, name, container)",
+ "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, method, name, container)",
"intervalFactor": 1,
"legendFormat": "{{container}}: {{status_code}}-{{method}}-{{name}}",
"refId": "A"
@@ -4043,6 +4087,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4081,19 +4130,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -4146,6 +4195,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4184,7 +4238,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, status_code, method)",
+ "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, status_code, method)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -4252,6 +4306,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4288,17 +4347,17 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".9",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))",
"refId": "C"
}
],
@@ -4349,6 +4408,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4385,20 +4449,20 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "99%",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "90%",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "50%",
"refId": "C"
@@ -4451,6 +4515,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4487,20 +4556,20 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "99%",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "90%",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))",
"interval": "",
"legendFormat": "50%",
"refId": "C"
@@ -4553,6 +4622,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4589,17 +4663,17 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".9",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))",
"refId": "C"
}
],
@@ -4650,6 +4724,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4686,7 +4765,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -4739,6 +4818,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4775,7 +4859,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -4828,6 +4912,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4864,7 +4953,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -4917,6 +5006,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "ops"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -4953,7 +5047,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (status_code)",
+ "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (status_code)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}",
"refId": "A"
@@ -5021,6 +5115,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -5059,19 +5158,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -5162,7 +5261,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -5264,7 +5363,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5349,7 +5448,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5434,7 +5533,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5519,7 +5618,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))",
+ "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"refId": "A"
}
],
@@ -5604,17 +5703,17 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".99",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".9",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"legendFormat": ".5",
"refId": "C"
}
@@ -5666,6 +5765,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -5703,19 +5807,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -5805,7 +5909,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -5873,6 +5977,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -5910,19 +6019,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -6012,7 +6121,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -6080,6 +6189,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -6117,19 +6231,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -6219,7 +6333,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -6287,6 +6401,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -6324,19 +6443,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -6426,7 +6545,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_cassandra_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_cassandra_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -6494,6 +6613,11 @@
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "unit": "s"
+ }
+ },
"fill": 1,
"fillGradient": 0,
"gridPos": {
@@ -6531,19 +6655,19 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"intervalFactor": 1,
"legendFormat": ".99-{{operation}}",
"refId": "A"
},
{
- "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".9-{{operation}}",
"refId": "B"
},
{
- "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))",
+ "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))",
"hide": false,
"legendFormat": ".5-{{operation}}",
"refId": "C"
@@ -6633,7 +6757,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)",
+ "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)",
"intervalFactor": 1,
"legendFormat": "{{status_code}}-{{operation}}",
"refId": "A"
@@ -6697,9 +6821,9 @@
"refresh_intervals": [
"10s",
"30s",
- "1m",
- "5m",
- "15m",
+ "$__rate_interval",
+ "$__rate_interval",
+ "1$__rate_interval",
"30m",
"1h",
"2h",
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-recording-rules.json b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-recording-rules.json
index 2861e871..d94f131f 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-recording-rules.json
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-recording-rules.json
@@ -273,6 +273,7 @@
"color": {
"mode": "palette-classic"
},
+ "unit": "s",
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-canary-dashboard.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-canary-dashboard.libsonnet
index 6539a34d..94e07deb 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-canary-dashboard.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-canary-dashboard.libsonnet
@@ -24,8 +24,8 @@ local grafana = import 'grafonnet/grafana.libsonnet';
// This logic is inherited from mimir-mixin.
dashboard.dashboard('Canary')
// We can't make use of simplified template selectors from the loki dashboard utils until we port the cortex dashboard utils panel/grid functionality.
- .addTemplate('cluster', 'loki_build_info', 'cluster')
- .addTemplate('namespace', 'loki_build_info{cluster=~"$cluster"}', 'namespace')
+ .addTemplate('cluster', 'loki_build_info', $._config.per_cluster_label)
+ .addTemplate('namespace', 'loki_build_info{' + $._config.per_cluster_label + '=~"$cluster"}', 'namespace')
+ {
// This dashboard uses the new grid system in order to place panels (using gridPos).
// Because of this we can't use the mixin's addRow() and addPanel().
@@ -33,7 +33,7 @@ local grafana = import 'grafonnet/grafana.libsonnet';
rows: null,
// ugly hack, copy pasta the tag/link
// code from the loki-mixin
- tags: ['loki'],
+ tags: $._config.tags,
links: [
{
asDropdown: true,
@@ -49,60 +49,60 @@ local grafana = import 'grafonnet/grafana.libsonnet';
panels: [
// grid row 1
dashboard.panel('Canary Entries Total') +
- dashboard.newStatPanel('sum(count(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}))', unit='short') +
+ dashboard.newStatPanel('sum(count(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster", namespace=~"$namespace"}))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 0, y: 0 } },
dashboard.panel('Canary Logs Total') +
- dashboard.newStatPanel('sum(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
+ dashboard.newStatPanel('sum(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 3, y: 0 } },
dashboard.panel('Missing') +
- dashboard.newStatPanel('sum(increase(loki_canary_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
+ dashboard.newStatPanel('sum(increase(loki_canary_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 6, y: 0 } },
dashboard.panel('Spotcheck Missing') +
- dashboard.newStatPanel('sum(increase(loki_canary_spot_check_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
+ dashboard.newStatPanel('sum(increase(loki_canary_spot_check_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 9, y: 0 } },
// grid row 2
dashboard.panel('Spotcheck Total') +
- dashboard.newStatPanel('sum(increase(loki_canary_spot_check_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
+ dashboard.newStatPanel('sum(increase(loki_canary_spot_check_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 0, y: 4 } },
dashboard.panel('Metric Test Error %') +
- dashboard.newStatPanel('((sum(loki_canary_metric_test_expected{cluster=~"$cluster",namespace=~"$namespace"}) - sum(loki_canary_metric_test_actual{cluster=~"$cluster",namespace=~"$namespace"}))/(sum(loki_canary_metric_test_actual{cluster=~"$cluster",namespace=~"$namespace"}))) * 100') +
+ dashboard.newStatPanel('((sum(loki_canary_metric_test_expected{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}) - sum(loki_canary_metric_test_actual{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}))/(sum(loki_canary_metric_test_actual{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}))) * 100') +
{ gridPos: { h: 4, w: 3, x: 3, y: 4 } },
dashboard.panel('Missing %') +
- dashboard.newStatPanel('(sum(increase(loki_canary_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range])))*100') +
+ dashboard.newStatPanel('(sum(increase(loki_canary_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range])))*100') +
{ gridPos: { h: 4, w: 3, x: 6, y: 4 } },
dashboard.panel('Spotcheck Missing %') +
- dashboard.newStatPanel('(sum(increase(loki_canary_spot_check_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_spot_check_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))) * 100') +
+ dashboard.newStatPanel('(sum(increase(loki_canary_spot_check_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_spot_check_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))) * 100') +
{ gridPos: { h: 4, w: 3, x: 9, y: 4 } },
// grid row 3
dashboard.panel('Metric Test Expected') +
- dashboard.newStatPanel('sum(loki_canary_metric_test_expected{cluster=~"$cluster",namespace=~"$namespace"})', unit='short') +
+ dashboard.newStatPanel('sum(loki_canary_metric_test_expected{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"})', unit='short') +
{ gridPos: { h: 4, w: 3, x: 0, y: 8 } },
dashboard.panel('Metric Test Actual') +
- dashboard.newStatPanel('sum(loki_canary_metric_test_actual{cluster=~"$cluster",namespace=~"$namespace"})', unit='short') +
+ dashboard.newStatPanel('sum(loki_canary_metric_test_actual{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"})', unit='short') +
{ gridPos: { h: 4, w: 3, x: 3, y: 8 } },
dashboard.panel('Websocket Missing') +
- dashboard.newStatPanel('sum(increase(loki_canary_websocket_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
+ dashboard.newStatPanel('sum(increase(loki_canary_websocket_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 6, y: 8 } },
dashboard.panel('Websocket Missing %') +
- dashboard.newStatPanel('(sum(increase(loki_canary_websocket_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__range])))*100') +
+ dashboard.newStatPanel('(sum(increase(loki_canary_websocket_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range])))*100') +
{ gridPos: { h: 4, w: 3, x: 9, y: 8 } },
// end of grid
dashboard.panel('Log Write to read Latency Percentiles') +
dashboard.queryPanel([
- 'histogram_quantile(0.95, sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
- 'histogram_quantile(0.50, sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
+ 'histogram_quantile(0.95, sum(rate(loki_canary_response_latency_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
+ 'histogram_quantile(0.50, sum(rate(loki_canary_response_latency_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
], ['p95', 'p50']) +
{ gridPos: { h: 6, w: 12, x: 12, y: 0 } },
@@ -115,7 +115,7 @@ local grafana = import 'grafonnet/grafana.libsonnet';
).addTargets(
[
grafana.prometheus.target(
- 'sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le)',
+ 'sum(rate(loki_canary_response_latency_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le)',
legendFormat='{{le}}',
format='heatmap',
),
@@ -125,24 +125,24 @@ local grafana = import 'grafonnet/grafana.libsonnet';
dashboard.panel('Spot Check Query') +
dashboard.queryPanel([
- 'histogram_quantile(0.99, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
- 'histogram_quantile(0.50, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
+ 'histogram_quantile(0.99, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
+ 'histogram_quantile(0.50, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
], ['p99', 'p95']) +
{ gridPos: { h: 6, w: 12, x: 0, y: 14 } },
dashboard.panel('Metric Test Query') +
dashboard.queryPanel([
- 'histogram_quantile(0.99, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[15m])) by (le))',
- 'histogram_quantile(0.50, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{cluster=~"$cluster",namespace=~"$namespace"}[15m])) by (le))',
+ 'histogram_quantile(0.99, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[15m])) by (le))',
+ 'histogram_quantile(0.50, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[15m])) by (le))',
], ['p99', 'p95'],) +
{ gridPos: { h: 6, w: 12, x: 12, y: 14 } },
dashboard.panel('Spot Check Missing %') +
- dashboard.queryPanel('topk(20, (sum by (cluster, pod) (increase(loki_canary_spot_check_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval]))/sum by (cluster, pod) (increase(loki_canary_spot_check_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) * 100)) > 0', '') +
+ dashboard.queryPanel('topk(20, (sum by (' + $._config.per_cluster_label + ', pod) (increase(loki_canary_spot_check_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval]))/sum by (' + $._config.per_cluster_label + ', pod) (increase(loki_canary_spot_check_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) * 100)) > 0', '') +
{ gridPos: { h: 6, w: 12, x: 0, y: 20 } },
g.panel('Missing logs') +
- g.queryPanel('topk(20,(sum by (cluster, pod)(increase(loki_canary_missing_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval]))/sum by (cluster, pod)(increase(loki_canary_entries_total{cluster=~"$cluster",namespace=~"$namespace"}[$__rate_interval])))*100) > 0', 'Missing {{ cluster }} {{ pod }}') +
+ g.queryPanel('topk(20,(sum by (' + $._config.per_cluster_label + ', pod)(increase(loki_canary_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval]))/sum by (' + $._config.per_cluster_label + ', pod)(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])))*100) > 0', 'Missing {{ ' + $._config.per_cluster_label + ' }} {{ pod }}') +
{ gridPos: { h: 6, w: 12, x: 12, y: 20 } },
],
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-chunks.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-chunks.libsonnet
index 87dfff7a..a048dadf 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-chunks.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-chunks.libsonnet
@@ -6,7 +6,11 @@ local utils = import 'mixin-utils/utils.libsonnet';
local dashboards = self,
'loki-chunks.json': {
local cfg = self,
- labelsSelector:: $._config.per_cluster_label + '="$cluster", job=~"$namespace/%s"' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.*'),
+ labelsSelector:: $._config.per_cluster_label + '="$cluster", job=~"$namespace/%s"' % (
+ if $._config.meta_monitoring.enabled
+ then '(ingester.*|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher
+ else if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.*'
+ ),
} +
$.dashboard('Loki / Chunks', uid='chunks')
.addCluster()
@@ -49,7 +53,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
.addPanel(
$.newQueryPanel('Index Entries Per Chunk') +
$.queryPanel(
- 'sum(rate(loki_chunk_store_index_entries_per_chunk_sum{%s}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{%s}[5m]))' % [
+ 'sum(rate(loki_chunk_store_index_entries_per_chunk_sum{%s}[$__rate_interval])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{%s}[$__rate_interval]))' % [
dashboards['loki-chunks.json'].labelsSelector,
dashboards['loki-chunks.json'].labelsSelector,
],
@@ -139,9 +143,9 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.newQueryPanel('Chunk Size Quantiles', 'bytes') +
$.queryPanel(
[
- 'histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{%s}[1m])) by (le))' % dashboards['loki-chunks.json'].labelsSelector,
- 'histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{%s}[1m])) by (le))' % dashboards['loki-chunks.json'].labelsSelector,
- 'histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{%s}[1m])) by (le))' % dashboards['loki-chunks.json'].labelsSelector,
+ 'histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{%s}[$__rate_interval])) by (le))' % dashboards['loki-chunks.json'].labelsSelector,
+ 'histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{%s}[$__rate_interval])) by (le))' % dashboards['loki-chunks.json'].labelsSelector,
+ 'histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{%s}[$__rate_interval])) by (le))' % dashboards['loki-chunks.json'].labelsSelector,
],
[
'p99',
@@ -157,9 +161,9 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.newQueryPanel('Chunk Duration hours (end-start)') +
$.queryPanel(
[
- 'histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{%s}[5m])) by (le))' % dashboards['loki-chunks.json'].labelsSelector,
- 'histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{%s}[5m])) by (le))' % dashboards['loki-chunks.json'].labelsSelector,
- 'sum(rate(loki_ingester_chunk_bounds_hours_sum{%s}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{%s}[5m]))' % [
+ 'histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{%s}[$__rate_interval])) by (le))' % dashboards['loki-chunks.json'].labelsSelector,
+ 'histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{%s}[$__rate_interval])) by (le))' % dashboards['loki-chunks.json'].labelsSelector,
+ 'sum(rate(loki_ingester_chunk_bounds_hours_sum{%s}[$__rate_interval])) / sum(rate(loki_ingester_chunk_bounds_hours_count{%s}[$__rate_interval]))' % [
dashboards['loki-chunks.json'].labelsSelector,
dashboards['loki-chunks.json'].labelsSelector,
],
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-deletion.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-deletion.libsonnet
index 5fdbcc76..5b8ef5d5 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-deletion.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-deletion.libsonnet
@@ -2,7 +2,9 @@ local g = import 'grafana-builder/grafana.libsonnet';
local utils = import 'mixin-utils/utils.libsonnet';
(import 'dashboard-utils.libsonnet') {
- local compactor_matcher = if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'compactor',
+ local compactor_matcher = if $._config.meta_monitoring.enabled
+ then 'pod=~"(compactor|%s-backend.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher
+ else if $._config.ssd.enabled then 'container="loki", pod=~"%s-backend.*"' % $._config.ssd.pod_prefix_matcher else 'container="compactor"',
grafanaDashboards+::
{
'loki-deletion.json':
@@ -61,15 +63,15 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
.addPanel(
$.newQueryPanel('Lines Deleted / Sec') +
- g.queryPanel('sum(rate(loki_compactor_deleted_lines{' + $._config.per_cluster_label + '=~"$cluster",job=~"$namespace/%s"}[$__rate_interval])) by (user)' % compactor_matcher, '{{user}}'),
+ g.queryPanel('sum(rate(loki_compactor_deleted_lines{' + $.namespaceMatcher() + ', ' + compactor_matcher + '}[$__rate_interval])) by (user)', '{{user}}'),
)
).addRow(
g.row('List of deletion requests')
.addPanel(
- $.logPanel('In progress/finished', '{%s, container="compactor"} |~ "Started processing delete request|delete request for user marked as processed" | logfmt | line_format "{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}" ' % $.namespaceMatcher()),
+ $.logPanel('In progress/finished', '{%s, %s} |~ "Started processing delete request|delete request for user marked as processed" | logfmt | line_format "{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}" ' % [$.namespaceMatcher(), compactor_matcher]),
)
.addPanel(
- $.logPanel('Requests', '{%s, container="compactor"} |~ "delete request for user added" | logfmt | line_format "{{.ts}} user={{.user}} query=\'{{.query}}\'"' % $.namespaceMatcher()),
+ $.logPanel('Requests', '{%s, %s} |~ "delete request for user added" | logfmt | line_format "{{.ts}} user={{.user}} query=\'{{.query}}\'"' % [$.namespaceMatcher(), compactor_matcher]),
)
),
},
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-logs.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-logs.libsonnet
index 9fd6eee5..b28d74e9 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-logs.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-logs.libsonnet
@@ -48,7 +48,6 @@ local template = import 'grafonnet/template.libsonnet';
local cfg = self,
showMultiCluster:: true,
- clusterLabel:: $._config.per_cluster_label,
} + lokiLogs +
$.dashboard('Loki / Logs', uid='logs')
@@ -61,8 +60,9 @@ local template = import 'grafonnet/template.libsonnet';
p {
targets: [
e {
- expr: if dashboards['loki-logs.json'].showMultiCluster then super.expr
- else std.strReplace(super.expr, $._config.per_cluster_label + '="$cluster", ', ''),
+ expr: if dashboards['loki-logs.json'].showMultiCluster
+ then std.strReplace(super.expr, 'cluster="$cluster"', $._config.per_cluster_label + '="$cluster"')
+ else std.strReplace(super.expr, 'cluster="$cluster", ', ''),
}
for e in p.targets
],
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-operational.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-operational.libsonnet
index e8f5d982..27152ff6 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-operational.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-operational.libsonnet
@@ -11,7 +11,6 @@ local utils = import 'mixin-utils/utils.libsonnet';
showAnnotations:: true,
showLinks:: true,
showMultiCluster:: true,
- clusterLabel:: $._config.per_cluster_label,
hiddenRows:: [
'Cassandra',
@@ -25,17 +24,31 @@ local utils = import 'mixin-utils/utils.libsonnet';
jobMatchers:: {
cortexgateway: [utils.selector.re('job', '($namespace)/cortex-gw(-internal)?')],
- distributor: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'distributor'))],
- ingester: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.*'))],
- querier: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'querier'))],
- queryFrontend: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'query-frontend'))],
+ distributor: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(distributor|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'distributor'))],
+ ingester: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(ingester|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.*'))],
+ querier: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(querier|%s-read|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'querier'))],
+ queryFrontend: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(query-frontend|%s-read|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'query-frontend'))],
},
podMatchers:: {
cortexgateway: [utils.selector.re('pod', 'cortex-gw')],
- distributor: [utils.selector.re('pod', '%s' % (if $._config.ssd.enabled then '%s-write.*' % $._config.ssd.pod_prefix_matcher else 'distributor.*'))],
- ingester: [utils.selector.re('pod', '%s' % (if $._config.ssd.enabled then '%s-write.*' % $._config.ssd.pod_prefix_matcher else 'ingester.*'))],
- querier: [utils.selector.re('pod', '%s' % (if $._config.ssd.enabled then '%s-read.*' % $._config.ssd.pod_prefix_matcher else 'querier.*'))],
+ distributor: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('pod', '(distributor|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('pod', '%s' % (if $._config.ssd.enabled then '%s-write.*' % $._config.ssd.pod_prefix_matcher else 'distributor.*'))],
+ ingester: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('pod', '(ingester|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('pod', '%s' % (if $._config.ssd.enabled then '%s-write.*' % $._config.ssd.pod_prefix_matcher else 'ingester.*'))],
+ querier: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('pod', '(querier|%s-read|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('pod', '%s' % (if $._config.ssd.enabled then '%s-read.*' % $._config.ssd.pod_prefix_matcher else 'querier.*'))],
},
}
+ lokiOperational + {
@@ -62,7 +75,22 @@ local utils = import 'mixin-utils/utils.libsonnet';
local replaceClusterMatchers(expr) =
if dashboards['loki-operational.json'].showMultiCluster
- then expr
+ // Replace the recording rules cluster label with the per-cluster label
+ then std.strReplace(
+ // Replace the cluster label for equality matchers with the per-cluster label
+ std.strReplace(
+ // Replace the cluster label for regex matchers with the per-cluster label
+ std.strReplace(
+ expr,
+ 'cluster=~"$cluster"',
+ $._config.per_cluster_label + '=~"$cluster"'
+ ),
+ 'cluster="$cluster"',
+ $._config.per_cluster_label + '="$cluster"'
+ ),
+ 'cluster_',
+ $._config.per_cluster_label + '_'
+ )
else
std.strReplace(
std.strReplace(
@@ -143,7 +171,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
local replaceAllMatchers(expr) =
- replaceMatchers(replaceClusterMatchers(expr)),
+ replaceMatchers(expr),
local selectDatasource(ds) =
if ds == null || ds == '' then ds
@@ -179,7 +207,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
datasource: selectDatasource(super.datasource),
targets: if std.objectHas(p, 'targets') then [
e {
- expr: removeInternalComponents(p.title, e.expr),
+ expr: removeInternalComponents(p.title, replaceClusterMatchers(e.expr)),
}
for e in p.targets
] else [],
@@ -188,7 +216,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
datasource: selectDatasource(super.datasource),
targets: if std.objectHas(sp, 'targets') then [
e {
- expr: removeInternalComponents(p.title, e.expr),
+ expr: removeInternalComponents(p.title, replaceClusterMatchers(e.expr)),
}
for e in sp.targets
] else [],
@@ -197,7 +225,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
datasource: selectDatasource(super.datasource),
targets: if std.objectHas(ssp, 'targets') then [
e {
- expr: removeInternalComponents(p.title, e.expr),
+ expr: removeInternalComponents(p.title, replaceClusterMatchers(e.expr)),
}
for e in ssp.targets
] else [],
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads-resources.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads-resources.libsonnet
index 83c1f0ea..21db04ea 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads-resources.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads-resources.libsonnet
@@ -2,11 +2,19 @@ local grafana = import 'grafonnet/grafana.libsonnet';
local utils = import 'mixin-utils/utils.libsonnet';
(import 'dashboard-utils.libsonnet') {
- local index_gateway_pod_matcher = if $._config.ssd.enabled then 'container="loki", pod=~"%s-read.*"' % $._config.ssd.pod_prefix_matcher else 'container="index-gateway"',
- local index_gateway_job_matcher = if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'index-gateway',
+ local index_gateway_pod_matcher = if $._config.meta_monitoring.enabled
+ then 'container=~"loki|index-gateway", pod=~"(index-gateway.*|%s-read.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher
+ else if $._config.ssd.enabled then 'container="loki", pod=~"%s-read.*"' % $._config.ssd.pod_prefix_matcher else 'container="index-gateway"',
+ local index_gateway_job_matcher = if $._config.meta_monitoring.enabled
+ then '(index-gateway.*|%s-read.*|loki-single-binary)' % $._config.ssd.pod_prefix_matcher
+ else if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'index-gateway',
- local ingester_pod_matcher = if $._config.ssd.enabled then 'container="loki", pod=~"%s-write.*"' % $._config.ssd.pod_prefix_matcher else 'container="ingester"',
- local ingester_job_matcher = if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.+',
+ local ingester_pod_matcher = if $._config.meta_monitoring.enabled
+ then 'container=~"loki|ingester", pod=~"(ingester.*|%s-write.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher
+ else if $._config.ssd.enabled then 'container="loki", pod=~"%s-write.*"' % $._config.ssd.pod_prefix_matcher else 'container="ingester"',
+ local ingester_job_matcher = if $._config.meta_monitoring.enabled
+ then '(ingester.+|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher
+ else if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.+',
grafanaDashboards+::
{
@@ -117,6 +125,38 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.containerDiskSpaceUtilizationPanel('Disk Space Utilization', index_gateway_job_matcher),
)
)
+ .addRowIf(
+ !$._config.ssd.enabled,
+ grafana.row.new('Bloom Gateway')
+ .addPanel(
+ $.containerCPUUsagePanel('CPU', 'bloom-gateway'),
+ )
+ .addPanel(
+ $.containerMemoryWorkingSetPanel('Memory (workingset)', 'bloom-gateway'),
+ )
+ .addPanel(
+ $.goHeapInUsePanel('Memory (go heap inuse)', 'bloom-gateway'),
+ )
+ .addPanel(
+ $.newQueryPanel('Disk Writes', 'Bps') +
+ $.queryPanel(
+ 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('bloom-gateway')],
+ '{{%s}} - {{device}}' % $._config.per_instance_label
+ ) +
+ $.withStacking,
+ )
+ .addPanel(
+ $.newQueryPanel('Disk Reads', 'Bps') +
+ $.queryPanel(
+ 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('bloom-gateway')],
+ '{{%s}} - {{device}}' % $._config.per_instance_label
+ ) +
+ $.withStacking,
+ )
+ .addPanel(
+ $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', 'bloom-gateway'),
+ )
+ )
.addRow(
$.row('Ingester')
.addPanel(
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads.libsonnet
index 3da4e200..6d75993d 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads.libsonnet
@@ -5,8 +5,56 @@ local utils = import 'mixin-utils/utils.libsonnet';
local dashboards = self,
local showBigTable = false,
- local http_routes = 'loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values',
- local grpc_routes = '/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs',
+ // Available HTTP routes can be collected with the following instant query:
+ // count by (route) (loki_request_duration_seconds_count{route!~"/.*"})
+ local http_routes = '(%s)' % std.join(
+ '|', [
+ 'api_prom_rules',
+ 'api_prom_rules_namespace_groupname',
+ 'api_v1_rules',
+ 'loki_api_v1_delete',
+ 'loki_api_v1_detected_labels',
+ 'loki_api_v1_index_stats',
+ 'loki_api_v1_index_volume',
+ 'loki_api_v1_index_volume_range',
+ 'loki_api_v1_label_name_values',
+ 'loki_api_v1_label_values',
+ 'loki_api_v1_labels',
+ 'loki_api_v1_patterns',
+ 'loki_api_v1_query',
+ 'loki_api_v1_query_range',
+ 'loki_api_v1_series',
+ 'otlp_v1_logs',
+ 'prometheus_api_v1_rules',
+ ]
+ ),
+
+ // Available GRPC routes can be collected with the following instant query:
+ // count by (route) (loki_request_duration_seconds_count{route=~"/.*"})
+ local grpc_routes = '(%s)' % std.join(
+ '|', [
+ '/base.Ruler/Rules',
+ '/indexgatewaypb.IndexGateway/GetChunkRef',
+ '/indexgatewaypb.IndexGateway/GetSeries',
+ '/indexgatewaypb.IndexGateway/GetShards',
+ '/indexgatewaypb.IndexGateway/GetStats',
+ '/indexgatewaypb.IndexGateway/GetVolume',
+ '/indexgatewaypb.IndexGateway/LabelNamesForMetricName',
+ '/indexgatewaypb.IndexGateway/LabelValuesForMetricName',
+ '/indexgatewaypb.IndexGateway/QueryIndex',
+ '/logproto.BloomGateway/FilterChunkRefs',
+ '/logproto.Pattern/Query',
+ '/logproto.Querier/GetChunkIDs',
+ '/logproto.Querier/GetDetectedLabels',
+ '/logproto.Querier/GetStats',
+ '/logproto.Querier/GetVolume',
+ '/logproto.Querier/Label',
+ '/logproto.Querier/Query',
+ '/logproto.Querier/QuerySample',
+ '/logproto.Querier/Series',
+ '/logproto.StreamData/GetStreamRates',
+ ]
+ ),
local latencyPanelWithExtraGrouping(metricName, selector, multiplier='1e3', extra_grouping='') = {
nullPointMode: 'null as zero',
@@ -31,20 +79,35 @@ local utils = import 'mixin-utils/utils.libsonnet';
local cfg = self,
showMultiCluster:: true,
- clusterLabel:: $._config.per_cluster_label,
clusterMatchers::
if cfg.showMultiCluster then
- [utils.selector.re(cfg.clusterLabel, '$cluster')]
+ [utils.selector.re($._config.per_cluster_label, '$cluster')]
else
[],
matchers:: {
cortexgateway: [utils.selector.re('job', '($namespace)/cortex-gw(-internal)?')],
- queryFrontend: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'query-frontend'))],
- querier: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'querier'))],
- ingester: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester'))],
- ingesterZoneAware: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester-zone.*'))],
- querierOrIndexGateway: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else '(querier|index-gateway)'))],
+ queryFrontend: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(query-frontend|%s-read|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'query-frontend'))],
+ querier: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(querier|%s-read|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'querier'))],
+ ingester: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(ingester|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester'))],
+ ingesterZoneAware: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(ingester-zone-.*|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester-zone.*'))],
+ querierOrIndexGateway: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(querier|index-gateway|%s-read|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else '(querier|index-gateway)'))],
+ indexGateway: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(index-gateway|%s-backend|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-backend' % $._config.ssd.pod_prefix_matcher else 'index-gateway'))],
+ bloomGateway: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(bloom-gateway|%s-backend|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-backend' % $._config.ssd.pod_prefix_matcher else 'bloom-gateway'))],
},
local selector(matcherId) =
@@ -59,6 +122,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
ingesterSelector:: selector('ingester'),
ingesterZoneSelector:: selector('ingesterZoneAware'),
querierOrIndexGatewaySelector:: selector('querierOrIndexGateway'),
+ indexGatewaySelector:: selector('indexGateway'),
+ bloomGatewaySelector:: selector('bloomGateway'),
} +
$.dashboard('Loki / Reads', uid='reads')
.addCluster()
@@ -194,19 +259,53 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
.addRowIf(
!$._config.ssd.enabled,
- $.row('Index')
+ $.row('Index Gateway')
.addPanel(
$.newQueryPanel('QPS') +
- $.newQpsPanel('loki_index_request_duration_seconds_count{%s operation!="index_chunk"}' % dashboards['loki-reads.json'].querierSelector)
+ $.newQpsPanel('loki_request_duration_seconds_count{%s route=~"%s"}' % [dashboards['loki-reads.json'].indexGatewaySelector, grpc_routes])
)
.addPanel(
$.newQueryPanel('Latency', 'ms') +
- $.latencyPanel('loki_index_request_duration_seconds', '{%s operation!="index_chunk"}' % dashboards['loki-reads.json'].querierSelector)
+ utils.latencyRecordingRulePanel(
+ 'loki_request_duration_seconds',
+ dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.bloomGateway + [utils.selector.re('route', grpc_routes)],
+ sum_by=['route']
+ )
)
.addPanel(
p99LatencyByPod(
- 'loki_index_request_duration_seconds',
- '{%s operation!="index_chunk"}' % dashboards['loki-reads.json'].querierSelector
+ 'loki_request_duration_seconds',
+ $.toPrometheusSelector(
+ dashboards['loki-reads.json'].clusterMatchers +
+ dashboards['loki-reads.json'].matchers.indexGateway +
+ [utils.selector.re('route', grpc_routes)]
+ ),
+ )
+ )
+ )
+ .addRowIf(
+ !$._config.ssd.enabled,
+ $.row('Bloom Gateway')
+ .addPanel(
+ $.newQueryPanel('QPS') +
+ $.newQpsPanel('loki_request_duration_seconds_count{%s route=~"%s"}' % [dashboards['loki-reads.json'].bloomGatewaySelector, grpc_routes])
+ )
+ .addPanel(
+ $.newQueryPanel('Latency', 'ms') +
+ utils.latencyRecordingRulePanel(
+ 'loki_request_duration_seconds',
+ dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.bloomGateway + [utils.selector.re('route', grpc_routes)],
+ sum_by=['route']
+ )
+ )
+ .addPanel(
+ p99LatencyByPod(
+ 'loki_request_duration_seconds',
+ $.toPrometheusSelector(
+ dashboards['loki-reads.json'].clusterMatchers +
+ dashboards['loki-reads.json'].matchers.bloomGateway +
+ [utils.selector.re('route', grpc_routes)]
+ ),
)
)
)
@@ -225,8 +324,27 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
)
)
- .addRow(
- $.row('BoltDB Shipper')
+ .addRowIf(
+ !$._config.ssd.enabled,
+ $.row('TSBD Index')
+ .addPanel(
+ $.newQueryPanel('QPS') +
+ $.newQpsPanel('loki_index_request_duration_seconds_count{%s operation!="index_chunk"}' % dashboards['loki-reads.json'].querierSelector)
+ )
+ .addPanel(
+ $.newQueryPanel('Latency', 'ms') +
+ $.latencyPanel('loki_index_request_duration_seconds', '{%s operation!="index_chunk"}' % dashboards['loki-reads.json'].querierSelector)
+ )
+ .addPanel(
+ p99LatencyByPod(
+ 'loki_index_request_duration_seconds',
+ '{%s operation!="index_chunk"}' % dashboards['loki-reads.json'].querierSelector
+ )
+ )
+ )
+ .addRowIf(
+ !$._config.ssd.enabled,
+ $.row('BoltDB Index')
.addPanel(
$.newQueryPanel('QPS') +
$.newQpsPanel('loki_boltdb_shipper_request_duration_seconds_count{%s operation="Shipper.Query"}' % dashboards['loki-reads.json'].querierOrIndexGatewaySelector)
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-retention.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-retention.libsonnet
index 9896a524..c2e461c3 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-retention.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-retention.libsonnet
@@ -1,8 +1,12 @@
local utils = import 'mixin-utils/utils.libsonnet';
(import 'dashboard-utils.libsonnet') {
- local compactor_pod_matcher = if $._config.ssd.enabled then 'container="loki", pod=~"%s-read.*"' % $._config.ssd.pod_prefix_matcher else 'container="compactor"',
- local compactor_job_matcher = if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'compactor',
+ local compactor_pod_matcher = if $._config.meta_monitoring.enabled
+ then 'pod=~"(compactor.*|%s-backend.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher
+ else if $._config.ssd.enabled then 'container="loki", pod=~"%s-read.*"' % $._config.ssd.pod_prefix_matcher else 'container="compactor"',
+ local compactor_job_matcher = if $._config.meta_monitoring.enabled
+ then '"(compactor|%s-backend.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher
+ else if $._config.ssd.enabled then '%s-backend' % $._config.ssd.pod_prefix_matcher else 'compactor',
grafanaDashboards+::
{
'loki-retention.json':
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes-resources.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes-resources.libsonnet
index f25aeb4b..1d4c693a 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes-resources.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes-resources.libsonnet
@@ -2,8 +2,12 @@ local grafana = import 'grafonnet/grafana.libsonnet';
local utils = import 'mixin-utils/utils.libsonnet';
(import 'dashboard-utils.libsonnet') {
- local ingester_pod_matcher = if $._config.ssd.enabled then 'container="loki", pod=~"%s-write.*"' % $._config.ssd.pod_prefix_matcher else 'container="ingester"',
- local ingester_job_matcher = if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.*',
+ local ingester_pod_matcher = if $._config.meta_monitoring.enabled
+ then 'container=~"loki|ingester", pod=~"(ingester.*|%s-write.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher
+ else if $._config.ssd.enabled then 'container="loki", pod=~"%s-write.*"' % $._config.ssd.pod_prefix_matcher else 'container="ingester"',
+ local ingester_job_matcher = if $._config.meta_monitoring.enabled
+ then '(ingester.*|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher
+ else if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.*',
grafanaDashboards+::
{
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes.libsonnet
index bedb9ca1..8cde2465 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes.libsonnet
@@ -9,19 +9,26 @@ local utils = import 'mixin-utils/utils.libsonnet';
local cfg = self,
showMultiCluster:: true,
- clusterLabel:: $._config.per_cluster_label,
clusterMatchers::
if cfg.showMultiCluster then
- [utils.selector.re(cfg.clusterLabel, '$cluster')]
+ [utils.selector.re($._config.per_cluster_label, '$cluster')]
else
[],
matchers:: {
cortexgateway: [utils.selector.re('job', '($namespace)/cortex-gw(-internal)?')],
- distributor: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'distributor'))],
- ingester: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester'))],
- ingester_zone: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester-zone.*'))],
- any_ingester: [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.*'))],
+ distributor: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(distributor|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'distributor'))],
+ ingester: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(ingester|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester'))],
+ ingester_zone: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(ingester-zone.*|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester-zone.*'))],
+ any_ingester: if $._config.meta_monitoring.enabled
+ then [utils.selector.re('job', '($namespace)/(ingester.*|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)]
+ else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.*'))],
},
local selector(matcherId) =
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/recording-rules.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/recording-rules.libsonnet
index 2d943807..46618da9 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/recording-rules.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/recording-rules.libsonnet
@@ -7,7 +7,7 @@ local template = import 'grafonnet/template.libsonnet';
template.new(
'tenant',
'$datasource',
- 'query_result(sum by (id) (grafanacloud_logs_instance_info) and sum(label_replace(loki_tenant:active_streams{cluster="$cluster",namespace="$namespace"},"id","$1","tenant","(.*)")) by(id))',
+ 'query_result(sum by (id) (grafanacloud_logs_instance_info) and sum(label_replace(loki_tenant:active_streams{' + $._config.per_cluster_label + '="$cluster",namespace="$namespace"},"id","$1","tenant","(.*)")) by(id))',
regex='/"([^"]+)"/',
sort=1,
includeAll=true,
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts.libsonnet
index 03fe7d4c..832c7843 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts.libsonnet
@@ -6,6 +6,6 @@
(import 'alerts/blocks.libsonnet') +
(import 'alerts/compactor.libsonnet') +
(import 'alerts/autoscaling.libsonnet') +
- (import 'alerts/ingest-storage.libsonnet') +
+ (if $._config.ingest_storage_enabled then import 'alerts/ingest-storage.libsonnet' else {}) +
(import 'alerts/continuous-test.libsonnet'),
}
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet
index ca67c94b..36119c5a 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet
@@ -34,10 +34,14 @@ local utils = import 'mixin-utils/utils.libsonnet';
// Note if alert_aggregation_labels is "job", this will repeat the label. But
// prometheus seems to tolerate that.
expr: |||
- 100 * sum by (%(group_by)s, %(job_label)s, route) (rate(cortex_request_duration_seconds_count{status_code=~"5..",route!~"%(excluded_routes)s"}[%(range_interval)s]))
+ # The following 5xx errors considered as non-error:
+ # - 529: used by distributor rate limiting (using 529 instead of 429 to let the client retry)
+ # - 598: used by GEM gateway when the client is very slow to send the request and the gateway times out reading the request body
+ (
+ sum by (%(group_by)s, %(job_label)s, route) (rate(cortex_request_duration_seconds_count{status_code=~"5..",status_code!~"529|598",route!~"%(excluded_routes)s"}[%(range_interval)s]))
/
- sum by (%(group_by)s, %(job_label)s, route) (rate(cortex_request_duration_seconds_count{route!~"%(excluded_routes)s"}[%(range_interval)s]))
- > 1
+ sum by (%(group_by)s, %(job_label)s, route) (rate(cortex_request_duration_seconds_count{route!~"%(excluded_routes)s"}[%(range_interval)s]))
+ ) * 100 > 1
||| % {
group_by: $._config.alert_aggregation_labels,
job_label: $._config.per_job_label,
@@ -79,26 +83,6 @@ local utils = import 'mixin-utils/utils.libsonnet';
||| % $._config,
},
},
- {
- alert: $.alertName('QueriesIncorrect'),
- expr: |||
- 100 * sum by (%(group_by)s) (rate(test_exporter_test_case_result_total{result="fail"}[%(range_interval)s]))
- /
- sum by (%(group_by)s) (rate(test_exporter_test_case_result_total[%(range_interval)s])) > 1
- ||| % {
- group_by: $._config.alert_aggregation_labels,
- range_interval: $.alertRangeInterval(5),
- },
- 'for': '15m',
- labels: {
- severity: 'warning',
- },
- annotations: {
- message: |||
- The %(product)s cluster %(alert_aggregation_variables)s is experiencing {{ printf "%%.2f" $value }}%% incorrect query results.
- ||| % $._config,
- },
- },
{
alert: $.alertName('InconsistentRuntimeConfig'),
expr: |||
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/blocks.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/blocks.libsonnet
index 8f6ed51d..eeec268f 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/blocks.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/blocks.libsonnet
@@ -14,17 +14,18 @@
(max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 0)
and
# Only if the ingester has ingested samples over the last 4h.
- (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0)
+ (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate%(recording_rules_range_interval)s[4h])) > 0)
and
# Only if the ingester was ingesting samples 4h ago. This protects against the case where the ingester replica
# had ingested samples in the past, then no traffic was received for a long period and then it starts
# receiving samples again. Without this check, the alert would fire as soon as it gets back receiving
# samples, while the a block shipping is expected within the next 4h.
- (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate1m[1h] offset 4h)) > 0)
+ (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate%(recording_rules_range_interval)s[1h] offset 4h)) > 0)
||| % {
alert_aggregation_labels: $._config.alert_aggregation_labels,
per_instance_label: $._config.per_instance_label,
alert_aggregation_rule_prefix: $._config.alert_aggregation_rule_prefix,
+ recording_rules_range_interval: $._config.recording_rules_range_interval,
},
labels: {
severity: 'critical',
@@ -41,11 +42,12 @@
expr: |||
(max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) == 0)
and
- (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0)
+ (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate%(recording_rules_range_interval)s[4h])) > 0)
||| % {
alert_aggregation_labels: $._config.alert_aggregation_labels,
per_instance_label: $._config.per_instance_label,
alert_aggregation_rule_prefix: $._config.alert_aggregation_rule_prefix,
+ recording_rules_range_interval: $._config.recording_rules_range_interval,
},
labels: {
severity: 'critical',
@@ -218,10 +220,13 @@
},
},
{
- // Alert if the bucket index has not been updated for a given user.
+ // Alert if the bucket index has not been updated for a given user. The default update interval is 900 seconds
+ // so we alert if we've missed two updates plus a 300 second buffer to avoid false-positives. It's important
+ // that this alert fire before queriers start to return errors because the bucket index is too old (3600 seconds
+ // by default).
alert: $.alertName('BucketIndexNotUpdated'),
expr: |||
- min by(%(alert_aggregation_labels)s, user) (time() - cortex_bucket_index_last_successful_update_timestamp_seconds) > 7200
+ min by(%(alert_aggregation_labels)s, user) (time() - cortex_bucket_index_last_successful_update_timestamp_seconds) > 2100
||| % $._config,
labels: {
severity: 'critical',
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet
index d40c3a23..32158d6d 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet
@@ -57,11 +57,13 @@
},
// Some dashboards show panels grouping together multiple components of a given "path".
- // This mapping configures which components belong to each group.
+ // This mapping configures which components belong to each group. A component can belong
+ // to multiple groups.
local componentGroups = {
write: ['distributor', 'ingester', 'mimir_write'],
read: ['query_frontend', 'querier', 'ruler_query_frontend', 'ruler_querier', 'mimir_read'],
backend: ['query_scheduler', 'ruler_query_scheduler', 'ruler', 'store_gateway', 'compactor', 'alertmanager', 'overrides_exporter', 'mimir_backend'],
+ remote_ruler_read: ['ruler_query_frontend', 'ruler_query_scheduler', 'ruler_querier'],
},
// These are used by the dashboards and allow for the simultaneous display of
@@ -133,6 +135,7 @@
write: componentsGroupMatcher(componentGroups.write),
read: componentsGroupMatcher(componentGroups.read),
backend: componentsGroupMatcher(componentGroups.backend),
+ remote_ruler_read: componentsGroupMatcher(componentGroups.remote_ruler_read),
},
all_instances: std.join('|', std.map(function(name) componentNameRegexp[name], componentGroups.write + componentGroups.read + componentGroups.backend)),
@@ -194,6 +197,9 @@
// Used to add extra annotations to all alerts, Careful: takes precedence over default annotations.
alert_extra_annotations: {},
+ // Whether alerts for experimental ingest storage are enabled.
+ ingest_storage_enabled: true,
+
cortex_p99_latency_threshold_seconds: 2.5,
// Whether resources dashboards are enabled (based on cAdvisor metrics).
@@ -278,7 +284,7 @@
sum by (%(alert_aggregation_labels)s, deployment) (
label_replace(
label_replace(
- sum by (%(alert_aggregation_labels)s, %(per_instance_label)s)(rate(container_cpu_usage_seconds_total[1m])),
+ sum by (%(alert_aggregation_labels)s, %(per_instance_label)s)(rate(container_cpu_usage_seconds_total[%(recording_rules_range_interval)s])),
"deployment", "$1", "%(per_instance_label)s", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
),
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
@@ -641,6 +647,10 @@
enabled: false,
hpa_name: $._config.autoscaling_hpa_prefix + 'cortex-gw.*',
},
+ ingester: {
+ enabled: false,
+ hpa_name: $._config.autoscaling_hpa_prefix + 'ingester-zone-a',
+ },
},
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards.libsonnet
index c4e9d6f9..a9fb0265 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards.libsonnet
@@ -23,6 +23,7 @@
(import 'dashboards/overview-networking.libsonnet') +
(import 'dashboards/reads-resources.libsonnet') +
(import 'dashboards/remote-ruler-reads-resources.libsonnet') +
+ (import 'dashboards/remote-ruler-reads-networking.libsonnet') +
(import 'dashboards/reads-networking.libsonnet') +
(import 'dashboards/writes-resources.libsonnet') +
(import 'dashboards/writes-networking.libsonnet') +
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-queries.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-queries.libsonnet
index 5ebf1bf8..70bbeb09 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-queries.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-queries.libsonnet
@@ -1,4 +1,30 @@
+local utils = import 'mixin-utils/utils.libsonnet';
+
{
+ // Helper function to produce failure rate in percentage queries for native and classic histograms.
+ // Takes a metric name and a selector as strings and returns a dictionary with classic and native queries.
+ nativeClassicFailureRate(metric, selector):: {
+ local template = |||
+ (
+ # gRPC errors are not tracked as 5xx but "error".
+ sum(%(countFailQuery)s)
+ or
+ # Handle the case no failure has been tracked yet.
+ vector(0)
+ )
+ /
+ sum(%(countQuery)s)
+ |||,
+ classic: template % {
+ countFailQuery: utils.nativeClassicHistogramCountRate(metric, selector + ',status_code=~"5.*|error"').classic,
+ countQuery: utils.nativeClassicHistogramCountRate(metric, selector).classic,
+ },
+ native: template % {
+ countFailQuery: utils.nativeClassicHistogramCountRate(metric, selector + ',status_code=~"5.*|error"').native,
+ countQuery: utils.nativeClassicHistogramCountRate(metric, selector).native,
+ },
+ },
+
// This object contains common queries used in the Mimir dashboards.
// These queries are NOT intended to be configurable or overriddeable via jsonnet,
// but they're defined in a common place just to share them between different dashboards.
@@ -25,55 +51,43 @@
query_http_routes_regex: '(prometheus|api_prom)_api_v1_query(_range)?',
gateway: {
+ // deprecated, will be removed
writeRequestsPerSecond: 'cortex_request_duration_seconds_count{%(gatewayMatcher)s, route=~"%(writeHTTPRoutesRegex)s"}' % variables,
readRequestsPerSecond: 'cortex_request_duration_seconds_count{%(gatewayMatcher)s, route=~"%(readHTTPRoutesRegex)s"}' % variables,
+ local p = self,
+ requestsPerSecondMetric: 'cortex_request_duration_seconds',
+ writeRequestsPerSecondSelector: '%(gatewayMatcher)s, route=~"%(writeHTTPRoutesRegex)s"' % variables,
+ readRequestsPerSecondSelector: '%(gatewayMatcher)s, route=~"%(readHTTPRoutesRegex)s"' % variables,
+
// Write failures rate as percentage of total requests.
- writeFailuresRate: |||
- (
- sum(rate(cortex_request_duration_seconds_count{%(gatewayMatcher)s, route=~"%(writeHTTPRoutesRegex)s",status_code=~"5.*"}[$__rate_interval]))
- or
- # Handle the case no failure has been tracked yet.
- vector(0)
- )
- /
- sum(rate(cortex_request_duration_seconds_count{%(gatewayMatcher)s, route=~"%(writeHTTPRoutesRegex)s"}[$__rate_interval]))
- ||| % variables,
+ writeFailuresRate: $.nativeClassicFailureRate(p.requestsPerSecondMetric, p.writeRequestsPerSecondSelector),
// Read failures rate as percentage of total requests.
- readFailuresRate: |||
- (
- sum(rate(cortex_request_duration_seconds_count{%(gatewayMatcher)s, route=~"%(readHTTPRoutesRegex)s",status_code=~"5.*"}[$__rate_interval]))
- or
- # Handle the case no failure has been tracked yet.
- vector(0)
- )
- /
- sum(rate(cortex_request_duration_seconds_count{%(gatewayMatcher)s, route=~"%(readHTTPRoutesRegex)s"}[$__rate_interval]))
- ||| % variables,
+ readFailuresRate: $.nativeClassicFailureRate(p.requestsPerSecondMetric, p.readRequestsPerSecondSelector),
},
distributor: {
+ // deprecated, will be removed
writeRequestsPerSecond: 'cortex_request_duration_seconds_count{%(distributorMatcher)s, route=~"%(writeGRPCRoutesRegex)s|%(writeHTTPRoutesRegex)s"}' % variables,
+
+ local p = self,
+ requestsPerSecondMetric: 'cortex_request_duration_seconds',
+ writeRequestsPerSecondSelector: '%(distributorMatcher)s, route=~"%(writeGRPCRoutesRegex)s|%(writeHTTPRoutesRegex)s"' % variables,
samplesPerSecond: 'sum(%(groupPrefixJobs)s:cortex_distributor_received_samples:rate5m{%(distributorMatcher)s})' % variables,
exemplarsPerSecond: 'sum(%(groupPrefixJobs)s:cortex_distributor_received_exemplars:rate5m{%(distributorMatcher)s})' % variables,
// Write failures rate as percentage of total requests.
- writeFailuresRate: |||
- (
- # gRPC errors are not tracked as 5xx but "error".
- sum(rate(cortex_request_duration_seconds_count{%(distributorMatcher)s, route=~"%(writeGRPCRoutesRegex)s|%(writeHTTPRoutesRegex)s",status_code=~"5.*|error"}[$__rate_interval]))
- or
- # Handle the case no failure has been tracked yet.
- vector(0)
- )
- /
- sum(rate(cortex_request_duration_seconds_count{%(distributorMatcher)s, route=~"%(writeGRPCRoutesRegex)s|%(writeHTTPRoutesRegex)s"}[$__rate_interval]))
- ||| % variables,
+ writeFailuresRate: $.nativeClassicFailureRate(p.requestsPerSecondMetric, p.writeRequestsPerSecondSelector),
},
query_frontend: {
+ // deprecated, will be removed
readRequestsPerSecond: 'cortex_request_duration_seconds_count{%(queryFrontendMatcher)s, route=~"%(readHTTPRoutesRegex)s"}' % variables,
+
+ local p = self,
+ readRequestsPerSecondMetric: 'cortex_request_duration_seconds',
+ readRequestsPerSecondSelector: '%(queryFrontendMatcher)s, route=~"%(readHTTPRoutesRegex)s"' % variables,
// These query routes are used in the overview and other dashboard, everythign else is considered "other" queries.
// Has to be a list to keep the same colors as before, see overridesNonErrorColorsPalette.
local overviewRoutes = [
@@ -124,16 +138,7 @@
labelValuesCardinalityQueriesPerSecond: queryPerSecond('labelValuesCardinality'),
// Read failures rate as percentage of total requests.
- readFailuresRate: |||
- (
- sum(rate(cortex_request_duration_seconds_count{%(queryFrontendMatcher)s, route=~"%(readHTTPRoutesRegex)s",status_code=~"5.*"}[$__rate_interval]))
- or
- # Handle the case no failure has been tracked yet.
- vector(0)
- )
- /
- sum(rate(cortex_request_duration_seconds_count{%(queryFrontendMatcher)s, route=~"%(readHTTPRoutesRegex)s"}[$__rate_interval]))
- ||| % variables,
+ readFailuresRate: $.nativeClassicFailureRate(p.readRequestsPerSecondMetric, p.readRequestsPerSecondSelector),
},
ruler: {
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet
index 47a347cb..a5df3935 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet
@@ -596,179 +596,160 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.latencyPanel('cortex_kv_request_duration_seconds', '{%s, kv_name=~"%s"}' % [$.jobMatcher($._config.job_names[jobName]), kvName])
),
- cpuAndMemoryBasedAutoScalingRow(componentTitle)::
- local component = std.asciiLower(componentTitle);
- local field = std.strReplace(component, '-', '_');
- super.row('%s - autoscaling' % [componentTitle])
- .addPanel(
- local title = 'Replicas';
- $.timeseriesPanel(title) +
- $.queryPanel(
- [
- |||
- max by (scaletargetref_name) (
- kube_horizontalpodautoscaler_spec_max_replicas{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
- # Add the scaletargetref_name label for readability
- + on (%(cluster_labels)s, horizontalpodautoscaler) group_left (scaletargetref_name)
- 0*kube_horizontalpodautoscaler_info{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
- )
- ||| % {
- namespace_matcher: $.namespaceMatcher(),
- hpa_name: $._config.autoscaling[field].hpa_name,
- cluster_labels: std.join(', ', $._config.cluster_labels),
- },
- |||
- max by (scaletargetref_name) (
- kube_horizontalpodautoscaler_status_current_replicas{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
- # HPA doesn't go to 0 replicas, so we multiply by 0 if the HPA is not active
- * on (%(cluster_labels)s, horizontalpodautoscaler)
- kube_horizontalpodautoscaler_status_condition{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s", condition="ScalingActive", status="true"}
- # Add the scaletargetref_name label for readability
- + on (%(cluster_labels)s, horizontalpodautoscaler) group_left (scaletargetref_name)
- 0*kube_horizontalpodautoscaler_info{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
- )
- ||| % {
- namespace_matcher: $.namespaceMatcher(),
- hpa_name: $._config.autoscaling[field].hpa_name,
- cluster_labels: std.join(', ', $._config.cluster_labels),
- },
- |||
- max by (scaletargetref_name) (
- kube_horizontalpodautoscaler_spec_min_replicas{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
- # Add the scaletargetref_name label for readability
- + on (%(cluster_labels)s, horizontalpodautoscaler) group_left (scaletargetref_name)
- 0*kube_horizontalpodautoscaler_info{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
- )
- ||| % {
- namespace_matcher: $.namespaceMatcher(),
- hpa_name: $._config.autoscaling[field].hpa_name,
- cluster_labels: std.join(', ', $._config.cluster_labels),
- },
- ],
- [
- 'Max {{ scaletargetref_name }}',
- 'Current {{ scaletargetref_name }}',
- 'Min {{ scaletargetref_name }}',
- ],
- ) +
- $.panelDescription(
- title,
+ // The provided componentName should be the name of a component among the ones defined in $._config.autoscaling.
+ autoScalingActualReplicas(componentName)::
+ local title = 'Replicas';
+ local componentTitle = std.strReplace(componentName, '_', '-');
+
+ $.timeseriesPanel(title) +
+ $.queryPanel(
+ [
|||
- The maximum and current number of %s replicas.
- Note: The current number of replicas can still show 1 replica even when scaled to 0.
- Because HPA never reports 0 replicas, the query will report 0 only if the HPA is not active.
- ||| % [component]
- ) +
- {
- fieldConfig+: {
- overrides: [
- $.overrideField('byRegexp', '/Max .+/', [
- $.overrideProperty('custom.fillOpacity', 0),
- $.overrideProperty('custom.lineStyle', { fill: 'dash' }),
- ]),
- $.overrideField('byRegexp', '/Current .+/', [
- $.overrideProperty('custom.fillOpacity', 0),
- ]),
- $.overrideField('byRegexp', '/Min .+/', [
- $.overrideProperty('custom.fillOpacity', 0),
- $.overrideProperty('custom.lineStyle', { fill: 'dash' }),
- ]),
- ],
+ max by (scaletargetref_name) (
+ kube_horizontalpodautoscaler_spec_max_replicas{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
+ # Add the scaletargetref_name label for readability
+ + on (%(cluster_labels)s, horizontalpodautoscaler) group_left (scaletargetref_name)
+ 0*kube_horizontalpodautoscaler_info{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
+ )
+ ||| % {
+ namespace_matcher: $.namespaceMatcher(),
+ hpa_name: $._config.autoscaling[componentName].hpa_name,
+ cluster_labels: std.join(', ', $._config.cluster_labels),
},
+ |||
+ max by (scaletargetref_name) (
+ kube_horizontalpodautoscaler_status_current_replicas{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
+ # HPA doesn't go to 0 replicas, so we multiply by 0 if the HPA is not active
+ * on (%(cluster_labels)s, horizontalpodautoscaler)
+ kube_horizontalpodautoscaler_status_condition{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s", condition="ScalingActive", status="true"}
+ # Add the scaletargetref_name label for readability
+ + on (%(cluster_labels)s, horizontalpodautoscaler) group_left (scaletargetref_name)
+ 0*kube_horizontalpodautoscaler_info{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
+ )
+ ||| % {
+ namespace_matcher: $.namespaceMatcher(),
+ hpa_name: $._config.autoscaling[componentName].hpa_name,
+ cluster_labels: std.join(', ', $._config.cluster_labels),
+ },
+ |||
+ max by (scaletargetref_name) (
+ kube_horizontalpodautoscaler_spec_min_replicas{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
+ # Add the scaletargetref_name label for readability
+ + on (%(cluster_labels)s, horizontalpodautoscaler) group_left (scaletargetref_name)
+ 0*kube_horizontalpodautoscaler_info{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"}
+ )
+ ||| % {
+ namespace_matcher: $.namespaceMatcher(),
+ hpa_name: $._config.autoscaling[componentName].hpa_name,
+ cluster_labels: std.join(', ', $._config.cluster_labels),
+ },
+ ],
+ [
+ 'Max {{ scaletargetref_name }}',
+ 'Current {{ scaletargetref_name }}',
+ 'Min {{ scaletargetref_name }}',
+ ],
+ ) +
+ $.panelDescription(
+ title,
+ |||
+ The maximum and current number of %s replicas.
+ Note: The current number of replicas can still show 1 replica even when scaled to 0.
+ Because HPA never reports 0 replicas, the query will report 0 only if the HPA is not active.
+ ||| % [componentTitle]
+ ) +
+ {
+ fieldConfig+: {
+ overrides: [
+ $.overrideField('byRegexp', '/Max .+/', [
+ $.overrideProperty('custom.fillOpacity', 0),
+ $.overrideProperty('custom.lineStyle', { fill: 'dash' }),
+ ]),
+ $.overrideField('byRegexp', '/Current .+/', [
+ $.overrideProperty('custom.fillOpacity', 0),
+ ]),
+ $.overrideField('byRegexp', '/Min .+/', [
+ $.overrideProperty('custom.fillOpacity', 0),
+ $.overrideProperty('custom.lineStyle', { fill: 'dash' }),
+ ]),
+ ],
},
- )
- .addPanel(
- local title = 'Scaling metric (CPU): Desired replicas';
- $.timeseriesPanel(title) +
- $.queryPanel(
- [
- |||
- sum by (scaler) (
+ },
+
+ // The provided componentName should be the name of a component among the ones defined in $._config.autoscaling.
+ autoScalingDesiredReplicasByScalingMetricPanel(componentName, scalingMetricName, scalingMetricID)::
+ local title = if scalingMetricName != '' then 'Scaling metric (%s): Desired replicas' % scalingMetricName else 'Desired replicas';
+ local scalerSelector = if scalingMetricID != '' then ('.*%s.*' % scalingMetricID) else '.+';
+
+ $.timeseriesPanel(title) +
+ $.queryPanel(
+ [
+ |||
+ sum by (scaler) (
+ label_replace(
+ keda_scaler_metrics_value{%(cluster_label)s=~"$cluster", exported_namespace=~"$namespace", scaler=~"%(scaler_selector)s"},
+ "namespace", "$1", "exported_namespace", "(.*)"
+ )
+ /
+ on(%(aggregation_labels)s, scaledObject, metric) group_left label_replace(
label_replace(
- keda_scaler_metrics_value{%(cluster_label)s=~"$cluster", exported_namespace=~"$namespace", scaler=~".*cpu.*"},
- "namespace", "$1", "exported_namespace", "(.*)"
- )
- /
- on(%(aggregation_labels)s, scaledObject, metric) group_left label_replace(
- label_replace(
- kube_horizontalpodautoscaler_spec_target_metric{%(namespace)s, horizontalpodautoscaler=~"%(hpa_name)s"},
- "metric", "$1", "metric_name", "(.+)"
- ),
- "scaledObject", "$1", "horizontalpodautoscaler", "%(hpa_prefix)s(.*)"
- )
+ kube_horizontalpodautoscaler_spec_target_metric{%(namespace)s, horizontalpodautoscaler=~"%(hpa_name)s"},
+ "metric", "$1", "metric_name", "(.+)"
+ ),
+ "scaledObject", "$1", "horizontalpodautoscaler", "%(hpa_prefix)s(.*)"
)
- ||| % {
- aggregation_labels: $._config.alert_aggregation_labels,
- cluster_label: $._config.per_cluster_label,
- hpa_prefix: $._config.autoscaling_hpa_prefix,
- hpa_name: $._config.autoscaling[field].hpa_name,
- namespace: $.namespaceMatcher(),
- },
- ], [
- '{{ scaler }}',
- ]
- ) +
- $.panelDescription(
- title,
- |||
- This panel shows the scaling metric exposed by KEDA divided by the target/threshold used.
- It should represent the desired number of replicas, ignoring the min/max constraints applied later.
- |||
- ),
+ )
+ ||| % {
+ aggregation_labels: $._config.alert_aggregation_labels,
+ cluster_label: $._config.per_cluster_label,
+ hpa_prefix: $._config.autoscaling_hpa_prefix,
+ hpa_name: $._config.autoscaling[componentName].hpa_name,
+ namespace: $.namespaceMatcher(),
+ scaler_selector: scalerSelector,
+ },
+ ], [
+ '{{ scaler }}',
+ ]
+ ) +
+ $.panelDescription(
+ title,
+ |||
+ This panel shows the scaling metric exposed by KEDA divided by the target/threshold used.
+ It should represent the desired number of replicas, ignoring the min/max constraints applied later.
+ |||
+ ),
+
+ // The provided componentName should be the name of a component among the ones defined in $._config.autoscaling.
+ autoScalingFailuresPanel(componentName)::
+ local title = 'Autoscaler failures rate';
+
+ $.timeseriesPanel(title) +
+ $.queryPanel(
+ $.filterKedaScalerErrorsByHPA($._config.autoscaling[componentName].hpa_name),
+ '{{scaler}} failures'
+ ) +
+ $.panelDescription(
+ title,
+ |||
+ The rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom
+ metrics server is unable to query the scaling metric from Prometheus so the autoscaler woudln't work properly.
+ |||
+ ),
+
+ cpuAndMemoryBasedAutoScalingRow(componentTitle)::
+ local componentName = std.strReplace(std.asciiLower(componentTitle), '-', '_');
+ super.row('%s – autoscaling' % [componentTitle])
+ .addPanel(
+ $.autoScalingActualReplicas(componentName)
)
.addPanel(
- local title = 'Scaling metric (memory): Desired replicas';
- $.timeseriesPanel(title) +
- $.queryPanel(
- [
- |||
- sum by (scaler) (
- label_replace(
- keda_scaler_metrics_value{%(cluster_label)s=~"$cluster", exported_namespace=~"$namespace", scaler=~".*memory.*"},
- "namespace", "$1", "exported_namespace", "(.*)"
- )
- /
- on(%(aggregation_labels)s, scaledObject, metric) group_left label_replace(
- label_replace(
- kube_horizontalpodautoscaler_spec_target_metric{%(namespace)s, horizontalpodautoscaler=~"%(hpa_name)s"},
- "metric", "$1", "metric_name", "(.+)"
- ),
- "scaledObject", "$1", "horizontalpodautoscaler", "%(hpa_prefix)s(.*)"
- )
- )
- ||| % {
- aggregation_labels: $._config.alert_aggregation_labels,
- cluster_label: $._config.per_cluster_label,
- hpa_prefix: $._config.autoscaling_hpa_prefix,
- hpa_name: $._config.autoscaling[field].hpa_name,
- namespace: $.namespaceMatcher(),
- },
- ], [
- '{{ scaler }}',
- ]
- ) +
- $.panelDescription(
- title,
- |||
- This panel shows the scaling metric exposed by KEDA divided by the target/threshold used.
- It should represent the desired number of replicas, ignoring the min/max constraints applied later.
- |||
- ),
+ $.autoScalingDesiredReplicasByScalingMetricPanel(componentName, 'CPU', 'cpu')
)
.addPanel(
- local title = 'Autoscaler failures rate';
- $.timeseriesPanel(title) +
- $.queryPanel(
- $.filterKedaScalerErrorsByHPA($._config.autoscaling[field].hpa_name),
- '{{scaler}} failures'
- ) +
- $.panelDescription(
- title,
- |||
- The rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom
- metrics server is unable to query the scaling metric from Prometheus so the autoscaler woudln't work properly.
- |||
- ),
+ $.autoScalingDesiredReplicasByScalingMetricPanel(componentName, 'memory', 'memory')
+ )
+ .addPanel(
+ $.autoScalingFailuresPanel(componentName)
),
newStatPanel(queries, legends='', unit='percentunit', decimals=1, thresholds=[], instant=false, novalue='')::
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/overview.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/overview.libsonnet
index bbd038ca..233fd6e5 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/overview.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/overview.libsonnet
@@ -33,6 +33,7 @@ local filename = 'mimir-overview.json';
assert std.md5(filename) == 'ffcd83628d7d4b5a03d1cafd159e6c9c' : 'UID of the dashboard has changed, please update references to dashboard.';
($.dashboard('Overview') + { uid: std.md5(filename) })
.addClusterSelectorTemplates()
+ .addShowNativeLatencyVariable()
.addRow(
$.row('%(product)s cluster health' % $._config)
@@ -53,9 +54,21 @@ local filename = 'mimir-overview.json';
'Status',
[
// Write failures.
- if $._config.gateway_enabled then $.queries.gateway.writeFailuresRate else $.queries.distributor.writeFailuresRate,
+ utils.showNativeHistogramQuery(
+ if $._config.gateway_enabled then $.queries.gateway.writeFailuresRate else $.queries.distributor.writeFailuresRate
+ ),
+ // Write failures but from classic histograms.
+ utils.showClassicHistogramQuery(
+ if $._config.gateway_enabled then $.queries.gateway.writeFailuresRate else $.queries.distributor.writeFailuresRate
+ ),
// Read failures.
- if $._config.gateway_enabled then $.queries.gateway.readFailuresRate else $.queries.query_frontend.readFailuresRate,
+ utils.showNativeHistogramQuery(
+ if $._config.gateway_enabled then $.queries.gateway.readFailuresRate else $.queries.query_frontend.readFailuresRate,
+ ),
+ // Read failures but from classic histograms.
+ utils.showClassicHistogramQuery(
+ if $._config.gateway_enabled then $.queries.gateway.readFailuresRate else $.queries.query_frontend.readFailuresRate,
+ ),
// Rule evaluation failures.
$.queries.ruler.evaluations.failuresRate,
// Alerting notifications.
@@ -84,7 +97,7 @@ local filename = 'mimir-overview.json';
// Object storage failures.
$.queries.storage.failuresRate,
],
- ['Writes', 'Reads', 'Rule evaluations', 'Alerting notifications', 'Object storage']
+ ['Writes', 'Writes', 'Reads', 'Reads', 'Rule evaluations', 'Alerting notifications', 'Object storage']
)
)
.addPanel(
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads-resources.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads-resources.libsonnet
index ee82ed22..df04952f 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads-resources.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads-resources.libsonnet
@@ -7,7 +7,7 @@ local filename = 'mimir-remote-ruler-reads-resources.json';
($.dashboard('Remote ruler reads resources') + { uid: std.md5(filename) })
.addClusterSelectorTemplates(false)
.addRow(
- $.row('Query-frontend (dedicated to ruler)')
+ $.row('Ruler-query-frontend')
.addPanel(
$.containerCPUUsagePanelByComponent('ruler_query_frontend'),
)
@@ -19,7 +19,7 @@ local filename = 'mimir-remote-ruler-reads-resources.json';
)
)
.addRow(
- $.row('Query-scheduler (dedicated to ruler)')
+ $.row('Ruler-query-scheduler')
.addPanel(
$.containerCPUUsagePanelByComponent('ruler_query_scheduler'),
)
@@ -31,7 +31,7 @@ local filename = 'mimir-remote-ruler-reads-resources.json';
)
)
.addRow(
- $.row('Querier (dedicated to ruler)')
+ $.row('Ruler-querier')
.addPanel(
$.containerCPUUsagePanelByComponent('ruler_querier'),
)
diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet
index df1f48f4..87164d27 100644
--- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet
+++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet
@@ -54,7 +54,7 @@ local filename = 'mimir-remote-ruler-reads.json';
)
)
.addRow(
- $.row('Query-frontend (dedicated to ruler)')
+ $.row('Ruler-query-frontend')
.addPanel(
$.timeseriesPanel('Requests / sec') +
$.qpsPanel('cortex_request_duration_seconds_count{%s, route=~"%s"}' % [$.jobMatcher($._config.job_names.ruler_query_frontend), rulerRoutesRegex])
@@ -80,7 +80,7 @@ local filename = 'mimir-remote-ruler-reads.json';
these panels will show "No data."