From 8d423d17980a617fa6ddec2cc5f8bf3bd59aa787 Mon Sep 17 00:00:00 2001 From: QuentinBisson Date: Thu, 23 May 2024 14:52:16 +0200 Subject: [PATCH] Add backend resource mixin for single scalable Signed-off-by: QuentinBisson --- .../dashboards/loki-backends-resources.json | 534 ++++++++++++++++++ production/loki-mixin/dashboards.libsonnet | 3 +- .../loki-backends-resources.libsonnet | 46 ++ 3 files changed, 582 insertions(+), 1 deletion(-) create mode 100644 production/loki-mixin-compiled-ssd/dashboards/loki-backends-resources.json create mode 100644 production/loki-mixin/dashboards/loki-backends-resources.libsonnet diff --git a/production/loki-mixin-compiled-ssd/dashboards/loki-backends-resources.json b/production/loki-mixin-compiled-ssd/dashboards/loki-backends-resources.json new file mode 100644 index 0000000000000..dd03d6d8b0d2b --- /dev/null +++ b/production/loki-mixin-compiled-ssd/dashboards/loki-backends-resources.json @@ -0,0 +1,534 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "gridPos": { }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "gridPos": { }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-backend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "gridPos": { }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk Writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "gridPos": { }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk Reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "gridPos": { }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(loki.*|enterprise-logs)-backend.*\"})", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk Space Utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend path", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Backend Resources", + "uid": "backends-resources", + "version": 0 + } \ No newline at end of file diff --git a/production/loki-mixin/dashboards.libsonnet b/production/loki-mixin/dashboards.libsonnet index 33f3b136d5ad6..eb6ae4ef658ba 100644 --- a/production/loki-mixin/dashboards.libsonnet +++ b/production/loki-mixin/dashboards.libsonnet @@ -3,10 +3,11 @@ (import 'dashboards/loki-chunks.libsonnet') + (import 'dashboards/loki-logs.libsonnet') + (import 'dashboards/loki-operational.libsonnet') + +(import 'dashboards/loki-backends-resources.libsonnet') + (import 'dashboards/loki-reads.libsonnet') + +(import 'dashboards/loki-reads-resources.libsonnet') + (import 'dashboards/loki-writes.libsonnet') + (import 'dashboards/loki-writes-resources.libsonnet') + -(import 'dashboards/loki-reads-resources.libsonnet') + (import 'dashboards/loki-deletion.libsonnet') + (import 'dashboards/loki-canary-dashboard.libsonnet') + (import 'dashboards/recording-rules.libsonnet') + diff --git a/production/loki-mixin/dashboards/loki-backends-resources.libsonnet b/production/loki-mixin/dashboards/loki-backends-resources.libsonnet new file mode 100644 index 0000000000000..a2a64a28f197d --- /dev/null +++ b/production/loki-mixin/dashboards/loki-backends-resources.libsonnet @@ -0,0 +1,46 @@ +local grafana = import 'grafonnet/grafana.libsonnet'; +local utils = import 'mixin-utils/utils.libsonnet'; + +(import 'dashboard-utils.libsonnet') { + local backend_pod_matcher = 'container="loki", pod=~"%s-backend.*"' % $._config.ssd.pod_prefix_matcher, + local backend_job_matcher = '%s-backend' % $._config.ssd.pod_prefix_matcher, + + grafanaDashboards+:: if !$._config.ssd.enabled then {} else { + 'loki-backends-resources.json': + $.dashboard('Loki / Backend Resources', uid='backends-resources') + .addCluster() + .addNamespace() + .addTag() + .addRow( + grafana.row.new('Backend path') + .addPanel( + $.CPUUsagePanel('CPU', backend_pod_matcher), + ) + .addPanel( + $.memoryWorkingSetPanel('Memory (workingset)', backend_pod_matcher), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', backend_job_matcher), + ) + .addPanel( + $.newQueryPanel('Disk Writes', 'Bps') + + $.queryPanel( + 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(backend_pod_matcher)], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.newQueryPanel('Disk Reads', 'Bps') + + $.queryPanel( + 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(backend_pod_matcher)], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', backend_job_matcher), + ) + ), + }, +}