diff --git a/artifacts/grafana-dashboards/eks-fargate/infrastructure/cluster.json b/artifacts/grafana-dashboards/eks-fargate/infrastructure/cluster.json new file mode 100644 index 0000000..15c1b26 --- /dev/null +++ b/artifacts/grafana-dashboards/eks-fargate/infrastructure/cluster.json @@ -0,0 +1,1707 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 26, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "Memory", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 9, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(container_memory_rss{job=\"kubelet\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 + } + ], + "title": "Memory Usage (w/o cache)", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 27, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "Memory Requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "Pods" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "links", + "value": [ + { + "targetBlank": false, + "title": "Drill down", + "url": "/d/a87fb0d919ec0ea5f6543124e16c42a5/kubernetes-compute-resources-namespace-workloads?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields[1]}" + } + ] + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "Workloads" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "links", + "value": [ + { + "targetBlank": false, + "title": "Drill down", + "url": "/d/a87fb0d919ec0ea5f6543124e16c42a5/kubernetes-compute-resources-namespace-workloads?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields[1]}" + } + ] + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "Memory Usage" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "Memory Requests" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "displayName", + "value": "Memory Requests %" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #F" + }, + "properties": [ + { + "id": "displayName", + "value": "Memory Limits" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #G" + }, + "properties": [ + { + "id": "displayName", + "value": "Memory Limits %" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "namespace" + }, + "properties": [ + { + "id": "displayName", + "value": "Namespace" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "links", + "value": [ + { + "targetBlank": false, + "title": "Drill down", + "url": "/d/a87fb0d919ec0ea5f6543124e16c42a5/kubernetes-compute-resources-namespace-workloads?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields[1]}" + } + ] + }, + { + "id": "custom.align" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 10, + "interval": "1m", + "links": [], + "options": { + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(container_memory_rss{job=\"kubelet\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(container_memory_rss{job=\"kubelet\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(container_memory_rss{job=\"kubelet\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + } + ], + "title": "Requests by Namespace", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 28, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "Current Network Usage", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "Current Receive Bandwidth" + }, + { + "id": "unit", + "value": "Bps" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "Current Transmit Bandwidth" + }, + { + "id": "unit", + "value": "Bps" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "Rate of Received Packets" + }, + { + "id": "unit", + "value": "pps" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "Rate of Transmitted Packets" + }, + { + "id": "unit", + "value": "pps" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "displayName", + "value": "Rate of Received Packets Dropped" + }, + { + "id": "unit", + "value": "pps" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #F" + }, + "properties": [ + { + "id": "displayName", + "value": "Rate of Transmitted Packets Dropped" + }, + { + "id": "unit", + "value": "pps" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "namespace" + }, + "properties": [ + { + "id": "displayName", + "value": "Namespace" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "links", + "value": [ + { + "targetBlank": false, + "title": "Drill down", + "url": "/d/a87fb0d919ec0ea5f6543124e16c42a5/kubernetes-compute-resources-namespace-workloads?var-datasource=$datasource&var-cluster=$cluster&var-namespace=${__data.fields[1]}" + } + ] + }, + { + "id": "custom.align" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 11, + "interval": "1m", + "links": [], + "options": { + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "title": "Current Network Usage", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 29, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "Bandwidth by Namespace", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 12, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 + } + ], + "title": "Receive Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 13, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 + } + ], + "title": "Transmit Bandwidth", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 32, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "Rate of Packets Dropped", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 18, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 + } + ], + "title": "Rate of Received Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 19, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 + } + ], + "title": "Rate of Transmitted Packets Dropped", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 33, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "Storage IO", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 20, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", container!=\"\", device=~\"mmcblk.p.+|.*nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 + } + ], + "title": "IOPS(Reads+Writes)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 21, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", container!=\"\", device=~\"mmcblk.p.+|.*nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 + } + ], + "title": "ThroughPut(Read+Write)", + "type": "timeseries" + } + ], + "refresh": "10s", + "revision": 1, + "schemaVersion": 38, + "style": "dark", + "tags": [ + "infrastructure" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "single-new-eks-fargate-opensource-observability-accelerator-12", + "value": "single-new-eks-fargate-opensource-observability-accelerator-12" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(up{job=\"kubelet\"}, cluster)", + "refId": "Prometheus-cluster-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Kubernetes / Compute Resources / Cluster", + "uid": "efa86fd1d0c121a26444b636a3f509a8", + "version": 2484, + "weekStart": "" +} diff --git a/artifacts/grafana-dashboards/eks-fargate/infrastructure/kubelet.json b/artifacts/grafana-dashboards/eks-fargate/infrastructure/kubelet.json new file mode 100644 index 0000000..ee281ff --- /dev/null +++ b/artifacts/grafana-dashboards/eks-fargate/infrastructure/kubelet.json @@ -0,0 +1,2060 @@ +{ + "__inputs": [], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "9.4.7" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph (old)", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 2, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubernetes-kubelet\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Running Kubelets", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 3, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubernetes-kubelet\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubernetes-kubelet\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Running Pods", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 4, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubernetes-kubelet\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubernetes-kubelet\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Running Containers", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 5, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubernetes-kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Actual Volume Count", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 6, + "links": [], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubernetes-kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Desired Volume Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 8, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "title": "Operation Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 10, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "title": "Operation duration 99th quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 11, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} pod", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", + "refId": "B" + } + ], + "title": "Pod Start Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 12, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} pod", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", + "refId": "B" + } + ], + "title": "Pod Start Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 13, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" + } + ], + "title": "Storage Operation Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 15, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubernetes-kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" + } + ], + "title": "Storage Operation Duration 99th quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 16, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubernetes-kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{operation_type}}", + "refId": "A" + } + ], + "title": "Cgroup manager operation rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 17, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubernetes-kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "title": "Cgroup manager 99th quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Pod lifecycle event generator", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 18, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubernetes-kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "PLEG relist rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 19, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "PLEG relist interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 56 + }, + "id": 20, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "PLEG relist duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 63 + }, + "id": 21, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubernetes-kubelet\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubernetes-kubelet\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubernetes-kubelet\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubernetes-kubelet\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" + } + ], + "title": "RPC Rate", + "type": "timeseries" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 70 + }, + "hiddenSeries": false, + "id": 22, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubernetes-kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{verb}} {{url}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Request duration 99th quantile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "show": true + }, + { + "format": "s", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 77 + }, + "hiddenSeries": false, + "id": 23, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "show": true + }, + { + "format": "bytes", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 77 + }, + "id": 24, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "CPU usage", + "type": "timeseries" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 77 + }, + "hiddenSeries": false, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubernetes-kubelet\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Goroutines", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": "10s", + "revision": 1, + "schemaVersion": 38, + "style": "dark", + "tags": [ + "infrastructure" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(up{job=\"kubernetes-kubelet\"}, cluster)", + "refId": "Prometheus-cluster-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "instance", + "multi": false, + "name": "instance", + "options": [], + "query": { + "query": "label_values(up{job=\"kubernetes-kubelet\",cluster=\"$cluster\"}, instance)", + "refId": "Prometheus-instance-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Kubernetes / Kubelet", + "uid": "3138fa155d5915769fbded898ac09fd9", + "version": 2483, + "weekStart": "" +} diff --git a/artifacts/grafana-dashboards/eks/infrastructure/cluster.json b/artifacts/grafana-dashboards/eks/infrastructure/cluster.json index 5fe5534..5c87c34 100644 --- a/artifacts/grafana-dashboards/eks/infrastructure/cluster.json +++ b/artifacts/grafana-dashboards/eks/infrastructure/cluster.json @@ -1,4 +1,38 @@ { + "__inputs": [], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "9.4.7" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], "annotations": { "list": [ { @@ -25,7 +59,6 @@ "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": null, - "iteration": 1679681754059, "links": [], "liveNow": false, "panels": [ @@ -105,7 +138,7 @@ }, "textMode": "auto" }, - "pluginVersion": "8.4.7", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -171,7 +204,7 @@ }, "textMode": "auto" }, - "pluginVersion": "8.4.7", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -237,7 +270,7 @@ }, "textMode": "auto" }, - "pluginVersion": "8.4.7", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -303,7 +336,7 @@ }, "textMode": "auto" }, - "pluginVersion": "8.4.7", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -369,7 +402,7 @@ }, "textMode": "auto" }, - "pluginVersion": "8.4.7", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -435,7 +468,7 @@ }, "textMode": "auto" }, - "pluginVersion": "8.4.7", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -488,6 +521,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -550,7 +585,8 @@ "legend": { "calcs": [], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true }, "tooltip": { "mode": "single", @@ -614,7 +650,10 @@ }, "custom": { "align": "auto", - "displayMode": "auto" + "cellOptions": { + "type": "auto" + }, + "inspect": false }, "decimals": 2, "displayName": "", @@ -869,6 +908,7 @@ "links": [], "options": { "footer": { + "countRows": false, "fields": "", "reducer": [ "sum" @@ -877,7 +917,7 @@ }, "showHeader": true }, - "pluginVersion": "8.4.7", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -1026,6 +1066,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -1085,7 +1127,8 @@ "legend": { "calcs": [], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true }, "tooltip": { "mode": "single", @@ -1149,7 +1192,10 @@ }, "custom": { "align": "auto", - "displayMode": "auto" + "cellOptions": { + "type": "auto" + }, + "inspect": false }, "decimals": 2, "displayName": "", @@ -1404,6 +1450,7 @@ "links": [], "options": { "footer": { + "countRows": false, "fields": "", "reducer": [ "sum" @@ -1412,7 +1459,7 @@ }, "showHeader": true }, - "pluginVersion": "8.4.7", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -1562,7 +1609,10 @@ }, "custom": { "align": "auto", - "displayMode": "auto" + "cellOptions": { + "type": "auto" + }, + "inspect": false }, "decimals": 2, "displayName": "", @@ -1778,10 +1828,11 @@ "y": 37 }, "id": 11, - "interval": "1m", + "interval": "2m", "links": [], "options": { "footer": { + "countRows": false, "fields": "", "reducer": [ "sum" @@ -1790,14 +1841,15 @@ }, "showHeader": true }, - "pluginVersion": "8.4.7", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { "uid": "$datasource" }, + "editorMode": "code", "exemplar": false, - "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", "format": "table", "instant": true, "interval": "", @@ -1810,8 +1862,9 @@ "datasource": { "uid": "$datasource" }, + "editorMode": "code", "exemplar": false, - "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", "format": "table", "instant": true, "interval": "", @@ -1824,8 +1877,9 @@ "datasource": { "uid": "$datasource" }, + "editorMode": "code", "exemplar": false, - "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", "format": "table", "instant": true, "interval": "", @@ -1838,8 +1892,9 @@ "datasource": { "uid": "$datasource" }, + "editorMode": "code", "exemplar": false, - "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", "format": "table", "instant": true, "interval": "", @@ -1852,8 +1907,9 @@ "datasource": { "uid": "$datasource" }, + "editorMode": "code", "exemplar": false, - "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", "format": "table", "instant": true, "interval": "", @@ -1866,8 +1922,9 @@ "datasource": { "uid": "$datasource" }, + "editorMode": "code", "exemplar": false, - "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", "format": "table", "instant": true, "interval": "", @@ -1925,6 +1982,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -1984,7 +2043,8 @@ "legend": { "calcs": [], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true }, "tooltip": { "mode": "single", @@ -2021,6 +2081,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -2080,7 +2142,8 @@ "legend": { "calcs": [], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true }, "tooltip": { "mode": "single", @@ -2143,6 +2206,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -2202,7 +2267,8 @@ "legend": { "calcs": [], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true }, "tooltip": { "mode": "single", @@ -2239,6 +2305,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -2298,7 +2366,8 @@ "legend": { "calcs": [], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true }, "tooltip": { "mode": "single", @@ -2361,6 +2430,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -2420,7 +2491,8 @@ "legend": { "calcs": [], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true }, "tooltip": { "mode": "single", @@ -2457,6 +2529,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -2516,7 +2590,8 @@ "legend": { "calcs": [], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true }, "tooltip": { "mode": "single", @@ -2580,7 +2655,10 @@ }, "custom": { "align": "auto", - "displayMode": "auto" + "cellOptions": { + "type": "auto" + }, + "inspect": false }, "decimals": 2, "displayName": "", @@ -2632,7 +2710,7 @@ }, { "id": "decimals", - "value": -1 + "value": 2 }, { "id": "custom.align" @@ -2655,7 +2733,7 @@ }, { "id": "decimals", - "value": -1 + "value": 2 }, { "id": "custom.align" @@ -2678,7 +2756,7 @@ }, { "id": "decimals", - "value": -1 + "value": 2 }, { "id": "custom.align" @@ -2796,10 +2874,11 @@ "y": 69 }, "id": 22, - "interval": "1m", + "interval": "2m", "links": [], "options": { "footer": { + "countRows": false, "fields": "", "reducer": [ "sum" @@ -2808,12 +2887,13 @@ }, "showHeader": true }, - "pluginVersion": "8.4.7", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { "uid": "$datasource" }, + "editorMode": "code", "exemplar": false, "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", device=~\"mmcblk.p.+|.*nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", @@ -2821,6 +2901,7 @@ "interval": "", "intervalFactor": 2, "legendFormat": "", + "range": false, "refId": "A", "step": 10 }, @@ -2842,6 +2923,7 @@ "datasource": { "uid": "$datasource" }, + "editorMode": "code", "exemplar": false, "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", device=~\"mmcblk.p.+|.*nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", device=~\"mmcblk.p.+|.*nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", @@ -2908,7 +2990,8 @@ } ], "refresh": "10s", - "schemaVersion": 35, + "revision": 1, + "schemaVersion": 38, "style": "dark", "tags": [ "infrastructure" @@ -2935,11 +3018,7 @@ "type": "datasource" }, { - "current": { - "selected": false, - "text": "eks-cluster-with-vpc", - "value": "eks-cluster-with-vpc" - }, + "current": {}, "datasource": { "type": "prometheus", "uid": "$datasource" @@ -2997,6 +3076,6 @@ "timezone": "utc", "title": "Kubernetes / Compute Resources / Cluster", "uid": "efa86fd1d0c121a26444b636a3f509a8", - "version": 23, + "version": 7395, "weekStart": "" } diff --git a/artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-cw-datasource.yaml b/artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-cw-datasource.yaml index 06496ca..c81bd00 100644 --- a/artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-cw-datasource.yaml +++ b/artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-cw-datasource.yaml @@ -12,6 +12,7 @@ spec: type: cloudwatch access: server isDefault: false + url: "" jsonData: 'tlsSkipVerify': false 'timeInterval': "5s" diff --git a/artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-xray-datasource.yaml b/artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-xray-datasource.yaml index 11b7cd7..afeedb6 100644 --- a/artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-xray-datasource.yaml +++ b/artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-xray-datasource.yaml @@ -11,6 +11,7 @@ spec: name: aws-observability-accelerator-xray type: grafana-x-ray-datasource access: server + url: "" isDefault: false jsonData: "authType": "ec2_iam_role" diff --git a/artifacts/k8s-deployment-manifest-templates/nginx/nginx-traffic-sample.yaml b/artifacts/k8s-deployment-manifest-templates/nginx/nginx-traffic-sample.yaml new file mode 100644 index 0000000..a2f706e --- /dev/null +++ b/artifacts/k8s-deployment-manifest-templates/nginx/nginx-traffic-sample.yaml @@ -0,0 +1,125 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: {{namespace}} + labels: + name: {{namespace}} + +--- + +kind: Pod +apiVersion: v1 +metadata: + name: banana-app + namespace: {{namespace}} + labels: + app: banana +spec: + containers: + - name: banana-app + image: hashicorp/http-echo + args: + - "-text=banana" + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 50m + memory: 50Mi +--- + +kind: Service +apiVersion: v1 +metadata: + name: banana-service + namespace: {{namespace}} +spec: + selector: + app: banana + ports: + - port: 5678 # Default port for image + +--- + +kind: Pod +apiVersion: v1 +metadata: + name: apple-app + namespace: {{namespace}} + labels: + app: apple +spec: + containers: + - name: apple-app + image: hashicorp/http-echo + args: + - "-text=apple" + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 50m + memory: 50Mi +--- + +kind: Service +apiVersion: v1 +metadata: + name: apple-service + namespace: {{namespace}} +spec: + selector: + app: apple + ports: + - port: 5678 # Default port for image + +--- + +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ingress-nginx-demo + namespace: {{namespace}} +spec: + rules: + - host: {{external_ip}} + http: + paths: + - path: /apple + pathType: Prefix + backend: + service: + name: apple-service + port: + number: 5678 + - path: /banana + pathType: Prefix + backend: + service: + name: banana-service + port: + number: 5678 + ingressClassName: nginx + +--- + +apiVersion: v1 +kind: Pod +metadata: + name: traffic-generator + namespace: {{namespace}} +spec: + containers: + - name: traffic-generator + image: ellerbrock/alpine-bash-curl-ssl + command: ["/bin/bash"] + args: ["-c", "while :; do curl http://{{external_ip}}/apple > /dev/null 2>&1; curl http://{{external_ip}}/banana > /dev/null 2>&1; sleep 0.05; done"] + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 50m + memory: 50Mi