From 1d8c06e35a74c8522f84c638dfada94c5694857a Mon Sep 17 00:00:00 2001 From: Stephen Lang Date: Wed, 24 Apr 2024 15:11:18 +0100 Subject: [PATCH 1/3] fix(dashboards): Port compute/namespaces (pods) dashboard to new grafonnet --- .../network-usage/cluster-total.libsonnet | 1 + dashboards/resources/cluster.libsonnet | 4 + dashboards/resources/multi-cluster.libsonnet | 2 + dashboards/resources/namespace.libsonnet | 1082 +++++++++++------ 4 files changed, 750 insertions(+), 339 deletions(-) diff --git a/dashboards/network-usage/cluster-total.libsonnet b/dashboards/network-usage/cluster-total.libsonnet index ef95bd93d..9d2670388 100644 --- a/dashboards/network-usage/cluster-total.libsonnet +++ b/dashboards/network-usage/cluster-total.libsonnet @@ -127,6 +127,7 @@ local var = g.dashboard.variable; g.panel.table.queryOptions.transformation.withId('organize') + g.panel.table.queryOptions.transformation.withOptions({ excludeByName: { + Time: true, 'Time 1': true, 'Time 2': true, 'Time 3': true, diff --git a/dashboards/resources/cluster.libsonnet b/dashboards/resources/cluster.libsonnet index ac1e86143..bb8229ad3 100644 --- a/dashboards/resources/cluster.libsonnet +++ b/dashboards/resources/cluster.libsonnet @@ -175,6 +175,7 @@ local var = g.dashboard.variable; table.queryOptions.transformation.withId('organize') + table.queryOptions.transformation.withOptions({ excludeByName: { + Time: true, 'Time 1': true, 'Time 2': true, 'Time 3': true, @@ -291,6 +292,7 @@ local var = g.dashboard.variable; table.queryOptions.transformation.withId('organize') + table.queryOptions.transformation.withOptions({ excludeByName: { + Time: true, 'Time 1': true, 'Time 2': true, 'Time 3': true, @@ -429,6 +431,7 @@ local var = g.dashboard.variable; table.queryOptions.transformation.withId('organize') + table.queryOptions.transformation.withOptions({ excludeByName: { + Time: true, 'Time 1': true, 'Time 2': true, 'Time 3': true, @@ -639,6 +642,7 @@ local var = g.dashboard.variable; table.queryOptions.transformation.withId('organize') + table.queryOptions.transformation.withOptions({ excludeByName: { + Time: true, 'Time 1': true, 'Time 2': true, 'Time 3': true, diff --git a/dashboards/resources/multi-cluster.libsonnet b/dashboards/resources/multi-cluster.libsonnet index 7137a3181..50c8d8817 100644 --- a/dashboards/resources/multi-cluster.libsonnet +++ b/dashboards/resources/multi-cluster.libsonnet @@ -134,6 +134,7 @@ local var = g.dashboard.variable; g.panel.table.queryOptions.transformation.withId('organize') + g.panel.table.queryOptions.transformation.withOptions({ excludeByName: { + Time: true, 'Time 1': true, 'Time 2': true, 'Time 3': true, @@ -232,6 +233,7 @@ local var = g.dashboard.variable; g.panel.table.queryOptions.transformation.withId('organize') + g.panel.table.queryOptions.transformation.withOptions({ excludeByName: { + Time: true, 'Time 1': true, 'Time 2': true, 'Time 3': true, diff --git a/dashboards/resources/namespace.libsonnet b/dashboards/resources/namespace.libsonnet index c7a1fc4c9..b1273aacc 100644 --- a/dashboards/resources/namespace.libsonnet +++ b/dashboards/resources/namespace.libsonnet @@ -1,374 +1,778 @@ -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; -local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet'; -local template = grafana.template; +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local prometheus = g.query.prometheus; +local stat = g.panel.stat; +local table = g.panel.table; +local timeSeries = g.panel.timeSeries; +local var = g.dashboard.variable; { + local statPanel(title, unit, query) = + stat.new(title) + + stat.options.withColorMode('none') + + stat.standardOptions.withUnit(unit) + + stat.queryOptions.withInterval($._config.grafanaK8s.minimumTimeInterval) + + stat.queryOptions.withTargets([ + prometheus.new('${datasource}', query) + + prometheus.withInstant(true), + ]), + + local tsPanel = + timeSeries { + new(title): + timeSeries.new(title) + + timeSeries.options.legend.withShowLegend() + + timeSeries.options.legend.withAsTable() + + timeSeries.options.legend.withDisplayMode('table') + + timeSeries.options.legend.withPlacement('right') + + timeSeries.options.legend.withCalcs(['lastNotNull']) + + timeSeries.options.tooltip.withMode('single') + + timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + timeSeries.fieldConfig.defaults.custom.withSpanNulls(true) + + timeSeries.queryOptions.withInterval($._config.grafanaK8s.minimumTimeInterval), + }, + grafanaDashboards+:: { - local clusterTemplate = - template.new( - name='cluster', - datasource='$datasource', - query='label_values(up{%(kubeStateMetricsSelector)s}, %(clusterLabel)s)' % $._config, - current='', - hide=if $._config.showMultiCluster then '' else '2', - refresh=2, - includeAll=false, - sort=1 - ), - - local namespaceTemplate = - template.new( - name='namespace', - datasource='$datasource', - query='label_values(kube_namespace_status_phase{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster"}, namespace)' % $._config, - current='', - hide='', - refresh=2, - includeAll=false, - multi=false, - sort=1 - ), 'k8s-resources-namespace.json': - local tableStyles = { - pod: { - alias: 'Pod', - link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') }, - }, - }; + local variables = { + datasource: + var.datasource.new('datasource', 'prometheus') + + var.datasource.withRegex($._config.datasourceFilterRegex) + + var.datasource.generalOptions.showOnDashboard.withLabelAndValue() + + var.datasource.generalOptions.withLabel('Data source') + + { + current: { + selected: true, + text: $._config.datasourceName, + value: $._config.datasourceName, + }, + }, - local networkColumns = [ - 'sum(irate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, - 'sum(irate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, - 'sum(irate(container_network_receive_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, - 'sum(irate(container_network_transmit_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, - 'sum(irate(container_network_receive_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, - 'sum(irate(container_network_transmit_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, - ]; + cluster: + var.query.new('cluster') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + $._config.clusterLabel, + 'up{%(kubeStateMetricsSelector)s}' % $._config, + ) + + var.query.generalOptions.withLabel('cluster') + + var.query.refresh.onTime() + + ( + if $._config.showMultiCluster + then var.query.generalOptions.showOnDashboard.withLabelAndValue() + else var.query.generalOptions.showOnDashboard.withNothing() + ) + + var.query.withSort(type='alphabetical'), - local networkTableStyles = { - pod: { - alias: 'Pod', - link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') }, - linkTooltip: 'Drill down to pods', - }, - 'Value #A': { - alias: 'Current Receive Bandwidth', - unit: 'Bps', - }, - 'Value #B': { - alias: 'Current Transmit Bandwidth', - unit: 'Bps', - }, - 'Value #C': { - alias: 'Rate of Received Packets', - unit: 'pps', - }, - 'Value #D': { - alias: 'Rate of Transmitted Packets', - unit: 'pps', - }, - 'Value #E': { - alias: 'Rate of Received Packets Dropped', - unit: 'pps', - }, - 'Value #F': { - alias: 'Rate of Transmitted Packets Dropped', - unit: 'pps', - }, + namespace: + var.query.new('namespace') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'namespace', + 'kube_namespace_status_phase{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster"}' % $._config, + ) + + var.query.generalOptions.withLabel('namespace') + + var.query.refresh.onTime() + + var.query.generalOptions.showOnDashboard.withLabelAndValue() + + var.query.withSort(type='alphabetical'), }; - local cpuUsageQuery = 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config; - local memoryUsageQuery = 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", container!="", image!=""}) by (pod)' % $._config; - - local cpuQuotaRequestsQuery = 'scalar(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"})' % $._config; - local cpuQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.cpu'); - local memoryQuotaRequestsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'requests.memory'); - local memoryQuotaLimitsQuery = std.strReplace(cpuQuotaRequestsQuery, 'requests.cpu', 'limits.memory'); - - local storageIOColumns = [ - 'sum by(pod) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config, - 'sum by(pod) (rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config, - 'sum by(pod) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config, - 'sum by(pod) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config, - 'sum by(pod) (rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config, - 'sum by(pod) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config, - ]; - - local storageIOTableStyles = { + local links = { pod: { - alias: 'Pod', - link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') }, - linkTooltip: 'Drill down to pods', - }, - 'Value #A': { - alias: 'IOPS(Reads)', - unit: 'iops', - decimals: 3, - }, - 'Value #B': { - alias: 'IOPS(Writes)', - unit: 'iops', - decimals: 3, - }, - 'Value #C': { - alias: 'IOPS(Reads + Writes)', - unit: 'iops', - decimals: 3, - }, - 'Value #D': { - alias: 'Throughput(Read)', - unit: 'Bps', - }, - 'Value #E': { - alias: 'Throughput(Write)', - unit: 'Bps', - }, - 'Value #F': { - alias: 'Throughput(Read + Write)', - unit: 'Bps', + title: 'Drill down to pods', + url: '%(prefix)s/d/%(uid)s/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}' % { + uid: $._config.grafanaDashboardIDs['k8s-resources-pod.json'], + prefix: $._config.grafanaK8s.linkPrefix, + }, }, }; - g.dashboard( - '%(dashboardNamePrefix)sCompute Resources / Namespace (Pods)' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-namespace.json']), - datasource_regex=$._config.datasourceFilterRegex, - datasource=$._config.datasourceName, - ) - .addRow( - (g.row('Headlines') + - { - height: '100px', - showTitle: false, - }) - .addPanel( - g.panel('CPU Utilisation (from requests)') + - g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) + local panels = [ + statPanel( + 'CPU Utilisation (from requests)', + 'percentunit', + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config ) - .addPanel( - g.panel('CPU Utilisation (from limits)') + - g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) + + stat.gridPos.withW(6) + + stat.gridPos.withH(3), + + statPanel( + 'CPU Utilisation (from limits)', + 'percentunit', + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config ) - .addPanel( - g.panel('Memory Utilisation (from requests)') + - g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config) + + stat.gridPos.withW(6) + + stat.gridPos.withH(3), + + statPanel( + 'Memory Utilisation (from requests)', + 'percentunit', + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config ) - .addPanel( - g.panel('Memory Utilisation (from limits)') + - g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config) + + stat.gridPos.withW(6) + + stat.gridPos.withH(3), + + statPanel( + 'Memory Utilisation (from limits)', + 'percentunit', + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config ) - ) - .addRow( - g.row('CPU Usage') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel([ - cpuUsageQuery, - cpuQuotaRequestsQuery, - cpuQuotaLimitsQuery, - ], ['{{pod}}', 'quota - requests', 'quota - limits']) + - g.stack + { - seriesOverrides: [ + + stat.gridPos.withW(6) + + stat.gridPos.withH(3), + + tsPanel.new('CPU Usage') + + tsPanel.standardOptions.withUnit('ops') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + + prometheus.new( + '${datasource}', + 'scalar(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"})' % $._config + ) + + prometheus.withLegendFormat('quota - requests'), + + prometheus.new( + '${datasource}', + 'scalar(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="limits.cpu"})' % $._config + ) + + prometheus.withLegendFormat('quota - limits'), + ]) + + tsPanel.standardOptions.withOverrides([ + { + matcher: { + id: 'byFrameRefID', + options: 'B', + }, + properties: [ + { + id: 'custom.lineStyle', + value: { + fill: 'dash', + }, + }, { - alias: 'quota - requests', - color: '#F2495C', - dashes: true, - fill: 0, - hideTooltip: true, - legend: true, - linewidth: 2, - stack: false, - hiddenSeries: true, + id: 'custom.lineWidth', + value: 2, }, { - alias: 'quota - limits', - color: '#FF9830', - dashes: true, - fill: 0, - hideTooltip: true, - legend: true, - linewidth: 2, - stack: false, - hiddenSeries: true, + id: 'color', + value: { + mode: 'fixed', + fixedColor: 'red', + }, }, ], }, - ) - ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel([ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'CPU Usage' }, - 'Value #B': { alias: 'CPU Requests' }, - 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'CPU Limits' }, - 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Memory Usage') - .addPanel( - g.panel('Memory Usage (w/o cache)') + - // Like above, without page cache - g.queryPanel([ - memoryUsageQuery, - memoryQuotaRequestsQuery, - memoryQuotaLimitsQuery, - ], ['{{pod}}', 'quota - requests', 'quota - limits']) + - g.stack + { - yaxes: g.yaxes('bytes'), - seriesOverrides: [ + matcher: { + id: 'byFrameRefID', + options: 'C', + }, + properties: [ + { + id: 'custom.lineStyle', + value: { + fill: 'dash', + }, + }, { - alias: 'quota - requests', - color: '#F2495C', - dashes: true, - fill: 0, - hideTooltip: true, - legend: true, - linewidth: 2, - stack: false, - hiddenSeries: true, + id: 'custom.lineWidth', + value: 2, }, { - alias: 'quota - limits', - color: '#FF9830', - dashes: true, - fill: 0, - hideTooltip: true, - legend: true, - linewidth: 2, - stack: false, - hiddenSeries: true, + id: 'color', + value: { + mode: 'fixed', + fixedColor: 'orange', + }, }, ], }, - ) - ) - .addRow( - g.row('Memory Quota') - .addPanel( - g.panel('Memory Quota') + - g.tablePanel([ - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, - 'sum(container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, - 'sum(container_memory_swap{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, - 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, - 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, - 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, - 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, - 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, - 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, - }) - ) - ) - .addRow( - g.row('Current Network Usage') - .addPanel( - g.panel('Current Network Usage') + - g.tablePanel( - networkColumns, - networkTableStyles - ), - ) - ) - .addRow( - g.row('Bandwidth') - .addPanel( - g.panel('Receive Bandwidth') + - g.queryPanel('sum(irate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - .addPanel( - g.panel('Transmit Bandwidth') + - g.queryPanel('sum(irate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Rate of Packets') - .addPanel( - g.panel('Rate of Received Packets') + - g.queryPanel('sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('pps') }, - ) - .addPanel( - g.panel('Rate of Transmitted Packets') + - g.queryPanel('sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('pps') }, - ) - ) - .addRow( - g.row('Rate of Packets Dropped') - .addPanel( - g.panel('Rate of Received Packets Dropped') + - g.queryPanel('sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('pps') }, - ) - .addPanel( - g.panel('Rate of Transmitted Packets Dropped') + - g.queryPanel('sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('pps') }, - ) - ) - .addRow( - g.row('Storage IO') - .addPanel( - g.panel('IOPS(Reads+Writes)') + - g.queryPanel('ceil(sum by(pod) (rate(container_fs_reads_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s])))' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('short'), decimals: -1 }, + ]), - ) - .addPanel( - g.panel('ThroughPut(Read+Write)') + - g.queryPanel('sum by(pod) (rate(container_fs_reads_bytes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Storage IO - Distribution') - .addPanel( - g.panel('Current Storage IO') + - g.tablePanel( - storageIOColumns, - storageIOTableStyles - ) + + table.new('CPU Quota') + + table.queryOptions.withTargets([ + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ]) + + table.queryOptions.withTransformations([ + table.queryOptions.transformation.withId('joinByField') + + table.queryOptions.transformation.withOptions({ + byField: 'pod', + mode: 'outer', + }), + + table.queryOptions.transformation.withId('organize') + + table.queryOptions.transformation.withOptions({ + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + }, + indexByName: { + 'Time 1': 0, + 'Time 2': 1, + 'Time 3': 2, + 'Time 4': 3, + 'Time 5': 4, + pod: 5, + 'Value #A': 6, + 'Value #B': 7, + 'Value #C': 8, + 'Value #D': 9, + 'Value #E': 10, + }, + renameByName: { + pod: 'Pod', + 'Value #A': 'CPU Usage', + 'Value #B': 'CPU Requests', + 'Value #C': 'CPU Requests %', + 'Value #D': 'CPU Limits', + 'Value #E': 'CPU Limits %', + }, + }), + ]) + + + table.standardOptions.withOverrides([ { - sort: { - col: 4, - desc: true, + matcher: { + id: 'byRegexp', + options: '/%/', }, + properties: [ + { + id: 'unit', + value: 'percentunit', + }, + ], }, - ) - ) + { - templating+: { - list+: [clusterTemplate, namespaceTemplate], - }, - }, + { + matcher: { + id: 'byName', + options: 'Pod', + }, + properties: [ + { + id: 'links', + value: [links.pod], + }, + ], + }, + ]), + + tsPanel.new('Memory Usage (w/o cache)') + + tsPanel.standardOptions.withUnit('bytes') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", container!="", image!=""}) by (pod)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + + prometheus.new( + '${datasource}', + 'scalar(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.memory"})' % $._config + ) + + prometheus.withLegendFormat('quota - requests'), + + prometheus.new( + '${datasource}', + 'scalar(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="limits.memory"})' % $._config + ) + + prometheus.withLegendFormat('quota - limits'), + ]) + + tsPanel.standardOptions.withOverrides([ + { + matcher: { + id: 'byFrameRefID', + options: 'B', + }, + properties: [ + { + id: 'custom.lineStyle', + value: { + fill: 'dash', + }, + }, + { + id: 'custom.lineWidth', + value: 2, + }, + { + id: 'color', + value: { + mode: 'fixed', + fixedColor: 'red', + }, + }, + ], + }, + { + matcher: { + id: 'byFrameRefID', + options: 'C', + }, + properties: [ + { + id: 'custom.lineStyle', + value: { + fill: 'dash', + }, + }, + { + id: 'custom.lineWidth', + value: 2, + }, + { + id: 'color', + value: { + mode: 'fixed', + fixedColor: 'orange', + }, + }, + ], + }, + ]), + + table.new('Memory Quota') + + table.standardOptions.withUnit('bytes') + + table.queryOptions.withTargets([ + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(container_memory_swap{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ]) + + table.queryOptions.withTransformations([ + table.queryOptions.transformation.withId('joinByField') + + table.queryOptions.transformation.withOptions({ + byField: 'pod', + mode: 'outer', + }), + + table.queryOptions.transformation.withId('organize') + + table.queryOptions.transformation.withOptions({ + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + 'Time 6': true, + 'Time 7': true, + 'Time 8': true, + }, + indexByName: { + 'Time 1': 0, + 'Time 2': 1, + 'Time 3': 2, + 'Time 4': 3, + 'Time 5': 4, + 'Time 6': 5, + 'Time 7': 6, + 'Time 8': 7, + pod: 8, + 'Value #A': 9, + 'Value #B': 10, + 'Value #C': 11, + 'Value #D': 12, + 'Value #E': 13, + 'Value #F': 14, + 'Value #G': 15, + 'Value #H': 16, + }, + renameByName: { + pod: 'Pod', + 'Value #A': 'Memory Usage', + 'Value #B': 'Memory Requests', + 'Value #C': 'Memory Requests %', + 'Value #D': 'Memory Limits', + 'Value #E': 'Memory Limits %', + 'Value #F': 'Memory Usage (RSS)', + 'Value #G': 'Memory Usage (Cache)', + 'Value #H': 'Memory Usage (Swap)', + }, + }), + ]) + + + table.standardOptions.withOverrides([ + { + matcher: { + id: 'byRegexp', + options: '/%/', + }, + properties: [ + { + id: 'unit', + value: 'percentunit', + }, + ], + }, + { + matcher: { + id: 'byName', + options: 'Pod', + }, + properties: [ + { + id: 'links', + value: [links.pod], + }, + ], + }, + ]), + + table.new('Current Network Usage') + + table.queryOptions.withTargets([ + prometheus.new('${datasource}', 'sum(rate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(rate(container_network_receive_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(rate(container_network_transmit_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(rate(container_network_receive_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(rate(container_network_transmit_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ]) + + + table.queryOptions.withTransformations([ + table.queryOptions.transformation.withId('joinByField') + + table.queryOptions.transformation.withOptions({ + byField: 'pod', + mode: 'outer', + }), + + table.queryOptions.transformation.withId('organize') + + table.queryOptions.transformation.withOptions({ + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + 'Time 6': true, + }, + indexByName: { + 'Time 1': 0, + 'Time 2': 1, + 'Time 3': 2, + 'Time 4': 3, + 'Time 5': 4, + 'Time 6': 5, + pod: 6, + 'Value #A': 7, + 'Value #B': 8, + 'Value #C': 9, + 'Value #D': 10, + 'Value #E': 11, + 'Value #F': 12, + }, + renameByName: { + pod: 'Pod', + 'Value #A': 'Current Receive Bandwidth', + 'Value #B': 'Current Transmit Bandwidth', + 'Value #C': 'Rate of Received Packets', + 'Value #D': 'Rate of Transmitted Packets', + 'Value #E': 'Rate of Received Packets Dropped', + 'Value #F': 'Rate of Transmitted Packets Dropped', + }, + }), + ]) + + + table.standardOptions.withOverrides([ + { + matcher: { + id: 'byRegexp', + options: '/Bandwidth/', + }, + properties: [ + { + id: 'unit', + value: 'Bps', + }, + ], + }, + { + matcher: { + id: 'byRegexp', + options: '/Packets/', + }, + properties: [ + { + id: 'unit', + value: 'pps', + }, + ], + }, + { + matcher: { + id: 'byName', + options: 'Pod', + }, + properties: [ + { + id: 'links', + value: [links.pod], + }, + ], + }, + ]), + + tsPanel.new('Receive Bandwidth') + + tsPanel.standardOptions.withUnit('Bps') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(rate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + tsPanel.new('Transmit Bandwidth') + + tsPanel.standardOptions.withUnit('Bps') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(rate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + tsPanel.new('Rate of Received Packets') + + tsPanel.standardOptions.withUnit('pps') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + tsPanel.new('Rate of Transmitted Packets') + + tsPanel.standardOptions.withUnit('pps') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + tsPanel.new('Rate of Received Packets Dropped') + + tsPanel.standardOptions.withUnit('pps') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + tsPanel.new('Rate of Transmitted Packets Dropped') + + tsPanel.standardOptions.withUnit('pps') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + tsPanel.new('IOPS(Reads+Writes)') + + tsPanel.standardOptions.withUnit('iops') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'ceil(sum by(pod) (rate(container_fs_reads_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s])))' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + tsPanel.new('ThroughPut(Read+Write)') + + tsPanel.standardOptions.withUnit('Bps') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum by(pod) (rate(container_fs_reads_bytes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + table.new('Current Storage IO') + + table.queryOptions.withTargets([ + prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ]) + + + table.queryOptions.withTransformations([ + table.queryOptions.transformation.withId('joinByField') + + table.queryOptions.transformation.withOptions({ + byField: 'pod', + mode: 'outer', + }), + + table.queryOptions.transformation.withId('organize') + + table.queryOptions.transformation.withOptions({ + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + 'Time 6': true, + }, + indexByName: { + 'Time 1': 0, + 'Time 2': 1, + 'Time 3': 2, + 'Time 4': 3, + 'Time 5': 4, + 'Time 6': 5, + pod: 6, + 'Value #A': 7, + 'Value #B': 8, + 'Value #C': 9, + 'Value #D': 10, + 'Value #E': 11, + 'Value #F': 12, + }, + renameByName: { + pod: 'Pod', + 'Value #A': 'IOPS(Reads)', + 'Value #B': 'IOPS(Writes)', + 'Value #C': 'IOPS(Reads + Writes)', + 'Value #D': 'Throughput(Read)', + 'Value #E': 'Throughput(Write)', + 'Value #F': 'Throughput(Read + Write)', + }, + }), + ]) + + + table.standardOptions.withOverrides([ + { + matcher: { + id: 'byRegexp', + options: '/IOPS/', + }, + properties: [ + { + id: 'unit', + value: 'iops', + }, + ], + }, + { + matcher: { + id: 'byRegexp', + options: '/Throughput/', + }, + properties: [ + { + id: 'unit', + value: 'Bps', + }, + ], + }, + { + matcher: { + id: 'byName', + options: 'Pod', + }, + properties: [ + { + id: 'links', + value: [links.pod], + }, + ], + }, + ]), + ]; + + g.dashboard.new('%(dashboardNamePrefix)sCompute Resources / Namespace (Pods)' % $._config.grafanaK8s) + + g.dashboard.withUid($._config.grafanaDashboardIDs['k8s-resources-namespace.json']) + + g.dashboard.withTags($._config.grafanaK8s.dashboardTags) + + g.dashboard.withEditable(false) + + g.dashboard.time.withFrom('now-1h') + + g.dashboard.time.withTo('now') + + g.dashboard.withRefresh($._config.grafanaK8s.refresh) + + g.dashboard.withVariables([variables.datasource, variables.cluster, variables.namespace]) + + g.dashboard.withPanels(g.util.grid.wrapPanels(panels, panelWidth=24, panelHeight=7)), }, } From aa3a0e41753773cb7dff5bc094055274c49d5715 Mon Sep 17 00:00:00 2001 From: Stephen Lang Date: Wed, 24 Apr 2024 16:14:09 +0100 Subject: [PATCH 2/3] chore: correct panel widths --- dashboards/resources/namespace.libsonnet | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dashboards/resources/namespace.libsonnet b/dashboards/resources/namespace.libsonnet index b1273aacc..1eaba9125 100644 --- a/dashboards/resources/namespace.libsonnet +++ b/dashboards/resources/namespace.libsonnet @@ -121,6 +121,7 @@ local var = g.dashboard.variable; + stat.gridPos.withH(3), tsPanel.new('CPU Usage') + + tsPanel.gridPos.withW(24) + tsPanel.standardOptions.withUnit('ops') + tsPanel.queryOptions.withTargets([ prometheus.new( @@ -195,6 +196,7 @@ local var = g.dashboard.variable; ]), table.new('CPU Quota') + + table.gridPos.withW(24) + table.queryOptions.withTargets([ prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) @@ -281,6 +283,7 @@ local var = g.dashboard.variable; ]), tsPanel.new('Memory Usage (w/o cache)') + + tsPanel.gridPos.withW(24) + tsPanel.standardOptions.withUnit('bytes') + tsPanel.queryOptions.withTargets([ prometheus.new( @@ -355,6 +358,7 @@ local var = g.dashboard.variable; ]), table.new('Memory Quota') + + table.gridPos.withW(24) + table.standardOptions.withUnit('bytes') + table.queryOptions.withTargets([ prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod)' % $._config) @@ -463,6 +467,7 @@ local var = g.dashboard.variable; ]), table.new('Current Network Usage') + + table.gridPos.withW(24) + table.queryOptions.withTargets([ prometheus.new('${datasource}', 'sum(rate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + prometheus.withInstant(true) @@ -654,6 +659,7 @@ local var = g.dashboard.variable; ]), table.new('Current Storage IO') + + table.gridPos.withW(24) + table.queryOptions.withTargets([ prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.withInstant(true) @@ -773,6 +779,6 @@ local var = g.dashboard.variable; + g.dashboard.time.withTo('now') + g.dashboard.withRefresh($._config.grafanaK8s.refresh) + g.dashboard.withVariables([variables.datasource, variables.cluster, variables.namespace]) - + g.dashboard.withPanels(g.util.grid.wrapPanels(panels, panelWidth=24, panelHeight=7)), + + g.dashboard.withPanels(g.util.grid.wrapPanels(panels, panelWidth=12, panelHeight=7)), }, } From a52f3d07a6164ce206f69618b58202dbebf51969 Mon Sep 17 00:00:00 2001 From: Stephen Lang Date: Wed, 24 Apr 2024 18:35:45 +0100 Subject: [PATCH 3/3] fix: Remove units from CPU time series panel --- dashboards/resources/namespace.libsonnet | 1 - 1 file changed, 1 deletion(-) diff --git a/dashboards/resources/namespace.libsonnet b/dashboards/resources/namespace.libsonnet index 1eaba9125..7119b2b3a 100644 --- a/dashboards/resources/namespace.libsonnet +++ b/dashboards/resources/namespace.libsonnet @@ -122,7 +122,6 @@ local var = g.dashboard.variable; tsPanel.new('CPU Usage') + tsPanel.gridPos.withW(24) - + tsPanel.standardOptions.withUnit('ops') + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}',