From aedeba72218de8a56c93404861623eb5fcafe10f Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Fri, 4 Oct 2024 08:49:38 +0100 Subject: [PATCH 01/11] WIP: Migrate dashboards to use new grafonnet library Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/dashboards/use.libsonnet | 862 ++++++++++++----------- docs/node-mixin/jsonnetfile.json | 15 +- 2 files changed, 435 insertions(+), 442 deletions(-) diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index fce411ab19..7a71d2c7fa 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -1,469 +1,471 @@ -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; local dashboard = grafana.dashboard; -local row = grafana.row; -local prometheus = grafana.prometheus; -local template = grafana.template; -local graphPanel = grafana.graphPanel; +local variable = dashboard.variable; +local row = grafana.panel.row; +local prometheus = grafana.query.prometheus; + +local timeSeriesPanel = grafana.panel.timeSeries; +local tsOptions = timeSeriesPanel.options; +local tsStandardOptions = timeSeriesPanel.standardOptions; +local tsQueryOptions = timeSeriesPanel.queryOptions; +local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom; +local tsLegend = tsOptions.legend; local c = import '../config.libsonnet'; -local datasourceTemplate = { - current: { - text: 'default', - value: 'default', - }, - hide: 0, - label: 'Data Source', - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', -}; +local datasource = variable.datasource.new( + 'datasource', 'prometheus' +); + +local tsCommonPanelOptions = variable.query.withDatasourceFromVariable(datasource) + + tsCustom.stacking.withMode('normal') + + tsCustom.withFillOpacity(100) + + tsLegend.withShowLegend(false) + + tsOptions.tooltip.withMode("multi") + + tsOptions.tooltip.withSort("desc"); local CPUUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'CPU Utilisation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local CPUSaturation = // TODO: Is this a useful panel? At least there should be some explanation how load // average relates to the "CPU saturation" in the title. - graphPanel.new( + timeSeriesPanel.new( 'CPU Saturation (Load1 per CPU)', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local memoryUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Memory Utilisation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local memorySaturation = - graphPanel.new( + timeSeriesPanel.new( 'Memory Saturation (Major Page Faults)', - datasource='$datasource', - span=6, - format='rds', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('rds'); + +local networkOverrides = tsStandardOptions.withOverrides( + [ + tsStandardOptions.override.byRegexp.new('/Transmit/') + + tsStandardOptions.override.byRegexp.withPropertiesFromOptions( + tsCustom.withTransform('negative-Y') + ), + ] +); local networkUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Network Utilisation (Bytes Receive/Transmit)', - datasource='$datasource', - span=6, - format='Bps', - stack=true, - fill=10, - legend_show=false, ) - .addSeriesOverride({ alias: '/Receive/', stack: 'A' }) - .addSeriesOverride({ alias: '/Transmit/', stack: 'B', transform: 'negative-Y' }) - { tooltip+: { sort: 2 } }; + + tsCommonPanelOptions + + tsStandardOptions.withUnit('Bps') + + networkOverrides; local networkSaturation = - graphPanel.new( + timeSeriesPanel.new( 'Network Saturation (Drops Receive/Transmit)', - datasource='$datasource', - span=6, - format='Bps', - stack=true, - fill=10, - legend_show=false, ) - .addSeriesOverride({ alias: '/ Receive/', stack: 'A' }) - .addSeriesOverride({ alias: '/ Transmit/', stack: 'B', transform: 'negative-Y' }) - { tooltip+: { sort: 2 } }; + + tsCommonPanelOptions + + tsStandardOptions.withUnit('Bps') + + networkOverrides; local diskIOUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Disk IO Utilisation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local diskIOSaturation = - graphPanel.new( + timeSeriesPanel.new( 'Disk IO Saturation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local diskSpaceUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Disk Space Utilisation', - datasource='$datasource', - span=12, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); { - _clusterTemplate:: template.new( - name='cluster', - datasource='$datasource', - query='label_values(node_time_seconds, %s)' % $._config.clusterLabel, - current='', - hide=if $._config.showMultiCluster then '' else '2', - refresh=2, - includeAll=false, - sort=1 - ), - + _clusterVariable:: + variable.query.new('cluster') + + variable.query.withDatasourceFromVariable(datasource) + + variable.query.queryTypes.withLabelValues( + $._config.clusterLabel, + 'node_time_seconds', + ) + + if $._config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing() + + variable.query.withRefresh(2) + + variable.query.selectionOptions.withIncludeAll(false) + + variable.query.withSort(1), + grafanaDashboards+:: { - 'node-rsrc-use.json': - - dashboard.new( - '%sUSE Method / Node' % $._config.dashboardNamePrefix, - time_from='now-1h', - tags=($._config.dashboardTags), - timezone='utc', - refresh='30s', - graphTooltip='shared_crosshair', - uid=std.md5('node-rsrc-use.json') - ) - .addTemplate(datasourceTemplate) - .addTemplate($._clusterTemplate) - .addTemplate( - template.new( - 'instance', - '$datasource', - 'label_values(node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}, instance)' % $._config, - refresh='time', - sort=1 - ) - ) - .addRow( - row.new('CPU') - .addPanel(CPUUtilisation.addTarget(prometheus.target('instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation'))) - .addPanel(CPUSaturation.addTarget(prometheus.target('instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Saturation'))) - ) - .addRow( - row.new('Memory') - .addPanel(memoryUtilisation.addTarget(prometheus.target('instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation'))) - .addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Major page Faults'))) - ) - .addRow( - row.new('Network') - .addPanel( - networkUtilisation - .addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit')) - ) - .addPanel( - networkSaturation - .addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit')) - ) - ) - .addRow( - row.new('Disk IO') - .addPanel(diskIOUtilisation.addTarget(prometheus.target('instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}'))) - .addPanel(diskIOSaturation.addTarget(prometheus.target('instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}'))) - ) - .addRow( - row.new('Disk Space') - .addPanel( - diskSpaceUtilisation.addTarget(prometheus.target( - ||| - sort_desc(1 - - ( - max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) - / - max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) - ) != 0 - ) - ||| % $._config, legendFormat='{{device}}' - )) - ) - ), - - 'node-cluster-rsrc-use.json': - dashboard.new( - '%sUSE Method / Cluster' % $._config.dashboardNamePrefix, - time_from='now-1h', - tags=($._config.dashboardTags), - timezone='utc', - refresh='30s', - graphTooltip='shared_crosshair', - uid=std.md5('node-cluster-rsrc-use.json') - ) - .addTemplate(datasourceTemplate) - .addTemplate($._clusterTemplate) - .addRow( - row.new('CPU') - .addPanel( - CPUUtilisation - .addTarget(prometheus.target( - ||| - (( - instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - * - instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - ) != 0 ) - / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ||| % $._config, legendFormat='{{ instance }}' - )) - ) - .addPanel( - CPUSaturation - .addTarget(prometheus.target( - ||| - ( - instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}}' - )) - ) - ) - .addRow( - row.new('Memory') - .addPanel( - memoryUtilisation - .addTarget(prometheus.target( - ||| - ( - instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}}', - )) - ) - .addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, legendFormat='{{instance}}'))) - ) - .addRow( - row.new('Network') - .addPanel( - networkUtilisation - .addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) - ) - .addPanel( - networkSaturation - .addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) - ) - ) - .addRow( - row.new('Disk IO') - .addPanel( - diskIOUtilisation - .addTarget(prometheus.target( - ||| - ( - instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}} {{device}}' - )) - ) - .addPanel( - diskIOSaturation - .addTarget(prometheus.target( - ||| - ( - instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}} {{device}}' - )) - ) - ) - .addRow( - row.new('Disk Space') - .addPanel( - diskSpaceUtilisation - .addTarget(prometheus.target( - ||| - sum without (device) ( - max without (fstype, mountpoint) (( - node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} - - - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} - ) != 0) - ) - / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}))) - ||| % $._config, legendFormat='{{instance}}' - )) - ) - ), - } + - if $._config.showMultiCluster then { - 'node-multicluster-rsrc-use.json': - dashboard.new( - '%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix, - time_from='now-1h', - tags=($._config.dashboardTags), - timezone='utc', - refresh='30s', - graphTooltip='shared_crosshair', - uid=std.md5('node-multicluster-rsrc-use.json') - ) - .addTemplate(datasourceTemplate) - .addRow( - row.new('CPU') - .addPanel( - CPUUtilisation - .addTarget(prometheus.target( - ||| - sum( - (( - instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s} - * - instance:node_num_cpu:sum{%(nodeExporterSelector)s} - ) != 0) - / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) - ) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - .addPanel( - CPUSaturation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} - / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - ) - .addRow( - row.new('Memory') - .addPanel( - memoryUtilisation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} - / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - .addPanel( - memorySaturation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - ) - .addRow( - row.new('Network') - .addPanel( - networkUtilisation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config - )) - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config - )) - ) - .addPanel( - networkSaturation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config - )) - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config - )) - ) - ) - .addRow( - row.new('Disk IO') - .addPanel( - diskIOUtilisation - .addTarget(prometheus.target( - ||| - sum(( - instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s, device) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config - )) - ) - .addPanel( - diskIOSaturation - .addTarget(prometheus.target( - ||| - sum(( - instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s, device) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config - )) - ) - ) - .addRow( - row.new('Disk Space') - .addPanel( - diskSpaceUtilisation - .addTarget(prometheus.target( - ||| - sum ( - sum without (device) ( - max without (fstype, mountpoint, instance, pod) (( - node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - ) != 0) - ) - / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}))) - ) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - ), - } else {}, + 'node-rsrc-use.json': + dashboard.new( + '%sUSE Method / Node' % $._config.dashboardNamePrefix, + ) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + $._clusterVariable, + variable.query.new('instance') + + variable.query.withDatasourceFromVariable(datasource) + + variable.query.queryTypes.withLabelValues( + 'instance', + 'node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, + ) + + variable.query.withRefresh(2) + + variable.query.withSort(1), + ]) + // TODO - panel width + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]), + CPUSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Saturation')]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]), + memorySaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Major page Faults')]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'), + prometheus.new('$datasource', 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'), + prometheus.new('$datasource', 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), + ]), + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sort_desc(1 - + ( + max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) + / + max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) + ) != 0 + ) + ||| % $._config + ) + prometheus.withLegendFormat('{{device}}') + ]), + ]), + ]), + ), + 'node-cluster-rsrc-use.json': + dashboard.new( + '%sUSE Method / Cluster' % $._config.dashboardNamePrefix, + ) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-cluster-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + $._clusterVariable, + variable.query.withDatasourceFromVariable(datasource) + + variable.query.withRefresh(2) + + variable.query.withSort(1), + ]) + // TODO - panel width + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + (( + instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + * + instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + ) != 0 ) + / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}') + ]), + CPUSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ) != 0 + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}') + ]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ) != 0 + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}') + ]), + memorySaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config + ) + prometheus.withLegendFormat('{{ instance }}') + ]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Receive'), + prometheus.new( + '$datasource', + 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Receive'), + prometheus.new( + '$datasource', + 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }} {{device}}')]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new( + '$datasource', + ||| + instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }} {{device}}')]), + ]), + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum without (device) ( + max without (fstype, mountpoint) (( + node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} + - + node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} + ) != 0) + ) + / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}))) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}')]), + ]), + ]), + ), + } + + if $._config.showMultiCluster then { + // TODO - test thoroughly + 'node-multicluster-rsrc-use.json': + dashboard.new( + '%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix, + ) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-multicluster-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + variable.query.withDatasourceFromVariable(datasource) + + variable.query.withRefresh(2) + + variable.query.withSort(1), + ]) + // TODO - panel width + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum( + (( + instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s} + * + instance:node_num_cpu:sum{%(nodeExporterSelector)s} + ) != 0) + / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) + ) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}') + ]), + CPUSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} + / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}') + ]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} + / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}') + ]), + memorySaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| + % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}') + ]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'), + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'), + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s, device) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}')]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new( + '$datasource', + ||| + sum(( + instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s, device) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}')]), + ]), + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum ( + sum without (device) ( + max without (fstype, mountpoint, instance, pod) (( + node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} + ) != 0) + ) + / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}))) + ) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}')]), + ]), + ]), + ), + } else {}, } diff --git a/docs/node-mixin/jsonnetfile.json b/docs/node-mixin/jsonnetfile.json index 721d4833a0..2d56d91245 100644 --- a/docs/node-mixin/jsonnetfile.json +++ b/docs/node-mixin/jsonnetfile.json @@ -4,20 +4,11 @@ { "source": { "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-latest" } }, - "version": "master" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet-7.0" - } - }, - "version": "master" + "version": "main" } ], "legacyImports": false From 2c935da481ed6d489e64a7bb43ba4a247c753c4c Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Fri, 4 Oct 2024 08:52:30 +0100 Subject: [PATCH 02/11] Run jsonnetfmt Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/dashboards/use.libsonnet | 732 ++++++++++++----------- 1 file changed, 368 insertions(+), 364 deletions(-) diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index 7a71d2c7fa..1e49bdda02 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -18,11 +18,11 @@ local datasource = variable.datasource.new( ); local tsCommonPanelOptions = variable.query.withDatasourceFromVariable(datasource) - + tsCustom.stacking.withMode('normal') - + tsCustom.withFillOpacity(100) - + tsLegend.withShowLegend(false) - + tsOptions.tooltip.withMode("multi") - + tsOptions.tooltip.withSort("desc"); + + tsCustom.stacking.withMode('normal') + + tsCustom.withFillOpacity(100) + + tsLegend.withShowLegend(false) + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc'); local CPUUtilisation = timeSeriesPanel.new( @@ -101,7 +101,7 @@ local diskSpaceUtilisation = + tsStandardOptions.withUnit('percentunit'); { - _clusterVariable:: + _clusterVariable:: variable.query.new('cluster') + variable.query.withDatasourceFromVariable(datasource) + variable.query.queryTypes.withLabelValues( @@ -109,363 +109,367 @@ local diskSpaceUtilisation = 'node_time_seconds', ) + if $._config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing() - + variable.query.withRefresh(2) - + variable.query.selectionOptions.withIncludeAll(false) - + variable.query.withSort(1), - + + variable.query.withRefresh(2) + + variable.query.selectionOptions.withIncludeAll(false) + + variable.query.withSort(1), + grafanaDashboards+:: { - 'node-rsrc-use.json': - dashboard.new( - '%sUSE Method / Node' % $._config.dashboardNamePrefix, - ) - + dashboard.time.withFrom('now-1h') - + dashboard.withTags($._config.dashboardTags) - + dashboard.withTimezone('utc') - + dashboard.withRefresh('30s') - + dashboard.graphTooltip.withSharedCrosshair() - + dashboard.withUid(std.md5('node-rsrc-use.json')) - + dashboard.withVariables([ - datasource, - $._clusterVariable, - variable.query.new('instance') - + variable.query.withDatasourceFromVariable(datasource) - + variable.query.queryTypes.withLabelValues( - 'instance', - 'node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, - ) - + variable.query.withRefresh(2) - + variable.query.withSort(1), - ]) - // TODO - panel width - + dashboard.withPanels( - grafana.util.grid.makeGrid([ - row.new('CPU') - + row.withPanels([ - CPUUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]), - CPUSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Saturation')]), - ]), - row.new('Memory') - + row.withPanels([ - memoryUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]), - memorySaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Major page Faults')]), - ]), - row.new('Network') - + row.withPanels([ - networkUtilisation + tsQueryOptions.withTargets([ - prometheus.new('$datasource', 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'), - prometheus.new('$datasource', 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'), - ]), - networkSaturation + tsQueryOptions.withTargets([ - prometheus.new('$datasource', 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'), - prometheus.new('$datasource', 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'), - ]), - ]), - row.new('Disk IO') - + row.withPanels([ - diskIOUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), - diskIOSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), - ]), - row.new('Disk Space') - + row.withPanels([ - diskSpaceUtilisation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - sort_desc(1 - - ( - max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) - / - max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) - ) != 0 - ) - ||| % $._config - ) + prometheus.withLegendFormat('{{device}}') - ]), - ]), - ]), - ), - 'node-cluster-rsrc-use.json': - dashboard.new( - '%sUSE Method / Cluster' % $._config.dashboardNamePrefix, - ) - + dashboard.time.withFrom('now-1h') - + dashboard.withTags($._config.dashboardTags) - + dashboard.withTimezone('utc') - + dashboard.withRefresh('30s') - + dashboard.graphTooltip.withSharedCrosshair() - + dashboard.withUid(std.md5('node-cluster-rsrc-use.json')) - + dashboard.withVariables([ - datasource, - $._clusterVariable, - variable.query.withDatasourceFromVariable(datasource) - + variable.query.withRefresh(2) - + variable.query.withSort(1), - ]) - // TODO - panel width - + dashboard.withPanels( - grafana.util.grid.makeGrid([ - row.new('CPU') - + row.withPanels([ - CPUUtilisation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - (( - instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - * - instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - ) != 0 ) - / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ||| % $._config - ) + prometheus.withLegendFormat('{{ instance }}') - ]), - CPUSaturation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - ( - instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config - ) + prometheus.withLegendFormat('{{ instance }}') - ]), - ]), - row.new('Memory') - + row.withPanels([ - memoryUtilisation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - ( - instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config - ) + prometheus.withLegendFormat('{{ instance }}') - ]), - memorySaturation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config - ) + prometheus.withLegendFormat('{{ instance }}') - ]), - ]), - row.new('Network') - + row.withPanels([ - networkUtilisation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config - ) + prometheus.withLegendFormat('{{ instance }} Receive'), - prometheus.new( - '$datasource', - 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config - ) + prometheus.withLegendFormat('{{ instance }} Transmit'), - ]), - networkSaturation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config - ) + prometheus.withLegendFormat('{{ instance }} Receive'), - prometheus.new( - '$datasource', - 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config - ) + prometheus.withLegendFormat('{{ instance }} Transmit'), - ]), - ]), - row.new('Disk IO') - + row.withPanels([ - diskIOUtilisation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ||| % $._config - ) + prometheus.withLegendFormat('{{ instance }} {{device}}')]), - diskIOSaturation + tsQueryOptions.withTargets([prometheus.new( - '$datasource', - ||| - instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ||| % $._config - ) + prometheus.withLegendFormat('{{ instance }} {{device}}')]), - ]), - row.new('Disk Space') - + row.withPanels([ - diskSpaceUtilisation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - sum without (device) ( - max without (fstype, mountpoint) (( - node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} - - - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} - ) != 0) - ) - / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}))) - ||| % $._config - ) + prometheus.withLegendFormat('{{ instance }}')]), - ]), - ]), - ), - } + - if $._config.showMultiCluster then { - // TODO - test thoroughly - 'node-multicluster-rsrc-use.json': - dashboard.new( - '%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix, - ) - + dashboard.time.withFrom('now-1h') - + dashboard.withTags($._config.dashboardTags) - + dashboard.withTimezone('utc') - + dashboard.withRefresh('30s') - + dashboard.graphTooltip.withSharedCrosshair() - + dashboard.withUid(std.md5('node-multicluster-rsrc-use.json')) - + dashboard.withVariables([ - datasource, - variable.query.withDatasourceFromVariable(datasource) - + variable.query.withRefresh(2) - + variable.query.withSort(1), - ]) - // TODO - panel width - + dashboard.withPanels( - grafana.util.grid.makeGrid([ - row.new('CPU') - + row.withPanels([ - CPUUtilisation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - sum( - (( - instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s} - * - instance:node_num_cpu:sum{%(nodeExporterSelector)s} - ) != 0) - / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) - ) by (%(clusterLabel)s) - ||| % $._config - ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}') - ]), - CPUSaturation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - sum(( - instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} - / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s) - ||| % $._config - ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}') - ]), - ]), - row.new('Memory') - + row.withPanels([ - memoryUtilisation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - sum(( - instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} - / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s) - ||| % $._config - ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}') - ]), - memorySaturation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - sum(( - instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| - % $._config - ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}') - ]), - ]), - row.new('Network') - + row.withPanels([ - networkUtilisation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - sum(( - instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config - ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'), - prometheus.new( - '$datasource', - ||| - sum(( - instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config - ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'), - ]), - networkSaturation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - sum(( - instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config - ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'), - prometheus.new( - '$datasource', - ||| - sum(( - instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config - ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'), - ]), - ]), - row.new('Disk IO') - + row.withPanels([ - diskIOUtilisation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - sum(( - instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s, device) - ||| % $._config - ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}')]), - diskIOSaturation + tsQueryOptions.withTargets([prometheus.new( - '$datasource', - ||| - sum(( - instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s, device) - ||| % $._config - ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}')]), - ]), - row.new('Disk Space') - + row.withPanels([ - diskSpaceUtilisation + tsQueryOptions.withTargets([ - prometheus.new( - '$datasource', - ||| - sum ( - sum without (device) ( - max without (fstype, mountpoint, instance, pod) (( - node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - ) != 0) - ) - / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}))) - ) by (%(clusterLabel)s) - ||| % $._config - ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}')]), - ]), - ]), - ), - } else {}, + 'node-rsrc-use.json': + dashboard.new( + '%sUSE Method / Node' % $._config.dashboardNamePrefix, + ) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + $._clusterVariable, + variable.query.new('instance') + + variable.query.withDatasourceFromVariable(datasource) + + variable.query.queryTypes.withLabelValues( + 'instance', + 'node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, + ) + + variable.query.withRefresh(2) + + variable.query.withSort(1), + ]) + // TODO - panel width + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]), + CPUSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Saturation')]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]), + memorySaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Major page Faults')]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'), + prometheus.new('$datasource', 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'), + prometheus.new('$datasource', 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), + ]), + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sort_desc(1 - + ( + max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) + / + max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) + ) != 0 + ) + ||| % $._config + ) + prometheus.withLegendFormat('{{device}}'), + ]), + ]), + ]), + ), + 'node-cluster-rsrc-use.json': + dashboard.new( + '%sUSE Method / Cluster' % $._config.dashboardNamePrefix, + ) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-cluster-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + $._clusterVariable, + variable.query.withDatasourceFromVariable(datasource) + + variable.query.withRefresh(2) + + variable.query.withSort(1), + ]) + // TODO - panel width + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + (( + instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + * + instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + ) != 0 ) + / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + CPUSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ) != 0 + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ) != 0 + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + memorySaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Receive'), + prometheus.new( + '$datasource', + 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Receive'), + prometheus.new( + '$datasource', + 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }} {{device}}'), + ]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new( + '$datasource', + ||| + instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }} {{device}}')]), + ]), + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum without (device) ( + max without (fstype, mountpoint) (( + node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} + - + node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} + ) != 0) + ) + / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}))) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + ]), + ]), + ), + } + + if $._config.showMultiCluster then { + // TODO - test thoroughly + 'node-multicluster-rsrc-use.json': + dashboard.new( + '%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix, + ) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-multicluster-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + variable.query.withDatasourceFromVariable(datasource) + + variable.query.withRefresh(2) + + variable.query.withSort(1), + ]) + // TODO - panel width + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum( + (( + instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s} + * + instance:node_num_cpu:sum{%(nodeExporterSelector)s} + ) != 0) + / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) + ) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + CPUSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} + / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} + / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + memorySaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| + % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'), + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'), + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s, device) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}'), + ]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new( + '$datasource', + ||| + sum(( + instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s, device) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}')]), + ]), + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum ( + sum without (device) ( + max without (fstype, mountpoint, instance, pod) (( + node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} + ) != 0) + ) + / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}))) + ) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + ]), + ]), + ), + } else {}, } From 8d8471c03f895b33f1c5f715549884c2fa594fca Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:11:44 +0100 Subject: [PATCH 03/11] 2nd set of dashboards Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/dashboards/use.libsonnet | 8 +-- docs/node-mixin/lib/prom-mixin.libsonnet | 62 +++++++++++------------- 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index 1e49bdda02..4519bea5f2 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -108,10 +108,10 @@ local diskSpaceUtilisation = $._config.clusterLabel, 'node_time_seconds', ) - + if $._config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing() - + variable.query.withRefresh(2) - + variable.query.selectionOptions.withIncludeAll(false) - + variable.query.withSort(1), + + (if $._config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) + + variable.query.withRefresh(2) // TODO Check this + + variable.query.selectionOptions.withIncludeAll(false) + + variable.query.withSort(1), // TODO Check this grafanaDashboards+:: { 'node-rsrc-use.json': diff --git a/docs/node-mixin/lib/prom-mixin.libsonnet b/docs/node-mixin/lib/prom-mixin.libsonnet index bd01cfd45f..b2351b0e2e 100644 --- a/docs/node-mixin/lib/prom-mixin.libsonnet +++ b/docs/node-mixin/lib/prom-mixin.libsonnet @@ -1,48 +1,44 @@ -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; local dashboard = grafana.dashboard; -local row = grafana.row; -local prometheus = grafana.prometheus; +local row = grafana.panel.row; +local prometheus = grafana.query.prometheus; +local variable = dashboard.variable; local template = grafana.template; + local graphPanel = grafana.graphPanel; -local grafana70 = import 'github.com/grafana/grafonnet-lib/grafonnet-7.0/grafana.libsonnet'; local gaugePanel = grafana70.panel.gauge; local table = grafana70.panel.table; +local timeSeriesPanel = grafana.panel.timeSeries; + + { new(config=null, platform=null, uid=null):: { - local prometheusDatasourceTemplate = { - current: { - text: 'default', - value: 'default', - }, - hide: 0, - label: 'Data Source', - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', - }, - - local clusterTemplatePrototype = - template.new( - 'cluster', - '$datasource', - '', - hide=if config.showMultiCluster then '' else '2', - refresh='time', - label='Cluster', - ), - local clusterTemplate = + local datasource = variable.datasource.new( + 'datasource', 'prometheus' + ), + + local clusterVariablePrototype = variable.query.new('cluster') + + variable.query.withDatasourceFromVariable(datasource) + + (if config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) + + variable.query.resfresh.onTime() + + variable.generalOptions.withLabel("Cluster"), + + local clusterVariable = if platform == 'Darwin' then - clusterTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, %(clusterLabel)s)' % config } + clusterVariablePrototype + + variable.query.queryTypes.withLabelValues( + config, + 'node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, %(clusterLabel)s', + ) else - clusterTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, %(clusterLabel)s)' % config }, + clusterVariablePrototype + + variable.query.queryTypes.withLabelValues( + config, + 'node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, %(clusterLabel)s', + ), local instanceTemplatePrototype = template.new( From fe13939fd64b1da0eb7adcc64f7e215fcc80b7c6 Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:18:00 +0100 Subject: [PATCH 04/11] Improve refresh and sort syntax Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/dashboards/use.libsonnet | 16 ++++++++-------- docs/node-mixin/lib/prom-mixin.libsonnet | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index 4519bea5f2..4102d29d53 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -109,9 +109,9 @@ local diskSpaceUtilisation = 'node_time_seconds', ) + (if $._config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) - + variable.query.withRefresh(2) // TODO Check this + + variable.query.refresh.onTime() + variable.query.selectionOptions.withIncludeAll(false) - + variable.query.withSort(1), // TODO Check this + + variable.query.withSort(asc=true), grafanaDashboards+:: { 'node-rsrc-use.json': @@ -133,8 +133,8 @@ local diskSpaceUtilisation = 'instance', 'node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, ) - + variable.query.withRefresh(2) - + variable.query.withSort(1), + + variable.query.refresh.onTime() + + variable.query.withSort(asc=true), ]) // TODO - panel width + dashboard.withPanels( @@ -198,8 +198,8 @@ local diskSpaceUtilisation = datasource, $._clusterVariable, variable.query.withDatasourceFromVariable(datasource) - + variable.query.withRefresh(2) - + variable.query.withSort(1), + + variable.query.refresh.onTime() + + variable.query.withSort(asc=true), ]) // TODO - panel width + dashboard.withPanels( @@ -329,8 +329,8 @@ local diskSpaceUtilisation = + dashboard.withVariables([ datasource, variable.query.withDatasourceFromVariable(datasource) - + variable.query.withRefresh(2) - + variable.query.withSort(1), + + variable.query.refresh.onTime() + + variable.query.withSort(asc=true), ]) // TODO - panel width + dashboard.withPanels( diff --git a/docs/node-mixin/lib/prom-mixin.libsonnet b/docs/node-mixin/lib/prom-mixin.libsonnet index b2351b0e2e..e9ac09c85b 100644 --- a/docs/node-mixin/lib/prom-mixin.libsonnet +++ b/docs/node-mixin/lib/prom-mixin.libsonnet @@ -23,7 +23,7 @@ local timeSeriesPanel = grafana.panel.timeSeries; local clusterVariablePrototype = variable.query.new('cluster') + variable.query.withDatasourceFromVariable(datasource) + (if config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) - + variable.query.resfresh.onTime() + + variable.query.refresh.onTime() + variable.generalOptions.withLabel("Cluster"), local clusterVariable = From 67fb2f3b4c29d56582399a7d6800e066db13aa99 Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Fri, 4 Oct 2024 09:26:46 +0100 Subject: [PATCH 05/11] More progress Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/dashboards/use.libsonnet | 13 +++-- docs/node-mixin/lib/prom-mixin.libsonnet | 73 +++++++++++++++--------- 2 files changed, 52 insertions(+), 34 deletions(-) diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index 4102d29d53..117fce1130 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -17,12 +17,13 @@ local datasource = variable.datasource.new( 'datasource', 'prometheus' ); -local tsCommonPanelOptions = variable.query.withDatasourceFromVariable(datasource) - + tsCustom.stacking.withMode('normal') - + tsCustom.withFillOpacity(100) - + tsLegend.withShowLegend(false) - + tsOptions.tooltip.withMode('multi') - + tsOptions.tooltip.withSort('desc'); +local tsCommonPanelOptions = + variable.query.withDatasourceFromVariable(datasource) + + tsCustom.stacking.withMode('normal') + + tsCustom.withFillOpacity(100) + + tsLegend.withShowLegend(false) + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc'); local CPUUtilisation = timeSeriesPanel.new( diff --git a/docs/node-mixin/lib/prom-mixin.libsonnet b/docs/node-mixin/lib/prom-mixin.libsonnet index e9ac09c85b..6d035da540 100644 --- a/docs/node-mixin/lib/prom-mixin.libsonnet +++ b/docs/node-mixin/lib/prom-mixin.libsonnet @@ -10,7 +10,11 @@ local gaugePanel = grafana70.panel.gauge; local table = grafana70.panel.table; local timeSeriesPanel = grafana.panel.timeSeries; - +local tsOptions = timeSeriesPanel.options; +local tsStandardOptions = timeSeriesPanel.standardOptions; +local tsQueryOptions = timeSeriesPanel.queryOptions; +local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom; +local tsLegend = tsOptions.legend; { @@ -20,43 +24,56 @@ local timeSeriesPanel = grafana.panel.timeSeries; 'datasource', 'prometheus' ), - local clusterVariablePrototype = variable.query.new('cluster') - + variable.query.withDatasourceFromVariable(datasource) - + (if config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) - + variable.query.refresh.onTime() - + variable.generalOptions.withLabel("Cluster"), + local clusterVariablePrototype = + variable.query.new('cluster') + + variable.query.withDatasourceFromVariable(datasource) + + (if config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) + + variable.query.refresh.onTime() + + variable.generalOptions.withLabel('Cluster'), - local clusterVariable = + local clusterVariable = if platform == 'Darwin' then - clusterVariablePrototype + - variable.query.queryTypes.withLabelValues( - config, - 'node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, %(clusterLabel)s', + clusterVariablePrototype + + variable.query.queryTypes.withLabelValues( + ' %(clusterLabel)s' % config, + 'node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}' % config, ) else - clusterVariablePrototype + - variable.query.queryTypes.withLabelValues( - config, - 'node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, %(clusterLabel)s', + clusterVariablePrototype + + variable.query.queryTypes.withLabelValues( + '%(clusterLabel)s' % config, + 'node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}' % config, ), - local instanceTemplatePrototype = - template.new( - 'instance', - '$datasource', - '', - refresh='time', - label='Instance', - ), - local instanceTemplate = + local instanceVariablePrototype = + variable.query.new('instance') + + variable.query.withDatasourceFromVariable(datasource) + + variable.query.refresh.onTime() + + variable.generalOptions.withLabel('Instance'), + + local instanceVariable = if platform == 'Darwin' then - instanceTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}, instance)' % config } + instanceVariablePrototype + + variable.query.queryTypes.withLabelValues( + 'instance', + 'node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}' % config, + ) else - instanceTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}, instance)' % config }, + instanceVariablePrototype + + variable.query.queryTypes.withLabelValues( + 'instance', + 'node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}' % config, + ), local idleCPU = + timeSeriesPanel.new('CPU Usage') + + variable.query.withDatasourceFromVariable(datasource) + + tsStandardOptions.withUnit('percentunit') + + tsCustom.stacking.withMode('normal') + // TODO - max, min, width + // TODO - target + + graphPanel.new( 'CPU Usage', datasource='$datasource', From 4babb33d3c984fd1f7858555561f7d9c29e6e030 Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Fri, 4 Oct 2024 14:28:17 +0100 Subject: [PATCH 06/11] Fix panel width Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/dashboards/use.libsonnet | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index 117fce1130..e2354c124e 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -137,7 +137,6 @@ local diskSpaceUtilisation = + variable.query.refresh.onTime() + variable.query.withSort(asc=true), ]) - // TODO - panel width + dashboard.withPanels( grafana.util.grid.makeGrid([ row.new('CPU') @@ -166,6 +165,8 @@ local diskSpaceUtilisation = diskIOUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), diskIOSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), ]), + ], panelWidth=12, panelHeight=7) + + grafana.util.grid.makeGrid([ row.new('Disk Space') + row.withPanels([ diskSpaceUtilisation + tsQueryOptions.withTargets([ @@ -183,7 +184,7 @@ local diskSpaceUtilisation = ) + prometheus.withLegendFormat('{{device}}'), ]), ]), - ]), + ], panelWidth=24, panelHeight=7, startY=34), ), 'node-cluster-rsrc-use.json': dashboard.new( @@ -202,7 +203,6 @@ local diskSpaceUtilisation = + variable.query.refresh.onTime() + variable.query.withSort(asc=true), ]) - // TODO - panel width + dashboard.withPanels( grafana.util.grid.makeGrid([ row.new('CPU') @@ -294,6 +294,8 @@ local diskSpaceUtilisation = ||| % $._config ) + prometheus.withLegendFormat('{{ instance }} {{device}}')]), ]), + ], panelWidth=12, panelHeight=7) + + grafana.util.grid.makeGrid([ row.new('Disk Space') + row.withPanels([ diskSpaceUtilisation + tsQueryOptions.withTargets([ @@ -312,7 +314,7 @@ local diskSpaceUtilisation = ) + prometheus.withLegendFormat('{{ instance }}'), ]), ]), - ]), + ], panelWidth=24, panelHeight=7, startY=34), ), } + if $._config.showMultiCluster then { @@ -333,7 +335,6 @@ local diskSpaceUtilisation = + variable.query.refresh.onTime() + variable.query.withSort(asc=true), ]) - // TODO - panel width + dashboard.withPanels( grafana.util.grid.makeGrid([ row.new('CPU') @@ -452,6 +453,9 @@ local diskSpaceUtilisation = ||| % $._config ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}')]), ]), + + ], panelWidth=12, panelHeight=7) + + grafana.util.grid.makeGrid([ row.new('Disk Space') + row.withPanels([ diskSpaceUtilisation + tsQueryOptions.withTargets([ @@ -470,7 +474,7 @@ local diskSpaceUtilisation = ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), ]), ]), - ]), + ], panelWidth=24, panelHeight=7, startY=34), ), } else {}, } From 3075a9d1674e6a5c40e90b82da10c0af4466a143 Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Fri, 4 Oct 2024 14:52:20 +0100 Subject: [PATCH 07/11] Some progress Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/lib/prom-mixin.libsonnet | 539 ++++++++++++----------- 1 file changed, 277 insertions(+), 262 deletions(-) diff --git a/docs/node-mixin/lib/prom-mixin.libsonnet b/docs/node-mixin/lib/prom-mixin.libsonnet index 6d035da540..40878241b4 100644 --- a/docs/node-mixin/lib/prom-mixin.libsonnet +++ b/docs/node-mixin/lib/prom-mixin.libsonnet @@ -5,9 +5,6 @@ local prometheus = grafana.query.prometheus; local variable = dashboard.variable; local template = grafana.template; -local graphPanel = grafana.graphPanel; -local gaugePanel = grafana70.panel.gauge; -local table = grafana70.panel.table; local timeSeriesPanel = grafana.panel.timeSeries; local tsOptions = timeSeriesPanel.options; @@ -16,17 +13,23 @@ local tsQueryOptions = timeSeriesPanel.queryOptions; local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom; local tsLegend = tsOptions.legend; +local gaugePanel = grafana.panel.gauge; +local gaugeStep = gaugePanel.standardOptions.threshold.step; + +local table = grafana.panel.table; +local tableStep = table.standardOptions.threshold.step; + { new(config=null, platform=null, uid=null):: { - local datasource = variable.datasource.new( + local prometheusDatasourceVariable = variable.datasource.new( 'datasource', 'prometheus' ), local clusterVariablePrototype = variable.query.new('cluster') - + variable.query.withDatasourceFromVariable(datasource) + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + (if config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) + variable.query.refresh.onTime() + variable.generalOptions.withLabel('Cluster'), @@ -47,7 +50,7 @@ local tsLegend = tsOptions.legend; local instanceVariablePrototype = variable.query.new('instance') - + variable.query.withDatasourceFromVariable(datasource) + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + variable.query.refresh.onTime() + variable.generalOptions.withLabel('Instance'), @@ -67,223 +70,233 @@ local tsLegend = tsOptions.legend; local idleCPU = timeSeriesPanel.new('CPU Usage') - + variable.query.withDatasourceFromVariable(datasource) + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + tsStandardOptions.withUnit('percentunit') + tsCustom.stacking.withMode('normal') - // TODO - max, min, width - // TODO - target - - - graphPanel.new( - 'CPU Usage', - datasource='$datasource', - span=6, - format='percentunit', - max=1, - min=0, - stack=true, - ) - .addTarget(prometheus.target( - ||| - ( - (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval]))) - / ignoring(cpu) group_left - count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"}) - ) - ||| % config, - legendFormat='{{cpu}}', - intervalFactor=5, - )), + + tsStandardOptions.withMax(1) + + tsStandardOptions.withMin(0) + + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval]))) + / ignoring(cpu) group_left + count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"}) + ) + ||| % config, + ) + + prometheus.withLegendFormat('{{cpu}}') + + prometheus.withIntervalFactor('5'), + ]), local systemLoad = - graphPanel.new( - 'Load Average', - datasource='$datasource', - span=6, - format='short', - min=0, - fill=0, - ) - .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='1m load average')) - .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='5m load average')) - .addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='15m load average')) - .addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config, legendFormat='logical cores')), - + timeSeriesPanel.new('Load Average') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('short') + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('1m load average'), + prometheus.new('$datasource', 'node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('5m load average'), + prometheus.new('$datasource', 'node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('15m load average'), + prometheus.new('$datasource', 'count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config) + prometheus.withLegendFormat('logical cores'), + ]), + + // TODO - span 9 local memoryGraphPanelPrototype = - graphPanel.new( - 'Memory Usage', - datasource='$datasource', - span=9, - format='bytes', - min=0, - ), + timeSeriesPanel.new('Memory Usage') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('bytes') + + tsStandardOptions.withMin(0), + + local memoryGraph = if platform == 'Linux' then - memoryGraphPanelPrototype { stack: true } - .addTarget(prometheus.target( - ||| - ( - node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - - node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - - node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - - node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - ) - ||| % config, - legendFormat='memory used' - )) - .addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory buffers')) - .addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory cached')) - .addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory free')) + memoryGraphPanelPrototype + + tsCustom.stacking.withMode('normal') + + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - + node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - + node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - + node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config, + ) + prometheus.withLegendFormat('memory used'), + prometheus.new('$datasource', 'node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory buffers'), + prometheus.new('$datasource', 'node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory cached'), + prometheus.new('$datasource', 'node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory free'), + ]) else if platform == 'Darwin' then // not useful to stack - memoryGraphPanelPrototype { stack: false } - .addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory')) - .addTarget(prometheus.target( - ||| - ( - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - ) - ||| % config, legendFormat='Memory Used' - )) - .addTarget(prometheus.target( - ||| - ( - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - ) - ||| % config, legendFormat='App Memory' - )) - .addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Wired Memory')) - .addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Compressed')) + memoryGraphPanelPrototype + + tsCustom.stacking.withMode('none') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Physical Memory'), + prometheus.new( + '$datasource', + ||| + ( + node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - + node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + + node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + + node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config + ) + prometheus.withLegendFormat( + 'Memory Used' + ), + prometheus.new( + '$datasource', + ||| + ( + node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - + node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config + ) + prometheus.withLegendFormat( + 'App Memory' + ), + prometheus.new('$datasource', 'node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Wired Memory'), + prometheus.new('$datasource', 'node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Compressed'), + ]) + else if platform == 'AIX' then - memoryGraphPanelPrototype { stack: false } - .addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory')) - .addTarget(prometheus.target( - ||| - ( - node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - ) - ||| % config, legendFormat='Memory Used' - )), + memoryGraphPanelPrototype + + tsCustom.stacking.withMode('none') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Physical Memory'), + prometheus.new( + '$datasource', + ||| + ( + node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - + node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config + ) + prometheus.withLegendFormat('Memory Used'), + ]), // NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout. + // TODO - span 3 local memoryGaugePanelPrototype = - gaugePanel.new( - title='Memory Usage', - datasource='$datasource', - ) - .addThresholdStep('rgba(50, 172, 45, 0.97)') - .addThresholdStep('rgba(237, 129, 40, 0.89)', 80) - .addThresholdStep('rgba(245, 54, 54, 0.9)', 90) - .setFieldConfig(max=100, min=0, unit='percent') - + { - span: 3, - }, + gaugePanel.new('Memory Usage') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + gaugePanel.standardOptions.thresholds.withSteps([ + gaugeStep.withColor('rgba(50, 172, 45, 0.97)'), + gaugeStep.withColor('rgba(237, 129, 40, 0.89)') + gaugeStep.withValue(80), + gaugeStep.withColor('rgba(245, 54, 54, 0.9)') + gaugeStep.withValue(90), + ]) + + gaugePanel.standardOptions.withMax(100) + + gaugePanel.standardOptions.withMin(0) + + gaugePanel.standardOptions.withUnit('percent'), local memoryGauge = if platform == 'Linux' then memoryGaugePanelPrototype - - .addTarget(prometheus.target( - ||| - 100 - - ( - avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / - avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - * 100 - ) - ||| % config, - )) + + gaugePanel.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + 100 - + ( + avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / + avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + * 100 + ) + ||| % config, + ), + ]) else if platform == 'Darwin' then memoryGaugePanelPrototype - .addTarget(prometheus.target( - ||| - ( - ( - avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - - avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + - avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + - avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - ) / - avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - ) - * - 100 - ||| % config - )) + + gaugePanel.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + ( + avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - + avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + + avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + + avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + ) / + avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + ) + * + 100 + ||| % config + ), + ]) + else if platform == 'AIX' then memoryGaugePanelPrototype - .addTarget(prometheus.target( - ||| - 100 - - ( - avg(node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / - avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - * 100 - ) - ||| % config - )), + + gaugePanel.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + 100 - + ( + avg(node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / + avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + * 100 + ) + ||| % config + ), + ]), local diskIO = - graphPanel.new( - 'Disk I/O', - datasource='$datasource', - span=6, - min=0, - fill=0, - ) - // TODO: Does it make sense to have those three in the same panel? - .addTarget(prometheus.target( - 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, - legendFormat='{{device}} read', - intervalFactor=1, - )) - .addTarget(prometheus.target( - 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, - legendFormat='{{device}} written', - intervalFactor=1, - )) - .addTarget(prometheus.target( - 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, - legendFormat='{{device}} io time', - intervalFactor=1, - )) + - { - seriesOverrides: [ - { - alias: '/ read| written/', - yaxis: 1, - }, - { - alias: '/ io time/', - yaxis: 2, - }, - ], - yaxes: [ - self.yaxe(format='Bps'), - self.yaxe(format='percentunit'), - ], - }, + timeSeriesPanel.new('Disk I/O') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsQueryOptions.withTargets([ + // TODO: Does it make sense to have those three in the same panel? + prometheus.new('$datasource', 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config) + + prometheus.withLegendFormat('{{device}} read') + + prometheus.withIntervalFactor(1), + prometheus.new('$datasource', 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config) + + prometheus.withLegendFormat('{{device}} written') + + prometheus.withIntervalFactor(1), + prometheus.new('$datasource', 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config) + + prometheus.withLegendFormat('{{device}} io time') + + prometheus.withIntervalFactor(1), + ]) + + tsStandardOptions.withOverrides( + [ + tsStandardOptions.override.byRegexp.new('/ read| written/') + + tsStandardOptions.override.byRegexp.withPropertiesFromOptions( + tsStandardOptions.withUnit('Bps') + ), + tsStandardOptions.override.byRegexp.new('/ io time/') + + tsStandardOptions.override.byRegexp.withPropertiesFromOptions(tsStandardOptions.withUnit('percentunit')), + ] + ), local diskSpaceUsage = - table.new( - title='Disk Space Usage', - datasource='$datasource', + table.new('Disk Space Usage') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + table.standardOptions.withUnit('decbytes') + + table.standardOptions.thresholds.withSteps( + [ + tableStep.withColor('green'), + tableStep.withColor('yellow') + gaugeStep.withValue(0.8), + tableStep.withColor('red') + gaugeStep.withValue(0.9), + ] ) - .setFieldConfig(unit='decbytes') - .addThresholdStep(color='green', value=null) - .addThresholdStep(color='yellow', value=0.8) - .addThresholdStep(color='red', value=0.9) + + table.queryOptions.withTargets([ + + ]), + // TODO - here .addTarget(prometheus.target( ||| max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) @@ -465,109 +478,111 @@ local tsLegend = tsOptions.legend; local networkReceived = - graphPanel.new( - 'Network Received', - description='Network received (bits/s)', - datasource='$datasource', - span=6, - format='bps', - min=0, - fill=0, - ) - .addTarget(prometheus.target( - 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config, - legendFormat='{{device}}', - intervalFactor=1, - )), + timeSeriesPanel.new('Network Received') + + timeSeriesPanel.panelOptions.withDescription('Network received (bits/s)') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('bps') + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config) + + prometheus.withLegendFormat('1m load average') + + prometheus.withIntervalFactor(1) + ]), local networkTransmitted = - graphPanel.new( - 'Network Transmitted', - description='Network transmitted (bits/s)', - datasource='$datasource', - span=6, - format='bps', - min=0, - fill=0, - ) - .addTarget(prometheus.target( - 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config, - legendFormat='{{device}}', - intervalFactor=1, - )), + timeSeriesPanel.new('Network Transmitted') + + timeSeriesPanel.panelOptions.withDescription('Network transmitted (bits/s)') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('bps') + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config) + + prometheus.withLegendFormat('1m load average') + + prometheus.withIntervalFactor(1) + ]), local cpuRow = row.new('CPU') - .addPanel(idleCPU) - .addPanel(systemLoad), + + row.withPanels([ + idleCPU, + systemLoad, + ]), local memoryRow = row.new('Memory') - .addPanel(memoryGraph) - .addPanel(memoryGauge), + + row.withPanels([ + + memoryGraph, + memoryGauge, + ]), local diskRow = row.new('Disk') - .addPanel(diskIO) - .addPanel(diskSpaceUsage), + + row.withPanels([ + diskIO, + diskSpaceUsage, + ]), local networkRow = row.new('Network') - .addPanel(networkReceived) - .addPanel(networkTransmitted), + + row.withPanels([ + networkReceived, + networkTransmitted, + ]), - local rows = - [ + local panels = + grafana.util.grid.makeGrid([ cpuRow, memoryRow, diskRow, networkRow, - ], + ]), - local templates = + local variables = [ - prometheusDatasourceTemplate, - clusterTemplate, - instanceTemplate, + prometheusDatasourceVariable, + clusterVariable, + instanceVariable, ], - dashboard: if platform == 'Linux' then dashboard.new( '%sNodes' % config.dashboardNamePrefix, - time_from='now-1h', - tags=(config.dashboardTags), - timezone='utc', - refresh='30s', - uid=std.md5(uid), - graphTooltip='shared_crosshair' ) - .addTemplates(templates) - .addRows(rows) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags(config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.withUid(std.md5(uid)) + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withVariables(variables) + + dashboard.withPanels(panels) else if platform == 'Darwin' then dashboard.new( '%sMacOS' % config.dashboardNamePrefix, - time_from='now-1h', - tags=(config.dashboardTags), - timezone='utc', - refresh='30s', - uid=std.md5(uid), - graphTooltip='shared_crosshair' ) - .addTemplates(templates) - .addRows(rows) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags(config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.withUid(std.md5(uid)) + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withVariables(variables) + + dashboard.withPanels(panels) else if platform == 'AIX' then dashboard.new( '%sAIX' % config.dashboardNamePrefix, - time_from='now-1h', - tags=(config.dashboardTags), - timezone='utc', - refresh='30s', - uid=std.md5(uid), - graphTooltip='shared_crosshair' ) - .addTemplates(templates) - .addRows(rows), + + dashboard.time.withFrom('now-1h') + + dashboard.withTags(config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.withUid(std.md5(uid)) + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withVariables(variables) + + dashboard.withPanels(panels), }, } From e48a3514720288208e4c5e99f9761077f834a37b Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Fri, 4 Oct 2024 17:08:59 +0100 Subject: [PATCH 08/11] Progress Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/lib/prom-mixin.libsonnet | 386 ++++++++++++----------- 1 file changed, 195 insertions(+), 191 deletions(-) diff --git a/docs/node-mixin/lib/prom-mixin.libsonnet b/docs/node-mixin/lib/prom-mixin.libsonnet index 40878241b4..be0d84948c 100644 --- a/docs/node-mixin/lib/prom-mixin.libsonnet +++ b/docs/node-mixin/lib/prom-mixin.libsonnet @@ -3,15 +3,12 @@ local dashboard = grafana.dashboard; local row = grafana.panel.row; local prometheus = grafana.query.prometheus; local variable = dashboard.variable; -local template = grafana.template; - local timeSeriesPanel = grafana.panel.timeSeries; local tsOptions = timeSeriesPanel.options; local tsStandardOptions = timeSeriesPanel.standardOptions; local tsQueryOptions = timeSeriesPanel.queryOptions; local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom; -local tsLegend = tsOptions.legend; local gaugePanel = grafana.panel.gauge; local gaugeStep = gaugePanel.standardOptions.threshold.step; @@ -32,7 +29,7 @@ local tableStep = table.standardOptions.threshold.step; + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + (if config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) + variable.query.refresh.onTime() - + variable.generalOptions.withLabel('Cluster'), + + variable.query.generalOptions.withLabel('Cluster'), local clusterVariable = if platform == 'Darwin' then @@ -52,7 +49,7 @@ local tableStep = table.standardOptions.threshold.step; variable.query.new('instance') + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + variable.query.refresh.onTime() - + variable.generalOptions.withLabel('Instance'), + + variable.query.generalOptions.withLabel('Instance'), local instanceVariable = if platform == 'Darwin' then @@ -75,6 +72,8 @@ local tableStep = table.standardOptions.threshold.step; + tsCustom.stacking.withMode('normal') + tsStandardOptions.withMax(1) + tsStandardOptions.withMin(0) + + tsOptions.tooltip.withMode('multi') + + tsCustom.withFillOpacity(10) + tsQueryOptions.withTargets([ prometheus.new( '$datasource', @@ -87,7 +86,7 @@ local tableStep = table.standardOptions.threshold.step; ||| % config, ) + prometheus.withLegendFormat('{{cpu}}') - + prometheus.withIntervalFactor('5'), + + prometheus.withIntervalFactor(5), ]), local systemLoad = @@ -96,6 +95,7 @@ local tableStep = table.standardOptions.threshold.step; + tsStandardOptions.withUnit('short') + tsStandardOptions.withMin(0) + tsCustom.withFillOpacity(0) + + tsOptions.tooltip.withMode('multi') + tsQueryOptions.withTargets([ prometheus.new('$datasource', 'node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('1m load average'), prometheus.new('$datasource', 'node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('5m load average'), @@ -108,8 +108,9 @@ local tableStep = table.standardOptions.threshold.step; timeSeriesPanel.new('Memory Usage') + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + tsStandardOptions.withUnit('bytes') - + tsStandardOptions.withMin(0), - + + tsStandardOptions.withMin(0) + + tsOptions.tooltip.withMode('multi') + + tsCustom.withFillOpacity(10), local memoryGraph = if platform == 'Linux' then @@ -259,6 +260,7 @@ local tableStep = table.standardOptions.threshold.step; + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + tsStandardOptions.withMin(0) + tsCustom.withFillOpacity(0) + + tsOptions.tooltip.withMode('multi') + tsQueryOptions.withTargets([ // TODO: Does it make sense to have those three in the same panel? prometheus.new('$datasource', 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config) @@ -288,193 +290,193 @@ local tableStep = table.standardOptions.threshold.step; + table.standardOptions.withUnit('decbytes') + table.standardOptions.thresholds.withSteps( [ - tableStep.withColor('green'), - tableStep.withColor('yellow') + gaugeStep.withValue(0.8), - tableStep.withColor('red') + gaugeStep.withValue(0.9), + tableStep.withColor('green'), + tableStep.withColor('yellow') + gaugeStep.withValue(0.8), + tableStep.withColor('red') + gaugeStep.withValue(0.9), ] ) + table.queryOptions.withTargets([ ]), - // TODO - here - .addTarget(prometheus.target( - ||| - max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) - ||| % config, - legendFormat='', - instant=true, - format='table' - )) - .addTarget(prometheus.target( - ||| - max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) - ||| % config, - legendFormat='', - instant=true, - format='table' - )) - .addOverride( - matcher={ - id: 'byName', - options: 'Mounted on', - }, - properties=[ - { - id: 'custom.width', - value: 260, - }, - ], - ) - .addOverride( - matcher={ - id: 'byName', - options: 'Size', - }, - properties=[ - - { - id: 'custom.width', - value: 93, - }, - - ], - ) - .addOverride( - matcher={ - id: 'byName', - options: 'Used', - }, - properties=[ - { - id: 'custom.width', - value: 72, - }, - ], - ) - .addOverride( - matcher={ - id: 'byName', - options: 'Available', - }, - properties=[ - { - id: 'custom.width', - value: 88, - }, - ], - ) + /* // TODO - here + .addTarget(prometheus.target( + ||| + max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) + ||| % config, + legendFormat='', + instant=true, + format='table' + )) + .addTarget(prometheus.target( + ||| + max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) + ||| % config, + legendFormat='', + instant=true, + format='table' + )) + .addOverride( + matcher={ + id: 'byName', + options: 'Mounted on', + }, + properties=[ + { + id: 'custom.width', + value: 260, + }, + ], + ) + .addOverride( + matcher={ + id: 'byName', + options: 'Size', + }, + properties=[ - .addOverride( - matcher={ - id: 'byName', - options: 'Used, %', - }, - properties=[ - { - id: 'unit', - value: 'percentunit', - }, - { - id: 'custom.displayMode', - value: 'gradient-gauge', - }, - { - id: 'max', - value: 1, - }, - { - id: 'min', - value: 0, - }, - ] - ) - + { span: 6 } - + { - transformations: [ - { - id: 'groupBy', - options: { - fields: { - 'Value #A': { - aggregations: [ - 'lastNotNull', - ], - operation: 'aggregate', - }, - 'Value #B': { - aggregations: [ - 'lastNotNull', - ], - operation: 'aggregate', - }, - mountpoint: { - aggregations: [], - operation: 'groupby', - }, + { + id: 'custom.width', + value: 93, }, + + ], + ) + .addOverride( + matcher={ + id: 'byName', + options: 'Used', }, - }, - { - id: 'merge', - options: {}, - }, - { - id: 'calculateField', - options: { - alias: 'Used', - binary: { - left: 'Value #A (lastNotNull)', - operator: '-', - reducer: 'sum', - right: 'Value #B (lastNotNull)', + properties=[ + { + id: 'custom.width', + value: 72, }, - mode: 'binary', - reduce: { - reducer: 'sum', + ], + ) + .addOverride( + matcher={ + id: 'byName', + options: 'Available', + }, + properties=[ + { + id: 'custom.width', + value: 88, }, + ], + ) + + .addOverride( + matcher={ + id: 'byName', + options: 'Used, %', }, - }, - { - id: 'calculateField', - options: { - alias: 'Used, %', - binary: { - left: 'Used', - operator: '/', - reducer: 'sum', - right: 'Value #A (lastNotNull)', + properties=[ + { + id: 'unit', + value: 'percentunit', }, - mode: 'binary', - reduce: { - reducer: 'sum', + { + id: 'custom.displayMode', + value: 'gradient-gauge', }, - }, - }, - { - id: 'organize', - options: { - excludeByName: {}, - indexByName: {}, - renameByName: { - 'Value #A (lastNotNull)': 'Size', - 'Value #B (lastNotNull)': 'Available', - mountpoint: 'Mounted on', + { + id: 'max', + value: 1, }, - }, - }, - { - id: 'sortBy', - options: { - fields: {}, - sort: [ - { - field: 'Mounted on', + { + id: 'min', + value: 0, + }, + ] + ) + + { span: 6 } + + { + transformations: [ + { + id: 'groupBy', + options: { + fields: { + 'Value #A': { + aggregations: [ + 'lastNotNull', + ], + operation: 'aggregate', + }, + 'Value #B': { + aggregations: [ + 'lastNotNull', + ], + operation: 'aggregate', + }, + mountpoint: { + aggregations: [], + operation: 'groupby', + }, + }, }, - ], - }, - }, - ], - }, + }, + { + id: 'merge', + options: {}, + }, + { + id: 'calculateField', + options: { + alias: 'Used', + binary: { + left: 'Value #A (lastNotNull)', + operator: '-', + reducer: 'sum', + right: 'Value #B (lastNotNull)', + }, + mode: 'binary', + reduce: { + reducer: 'sum', + }, + }, + }, + { + id: 'calculateField', + options: { + alias: 'Used, %', + binary: { + left: 'Used', + operator: '/', + reducer: 'sum', + right: 'Value #A (lastNotNull)', + }, + mode: 'binary', + reduce: { + reducer: 'sum', + }, + }, + }, + { + id: 'organize', + options: { + excludeByName: {}, + indexByName: {}, + renameByName: { + 'Value #A (lastNotNull)': 'Size', + 'Value #B (lastNotNull)': 'Available', + mountpoint: 'Mounted on', + }, + }, + }, + { + id: 'sortBy', + options: { + fields: {}, + sort: [ + { + field: 'Mounted on', + }, + ], + }, + }, + ], + }, */ local networkReceived = @@ -484,10 +486,11 @@ local tableStep = table.standardOptions.threshold.step; + tsStandardOptions.withUnit('bps') + tsStandardOptions.withMin(0) + tsCustom.withFillOpacity(0) + + tsOptions.tooltip.withMode('multi') + tsQueryOptions.withTargets([ prometheus.new('$datasource', 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config) + prometheus.withLegendFormat('1m load average') - + prometheus.withIntervalFactor(1) + + prometheus.withIntervalFactor(1), ]), local networkTransmitted = @@ -497,10 +500,11 @@ local tableStep = table.standardOptions.threshold.step; + tsStandardOptions.withUnit('bps') + tsStandardOptions.withMin(0) + tsCustom.withFillOpacity(0) + + tsOptions.tooltip.withMode('multi') + tsQueryOptions.withTargets([ prometheus.new('$datasource', 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config) + prometheus.withLegendFormat('1m load average') - + prometheus.withIntervalFactor(1) + + prometheus.withIntervalFactor(1), ]), local cpuRow = @@ -510,13 +514,11 @@ local tableStep = table.standardOptions.threshold.step; systemLoad, ]), - local memoryRow = - row.new('Memory') - + row.withPanels([ - - memoryGraph, - memoryGauge, - ]), + local memoryRow = [ + row.new('Memory') + row.gridPos.withY(8), + memoryGraph + row.gridPos.withX(0) + row.gridPos.withY(9) + row.gridPos.withH(7) + row.gridPos.withW(18), + memoryGauge + row.gridPos.withX(18) + row.gridPos.withY(9) + row.gridPos.withH(7) + row.gridPos.withW(6), + ], local diskRow = row.new('Disk') @@ -535,10 +537,12 @@ local tableStep = table.standardOptions.threshold.step; local panels = grafana.util.grid.makeGrid([ cpuRow, - memoryRow, + ], panelWidth=12, panelHeight=7) + + memoryRow + + grafana.util.grid.makeGrid([ diskRow, networkRow, - ]), + ], panelWidth=12, panelHeight=7, startY=18), local variables = [ From 7f563879b06a1b60b8db1b4af48b92f2be3bc9e3 Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Mon, 7 Oct 2024 09:27:24 +0100 Subject: [PATCH 09/11] Finish table panel Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/dashboards/use.libsonnet | 1 - docs/node-mixin/lib/prom-mixin.libsonnet | 286 +++++++++-------------- 2 files changed, 113 insertions(+), 174 deletions(-) diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index e2354c124e..1e23637f8b 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -318,7 +318,6 @@ local diskSpaceUtilisation = ), } + if $._config.showMultiCluster then { - // TODO - test thoroughly 'node-multicluster-rsrc-use.json': dashboard.new( '%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix, diff --git a/docs/node-mixin/lib/prom-mixin.libsonnet b/docs/node-mixin/lib/prom-mixin.libsonnet index be0d84948c..cabb93d67e 100644 --- a/docs/node-mixin/lib/prom-mixin.libsonnet +++ b/docs/node-mixin/lib/prom-mixin.libsonnet @@ -15,6 +15,8 @@ local gaugeStep = gaugePanel.standardOptions.threshold.step; local table = grafana.panel.table; local tableStep = table.standardOptions.threshold.step; +local tableOverride = table.standardOptions.override; +local tableTransformation = table.queryOptions.transformation; { @@ -296,188 +298,126 @@ local tableStep = table.standardOptions.threshold.step; ] ) + table.queryOptions.withTargets([ - - ]), - /* // TODO - here - .addTarget(prometheus.target( - ||| - max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) - ||| % config, - legendFormat='', - instant=true, - format='table' - )) - .addTarget(prometheus.target( - ||| - max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) - ||| % config, - legendFormat='', - instant=true, - format='table' - )) - .addOverride( - matcher={ - id: 'byName', - options: 'Mounted on', - }, - properties=[ - { - id: 'custom.width', - value: 260, - }, - ], + prometheus.new( + '$datasource', + ||| + max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) + ||| % config + ) + + prometheus.withLegendFormat('') + + prometheus.withInstant() + + prometheus.withFormat('table'), + prometheus.new( + '$datasource', + ||| + max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) + ||| % config + ) + + prometheus.withLegendFormat('') + + prometheus.withInstant() + + prometheus.withFormat('table'), + ]) + + table.standardOptions.withOverrides([ + tableOverride.byName.new('Mounted on') + + tableOverride.byName.withProperty('custom.width', 260), + tableOverride.byName.new('Size') + + tableOverride.byName.withProperty('custom.width', 93), + tableOverride.byName.new('Used') + + tableOverride.byName.withProperty('custom.width', 72), + tableOverride.byName.new('Available') + + tableOverride.byName.withProperty('custom.width', 88), + tableOverride.byName.new('Used, %') + + tableOverride.byName.withProperty('unit', 'percentunit') + + tableOverride.byName.withPropertiesFromOptions( + table.fieldConfig.defaults.custom.withCellOptions( + { type: 'gauge' }, ) - .addOverride( - matcher={ - id: 'byName', - options: 'Size', - }, - properties=[ - - { - id: 'custom.width', - value: 93, + ) + + tableOverride.byName.withProperty('max', 1) + + tableOverride.byName.withProperty('min', 0), + ]) + + table.queryOptions.withTransformations([ + tableTransformation.withId('groupBy') + + tableTransformation.withOptions( + { + fields: { + 'Value #A': { + aggregations: [ + 'lastNotNull', + ], + operation: 'aggregate', }, - - ], - ) - .addOverride( - matcher={ - id: 'byName', - options: 'Used', - }, - properties=[ - { - id: 'custom.width', - value: 72, + 'Value #B': { + aggregations: [ + 'lastNotNull', + ], + operation: 'aggregate', }, - ], - ) - .addOverride( - matcher={ - id: 'byName', - options: 'Available', - }, - properties=[ - { - id: 'custom.width', - value: 88, + mountpoint: { + aggregations: [], + operation: 'groupby', }, - ], - ) - - .addOverride( - matcher={ - id: 'byName', - options: 'Used, %', }, - properties=[ - { - id: 'unit', - value: 'percentunit', - }, - { - id: 'custom.displayMode', - value: 'gradient-gauge', - }, - { - id: 'max', - value: 1, - }, - { - id: 'min', - value: 0, - }, - ] - ) - + { span: 6 } - + { - transformations: [ - { - id: 'groupBy', - options: { - fields: { - 'Value #A': { - aggregations: [ - 'lastNotNull', - ], - operation: 'aggregate', - }, - 'Value #B': { - aggregations: [ - 'lastNotNull', - ], - operation: 'aggregate', - }, - mountpoint: { - aggregations: [], - operation: 'groupby', - }, - }, - }, - }, - { - id: 'merge', - options: {}, - }, - { - id: 'calculateField', - options: { - alias: 'Used', - binary: { - left: 'Value #A (lastNotNull)', - operator: '-', - reducer: 'sum', - right: 'Value #B (lastNotNull)', - }, - mode: 'binary', - reduce: { - reducer: 'sum', - }, - }, - }, - { - id: 'calculateField', - options: { - alias: 'Used, %', - binary: { - left: 'Used', - operator: '/', - reducer: 'sum', - right: 'Value #A (lastNotNull)', - }, - mode: 'binary', - reduce: { - reducer: 'sum', - }, - }, - }, - { - id: 'organize', - options: { - excludeByName: {}, - indexByName: {}, - renameByName: { - 'Value #A (lastNotNull)': 'Size', - 'Value #B (lastNotNull)': 'Available', - mountpoint: 'Mounted on', - }, - }, - }, + } + ), + tableTransformation.withId('merge'), + tableTransformation.withId('calculateField') + + tableTransformation.withOptions( + { + alias: 'Used', + binary: { + left: 'Value #A (lastNotNull)', + operator: '-', + reducer: 'sum', + right: 'Value #B (lastNotNull)', + }, + mode: 'binary', + reduce: { + reducer: 'sum', + }, + } + ), + tableTransformation.withId('calculateField') + + tableTransformation.withOptions( + { + alias: 'Used, %', + binary: { + left: 'Used', + operator: '/', + reducer: 'sum', + right: 'Value #A (lastNotNull)', + }, + mode: 'binary', + reduce: { + reducer: 'sum', + }, + } + ), + tableTransformation.withId('organize') + + tableTransformation.withOptions( + { + excludeByName: {}, + indexByName: {}, + renameByName: { + 'Value #A (lastNotNull)': 'Size', + 'Value #B (lastNotNull)': 'Available', + mountpoint: 'Mounted on', + }, + } + ), + tableTransformation.withId('sortBy') + + tableTransformation.withOptions( + { + fields: {}, + sort: [ { - id: 'sortBy', - options: { - fields: {}, - sort: [ - { - field: 'Mounted on', - }, - ], - }, + field: 'Mounted on', }, ], - }, */ + } + ), + ]), local networkReceived = timeSeriesPanel.new('Network Received') From 1b4782c22ba209ccc8089a16170b0634056eec42 Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:09:29 +0000 Subject: [PATCH 10/11] Fix network legend Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/lib/prom-mixin.libsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/node-mixin/lib/prom-mixin.libsonnet b/docs/node-mixin/lib/prom-mixin.libsonnet index cabb93d67e..9b6a494b84 100644 --- a/docs/node-mixin/lib/prom-mixin.libsonnet +++ b/docs/node-mixin/lib/prom-mixin.libsonnet @@ -429,7 +429,7 @@ local tableTransformation = table.queryOptions.transformation; + tsOptions.tooltip.withMode('multi') + tsQueryOptions.withTargets([ prometheus.new('$datasource', 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config) - + prometheus.withLegendFormat('1m load average') + + prometheus.withLegendFormat('{{device}}') + prometheus.withIntervalFactor(1), ]), @@ -443,7 +443,7 @@ local tableTransformation = table.queryOptions.transformation; + tsOptions.tooltip.withMode('multi') + tsQueryOptions.withTargets([ prometheus.new('$datasource', 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config) - + prometheus.withLegendFormat('1m load average') + + prometheus.withLegendFormat('{{device}}') + prometheus.withIntervalFactor(1), ]), From 42d468c8d656a649452ad9045185ef63b9a5504e Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:08:02 +0000 Subject: [PATCH 11/11] Never show points Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/dashboards/use.libsonnet | 1 + docs/node-mixin/lib/prom-mixin.libsonnet | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index 1e23637f8b..f9d9e07cf6 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -21,6 +21,7 @@ local tsCommonPanelOptions = variable.query.withDatasourceFromVariable(datasource) + tsCustom.stacking.withMode('normal') + tsCustom.withFillOpacity(100) + + tsCustom.withShowPoints('never') + tsLegend.withShowLegend(false) + tsOptions.tooltip.withMode('multi') + tsOptions.tooltip.withSort('desc'); diff --git a/docs/node-mixin/lib/prom-mixin.libsonnet b/docs/node-mixin/lib/prom-mixin.libsonnet index 9b6a494b84..f18c273c29 100644 --- a/docs/node-mixin/lib/prom-mixin.libsonnet +++ b/docs/node-mixin/lib/prom-mixin.libsonnet @@ -76,6 +76,7 @@ local tableTransformation = table.queryOptions.transformation; + tsStandardOptions.withMin(0) + tsOptions.tooltip.withMode('multi') + tsCustom.withFillOpacity(10) + + tsCustom.withShowPoints('never') + tsQueryOptions.withTargets([ prometheus.new( '$datasource', @@ -97,6 +98,7 @@ local tableTransformation = table.queryOptions.transformation; + tsStandardOptions.withUnit('short') + tsStandardOptions.withMin(0) + tsCustom.withFillOpacity(0) + + tsCustom.withShowPoints('never') + tsOptions.tooltip.withMode('multi') + tsQueryOptions.withTargets([ prometheus.new('$datasource', 'node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('1m load average'), @@ -105,14 +107,14 @@ local tableTransformation = table.queryOptions.transformation; prometheus.new('$datasource', 'count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config) + prometheus.withLegendFormat('logical cores'), ]), - // TODO - span 9 local memoryGraphPanelPrototype = timeSeriesPanel.new('Memory Usage') + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + tsStandardOptions.withUnit('bytes') + tsStandardOptions.withMin(0) + tsOptions.tooltip.withMode('multi') - + tsCustom.withFillOpacity(10), + + tsCustom.withFillOpacity(10) + + tsCustom.withShowPoints('never'), local memoryGraph = if platform == 'Linux' then @@ -189,7 +191,6 @@ local tableTransformation = table.queryOptions.transformation; // NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout. - // TODO - span 3 local memoryGaugePanelPrototype = gaugePanel.new('Memory Usage') + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) @@ -262,6 +263,7 @@ local tableTransformation = table.queryOptions.transformation; + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + tsStandardOptions.withMin(0) + tsCustom.withFillOpacity(0) + + tsCustom.withShowPoints('never') + tsOptions.tooltip.withMode('multi') + tsQueryOptions.withTargets([ // TODO: Does it make sense to have those three in the same panel? @@ -426,6 +428,7 @@ local tableTransformation = table.queryOptions.transformation; + tsStandardOptions.withUnit('bps') + tsStandardOptions.withMin(0) + tsCustom.withFillOpacity(0) + + tsCustom.withShowPoints('never') + tsOptions.tooltip.withMode('multi') + tsQueryOptions.withTargets([ prometheus.new('$datasource', 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config)