From 0cbee467631b373e448cae29e6a7b55f85d5fde3 Mon Sep 17 00:00:00 2001 From: Mikko Ylinen Date: Mon, 2 Oct 2023 14:49:39 +0300 Subject: [PATCH 1/5] Add Misc members to container info + tests Modeled after Hugetlb support (except that this skips DeprecatedContainerStats) and added to sources after it. --- info/v1/container.go | 16 ++++++++++++++++ info/v2/container.go | 4 ++++ info/v2/conversion.go | 4 ++++ info/v2/conversion_test.go | 4 ++++ 4 files changed, 28 insertions(+) diff --git a/info/v1/container.go b/info/v1/container.go index ae1d9caecc..159812b936 100644 --- a/info/v1/container.go +++ b/info/v1/container.go @@ -62,6 +62,8 @@ type ContainerSpec struct { HasHugetlb bool `json:"has_hugetlb"` + HasMisc bool `json:"has_misc"` + HasNetwork bool `json:"has_network"` HasProcesses bool `json:"has_processes"` @@ -205,6 +207,9 @@ func (s *ContainerSpec) Eq(b *ContainerSpec) bool { if s.HasHugetlb != b.HasHugetlb { return false } + if s.HasMisc != b.HasMisc { + return false + } if s.HasNetwork != b.HasNetwork { return false } @@ -364,6 +369,13 @@ type HugetlbStats struct { Failcnt uint64 `json:"failcnt"` } +type MiscStats struct { + // current resource usage for a key in misc + Usage uint64 `json:"usage,omitempty"` + // number of times the resource was about to go over the max boundary + Events uint64 `json:"events,omitempty"` +} + type MemoryStats struct { // Current memory usage, this includes all memory regardless of when it was // accessed. @@ -947,6 +959,7 @@ type ContainerStats struct { DiskIo DiskIoStats `json:"diskio,omitempty"` Memory MemoryStats `json:"memory,omitempty"` Hugetlb map[string]HugetlbStats `json:"hugetlb,omitempty"` + Misc map[string]MiscStats `json:"misc,omitempty"` Network NetworkStats `json:"network,omitempty"` // Filesystem statistics Filesystem []FsStats `json:"filesystem,omitempty"` @@ -1016,6 +1029,9 @@ func (a *ContainerStats) StatsEq(b *ContainerStats) bool { if !reflect.DeepEqual(a.Hugetlb, b.Hugetlb) { return false } + if !reflect.DeepEqual(a.Misc, b.Misc) { + return false + } if !reflect.DeepEqual(a.DiskIo, b.DiskIo) { return false } diff --git a/info/v2/container.go b/info/v2/container.go index f0824027a3..0a35c403c1 100644 --- a/info/v2/container.go +++ b/info/v2/container.go @@ -90,6 +90,8 @@ type ContainerSpec struct { HasHugetlb bool `json:"has_hugetlb"` + HasMisc bool `json:"has_misc"` + HasCustomMetrics bool `json:"has_custom_metrics"` CustomMetrics []v1.MetricSpec `json:"custom_metrics,omitempty"` @@ -163,6 +165,8 @@ type ContainerStats struct { Memory *v1.MemoryStats `json:"memory,omitempty"` // Hugepage statistics Hugetlb *map[string]v1.HugetlbStats `json:"hugetlb,omitempty"` + // Misc statistics + Misc *map[string]v1.MiscStats `json:"misc,omitempty"` // Network statistics Network *NetworkStats `json:"network,omitempty"` // Processes statistics diff --git a/info/v2/conversion.go b/info/v2/conversion.go index 41da7ea4a9..8f115044ec 100644 --- a/info/v2/conversion.go +++ b/info/v2/conversion.go @@ -121,6 +121,9 @@ func ContainerStatsFromV1(containerName string, spec *v1.ContainerSpec, stats [] if spec.HasHugetlb { stat.Hugetlb = &val.Hugetlb } + if spec.HasMisc { + stat.Misc = &val.Misc + } if spec.HasNetwork { // TODO: Handle TcpStats stat.Network = &NetworkStats{ @@ -288,6 +291,7 @@ func ContainerSpecFromV1(specV1 *v1.ContainerSpec, aliases []string, namespace s HasCpu: specV1.HasCpu, HasMemory: specV1.HasMemory, HasHugetlb: specV1.HasHugetlb, + HasMisc: specV1.HasMisc, HasFilesystem: specV1.HasFilesystem, HasNetwork: specV1.HasNetwork, HasProcesses: specV1.HasProcesses, diff --git a/info/v2/conversion_test.go b/info/v2/conversion_test.go index ca470e9480..2df27f39f8 100644 --- a/info/v2/conversion_test.go +++ b/info/v2/conversion_test.go @@ -48,6 +48,7 @@ func TestContainerSpecFromV1(t *testing.T) { SwapLimit: 8192, }, HasHugetlb: true, + HasMisc: true, HasNetwork: true, HasProcesses: true, HasFilesystem: true, @@ -82,6 +83,7 @@ func TestContainerSpecFromV1(t *testing.T) { SwapLimit: 8192, }, HasHugetlb: true, + HasMisc: true, HasNetwork: true, HasProcesses: true, HasFilesystem: true, @@ -121,6 +123,7 @@ func TestContainerStatsFromV1(t *testing.T) { SwapLimit: 8192, }, HasHugetlb: true, + HasMisc: true, HasNetwork: true, HasProcesses: true, HasFilesystem: true, @@ -263,6 +266,7 @@ func TestContainerStatsFromV1(t *testing.T) { DiskIo: &v1Stats.DiskIo, Memory: &v1Stats.Memory, Hugetlb: &v1Stats.Hugetlb, + Misc: &v1Stats.Misc, Processes: &v1Stats.Processes, Network: &NetworkStats{ Interfaces: v1Stats.Network.Interfaces, From eff6ae69c701832372df37c5eadb8e89ad307147 Mon Sep 17 00:00:00 2001 From: Mikko Ylinen Date: Mon, 2 Oct 2023 15:38:34 +0300 Subject: [PATCH 2/5] Add Misc metrics helpers + tests Modeled after Hugetlb support and added to sources after it. --- container/common/helpers.go | 8 ++++++++ container/common/helpers_test.go | 2 ++ container/factory.go | 2 ++ container/libcontainer/helpers.go | 1 + 4 files changed, 13 insertions(+) diff --git a/container/common/helpers.go b/container/common/helpers.go index dbffc922e9..e14464fd83 100644 --- a/container/common/helpers.go +++ b/container/common/helpers.go @@ -197,6 +197,14 @@ func getSpecInternal(cgroupPaths map[string]string, machineInfoFactory info.Mach } } + // Misc controller + miscRoot, ok := cgroupPaths["misc"] + if ok { + if utils.FileExists(miscRoot) { + spec.HasMisc = true + } + } + // Processes, read it's value from pids path directly pidsRoot, ok := GetControllerPath(cgroupPaths, "pids", cgroup2UnifiedMode) if ok { diff --git a/container/common/helpers_test.go b/container/common/helpers_test.go index 5688b89f17..fde75a8932 100644 --- a/container/common/helpers_test.go +++ b/container/common/helpers_test.go @@ -136,6 +136,7 @@ func TestGetSpecCgroupV1(t *testing.T) { assert.EqualValues(t, spec.Processes.Limit, 1027) assert.False(t, spec.HasHugetlb) + assert.False(t, spec.HasMisc) assert.False(t, spec.HasDiskIo) } @@ -168,6 +169,7 @@ func TestGetSpecCgroupV2(t *testing.T) { assert.EqualValues(t, spec.Processes.Limit, 1027) assert.False(t, spec.HasHugetlb) + assert.False(t, spec.HasMisc) assert.True(t, spec.HasDiskIo) } diff --git a/container/factory.go b/container/factory.go index c48a64e163..728cec6ae9 100644 --- a/container/factory.go +++ b/container/factory.go @@ -60,6 +60,7 @@ const ( AppMetrics MetricKind = "app" ProcessMetrics MetricKind = "process" HugetlbUsageMetrics MetricKind = "hugetlb" + MiscMetrics MetricKind = "misc" PerfMetrics MetricKind = "perf_event" ReferencedMemoryMetrics MetricKind = "referenced_memory" CPUTopologyMetrics MetricKind = "cpu_topology" @@ -85,6 +86,7 @@ var AllMetrics = MetricSet{ ProcessMetrics: struct{}{}, AppMetrics: struct{}{}, HugetlbUsageMetrics: struct{}{}, + MiscMetrics: struct{}{}, PerfMetrics: struct{}{}, ReferencedMemoryMetrics: struct{}{}, CPUTopologyMetrics: struct{}{}, diff --git a/container/libcontainer/helpers.go b/container/libcontainer/helpers.go index e535ad64c4..0514b630e4 100644 --- a/container/libcontainer/helpers.go +++ b/container/libcontainer/helpers.go @@ -83,6 +83,7 @@ var supportedSubsystems = map[string]container.MetricKind{ "cpuacct": container.CpuUsageMetrics, "memory": container.MemoryUsageMetrics, "hugetlb": container.HugetlbUsageMetrics, + "misc": container.MiscMetrics, "pids": container.ProcessMetrics, "cpuset": container.CPUSetMetrics, "blkio": container.DiskIOMetrics, From f60021d58681a037128000b2e63c4130131d713c Mon Sep 17 00:00:00 2001 From: Mikko Ylinen Date: Mon, 2 Oct 2023 17:33:59 +0300 Subject: [PATCH 3/5] Add Misc metrics support + tests Modeled after Hugetlb support and added to sources after it. --- cmd/cadvisor_test.go | 1 + container/libcontainer/handler.go | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/cmd/cadvisor_test.go b/cmd/cadvisor_test.go index 58461ae182..a3635ed256 100644 --- a/cmd/cadvisor_test.go +++ b/cmd/cadvisor_test.go @@ -106,6 +106,7 @@ func TestToIncludedMetrics(t *testing.T) { container.ProcessMetrics: struct{}{}, container.AppMetrics: struct{}{}, container.HugetlbUsageMetrics: struct{}{}, + container.MiscMetrics: struct{}{}, container.PerfMetrics: struct{}{}, container.ReferencedMemoryMetrics: struct{}{}, container.CPUTopologyMetrics: struct{}{}, diff --git a/container/libcontainer/handler.go b/container/libcontainer/handler.go index 5bf1a4f997..973f8ff4ba 100644 --- a/container/libcontainer/handler.go +++ b/container/libcontainer/handler.go @@ -884,6 +884,16 @@ func setHugepageStats(s *cgroups.Stats, ret *info.ContainerStats) { } } +func setMiscStats(s *cgroups.Stats, ret *info.ContainerStats) { + ret.Misc = make(map[string]info.MiscStats) + for k, v := range s.MiscStats { + ret.Misc[k] = info.MiscStats{ + Usage: v.Usage, + Events: v.Events, + } + } +} + // read from pids path not cpu func setThreadsStats(s *cgroups.Stats, ret *info.ContainerStats) { if s != nil { @@ -909,6 +919,9 @@ func newContainerStats(cgroupStats *cgroups.Stats, includedMetrics container.Met if includedMetrics.Has(container.HugetlbUsageMetrics) { setHugepageStats(s, ret) } + if includedMetrics.Has(container.MiscMetrics) { + setMiscStats(s, ret) + } if includedMetrics.Has(container.CPUSetMetrics) { setCPUSetStats(s, ret) } From a7ad3124eeae97fbbb902205f5e60b945cf9c49a Mon Sep 17 00:00:00 2001 From: Mikko Ylinen Date: Mon, 2 Oct 2023 17:47:06 +0300 Subject: [PATCH 4/5] Add Misc cgroup info to docs --- docs/runtime_options.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/runtime_options.md b/docs/runtime_options.md index f77551b7cc..0839416d60 100644 --- a/docs/runtime_options.md +++ b/docs/runtime_options.md @@ -134,8 +134,8 @@ cAdvisor stores the latest historical data in memory. How long of a history it s --application_metrics_count_limit=100: Max number of application metrics to store (per container) (default 100) --collector_cert="": Collector's certificate, exposed to endpoints for certificate based authentication. --collector_key="": Key for the collector's certificate ---disable_metrics=: comma-separated list of metrics to be disabled. Options are advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp. (default advtcp,cpu_topology,cpuset,hugetlb,memory_numa,process,referenced_memory,resctrl,sched,tcp,udp) ---enable_metrics=: comma-separated list of metrics to be enabled. If set, overrides 'disable_metrics'. Options are advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp. +--disable_metrics=: comma-separated list of metrics to be disabled. Options are advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,misc,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp. (default advtcp,cpu_topology,cpuset,hugetlb,memory_numa,misc,process,referenced_memory,resctrl,sched,tcp,udp) +--enable_metrics=: comma-separated list of metrics to be enabled. If set, overrides 'disable_metrics'. Options are advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,misc,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp. --prometheus_endpoint="/metrics": Endpoint to expose Prometheus metrics on (default "/metrics") --disable_root_cgroup_stats=false: Disable collecting root Cgroup stats ``` From 03e04bc17f4f98e1dca3dbd8c4253825a0b0f19e Mon Sep 17 00:00:00 2001 From: Mikko Ylinen Date: Tue, 3 Oct 2023 08:00:05 +0300 Subject: [PATCH 5/5] Add Misc cgroup support to Prometheus metrics --- docs/storage/prometheus.md | 2 + metrics/prometheus.go | 37 +++++++++++++++++++ metrics/prometheus_fake.go | 10 +++++ metrics/testdata/prometheus_metrics | 8 ++++ .../prometheus_metrics_whitelist_filtered | 8 ++++ 5 files changed, 65 insertions(+) diff --git a/docs/storage/prometheus.md b/docs/storage/prometheus.md index 7bb3c465ea..fa096078a2 100644 --- a/docs/storage/prometheus.md +++ b/docs/storage/prometheus.md @@ -50,6 +50,8 @@ Metric name | Type | Description | Unit (where applicable) | option parameter | `container_hugetlb_failcnt` | Counter | Number of hugepage usage hits limits | | hugetlb | `container_hugetlb_max_usage_bytes` | Gauge | Maximum hugepage usages recorded | bytes | hugetlb | `container_hugetlb_usage_bytes` | Gauge | Current hugepage usage | bytes | hugetlb | +`container_misc_usage` | Gauge | Current usage of the misc scalar resource specified by the label | | misc | +`container_misc_events` | Counter | Number of times the usage for the misc scalar resource specified by the label was about to go over the max boundary | | misc | `container_last_seen` | Gauge | Last time a container was seen by the exporter | timestamp | - | `container_llc_occupancy_bytes` | Gauge | Last level cache usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM). | bytes | resctrl | `container_memory_bandwidth_bytes` | Gauge | Total memory bandwidth usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM). | bytes | resctrl | diff --git a/metrics/prometheus.go b/metrics/prometheus.go index 86064819d3..21bc78745d 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -368,6 +368,43 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri }, }...) } + if includedMetrics.Has(container.MiscMetrics) { + c.containerMetrics = append(c.containerMetrics, []containerMetric{ + { + name: "container_misc_usage", + help: "Current usage of the misc scalar resource specified by the label", + valueType: prometheus.GaugeValue, + extraLabels: []string{"resource"}, + getValues: func(s *info.ContainerStats) metricValues { + values := make(metricValues, 0, len(s.Misc)) + for k, v := range s.Misc { + values = append(values, metricValue{ + value: float64(v.Usage), + labels: []string{k}, + timestamp: s.Timestamp, + }) + } + return values + }, + }, { + name: "container_misc_events", + help: "Number of times the usage for the misc scalar resource specified by the label was about to go over the max boundary", + valueType: prometheus.CounterValue, + extraLabels: []string{"resource"}, + getValues: func(s *info.ContainerStats) metricValues { + values := make(metricValues, 0, len(s.Misc)) + for k, v := range s.Misc { + values = append(values, metricValue{ + value: float64(v.Events), + labels: []string{k}, + timestamp: s.Timestamp, + }) + } + return values + }, + }, + }...) + } if includedMetrics.Has(container.MemoryUsageMetrics) { c.containerMetrics = append(c.containerMetrics, []containerMetric{ { diff --git a/metrics/prometheus_fake.go b/metrics/prometheus_fake.go index fd43b78148..0aa4a66e39 100644 --- a/metrics/prometheus_fake.go +++ b/metrics/prometheus_fake.go @@ -371,6 +371,16 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req Failcnt: 0, }, }, + Misc: map[string]info.MiscStats{ + "res_a": { + Usage: 1, + Events: 42, + }, + "res_b": { + Usage: 2, + Events: 42, + }, + }, Network: info.NetworkStats{ InterfaceStats: info.InterfaceStats{ Name: "eth0", diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics index a385e50689..63831f3220 100644 --- a/metrics/testdata/prometheus_metrics +++ b/metrics/testdata/prometheus_metrics @@ -136,6 +136,14 @@ container_hugetlb_max_usage_bytes{container_env_foo_env="prod",container_label_f # TYPE container_hugetlb_usage_bytes gauge container_hugetlb_usage_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",pagesize="1Gi",zone_name="hello"} 0 1395066363000 container_hugetlb_usage_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",pagesize="2Mi",zone_name="hello"} 4 1395066363000 +# HELP container_misc_usage Current usage of the resource +# TYPE container_misc_usage gauge +container_misc_usage{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 1 1395066363000 +container_misc_usage{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 2 1395066363000 +# HELP container_misc_events Number of times the usage for the resource was about to go over the max boundary +# TYPE container_misc_events counter +container_misc_events{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 42 1395066363000 +container_misc_events{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 42 1395066363000 # HELP container_last_seen Last time a container was seen by the exporter # TYPE container_last_seen gauge container_last_seen{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.395066363e+09 1395066363000 diff --git a/metrics/testdata/prometheus_metrics_whitelist_filtered b/metrics/testdata/prometheus_metrics_whitelist_filtered index 921b2e1106..72d5700ac1 100644 --- a/metrics/testdata/prometheus_metrics_whitelist_filtered +++ b/metrics/testdata/prometheus_metrics_whitelist_filtered @@ -136,6 +136,14 @@ container_hugetlb_max_usage_bytes{container_env_foo_env="prod",id="testcontainer # TYPE container_hugetlb_usage_bytes gauge container_hugetlb_usage_bytes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",pagesize="1Gi",zone_name="hello"} 0 1395066363000 container_hugetlb_usage_bytes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",pagesize="2Mi",zone_name="hello"} 4 1395066363000 +# HELP container_misc_usage Current usage of the resource +# TYPE container_misc_usage gauge +container_misc_usage{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 1 1395066363000 +container_misc_usage{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 2 1395066363000 +# HELP container_misc_events Number of times the usage for the resource was about to go over the max boundary +# TYPE container_misc_events counter +container_misc_events{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_a",zone_name="hello"} 42 1395066363000 +container_misc_events{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",resource="res_b",zone_name="hello"} 42 1395066363000 # HELP container_last_seen Last time a container was seen by the exporter # TYPE container_last_seen gauge container_last_seen{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.395066363e+09 1395066363000