diff --git a/libbeat/cmd/instance/metrics/metrics.go b/libbeat/cmd/instance/metrics/metrics.go index f47bf59a335..9960db99e9f 100644 --- a/libbeat/cmd/instance/metrics/metrics.go +++ b/libbeat/cmd/instance/metrics/metrics.go @@ -59,7 +59,7 @@ func SetupMetrics(name string) error { beatProcessStats = &process.Stats{ Procs: []string{name}, EnvWhitelist: nil, - CpuTicks: true, + CPUTicks: true, CacheCmdLine: true, IncludeTop: process.IncludeTopConfig{}, } diff --git a/metricbeat/module/system/process/cgroup.go b/libbeat/metric/system/process/cgroup.go similarity index 79% rename from metricbeat/module/system/process/cgroup.go rename to libbeat/metric/system/process/cgroup.go index c71dc7fe34d..db4f768f38b 100644 --- a/metricbeat/module/system/process/cgroup.go +++ b/libbeat/metric/system/process/cgroup.go @@ -26,31 +26,31 @@ import ( // cgroupStatsToMap returns a MapStr containing the data from the stats object. // If stats is nil then nil is returned. -func cgroupStatsToMap(stats *cgroup.Stats, perCPU bool) common.MapStr { - if stats == nil { +func cgroupStatsToMap(stats *Process) common.MapStr { + if stats == nil || stats.RawStats == nil { return nil } cgroup := common.MapStr{} // id and path are only available when all subsystems share a common path. - if stats.ID != "" { - cgroup["id"] = stats.ID + if stats.RawStats.ID != "" { + cgroup["id"] = stats.RawStats.ID } - if stats.Path != "" { - cgroup["path"] = stats.Path + if stats.RawStats.Path != "" { + cgroup["path"] = stats.RawStats.Path } - if cpu := cgroupCPUToMapStr(stats.CPU); cpu != nil { + if cpu := cgroupCPUToMapStr(stats.RawStats.CPU); cpu != nil { cgroup["cpu"] = cpu } - if cpuacct := cgroupCPUAccountingToMapStr(stats.CPUAccounting, perCPU); cpuacct != nil { + if cpuacct := cgroupCPUAccountingToMapStr(stats); cpuacct != nil { cgroup["cpuacct"] = cpuacct } - if memory := cgroupMemoryToMapStr(stats.Memory); memory != nil { + if memory := cgroupMemoryToMapStr(stats.RawStats.Memory); memory != nil { cgroup["memory"] = memory } - if blkio := cgroupBlockIOToMapStr(stats.BlockIO); blkio != nil { + if blkio := cgroupBlockIOToMapStr(stats.RawStats.BlockIO); blkio != nil { cgroup["blkio"] = blkio } @@ -97,7 +97,8 @@ func cgroupCPUToMapStr(cpu *cgroup.CPUSubsystem) common.MapStr { // cgroupCPUAccountingToMapStr returns a MapStr containing // CPUAccountingSubsystem data. If the cpuacct parameter is nil then nil is // returned. -func cgroupCPUAccountingToMapStr(cpuacct *cgroup.CPUAccountingSubsystem, perCPU bool) common.MapStr { +func cgroupCPUAccountingToMapStr(process *Process) common.MapStr { + cpuacct := process.RawStats.CPUAccounting if cpuacct == nil { return nil } @@ -106,25 +107,35 @@ func cgroupCPUAccountingToMapStr(cpuacct *cgroup.CPUAccountingSubsystem, perCPU "id": cpuacct.ID, "path": cpuacct.Path, "total": common.MapStr{ - "ns": cpuacct.TotalNanos, + "ns": cpuacct.TotalNanos, + "pct": process.PctStats.CPUTotalPct, + "norm": common.MapStr{ + "pct": process.PctStats.CPUTotalPctNorm, + }, }, "stats": common.MapStr{ "system": common.MapStr{ - "ns": cpuacct.Stats.SystemNanos, + "ns": cpuacct.Stats.SystemNanos, + "pct": process.PctStats.CPUSystemPct, + "norm": common.MapStr{ + "pct": process.PctStats.CPUSystemPctNorm, + }, }, "user": common.MapStr{ - "ns": cpuacct.Stats.UserNanos, + "ns": cpuacct.Stats.UserNanos, + "pct": process.PctStats.CPUUserPct, + "norm": common.MapStr{ + "pct": process.PctStats.CPUUserPctNorm, + }, }, }, } - if perCPU { - perCPUUsage := common.MapStr{} - for i, usage := range cpuacct.UsagePerCPU { - perCPUUsage[strconv.Itoa(i+1)] = usage - } - event["percpu"] = perCPUUsage + perCPUUsage := common.MapStr{} + for i, usage := range cpuacct.UsagePerCPU { + perCPUUsage[strconv.Itoa(i+1)] = usage } + event["percpu"] = perCPUUsage return event } diff --git a/libbeat/metric/system/process/process.go b/libbeat/metric/system/process/process.go index 76098a43f19..9e7eb5ac932 100644 --- a/libbeat/metric/system/process/process.go +++ b/libbeat/metric/system/process/process.go @@ -34,6 +34,7 @@ import ( "github.com/elastic/beats/v7/libbeat/logp" "github.com/elastic/beats/v7/libbeat/metric/system/memory" sigar "github.com/elastic/gosigar" + "github.com/elastic/gosigar/cgroup" ) // ProcsMap is a map where the keys are the names of processes and the value is the Process with that name @@ -42,39 +43,57 @@ type ProcsMap map[int]*Process // Process is the structure which holds the information of a process running on the host. // It includes pid, gid and it interacts with gosigar to fetch process data from the host. type Process struct { - Pid int `json:"pid"` - Ppid int `json:"ppid"` - Pgid int `json:"pgid"` - Name string `json:"name"` - Username string `json:"username"` - State string `json:"state"` - Args []string `json:"args"` - CmdLine string `json:"cmdline"` - Cwd string `json:"cwd"` - Executable string `json:"executable"` - Mem sigar.ProcMem - Cpu sigar.ProcTime - SampleTime time.Time - FD sigar.ProcFDUsage - Env common.MapStr + Pid int `json:"pid"` + Ppid int `json:"ppid"` + Pgid int `json:"pgid"` + Name string `json:"name"` + Username string `json:"username"` + State string `json:"state"` + Args []string `json:"args"` + CmdLine string `json:"cmdline"` + Cwd string `json:"cwd"` + Executable string `json:"executable"` + Mem sigar.ProcMem + CPU sigar.ProcTime + SampleTime time.Time + FD sigar.ProcFDUsage + Env common.MapStr + + //cpu stats cpuSinceStart float64 cpuTotalPct float64 cpuTotalPctNorm float64 + + // cgroup stats + RawStats *cgroup.Stats + PctStats CgroupPctStats +} + +// CgroupPctStats stores rendered percent values from cgroup CPU data +type CgroupPctStats struct { + CPUTotalPct float64 + CPUTotalPctNorm float64 + CPUUserPct float64 + CPUUserPctNorm float64 + CPUSystemPct float64 + CPUSystemPctNorm float64 } // Stats stores the stats of processes on the host. type Stats struct { - Procs []string - ProcsMap ProcsMap - CpuTicks bool - EnvWhitelist []string - CacheCmdLine bool - IncludeTop IncludeTopConfig + Procs []string + ProcsMap ProcsMap + CPUTicks bool + EnvWhitelist []string + CacheCmdLine bool + IncludeTop IncludeTopConfig + CgroupOpts cgroup.ReaderOptions + EnableCgroups bool procRegexps []match.Matcher // List of regular expressions used to whitelist processes. envRegexps []match.Matcher // List of regular expressions used to whitelist env vars. - - logger *logp.Logger + cgroups *cgroup.Reader + logger *logp.Logger } // Ticks of CPU for a process @@ -127,8 +146,8 @@ func (proc *Process) getDetails(envPredicate func(string) bool) error { return fmt.Errorf("error getting process mem for pid=%d: %v", proc.Pid, err) } - proc.Cpu = sigar.ProcTime{} - if err := proc.Cpu.Get(proc.Pid); err != nil { + proc.CPU = sigar.ProcTime{} + if err := proc.CPU.Get(proc.Pid); err != nil { return fmt.Errorf("error getting process cpu time for pid=%d: %v", proc.Pid, err) } @@ -322,13 +341,13 @@ func (procStats *Stats) getProcessEvent(process *Process) common.MapStr { "pct": process.cpuTotalPctNorm, }, }, - "start_time": unixTimeMsToTime(process.Cpu.StartTime), + "start_time": unixTimeMsToTime(process.CPU.StartTime), } - if procStats.CpuTicks { - proc.Put("cpu.user.ticks", process.Cpu.User) - proc.Put("cpu.system.ticks", process.Cpu.Sys) - proc.Put("cpu.total.ticks", process.Cpu.Total) + if procStats.CPUTicks { + proc.Put("cpu.user.ticks", process.CPU.User) + proc.Put("cpu.system.ticks", process.CPU.Sys) + proc.Put("cpu.total.ticks", process.CPU.Total) } if process.FD != (sigar.ProcFDUsage{}) { @@ -341,6 +360,12 @@ func (procStats *Stats) getProcessEvent(process *Process) common.MapStr { } } + if procStats.EnableCgroups { + if statsMap := cgroupStatsToMap(process); statsMap != nil { + proc["cgroup"] = statsMap + } + } + return proc } @@ -359,18 +384,63 @@ func GetProcCPUPercentage(s0, s1 *Process) (normalizedPct, pct, totalPct float64 if s0 != nil && s1 != nil { timeDelta := s1.SampleTime.Sub(s0.SampleTime) timeDeltaMillis := timeDelta / time.Millisecond - totalCPUDeltaMillis := int64(s1.Cpu.Total - s0.Cpu.Total) + totalCPUDeltaMillis := int64(s1.CPU.Total - s0.CPU.Total) pct := float64(totalCPUDeltaMillis) / float64(timeDeltaMillis) normalizedPct := pct / float64(runtime.NumCPU()) - return common.Round(normalizedPct, common.DefaultDecimalPlacesCount), common.Round(pct, common.DefaultDecimalPlacesCount), - common.Round(float64(s1.Cpu.Total), common.DefaultDecimalPlacesCount) + common.Round(float64(s1.CPU.Total), common.DefaultDecimalPlacesCount) } return 0, 0, 0 } +// GetCgroupPercentage returns CPU usage percentages for a given cgroup +// see GetProcCPUPercentage for implementation details, as the two are conceptually similar. +// Note that the cgroup controller reports system and user times in USER_HZ, while +// totals are reported in nanoseconds. Because of this, any math that mixes the two might be slightly off, +// as USER_HZ is less precise value that will get rounded up to nanseconds. +// Because of that, `user` and `system` metrics reflect a precentage of overall CPU time, but can't be compared to the total pct values. +func GetCgroupPercentage(s0, s1 *Process) CgroupPctStats { + if s0 == nil || s1 == nil || s0.RawStats == nil || s1.RawStats == nil || s0.RawStats.CPUAccounting == nil || s1.RawStats.CPUAccounting == nil { + return CgroupPctStats{} + } + timeDelta := s1.SampleTime.Sub(s0.SampleTime) + timeDeltaNanos := timeDelta / time.Nanosecond + totalCPUDeltaNanos := int64(s1.RawStats.CPUAccounting.TotalNanos - s0.RawStats.CPUAccounting.TotalNanos) + + pct := float64(totalCPUDeltaNanos) / float64(timeDeltaNanos) + // Avoid using NumCPU unless we need to; the values in UsagePerCPU are more likely to reflect the running conditions of the cgroup + // NumCPU can vary based on the conditions of the running metricbeat process, as it uses Affinity Masks, not hardware data. + var cpuCount int + if len(s1.RawStats.CPUAccounting.UsagePerCPU) > 0 { + cpuCount = len(s1.RawStats.CPUAccounting.UsagePerCPU) + } else { + cpuCount = runtime.NumCPU() + } + + // if you look at the raw cgroup stats, the following normalized value is literally an average of per-cpu numbers. + normalizedPct := pct / float64(cpuCount) + userCPUDeltaMillis := int64(s1.RawStats.CPUAccounting.Stats.UserNanos - s0.RawStats.CPUAccounting.Stats.UserNanos) + systemCPUDeltaMillis := int64(s1.RawStats.CPUAccounting.Stats.SystemNanos - s0.RawStats.CPUAccounting.Stats.SystemNanos) + + userPct := float64(userCPUDeltaMillis) / float64(timeDeltaNanos) + systemPct := float64(systemCPUDeltaMillis) / float64(timeDeltaNanos) + + normalizedUser := userPct / float64(cpuCount) + normalizedSystem := systemPct / float64(cpuCount) + + pctValues := CgroupPctStats{ + CPUTotalPct: common.Round(pct, common.DefaultDecimalPlacesCount), + CPUTotalPctNorm: common.Round(normalizedPct, common.DefaultDecimalPlacesCount), + CPUUserPct: common.Round(userPct, common.DefaultDecimalPlacesCount), + CPUUserPctNorm: common.Round(normalizedUser, common.DefaultDecimalPlacesCount), + CPUSystemPct: common.Round(systemPct, common.DefaultDecimalPlacesCount), + CPUSystemPctNorm: common.Round(normalizedSystem, common.DefaultDecimalPlacesCount), + } + return pctValues +} + // matchProcess checks if the provided process name matches any of the process regexes func (procStats *Stats) matchProcess(name string) bool { for _, reg := range procStats.procRegexps { @@ -409,6 +479,16 @@ func (procStats *Stats) Init() error { procStats.envRegexps = append(procStats.envRegexps, reg) } + if procStats.EnableCgroups { + cgReader, err := cgroup.NewReaderOptions(procStats.CgroupOpts) + if err == cgroup.ErrCgroupsMissing { + logp.Warn("cgroup data collection will be disabled: %v", err) + } else if err != nil { + return errors.Wrap(err, "error initializing cgroup reader") + } + procStats.cgroups = cgReader + } + return nil } @@ -492,6 +572,17 @@ func (procStats *Stats) getSingleProcess(pid int, newProcs ProcsMap) *Process { return nil } + if procStats.EnableCgroups { + cgStats, err := procStats.cgroups.GetStatsForProcess(pid) + if err != nil { + procStats.logger.Debug("Error fetching cgroup data for process %s with pid=%d: %v", process.Name, process.Pid, err) + return nil + } + process.RawStats = cgStats + last := procStats.ProcsMap[process.Pid] + process.PctStats = GetCgroupPercentage(last, process) + } + newProcs[process.Pid] = process last := procStats.ProcsMap[process.Pid] process.cpuTotalPctNorm, process.cpuTotalPct, process.cpuSinceStart = GetProcCPUPercentage(last, process) diff --git a/libbeat/metric/system/process/process_test.go b/libbeat/metric/system/process/process_test.go index 6bc6be447a5..712b7808547 100644 --- a/libbeat/metric/system/process/process_test.go +++ b/libbeat/metric/system/process/process_test.go @@ -68,10 +68,10 @@ func TestGetProcess(t *testing.T) { assert.True(t, (process.Mem.Share >= 0)) // CPU Checks - assert.True(t, (process.Cpu.StartTime > 0)) - assert.True(t, (process.Cpu.Total >= 0)) - assert.True(t, (process.Cpu.User >= 0)) - assert.True(t, (process.Cpu.Sys >= 0)) + assert.True(t, (process.CPU.StartTime > 0)) + assert.True(t, (process.CPU.Total >= 0)) + assert.True(t, (process.CPU.User >= 0)) + assert.True(t, (process.CPU.Sys >= 0)) assert.True(t, (process.SampleTime.Unix() <= time.Now().Unix())) @@ -143,7 +143,7 @@ func TestProcMemPercentage(t *testing.T) { func TestProcCpuPercentage(t *testing.T) { p1 := &Process{ - Cpu: gosigar.ProcTime{ + CPU: gosigar.ProcTime{ User: 11345, Sys: 37, Total: 11382, @@ -152,7 +152,7 @@ func TestProcCpuPercentage(t *testing.T) { } p2 := &Process{ - Cpu: gosigar.ProcTime{ + CPU: gosigar.ProcTime{ User: 14794, Sys: 47, Total: 14841, diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index a1a8913819b..4c2278437ab 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -44966,6 +44966,26 @@ type: long -- +*`system.process.cgroup.cpuacct.total.pct`*:: ++ +-- +CPU time of the cgroup as a percentage of overall CPU time. + + +type: scaled_float + +-- + +*`system.process.cgroup.cpuacct.total.norm.pct`*:: ++ +-- +CPU time of the cgroup as a percentage of overall CPU time, normalized by CPU count. This is functionally an average of time spent across individual CPUs. + + +type: scaled_float + +-- + *`system.process.cgroup.cpuacct.stats.user.ns`*:: + -- @@ -44975,6 +44995,24 @@ type: long -- +*`system.process.cgroup.cpuacct.stats.user.pct`*:: ++ +-- +time the cgroup spent in user space, as a percentage of total CPU time + +type: scaled_float + +-- + +*`system.process.cgroup.cpuacct.stats.user.norm.pct`*:: ++ +-- +time the cgroup spent in user space, as a percentage of total CPU time, normalized by CPU count. + +type: scaled_float + +-- + *`system.process.cgroup.cpuacct.stats.system.ns`*:: + -- @@ -44984,6 +45022,24 @@ type: long -- +*`system.process.cgroup.cpuacct.stats.system.pct`*:: ++ +-- +Time the cgroup spent in kernel space, as a percentage of total CPU time + +type: scaled_float + +-- + +*`system.process.cgroup.cpuacct.stats.system.norm.pct`*:: ++ +-- +Time the cgroup spent in kernel space, as a percentage of total CPU time, normalized by CPU count. + +type: scaled_float + +-- + *`system.process.cgroup.cpuacct.percpu`*:: + -- diff --git a/metricbeat/module/system/fields.go b/metricbeat/module/system/fields.go index 9c99bdfe5c8..d52d17d7276 100644 --- a/metricbeat/module/system/fields.go +++ b/metricbeat/module/system/fields.go @@ -32,5 +32,5 @@ func init() { // AssetSystem returns asset data. // This is the base64 encoded gzipped contents of module/system. func AssetSystem() string { - return "" + return "" } diff --git a/metricbeat/module/system/process/_meta/data.json b/metricbeat/module/system/process/_meta/data.json index 6577bb55b59..9d748e2f39c 100644 --- a/metricbeat/module/system/process/_meta/data.json +++ b/metricbeat/module/system/process/_meta/data.json @@ -11,16 +11,19 @@ }, "process": { "args": [ - "/sbin/init", - "splash" + "/usr/lib/systemd/systemd", + "--switched-root", + "--system", + "--deserialize", + "29" ], - "command_line": "/sbin/init splash", + "command_line": "/usr/lib/systemd/systemd --switched-root --system --deserialize 29", "cpu": { - "pct": 0, - "start_time": "2020-12-04T22:17:35.000Z" + "pct": 0.0029, + "start_time": "2021-03-29T04:24:52.000Z" }, "memory": { - "pct": 0.0004 + "pct": 0.0011 }, "name": "systemd", "pgid": 1, @@ -33,24 +36,191 @@ }, "system": { "process": { - "cmdline": "/sbin/init splash", + "cgroup": { + "blkio": { + "id": "init.scope", + "path": "/init.scope", + "total": { + "bytes": 3188736, + "ios": 238 + } + }, + "cpu": { + "cfs": { + "period": { + "us": 100000 + }, + "quota": { + "us": 0 + }, + "shares": 1024 + }, + "id": "init.scope", + "path": "/init.scope", + "rt": { + "period": { + "us": 0 + }, + "runtime": { + "us": 0 + } + }, + "stats": { + "periods": 0, + "throttled": { + "ns": 0, + "periods": 0 + } + } + }, + "cpuacct": { + "id": "init.scope", + "path": "/init.scope", + "percpu": { + "1": 7906237728724, + "2": 8055519955790, + "3": 7985757217648, + "4": 7270310725150 + }, + "stats": { + "system": { + "norm": { + "pct": 0.0015 + }, + "ns": 10276870000000, + "pct": 0.0058 + }, + "user": { + "norm": { + "pct": 0.0044 + }, + "ns": 20915760000000, + "pct": 0.0174 + } + }, + "total": { + "norm": { + "pct": 0.0047 + }, + "ns": 31217825627312, + "pct": 0.0188 + } + }, + "id": "init.scope", + "memory": { + "id": "init.scope", + "kmem": { + "failures": 0, + "limit": { + "bytes": 9223372036854771712 + }, + "usage": { + "bytes": 4755456, + "max": { + "bytes": 8380416 + } + } + }, + "kmem_tcp": { + "failures": 0, + "limit": { + "bytes": 9223372036854771712 + }, + "usage": { + "bytes": 0, + "max": { + "bytes": 0 + } + } + }, + "mem": { + "failures": 0, + "limit": { + "bytes": 9223372036854771712 + }, + "usage": { + "bytes": 31584256, + "max": { + "bytes": 53239808 + } + } + }, + "memsw": { + "failures": 0, + "limit": { + "bytes": 9223372036854771712 + }, + "usage": { + "bytes": 31821824, + "max": { + "bytes": 53239808 + } + } + }, + "path": "/init.scope", + "stats": { + "active_anon": { + "bytes": 0 + }, + "active_file": { + "bytes": 16642048 + }, + "cache": { + "bytes": 20140032 + }, + "hierarchical_memory_limit": { + "bytes": 9223372036854771712 + }, + "hierarchical_memsw_limit": { + "bytes": 9223372036854771712 + }, + "inactive_anon": { + "bytes": 7016448 + }, + "inactive_file": { + "bytes": 2977792 + }, + "major_page_faults": 165, + "mapped_file": { + "bytes": 15814656 + }, + "page_faults": 1288584, + "pages_in": 364353, + "pages_out": 357844, + "rss": { + "bytes": 6893568 + }, + "rss_huge": { + "bytes": 0 + }, + "swap": { + "bytes": 0 + }, + "unevictable": { + "bytes": 0 + } + } + }, + "path": "/init.scope" + }, + "cmdline": "/usr/lib/systemd/systemd --switched-root --system --deserialize 29", "cpu": { - "start_time": "2020-12-04T22:17:35.000Z", + "start_time": "2021-03-29T04:24:52.000Z", "total": { "norm": { - "pct": 0 + "pct": 0.0029 }, - "pct": 0, - "value": 1290 + "pct": 0.0116, + "value": 31214570 } }, "memory": { "rss": { - "bytes": 12173312, - "pct": 0.0004 + "bytes": 17108992, + "pct": 0.0011 }, - "share": 8679424, - "size": 172113920 + "share": 11341824, + "size": 178851840 }, "state": "sleeping" } diff --git a/metricbeat/module/system/process/_meta/fields.yml b/metricbeat/module/system/process/_meta/fields.yml index 51ab8b81e09..7c89b50f080 100644 --- a/metricbeat/module/system/process/_meta/fields.yml +++ b/metricbeat/module/system/process/_meta/fields.yml @@ -240,14 +240,40 @@ Total CPU time in nanoseconds consumed by all tasks in the cgroup. + - name: total.pct + type: scaled_float + description: > + CPU time of the cgroup as a percentage of overall CPU time. + + - name: total.norm.pct + type: scaled_float + description: > + CPU time of the cgroup as a percentage of overall CPU time, normalized by CPU count. This is functionally an average of time spent across individual CPUs. + - name: stats.user.ns type: long description: CPU time consumed by tasks in user mode. + - name: stats.user.pct + type: scaled_float + description: time the cgroup spent in user space, as a percentage of total CPU time + + - name: stats.user.norm.pct + type: scaled_float + description: time the cgroup spent in user space, as a percentage of total CPU time, normalized by CPU count. + - name: stats.system.ns type: long description: CPU time consumed by tasks in user (kernel) mode. + - name: stats.system.pct + type: scaled_float + description: Time the cgroup spent in kernel space, as a percentage of total CPU time + + - name: stats.system.norm.pct + type: scaled_float + description: Time the cgroup spent in kernel space, as a percentage of total CPU time, normalized by CPU count. + - name: percpu type: object object_type: long diff --git a/metricbeat/module/system/process/process.go b/metricbeat/module/system/process/process.go index 5b1bcee0b2e..1cb8723e919 100644 --- a/metricbeat/module/system/process/process.go +++ b/metricbeat/module/system/process/process.go @@ -65,14 +65,27 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { return nil, fmt.Errorf("unexpected module type") } + enableCgroups := false + if runtime.GOOS == "linux" { + if config.Cgroups == nil || *config.Cgroups { + enableCgroups = true + debugf("process cgroup data collection is enabled, using hostfs='%v'", paths.Paths.Hostfs) + } + } + m := &MetricSet{ BaseMetricSet: base, stats: &process.Stats{ - Procs: config.Procs, - EnvWhitelist: config.EnvWhitelist, - CpuTicks: config.IncludeCPUTicks || (config.CPUTicks != nil && *config.CPUTicks), - CacheCmdLine: config.CacheCmdLine, - IncludeTop: config.IncludeTop, + Procs: config.Procs, + EnvWhitelist: config.EnvWhitelist, + CPUTicks: config.IncludeCPUTicks || (config.CPUTicks != nil && *config.CPUTicks), + CacheCmdLine: config.CacheCmdLine, + IncludeTop: config.IncludeTop, + EnableCgroups: enableCgroups, + CgroupOpts: cgroup.ReaderOptions{ + RootfsMountpoint: paths.Paths.Hostfs, + IgnoreRootCgroups: true, + }, }, perCPU: config.IncludePerCPU, IsAgent: systemModule.IsAgent, @@ -82,20 +95,6 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { return nil, err } - if runtime.GOOS == "linux" { - if config.Cgroups == nil || *config.Cgroups { - debugf("process cgroup data collection is enabled, using hostfs='%v'", paths.Paths.Hostfs) - m.cgroup, err = cgroup.NewReader(paths.Paths.Hostfs, true) - if err != nil { - if err == cgroup.ErrCgroupsMissing { - logp.Warn("cgroup data collection will be disabled: %v", err) - } else { - return nil, errors.Wrap(err, "error initializing cgroup reader") - } - } - } - } - return m, nil } @@ -107,25 +106,6 @@ func (m *MetricSet) Fetch(r mb.ReporterV2) error { return errors.Wrap(err, "process stats") } - if m.cgroup != nil { - for _, proc := range procs { - pid, ok := proc["pid"].(int) - if !ok { - debugf("error converting pid to int for proc %+v", proc) - continue - } - stats, err := m.cgroup.GetStatsForProcess(pid) - if err != nil { - debugf("error getting cgroups stats for pid=%d, %v", pid, err) - continue - } - - if statsMap := cgroupStatsToMap(stats, m.perCPU); statsMap != nil { - proc["cgroup"] = statsMap - } - } - } - for _, proc := range procs { rootFields := common.MapStr{ "process": common.MapStr{ @@ -139,6 +119,10 @@ func (m *MetricSet) Fetch(r mb.ReporterV2) error { }, } + if m.stats.EnableCgroups && !m.perCPU { + proc.Delete("cgroup.cpuacct.percpu") + } + // Duplicate system.process.cmdline with ECS name process.command_line rootFields = getAndCopy(proc, "cmdline", rootFields, "process.command_line")