Skip to content

Commit

Permalink
libbeat/processors/add_process_metadata: Add default cgroup.regex for…
Browse files Browse the repository at this point in the history
… add_process_metadata (#36484)

Replace the existing cgroup_prefixes default with a default cgroup_regex. The new default will match the same cgroup paths as the old cgroup_prefixes value plus more. Out of the box it will match cgroup paths from modern Kubernetes and Podman versions. Existing users of cgroup_prefixes and cgroup_regex should see no breaking change.

This removes the undocumented behavior that when cgroup_prefixes was used that it would return a value that matched [\w]{64} for cgroup v2 paths. This was inconsistent because it arbitrary applied only to cgroup v2 paths and it could be prone to false matches.
  • Loading branch information
bhapas authored Sep 8, 2023
1 parent 0f47b30 commit 80ed33b
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 99 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
- Eliminate cloning of event in deepUpdate {pull}35945[35945]
- Fix ndjson parser to store JSON fields correctly under `target` {issue}29395[29395]
- Support build of projects outside of beats directory {pull}36126[36126]
- Add default cgroup regex for add_process_metadata processor {pull}36484[36484] {issue}32961[32961]
- Fix environment capture by `add_process_metadata` processor. {issue}36469[36469] {pull}36471[36471]


Expand Down
10 changes: 10 additions & 0 deletions libbeat/processors/add_process_metadata/add_process_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,16 @@ func newProcessMetadataProcessorWithProvider(cfg *conf.C, provider processMetada
return nil, fmt.Errorf("fail to unpack the %v configuration: %w", processorName, err)
}

// If neither option is configured, then add a default. A default cgroup_regex
// cannot be added to the struct returned by defaultConfig() because if
// config_regex is set, it would take precedence over any user-configured
// cgroup_prefixes.
hasCgroupPrefixes, _ := cfg.Has("cgroup_prefixes", -1)
hasCgroupRegex, _ := cfg.Has("cgroup_regex", -1)
if !hasCgroupPrefixes && !hasCgroupRegex {
config.CgroupRegex = defaultCgroupRegex
}

mappings, err := config.getMappings()
if err != nil {
return nil, fmt.Errorf("error unpacking %v.target_fields: %w", processorName, err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"unsafe"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/elastic/beats/v7/libbeat/beat"
conf "github.com/elastic/elastic-agent-libs/config"
Expand All @@ -38,7 +39,8 @@ import (
)

func TestAddProcessMetadata(t *testing.T) {
logp.TestingSetup(logp.WithSelectors(processorName))
require.NoError(t, logp.TestingSetup(logp.WithSelectors(processorName)))

startTime := time.Now()
testProcs := testProvider{
1: {
Expand Down Expand Up @@ -82,7 +84,6 @@ func TestAddProcessMetadata(t *testing.T) {
testMap := map[int]cgroup.PathList{
1: {
V1: map[string]cgroup.ControllerPath{

"cpu": {ControllerPath: "/kubepods/besteffort/pod665fb997-575b-11ea-bfce-080027421ddf/b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"},
"net_prio": {ControllerPath: "/kubepods/besteffort/pod665fb997-575b-11ea-bfce-080027421ddf/b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"},
"blkio": {ControllerPath: "/kubepods/besteffort/pod665fb997-575b-11ea-bfce-080027421ddf/b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"},
Expand All @@ -100,7 +101,6 @@ func TestAddProcessMetadata(t *testing.T) {
},
2: {
V1: map[string]cgroup.ControllerPath{

"cpu": {ControllerPath: "/kubepods/besteffort/pod665fb997-575b-11ea-bfce-080027421ddf/b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"},
"net_prio": {ControllerPath: "/kubepods/besteffort/pod665fb997-575b-11ea-bfce-080027421ddf/b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"},
"blkio": {ControllerPath: "/kubepods/besteffort/pod665fb997-575b-11ea-bfce-080027421ddf/b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"},
Expand All @@ -116,6 +116,11 @@ func TestAddProcessMetadata(t *testing.T) {
"name=systemd": {ControllerPath: "/kubepods/besteffort/pod665fb997-575b-11ea-bfce-080027421ddf/b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"},
},
},
6: {
V2: map[string]cgroup.ControllerPath{
"Docker": {IsV2: true, ControllerPath: "/custom_path/123456abc"},
},
},
}

return testMap[pid], nil
Expand Down Expand Up @@ -714,6 +719,30 @@ func TestAddProcessMetadata(t *testing.T) {
},
},
},
{
description: "invalid cgroup_regex configured",
config: mapstr.M{
"cgroup_regex": "",
},
initErr: errors.New("fail to unpack the add_process_metadata configuration: cgroup_regexp must contain exactly one capturing group for the container ID accessing config"),
},
{
description: "cgroup_prefixes configured",
config: mapstr.M{
"match_pids": []string{"pid"},
"include_fields": []string{"container.id"},
"cgroup_prefixes": []string{"/custom_path"},
},
event: mapstr.M{
"pid": "6",
},
expected: mapstr.M{
"pid": "6",
"container": mapstr.M{
"id": "123456abc",
},
},
},
} {
t.Run(test.description, func(t *testing.T) {
config, err := conf.NewConfigFrom(test.config)
Expand Down Expand Up @@ -788,15 +817,14 @@ func TestAddProcessMetadata(t *testing.T) {
}

func TestUsingCache(t *testing.T) {
logp.TestingSetup(logp.WithSelectors(processorName))
require.NoError(t, logp.TestingSetup(logp.WithSelectors(processorName)))

selfPID := os.Getpid()

// mock of the cgroup processCgroupPaths
processCgroupPaths = func(_ resolve.Resolver, pid int) (cgroup.PathList, error) {
testStruct := cgroup.PathList{
V1: map[string]cgroup.ControllerPath{

"cpu": {ControllerPath: "/kubepods/besteffort/pod665fb997-575b-11ea-bfce-080027421ddf/b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"},
"net_prio": {ControllerPath: "/kubepods/besteffort/pod665fb997-575b-11ea-bfce-080027421ddf/b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"},
"blkio": {ControllerPath: "/kubepods/besteffort/pod665fb997-575b-11ea-bfce-080027421ddf/b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"},
Expand Down Expand Up @@ -898,7 +926,8 @@ func TestUsingCache(t *testing.T) {
}

func TestSelf(t *testing.T) {
logp.TestingSetup(logp.WithSelectors(processorName))
require.NoError(t, logp.TestingSetup(logp.WithSelectors(processorName)))

config, err := conf.NewConfigFrom(mapstr.M{
"match_pids": []string{"self_pid"},
"target": "self",
Expand Down Expand Up @@ -931,7 +960,8 @@ func TestSelf(t *testing.T) {
}

func TestBadProcess(t *testing.T) {
logp.TestingSetup(logp.WithSelectors(processorName))
require.NoError(t, logp.TestingSetup(logp.WithSelectors(processorName)))

config, err := conf.NewConfigFrom(mapstr.M{
"match_pids": []string{"self_pid"},
"target": "self",
Expand Down Expand Up @@ -1118,8 +1148,54 @@ func TestV2CID(t *testing.T) {
}
return testMap, nil
}
provider := newCidProvider(resolve.NewTestResolver(""), []string{}, "", processCgroupPaths, nil)
provider := newCidProvider(resolve.NewTestResolver(""), nil, defaultCgroupRegex, processCgroupPaths, nil)
result, err := provider.GetCid(1)
assert.NoError(t, err)
assert.Equal(t, "2dcbab615aebfa9313feffc5cfdacd381543cfa04c6be3f39ac656e55ef34805", result)
}

// TestDefaultCgroupRegex verifies that defaultCgroupRegex matches the most common
// container runtime and container orchestrator cgroup paths.
func TestDefaultCgroupRegex(t *testing.T) {
testCases := []struct {
TestName string
CgroupPath string
ContainerID string
}{
{
TestName: "kubernetes-docker",
CgroupPath: "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod69349abe_d645_11ea_9c4c_08002709c05c.slice/docker-80d85a3a585f1575028ebe468d83093c301eda20d37d1671ff2a0be50fc0e460.scope",
ContainerID: "80d85a3a585f1575028ebe468d83093c301eda20d37d1671ff2a0be50fc0e460",
},
{
TestName: "kubernetes-cri-containerd",
CgroupPath: "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2d5133c0_65f3_40b2_b375_c04866d418e1.slice/cri-containerd-e01a26336924e2fb8089bcf4cf943954fd9ea616cc5678f38f65928307979459.scope",
ContainerID: "e01a26336924e2fb8089bcf4cf943954fd9ea616cc5678f38f65928307979459",
},
{
TestName: "kubernetes-crio",
CgroupPath: "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod69349abe_d645_11ea_9c4c_08002709c05c.slice/crio-80d85a3a585f1575028ebe468d83093c301eda20d37d1671ff2a0be50fc0e460.scope",
ContainerID: "80d85a3a585f1575028ebe468d83093c301eda20d37d1671ff2a0be50fc0e460",
},
{
TestName: "podman",
CgroupPath: "/user.slice/user-1000.slice/[email protected]/user.slice/libpod-conmon-ee059a097566fdc5ac9141bfcdfbed0c972163da891de076e0849d7b53597aac.scope",
ContainerID: "ee059a097566fdc5ac9141bfcdfbed0c972163da891de076e0849d7b53597aac",
},
{
TestName: "docker",
CgroupPath: "/docker/485776c9f6f2c22e2b44a2239b65471d6a02701b54d1cb5e1c55a09108a1b5b9",
ContainerID: "485776c9f6f2c22e2b44a2239b65471d6a02701b54d1cb5e1c55a09108a1b5b9",
},
}

for _, tc := range testCases {
tc := tc
t.Run(tc.TestName, func(t *testing.T) {
matches := defaultCgroupRegex.FindStringSubmatch(tc.CgroupPath)
if len(matches) < 2 || matches[1] != tc.ContainerID {
t.Errorf("container.id not matched in cgroup path %s", tc.CgroupPath)
}
})
}
}
24 changes: 17 additions & 7 deletions libbeat/processors/add_process_metadata/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@ package add_process_metadata

import (
"fmt"
"regexp"
"strings"
"time"

"github.com/elastic/elastic-agent-libs/mapstr"
)

// defaultCgroupRegex captures 64-character lowercase hexadecimal container IDs found in cgroup paths.
var defaultCgroupRegex = regexp.MustCompile(`[-/]([0-9a-f]{64})(\.scope)?$`)

type config struct {
// IgnoreMissing: Ignore errors if event has no PID field.
IgnoreMissing bool `config:"ignore_missing"`
Expand All @@ -50,14 +54,21 @@ type config struct {
// CgroupPrefix is the prefix where the container id is inside cgroup
CgroupPrefixes []string `config:"cgroup_prefixes"`

// CgroupRegex is the regular expression that captures the container id from cgroup path
CgroupRegex string `config:"cgroup_regex"`
// CgroupRegex is the regular expression that captures the container ID from a cgroup path.
CgroupRegex *regexp.Regexp `config:"cgroup_regex"`

// CgroupCacheExpireTime is the length of time before cgroup cache elements expire in seconds,
// set to 0 to disable the cgroup cache
CgroupCacheExpireTime time.Duration `config:"cgroup_cache_expire_time"`
}

func (c *config) Validate() error {
if c.CgroupRegex != nil && c.CgroupRegex.NumSubexp() != 1 {
return fmt.Errorf("cgroup_regexp must contain exactly one capturing group for the container ID")
}
return nil
}

// available fields by default
var defaultFields = mapstr.M{
"process": mapstr.M{
Expand Down Expand Up @@ -99,22 +110,21 @@ func defaultConfig() config {
RestrictedFields: false,
MatchPIDs: []string{"process.pid", "process.parent.pid"},
HostPath: "/",
CgroupPrefixes: []string{"/kubepods", "/docker"},
CgroupCacheExpireTime: cacheExpiration,
}
}

func (pf *config) getMappings() (mappings mapstr.M, err error) {
func (c *config) getMappings() (mappings mapstr.M, err error) {
mappings = mapstr.M{}
validFields := defaultFields
if pf.RestrictedFields {
if c.RestrictedFields {
validFields = restrictedFields
}
fieldPrefix := pf.Target
fieldPrefix := c.Target
if len(fieldPrefix) > 0 {
fieldPrefix += "."
}
wantedFields := pf.Fields
wantedFields := c.Fields
if len(wantedFields) == 0 {
wantedFields = []string{"process", "container"}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,34 +12,44 @@ processes, identified by their process ID (PID).
-------------------------------------------------------------------------------
processors:
- add_process_metadata:
match_pids: [system.process.ppid]
target: system.process.parent
match_pids:
- process.pid
-------------------------------------------------------------------------------

The fields added to the event look as follows:

[source,json]
-------------------------------------------------------------------------------
"process": {
"name": "systemd",
"title": "/usr/lib/systemd/systemd --switched-root --system --deserialize 22",
"exe": "/usr/lib/systemd/systemd",
"args": ["/usr/lib/systemd/systemd", "--switched-root", "--system", "--deserialize", "22"],
"pid": 1,
"parent": {
"pid": 0
{
"container": {
"id": "b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"
},
"start_time": "2018-08-22T08:44:50.684Z",
"owner": {
"name": "root",
"id": "0"
"process": {
"args": [
"/usr/lib/systemd/systemd",
"--switched-root",
"--system",
"--deserialize",
"22"
],
"executable": "/usr/lib/systemd/systemd",
"name": "systemd",
"owner": {
"id": "0",
"name": "root"
},
"parent": {
"pid": 0
},
"pid": 1,
"start_time": "2018-08-22T08:44:50.684Z",
"title": "/usr/lib/systemd/systemd --switched-root --system --deserialize 22"
}
},
"container": {
"id": "b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1"
},
}
-------------------------------------------------------------------------------

Optionally, the process environment can be included, too:

[source,json]
-------------------------------------------------------------------------------
...
Expand Down Expand Up @@ -69,7 +79,7 @@ of the fields in match_pids will be discarded and an error will be generated. By
default, this condition is ignored.

`overwrite_keys`:: (Optional) By default, if a target field already exists, it
will not be overwritten and an error will be logged. If `overwrite_keys` is
will not be overwritten, and an error will be logged. If `overwrite_keys` is
set to `true`, this condition will be ignored.

`restricted_fields`:: (Optional) By default, the `process.env` field is not
Expand All @@ -81,21 +91,21 @@ directory of the host `/`. This is the path where `/proc` is mounted. For
different runtime configurations of Kubernetes or Docker, the `host_path` can
be set to overwrite the default.

`cgroup_prefixes`:: (Optional) By default, the `cgroup_prefixes` field is set
to `/kubepods` and `/docker`. This is the prefix where the container ID is
inside cgroup. For different runtime configurations of Kubernetes or Docker,
the `cgroup_prefixes` can be set to overwrite the defaults.

`cgroup_regex`:: (Optional) By default, the container id is extracted from
cgroup file based on `cgroup_prefixes`. This can be overwritten by specifying
regular expression with capture group for capturing container id from cgroup
path. Examples:
. `^\/.+\/.+\/.+\/([0-9a-f]{64}).*` will match the container id of a cgroup
like `/kubepods/besteffort/pod665fb997-575b-11ea-bfce-080027421ddf/b5285682fba7449c86452b89a800609440ecc88a7ba5f2d38bedfb85409b30b1`
. `^\/.+\/.+\/.+\/docker-([0-9a-f]{64}).scope` will match the container id of a cgroup
like `/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod69349abe_d645_11ea_9c4c_08002709c05c.slice/docker-80d85a3a585f1575028ebe468d83093c301eda20d37d1671ff2a0be50fc0e460.scope`
. `^\/.+\/.+\/.+\/crio-([0-9a-f]{64}).scope` will match the container id of a cgroup
like `/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod69349abe_d645_11ea_9c4c_08002709c05c.slice/crio-80d85a3a585f1575028ebe468d83093c301eda20d37d1671ff2a0be50fc0e460.scope`
`cgroup_prefixes`:: (Optional) List of prefixes that will be matched against
cgroup paths. When a cgroup path begins with a prefix in the list, then the
last element of the path is returned as the container ID. Only one of
`cgroup_prefixes` and `cgroup_rexex` should be configured. If neither are
configured then a default `cgroup_regex` value is used that matches cgroup
paths containing 64-character container IDs (like those from Docker,
Kubernetes, and Podman).

`cgroup_regex`:: (Optional) A regular expression that will be matched against
cgroup paths. It must contain one capturing group. When a cgroup path matches
the regular expression then the value of the capturing group is returned as
the container ID. Only one of `cgroup_prefixes` and `cgroup_rexex` should be
configured. If neither are configured then a default `cgroup_regex` value is
used that matches cgroup paths containing 64-character container IDs (like those
from Docker, Kubernetes, and Podman).

`cgroup_cache_expire_time`:: (Optional) By default, the
`cgroup_cache_expire_time` is set to 30 seconds. This is the length of time
Expand Down
Loading

0 comments on commit 80ed33b

Please sign in to comment.