diff --git a/.chloggen/ottl-func-extract-pattern.yaml b/.chloggen/ottl-func-extract-pattern.yaml new file mode 100644 index 000000000000..139ced2dd51d --- /dev/null +++ b/.chloggen/ottl-func-extract-pattern.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/ottl + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add new `ExtractPatterns` converter that extract regex pattern from string. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [25834, 25856] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md index 6cdf1e2486a1..fe82c55b76d6 100644 --- a/pkg/ottl/ottlfuncs/README.md +++ b/pkg/ottl/ottlfuncs/README.md @@ -277,6 +277,7 @@ Unlike functions, they do not modify any input telemetry and always return a val Available Converters: - [Concat](#concat) - [ConvertCase](#convertcase) +- [ExtractPatterns](#extractpatterns) - [FNV](#fnv) - [Duration](#duration) - [Int](#int) @@ -353,6 +354,22 @@ Examples: - `Duration("333ms")` - `Duration("1000000h")` +### ExtractPatterns + +`ExtractPatterns(target, pattern)` + +The `ExtractPatterns` Converter returns a `pcommon.Map` struct that is a result of extracting named capture groups from the target string. If not matches are found then an empty `pcommon.Map` is returned. + +`target` is a Getter that returns a string. `pattern` is a regex string. + +If `target` is not a string or nil `ExtractPatterns` will return an error. If `pattern` does not contain at least 1 named capture group then `ExtractPatterns` will error on startup. + +Examples: + +- `ExtractPatterns(attributes["k8s.change_cause"], "GIT_SHA=(?P\w+)")` + +- `ExtractPatterns(body, "^(?P\\w+ \\w+ [0-9]+:[0-9]+:[0-9]+) (?P([A-Za-z0-9-_]+)) (?P\\w+)(\\[(?P\\d+)\\])?: (?P.*)$")` + ### FNV `FNV(value)` diff --git a/pkg/ottl/ottlfuncs/func_extract_patterns.go b/pkg/ottl/ottlfuncs/func_extract_patterns.go new file mode 100644 index 000000000000..4101e8d9182f --- /dev/null +++ b/pkg/ottl/ottlfuncs/func_extract_patterns.go @@ -0,0 +1,75 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" + +import ( + "context" + "fmt" + "regexp" + + "go.opentelemetry.io/collector/pdata/pcommon" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" +) + +type ExtractPatternsArguments[K any] struct { + Target ottl.StringGetter[K] `ottlarg:"0"` + Pattern string `ottlarg:"1"` +} + +func NewExtractPatternsFactory[K any]() ottl.Factory[K] { + return ottl.NewFactory("ExtractPatterns", &ExtractPatternsArguments[K]{}, createExtractPatternsFunction[K]) +} + +func createExtractPatternsFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) { + args, ok := oArgs.(*ExtractPatternsArguments[K]) + + if !ok { + return nil, fmt.Errorf("ExtractPatternsFactory args must be of type *ExtractPatternsArguments[K]") + } + + return extractPatterns(args.Target, args.Pattern) +} + +func extractPatterns[K any](target ottl.StringGetter[K], pattern string) (ottl.ExprFunc[K], error) { + r, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("the pattern supplied to ExtractPatterns is not a valid pattern: %w", err) + } + + namedCaptureGroups := 0 + for _, groupName := range r.SubexpNames() { + if groupName != "" { + namedCaptureGroups++ + } + } + + if namedCaptureGroups == 0 { + return nil, fmt.Errorf("at least 1 named capture group must be supplied in the given regex") + } + + return func(ctx context.Context, tCtx K) (interface{}, error) { + val, err := target.Get(ctx, tCtx) + if err != nil { + return nil, err + } + + matches := r.FindStringSubmatch(val) + if matches == nil { + return pcommon.NewMap(), nil + } + + result := pcommon.NewMap() + for i, subexp := range r.SubexpNames() { + if i == 0 { + // Skip whole match + continue + } + if subexp != "" { + result.PutStr(subexp, matches[i]) + } + } + return result, err + }, nil +} diff --git a/pkg/ottl/ottlfuncs/func_extract_patterns_test.go b/pkg/ottl/ottlfuncs/func_extract_patterns_test.go new file mode 100644 index 000000000000..d051eb1a8476 --- /dev/null +++ b/pkg/ottl/ottlfuncs/func_extract_patterns_test.go @@ -0,0 +1,140 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ottlfuncs + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" +) + +func Test_extractPatterns(t *testing.T) { + target := &ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (interface{}, error) { + return `a=b c=d`, nil + }, + } + tests := []struct { + name string + target ottl.StringGetter[any] + pattern string + want func(pcommon.Map) + }{ + { + name: "extract patterns", + target: target, + pattern: `^a=(?P\w+)\s+c=(?P\w+)$`, + want: func(expectedMap pcommon.Map) { + expectedMap.PutStr("a", "b") + expectedMap.PutStr("c", "d") + }, + }, + { + name: "no pattern found", + target: target, + pattern: `^a=(?P\w+)$`, + want: func(expectedMap pcommon.Map) {}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + exprFunc, err := extractPatterns(tt.target, tt.pattern) + assert.NoError(t, err) + + result, err := exprFunc(context.Background(), nil) + assert.NoError(t, err) + + resultMap, ok := result.(pcommon.Map) + require.True(t, ok) + + expected := pcommon.NewMap() + tt.want(expected) + + assert.Equal(t, expected.Len(), resultMap.Len()) + expected.Range(func(k string, v pcommon.Value) bool { + ev, _ := expected.Get(k) + av, _ := resultMap.Get(k) + assert.Equal(t, ev, av) + return true + }) + }) + } +} + +func Test_extractPatterns_validation(t *testing.T) { + tests := []struct { + name string + target ottl.StringGetter[any] + pattern string + }{ + { + name: "bad regex", + target: &ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (interface{}, error) { + return "foobar", nil + }, + }, + pattern: "(", + }, + { + name: "no named capture group", + target: &ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (interface{}, error) { + return "foobar", nil + }, + }, + pattern: "(.*)", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + exprFunc, err := extractPatterns[any](tt.target, tt.pattern) + assert.Error(t, err) + assert.Nil(t, exprFunc) + }) + } +} + +func Test_extractPatterns_bad_input(t *testing.T) { + tests := []struct { + name string + target ottl.StringGetter[any] + pattern string + }{ + { + name: "target is non-string", + target: &ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (interface{}, error) { + return 123, nil + }, + }, + pattern: "(?P.*)", + }, + { + name: "target is nil", + target: &ottl.StandardStringGetter[any]{ + Getter: func(ctx context.Context, tCtx any) (interface{}, error) { + return nil, nil + }, + }, + pattern: "(?P.*)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + exprFunc, err := extractPatterns[any](tt.target, tt.pattern) + assert.NoError(t, err) + + result, err := exprFunc(nil, nil) + assert.Error(t, err) + assert.Nil(t, result) + }) + } +} diff --git a/pkg/ottl/ottlfuncs/functions.go b/pkg/ottl/ottlfuncs/functions.go index e8d0b577e29d..ab195a651bb8 100644 --- a/pkg/ottl/ottlfuncs/functions.go +++ b/pkg/ottl/ottlfuncs/functions.go @@ -37,6 +37,7 @@ func converters[K any]() []ottl.Factory[K] { NewConcatFactory[K](), NewConvertCaseFactory[K](), NewDurationFactory[K](), + NewExtractPatternsFactory[K](), NewFnvFactory[K](), NewIntFactory[K](), NewIsMapFactory[K](),