From 80e6ea282a0cd8d0b56d3d6b8e629ddd60758942 Mon Sep 17 00:00:00 2001 From: Antoine Toulme Date: Wed, 6 Apr 2022 06:46:02 -0700 Subject: [PATCH] [processor/attributes] Add support to filter on log body (#8996) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add support to filter on log body * code review * add changelog * Update processor/attributesprocessor/README.md Co-authored-by: Przemek Maciolek <58699843+pmm-sumo@users.noreply.github.com> * Update CHANGELOG.md Co-authored-by: Dmitrii Anoshin Co-authored-by: Przemek Maciolek <58699843+pmm-sumo@users.noreply.github.com> Co-authored-by: Dmitrii Anoshin Co-authored-by: Juraci Paixão Kröhling --- CHANGELOG.md | 1 + .../processor/filterconfig/config.go | 12 ++++++++++-- .../processor/filterlog/filterlog.go | 16 ++++++++++++++++ .../processor/filterlog/filterlog_test.go | 15 ++++++++++++--- processor/attributesprocessor/README.md | 11 ++++++++--- .../attributesprocessor/testdata/config.yaml | 18 ++++++++++++++++++ 6 files changed, 65 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34b0431fdcab..fc6bcdf6a338 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Add `make crosslink` target to ensure replace statements are included in `go.mod` for all transitive dependencies within repository (#8822) - `filestorageextension`: Change bbolt DB settings for better performance (#9004) - `jaegerremotesamplingextension`: Add local and remote sampling stores (#8818) +- `attributesprocessor`: Add support to filter on log body (#8996) ### 🛑 Breaking changes 🛑 diff --git a/internal/coreinternal/processor/filterconfig/config.go b/internal/coreinternal/processor/filterconfig/config.go index d767b101f152..5410e3b0e230 100644 --- a/internal/coreinternal/processor/filterconfig/config.go +++ b/internal/coreinternal/processor/filterconfig/config.go @@ -95,6 +95,10 @@ type MatchProperties struct { // Deprecated: the Name field is removed from the log data model. LogNames []string `mapstructure:"log_names"` + // LogBodies is a list of strings that the LogRecord's body field must match + // against. + LogBodies []string `mapstructure:"log_bodies"` + // MetricNames is a list of strings to match metric name against. // A match occurs if metric name matches at least one item in the list. // This field is optional. @@ -123,6 +127,10 @@ func (mp *MatchProperties) ValidateForSpans() error { return errors.New("log_names should not be specified for trace spans") } + if len(mp.LogBodies) > 0 { + return errors.New("log_bodies should not be specified for trace spans") + } + if len(mp.Services) == 0 && len(mp.SpanNames) == 0 && len(mp.Attributes) == 0 && len(mp.Libraries) == 0 && len(mp.Resources) == 0 { return errors.New(`at least one of "services", "span_names", "attributes", "libraries" or "resources" field must be specified`) @@ -137,8 +145,8 @@ func (mp *MatchProperties) ValidateForLogs() error { return errors.New("neither services nor span_names should be specified for log records") } - if len(mp.Attributes) == 0 && len(mp.Libraries) == 0 && len(mp.Resources) == 0 { - return errors.New(`at least one of "attributes", "libraries" or "resources" field must be specified`) + if len(mp.Attributes) == 0 && len(mp.Libraries) == 0 && len(mp.Resources) == 0 && len(mp.LogBodies) == 0 { + return errors.New(`at least one of "attributes", "libraries", "resources" or "log_bodies" field must be specified`) } return nil diff --git a/internal/coreinternal/processor/filterlog/filterlog.go b/internal/coreinternal/processor/filterlog/filterlog.go index c981fc18c655..c22fee6188bb 100644 --- a/internal/coreinternal/processor/filterlog/filterlog.go +++ b/internal/coreinternal/processor/filterlog/filterlog.go @@ -38,6 +38,9 @@ type propertiesMatcher struct { // log names to compare to. nameFilters filterset.FilterSet + + // log bodies to compare to. + bodyFilters filterset.FilterSet } // NewMatcher creates a LogRecord Matcher that matches based on the given MatchProperties. @@ -62,20 +65,33 @@ func NewMatcher(mp *filterconfig.MatchProperties) (Matcher, error) { return nil, fmt.Errorf("error creating log record name filters: %v", err) } } + var bodyFS filterset.FilterSet + if len(mp.LogBodies) > 0 { + bodyFS, err = filterset.CreateFilterSet(mp.LogBodies, &mp.Config) + if err != nil { + return nil, fmt.Errorf("error creating log record body filters: %v", err) + } + } return &propertiesMatcher{ PropertiesMatcher: rm, nameFilters: nameFS, + bodyFilters: bodyFS, }, nil } // MatchLogRecord matches a log record to a set of properties. // There are 3 sets of properties to match against. // The log record names are matched, if specified. +// The log record bodies are matched, if specified. // The attributes are then checked, if specified. // At least one of log record names or attributes must be specified. It is // supported to have more than one of these specified, and all specified must // evaluate to true for a match to occur. func (mp *propertiesMatcher) MatchLogRecord(lr pdata.LogRecord, resource pdata.Resource, library pdata.InstrumentationScope) bool { + if lr.Body().Type() == pdata.ValueTypeString && mp.bodyFilters != nil && mp.bodyFilters.Matches(lr.Body().StringVal()) { + return true + } + return mp.PropertiesMatcher.Match(lr.Attributes(), resource, library) } diff --git a/internal/coreinternal/processor/filterlog/filterlog_test.go b/internal/coreinternal/processor/filterlog/filterlog_test.go index 7fc38a285d33..8aa0440048ce 100644 --- a/internal/coreinternal/processor/filterlog/filterlog_test.go +++ b/internal/coreinternal/processor/filterlog/filterlog_test.go @@ -40,14 +40,15 @@ func TestLogRecord_validateMatchesConfiguration_InvalidConfig(t *testing.T) { { name: "empty_property", property: filterconfig.MatchProperties{}, - errorString: "at least one of \"attributes\", \"libraries\" or \"resources\" field must be specified", + errorString: `at least one of "attributes", "libraries", "resources" or "log_bodies" field must be specified`, }, { name: "empty_log_names_and_attributes", property: filterconfig.MatchProperties{ - LogNames: []string{}, + LogNames: []string{}, + LogBodies: []string{}, }, - errorString: "at least one of \"attributes\", \"libraries\" or \"resources\" field must be specified", + errorString: `at least one of "attributes", "libraries", "resources" or "log_bodies" field must be specified`, }, { name: "span_properties", @@ -149,10 +150,18 @@ func TestLogRecord_Matching_True(t *testing.T) { }, }, }, + { + name: "log_body_regexp_match", + properties: &filterconfig.MatchProperties{ + Config: *createConfig(filterset.Regexp), + LogBodies: []string{"AUTH.*"}, + }, + }, } lr := pdata.NewLogRecord() lr.Attributes().InsertString("abc", "def") + lr.Body().SetStringVal("AUTHENTICATION FAILED") for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { diff --git a/processor/attributesprocessor/README.md b/processor/attributesprocessor/README.md index 301a1333edcc..c29b41ae33ff 100644 --- a/processor/attributesprocessor/README.md +++ b/processor/attributesprocessor/README.md @@ -166,13 +166,13 @@ if the input data should be included or excluded from the processor. To configur this option, under `include` and/or `exclude` at least `match_type` and one of the following is required: - For spans, one of `services`, `span_names`, `attributes`, `resources`, or `libraries` must be specified -with a non-empty value for a valid configuration. The `log_names`, `expressions`, `resource_attributes` and +with a non-empty value for a valid configuration. The `log_names`, `log_bodies`, `expressions`, `resource_attributes` and `metric_names` fields are invalid. -- For logs, one of `log_names`, `attributes`, `resources`, or `libraries` must be specified with a +- For logs, one of `log_names`, `log_bodies`, `attributes`, `resources`, or `libraries` must be specified with a non-empty value for a valid configuration. The `span_names`, `metric_names`, `expressions`, `resource_attributes`, and `services` fields are invalid. - For metrics, one of `metric_names`, `resources` must be specified -with a valid non-empty value for a valid configuration. The `span_names`, `log_names`, and +with a valid non-empty value for a valid configuration. The `span_names`, `log_names`, `log_bodies` and `services` fields are invalid. @@ -218,6 +218,11 @@ attributes: # This is an optional field. log_names: [, ..., ] + # The log body must match at least one of the items. + # Currently only string body types are supported. + # This is an optional field. + log_bodies: [, ..., ] + # The metric name must match at least one of the items. # This is an optional field. metric_names: [, ..., ] diff --git a/processor/attributesprocessor/testdata/config.yaml b/processor/attributesprocessor/testdata/config.yaml index e8a7428c34d7..2c68eb918de6 100644 --- a/processor/attributesprocessor/testdata/config.yaml +++ b/processor/attributesprocessor/testdata/config.yaml @@ -307,6 +307,24 @@ processors: action: update value: "SELECT * FROM USERS [obfuscated]" + + # The following demonstrates how to process logs that have a body that match regexp + # patterns. This processor will remove "token" attribute and will obfuscate "password" + # attribute in spans where body matches "AUTH.*". + attributes/log_body_regexp: + # Specifies the span properties that must exist for the processor to be applied. + include: + # match_type defines that "services" is an array of regexp-es. + match_type: regexp + # The span service name must match "auth.*" pattern. + log_bodies: ["AUTH.*"] + actions: + - key: password + action: update + value: "obfuscated" + - key: token + action: delete + receivers: nop: