Skip to content

Commit

Permalink
Add support for complex capture group names in parse_regex.
Browse files Browse the repository at this point in the history
  • Loading branch information
igorpeshansky committed Apr 2, 2022
1 parent ce41293 commit 17f52e6
Show file tree
Hide file tree
Showing 5 changed files with 628 additions and 4 deletions.
60 changes: 56 additions & 4 deletions confgenerator/logging_processors.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package confgenerator
import (
"fmt"
"log"
"regexp"
"strings"

"github.com/GoogleCloudPlatform/ops-agent/confgenerator/filter"
Expand Down Expand Up @@ -62,6 +63,52 @@ func init() {
LoggingProcessorTypes.RegisterType(func() Component { return &LoggingProcessorParseJson{} })
}

// rewriteComplexCaptures translates disallowed capture group names into placeholders and a transformation to rename them back in the log record.
func rewriteComplexCaptures(regex, tag string) (string, []fluentbit.Component) {
// Short-circuit regexes that don't have disallowed capture group names
disallowed := regexp.MustCompile(`\(\?P<(?:[A-Za-z0-9_]*[^A-Za-z0-9_>])+(?:[A-Za-z0-9_]*)>`)
if !disallowed.MatchString(regex) {
return regex, nil
}
// Maintain a list of rewritten capture group names
var rewrites []string
captureGroup := regexp.MustCompile(`\(\?P<((?:[^>\\]|\\.)*)>`)
// Can't use ReplaceAllStringFunc, since it doesn't support replacing only captured values
groupIndexes := captureGroup.FindAllStringSubmatchIndex(regex, -1)
l := 0
var r []string
for _, i := range groupIndexes {
g := regex[i[0]:i[1]] // Full match
s := regex[i[2]:i[3]] // First capture group
r = append(r, regex[l:i[2]])
// Also replace any capture group whose name starts with "__"
if !disallowed.MatchString(g) && !strings.HasPrefix(s, "__") {
r = append(r, s)
} else {
rewrites = append(rewrites, s)
r = append(r, fmt.Sprintf("__%d", len(rewrites)))
}
l = i[3]
}
r = append(r, regex[l:])
// Reconstruct the regex
regex = strings.Join(r, "")
// Rename all captured fields
oc := make([][2]string, len(rewrites))
for i, g := range rewrites {
oc = append(oc, [2]string{"Rename", fmt.Sprintf("__%d %q", i+1, g)})
}
rename := fluentbit.Component{
Kind: "FILTER",
Config: map[string]string{
"Match": tag,
"Name": "modify",
},
OrderedConfig: oc,
}
return regex, []fluentbit.Component{rename}
}

// A LoggingProcessorParseRegex applies a regex to the specified field, storing the named capture groups as keys in the log record.
// This was maintained in addition to the parse_regex_complex to ensure backward compatibility with any existing configurations
type LoggingProcessorParseRegex struct {
Expand All @@ -77,14 +124,16 @@ func (r LoggingProcessorParseRegex) Type() string {
}

func (p LoggingProcessorParseRegex) Components(tag, uid string) []fluentbit.Component {
regex, transforms := rewriteComplexCaptures(p.Regex, tag)

parser, parserName := p.ParserShared.Component(tag, uid)
parser.Config["Format"] = "regex"
parser.Config["Regex"] = p.Regex
parser.Config["Regex"] = regex

return []fluentbit.Component{
return append([]fluentbit.Component{
parser,
fluentbit.ParserFilterComponent(tag, p.Field, []string{parserName}),
}
}, transforms...)
}

type RegexParser struct {
Expand All @@ -103,10 +152,13 @@ func (p LoggingProcessorParseRegexComplex) Components(tag, uid string) []fluentb
parserNames := []string{}

for idx, parserConfig := range p.Parsers {
regex, transforms := rewriteComplexCaptures(parserConfig.Regex, tag)

parser, parserName := parserConfig.Parser.Component(tag, fmt.Sprintf("%s.%d", uid, idx))
parser.Config["Format"] = "regex"
parser.Config["Regex"] = parserConfig.Regex
parser.Config["Regex"] = regex
components = append(components, parser)
components = append(components, transforms...)
parserNames = append(parserNames, parserName)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
@SET buffers_dir=/var/lib/google-cloud-ops-agent/fluent-bit/buffers
@SET logs_dir=/var/log/google-cloud-ops-agent/subagents

[SERVICE]
Daemon off
Flush 1
Log_Level info
dns.resolver legacy
storage.backlog.mem_limit 50M
storage.checksum on
storage.max_chunks_up 128
storage.metrics on
storage.sync normal

[INPUT]
Name fluentbit_metrics
Scrape_Interval 60
Scrape_On_Start True

[INPUT]
Buffer_Chunk_Size 512k
Buffer_Max_Size 5M
DB ${buffers_dir}/default_pipeline_syslog
Key message
Mem_Buf_Limit 10M
Name tail
Path /var/log/messages,/var/log/syslog
Read_from_Head True
Rotate_Wait 30
Skip_Long_Lines On
Tag default_pipeline.syslog
storage.type filesystem

[INPUT]
Buffer_Chunk_Size 512k
Buffer_Max_Size 5M
DB ${buffers_dir}/ops-agent-fluent-bit
Key message
Mem_Buf_Limit 10M
Name tail
Path ${logs_dir}/logging-module.log
Read_from_Head True
Rotate_Wait 30
Skip_Long_Lines On
Tag ops-agent-fluent-bit
storage.type filesystem

[FILTER]
Key_Name key_1
Match default_pipeline.syslog
Name parser
Parser default_pipeline.syslog.0

[FILTER]
Match default_pipeline.syslog
Name modify



Rename __1 "logging.googleapis.com/severity"
Rename __2 "z*%\\>\\\\!"
Rename __3 "__6"

[FILTER]
Add logging.googleapis.com/logName syslog
Match default_pipeline.syslog
Name modify

[OUTPUT]
Match_Regex ^(default_pipeline\.syslog)$
Name stackdriver
Retry_Limit 3
net.connect_timeout_log_error False
resource gce_instance
stackdriver_agent Google-Cloud-Ops-Agent-Logging/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
tls On
tls.verify Off
workers 8

[OUTPUT]
Match_Regex ^(ops-agent-fluent-bit)$
Name stackdriver
Retry_Limit 3
net.connect_timeout_log_error False
resource gce_instance
stackdriver_agent Google-Cloud-Ops-Agent-Logging/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
tls On
tls.verify Off
workers 8

[OUTPUT]
Match *
Name prometheus_exporter
host 0.0.0.0
port 20202
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[PARSER]
Format regex
Name default_pipeline.syslog.0
Regex ^(?P<__1>[EWID]) (?P<__2>.*) (?P<__3>.)$
Time_Format time_format_1
Time_Key time_key_1
Loading

0 comments on commit 17f52e6

Please sign in to comment.