Skip to content

Commit

Permalink
Add regex processor plugin (#3839)
Browse files Browse the repository at this point in the history
  • Loading branch information
44px authored and danielnelson committed May 21, 2018
1 parent 3be9cad commit ccc4a85
Show file tree
Hide file tree
Showing 4 changed files with 430 additions and 0 deletions.
1 change: 1 addition & 0 deletions plugins/processors/all/all.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ package all
import (
_ "github.com/influxdata/telegraf/plugins/processors/override"
_ "github.com/influxdata/telegraf/plugins/processors/printer"
_ "github.com/influxdata/telegraf/plugins/processors/regex"
_ "github.com/influxdata/telegraf/plugins/processors/topk"
)
46 changes: 46 additions & 0 deletions plugins/processors/regex/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Regex Processor Plugin

The `regex` plugin transforms tag and field values with regex pattern. If `result_key` parameter is present, it can produce new tags and fields from existing ones.

### Configuration:

```toml
[[processors.regex]]
namepass = ["nginx_requests"]

# Tag and field conversions defined in a separate sub-tables
[[processors.regex.tags]]
## Tag to change
key = "resp_code"
## Regular expression to match on a tag value
pattern = "^(\\d)\\d\\d$"
## Pattern for constructing a new value (${1} represents first subgroup)
replacement = "${1}xx"

[[processors.regex.fields]]
key = "request"
## All the power of the Go regular expressions available here
## For example, named subgroups
pattern = "^/api(?P<method>/[\\w/]+)\\S*"
replacement = "${method}"
## If result_key is present, a new field will be created
## instead of changing existing field
result_key = "method"

# Multiple conversions may be applied for one field sequentially
# Let's extract one more value
[[processors.regex.fields]]
key = "request"
pattern = ".*category=(\\w+).*"
replacement = "${1}"
result_key = "search_category"
```

### Tags:

No tags are applied by this processor.

### Example Output:
```
nginx_requests,verb=GET,resp_code=2xx request="/api/search/?category=plugins&q=regex&sort=asc",method="/search/",search_category="plugins",referrer="-",ident="-",http_version=1.1,agent="UserAgent",client_ip="127.0.0.1",auth="-",resp_bytes=270i 1519652321000000000
```
110 changes: 110 additions & 0 deletions plugins/processors/regex/regex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package regex

import (
"regexp"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/processors"
)

type Regex struct {
Tags []converter
Fields []converter
regexCache map[string]*regexp.Regexp
}

type converter struct {
Key string
Pattern string
Replacement string
ResultKey string
}

const sampleConfig = `
## Tag and field conversions defined in a separate sub-tables
# [[processors.regex.tags]]
# ## Tag to change
# key = "resp_code"
# ## Regular expression to match on a tag value
# pattern = "^(\\d)\\d\\d$"
# ## Pattern for constructing a new value (${1} represents first subgroup)
# replacement = "${1}xx"
# [[processors.regex.fields]]
# key = "request"
# ## All the power of the Go regular expressions available here
# ## For example, named subgroups
# pattern = "^/api(?P<method>/[\\w/]+)\\S*"
# replacement = "${method}"
# ## If result_key is present, a new field will be created
# ## instead of changing existing field
# result_key = "method"
## Multiple conversions may be applied for one field sequentially
## Let's extract one more value
# [[processors.regex.fields]]
# key = "request"
# pattern = ".*category=(\\w+).*"
# replacement = "${1}"
# result_key = "search_category"
`

func NewRegex() *Regex {
return &Regex{
regexCache: make(map[string]*regexp.Regexp),
}
}

func (r *Regex) SampleConfig() string {
return sampleConfig
}

func (r *Regex) Description() string {
return "Transforms tag and field values with regex pattern"
}

func (r *Regex) Apply(in ...telegraf.Metric) []telegraf.Metric {
for _, metric := range in {
for _, converter := range r.Tags {
if value, ok := metric.GetTag(converter.Key); ok {
metric.AddTag(r.convert(converter, value))
}
}

for _, converter := range r.Fields {
if value, ok := metric.GetField(converter.Key); ok {
switch value := value.(type) {
case string:
metric.AddField(r.convert(converter, value))
}
}
}
}

return in
}

func (r *Regex) convert(c converter, src string) (string, string) {
regex, compiled := r.regexCache[c.Pattern]
if !compiled {
regex = regexp.MustCompile(c.Pattern)
r.regexCache[c.Pattern] = regex
}

value := ""
if c.ResultKey == "" || regex.MatchString(src) {
value = regex.ReplaceAllString(src, c.Replacement)
}

if c.ResultKey != "" {
return c.ResultKey, value
}

return c.Key, value
}

func init() {
processors.Add("regex", func() telegraf.Processor {
return NewRegex()
})
}
Loading

0 comments on commit ccc4a85

Please sign in to comment.