Skip to content

Commit

Permalink
Replace *regexp.Regexp with match.Matcher (#3469)
Browse files Browse the repository at this point in the history
* Use matchers in processor conditionals

* Filebeat include/exclude lines/files using match.Matcher

* Update filebeat multiline to use match.Matcher

* Metricbeat system module whitelist using new string matcher

* Remove `match` conditional

* Update changelog
  • Loading branch information
Steffen Siering authored and andrewkroh committed Jan 27, 2017
1 parent d2168ac commit 3fce440
Show file tree
Hide file tree
Showing 17 changed files with 208 additions and 212 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff]
- Files created by Beats (logs, registry, file output) will have 0600 permissions. {pull}3387[3387].
- RPM/deb packages will now install the config file with 0600 permissions. {pull}3382[3382]
- Add the option to pass custom HTTP headers to the Elasticsearch output. {pull}3400[3400]
- Unify `regexp` and `contains` conditionals, for both to support array of strings and convert numbers to strings if required. {pull}3469[3469]

*Metricbeat*

Expand All @@ -87,6 +88,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff]
- Kafka consumer groups metricset. {pull}3240[3240]
- Add dynamic configuration reloading for modules. {pull}3281[3281]
- Add docker health metricset {pull}3357[3357]
- System module uses new matchers for white-listing processes. {pull}3469[3469]

*Packetbeat*

Expand All @@ -96,6 +98,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff]
- Add enabled config option to prospectors. {pull}3157[3157]
- Add target option for decoded_json_field. {pull}3169[3169]
- Add the `pipeline` config option at the prospector level, for configuring the Ingest Node pipeline ID. {pull}3433[3433]
- Update regular expressions used for matching file names or lines (multiline, include/exclude functionality) to new matchers improving performance of simple string matches. {pull}3469[3469]

*Winlogbeat*

Expand Down
6 changes: 3 additions & 3 deletions filebeat/harvester/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ package harvester

import (
"fmt"
"regexp"
"time"

cfg "github.com/elastic/beats/filebeat/config"
"github.com/elastic/beats/filebeat/harvester/reader"
"github.com/elastic/beats/libbeat/common"
"github.com/elastic/beats/libbeat/common/match"

"github.com/dustin/go-humanize"
"github.com/elastic/beats/libbeat/logp"
Expand Down Expand Up @@ -47,8 +47,8 @@ type harvesterConfig struct {
CloseEOF bool `config:"close_eof"`
CloseTimeout time.Duration `config:"close_timeout" validate:"min=0"`
ForceCloseFiles bool `config:"force_close_files"`
ExcludeLines []*regexp.Regexp `config:"exclude_lines"`
IncludeLines []*regexp.Regexp `config:"include_lines"`
ExcludeLines []match.Matcher `config:"exclude_lines"`
IncludeLines []match.Matcher `config:"include_lines"`
MaxBytes int `config:"max_bytes" validate:"min=0,nonzero"`
Multiline *reader.MultilineConfig `config:"multiline"`
JSON *reader.JSONConfig `config:"json"`
Expand Down
4 changes: 2 additions & 2 deletions filebeat/harvester/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,14 @@ func (h *Harvester) sendEvent(event *input.Event) bool {
// the include_lines and exclude_lines options.
func (h *Harvester) shouldExportLine(line string) bool {
if len(h.config.IncludeLines) > 0 {
if !MatchAnyRegexps(h.config.IncludeLines, line) {
if !MatchAny(h.config.IncludeLines, line) {
// drop line
logp.Debug("harvester", "Drop line as it does not match any of the include patterns %s", line)
return false
}
}
if len(h.config.ExcludeLines) > 0 {
if MatchAnyRegexps(h.config.ExcludeLines, line) {
if MatchAny(h.config.ExcludeLines, line) {
// drop line
logp.Debug("harvester", "Drop line as it does match one of the exclude patterns%s", line)
return false
Expand Down
22 changes: 8 additions & 14 deletions filebeat/harvester/log_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,29 +104,23 @@ func TestReadLine(t *testing.T) {
}

func TestExcludeLine(t *testing.T) {

regexp, err := InitRegexps([]string{"^DBG"})

regexp, err := InitMatchers("^DBG")
assert.Nil(t, err)

assert.True(t, MatchAnyRegexps(regexp, "DBG: a debug message"))
assert.False(t, MatchAnyRegexps(regexp, "ERR: an error message"))
assert.True(t, MatchAny(regexp, "DBG: a debug message"))
assert.False(t, MatchAny(regexp, "ERR: an error message"))
}

func TestIncludeLine(t *testing.T) {

regexp, err := InitRegexps([]string{"^ERR", "^WARN"})
regexp, err := InitMatchers("^ERR", "^WARN")

assert.Nil(t, err)

assert.False(t, MatchAnyRegexps(regexp, "DBG: a debug message"))
assert.True(t, MatchAnyRegexps(regexp, "ERR: an error message"))
assert.True(t, MatchAnyRegexps(regexp, "WARNING: a simple warning message"))
assert.False(t, MatchAny(regexp, "DBG: a debug message"))
assert.True(t, MatchAny(regexp, "ERR: an error message"))
assert.True(t, MatchAny(regexp, "WARNING: a simple warning message"))
}

func TestInitRegexp(t *testing.T) {

_, err := InitRegexps([]string{"((((("})
_, err := InitMatchers("(((((")
assert.NotNil(t, err)
}

Expand Down
17 changes: 9 additions & 8 deletions filebeat/harvester/reader/multiline.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ package reader
import (
"errors"
"fmt"
"regexp"
"time"

"github.com/elastic/beats/libbeat/common/match"
)

// MultiLine reader combining multiple line events into one multi-line event.
Expand Down Expand Up @@ -55,7 +56,7 @@ func NewMultiline(
maxBytes int,
config *MultilineConfig,
) (*Multiline, error) {
types := map[string]func(*regexp.Regexp) (matcher, error){
types := map[string]func(match.Matcher) (matcher, error){
"before": beforeMatcher,
"after": afterMatcher,
}
Expand Down Expand Up @@ -280,14 +281,14 @@ func (mlr *Multiline) setState(next func(mlr *Multiline) (Message, error)) {

// matchers

func afterMatcher(regex *regexp.Regexp) (matcher, error) {
return genPatternMatcher(regex, func(last, current []byte) []byte {
func afterMatcher(pat match.Matcher) (matcher, error) {
return genPatternMatcher(pat, func(last, current []byte) []byte {
return current
})
}

func beforeMatcher(regex *regexp.Regexp) (matcher, error) {
return genPatternMatcher(regex, func(last, current []byte) []byte {
func beforeMatcher(pat match.Matcher) (matcher, error) {
return genPatternMatcher(pat, func(last, current []byte) []byte {
return last
})
}
Expand All @@ -299,12 +300,12 @@ func negatedMatcher(m matcher) matcher {
}

func genPatternMatcher(
regex *regexp.Regexp,
pat match.Matcher,
sel func(last, current []byte) []byte,
) (matcher, error) {
matcher := func(last, current []byte) bool {
line := sel(last, current)
return regex.Match(line)
return pat.Match(line)
}
return matcher, nil
}
5 changes: 3 additions & 2 deletions filebeat/harvester/reader/multiline_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ package reader

import (
"fmt"
"regexp"
"time"

"github.com/elastic/beats/libbeat/common/match"
)

type MultilineConfig struct {
Negate bool `config:"negate"`
Match string `config:"match" validate:"required"`
MaxLines *int `config:"max_lines"`
Pattern *regexp.Regexp `config:"pattern"`
Pattern match.Matcher `config:"pattern"`
Timeout *time.Duration `config:"timeout" validate:"positive"`
}

Expand Down
12 changes: 6 additions & 6 deletions filebeat/harvester/reader/multiline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ import (
"bytes"
"errors"
"os"
"regexp"
"strings"
"testing"
"time"

"github.com/elastic/beats/filebeat/harvester/encoding"
"github.com/elastic/beats/libbeat/common/match"
"github.com/stretchr/testify/assert"
)

Expand All @@ -26,7 +26,7 @@ func (p bufferSource) Continuable() bool { return false }
func TestMultilineAfterOK(t *testing.T) {
testMultilineOK(t,
MultilineConfig{
Pattern: regexp.MustCompile(`^[ \t] +`), // next line is indented by spaces
Pattern: match.MustCompile(`^[ \t] +`), // next line is indented by spaces
Match: "after",
},
2,
Expand All @@ -38,7 +38,7 @@ func TestMultilineAfterOK(t *testing.T) {
func TestMultilineBeforeOK(t *testing.T) {
testMultilineOK(t,
MultilineConfig{
Pattern: regexp.MustCompile(`\\$`), // previous line ends with \
Pattern: match.MustCompile(`\\$`), // previous line ends with \
Match: "before",
},
2,
Expand All @@ -50,7 +50,7 @@ func TestMultilineBeforeOK(t *testing.T) {
func TestMultilineAfterNegateOK(t *testing.T) {
testMultilineOK(t,
MultilineConfig{
Pattern: regexp.MustCompile(`^-`), // first line starts with '-' at beginning of line
Pattern: match.MustCompile(`^-`), // first line starts with '-' at beginning of line
Negate: true,
Match: "after",
},
Expand All @@ -63,7 +63,7 @@ func TestMultilineAfterNegateOK(t *testing.T) {
func TestMultilineBeforeNegateOK(t *testing.T) {
testMultilineOK(t,
MultilineConfig{
Pattern: regexp.MustCompile(`;$`), // last line ends with ';'
Pattern: match.MustCompile(`;$`), // last line ends with ';'
Negate: true,
Match: "before",
},
Expand All @@ -76,7 +76,7 @@ func TestMultilineBeforeNegateOK(t *testing.T) {
func TestMultilineBeforeNegateOKWithEmptyLine(t *testing.T) {
testMultilineOK(t,
MultilineConfig{
Pattern: regexp.MustCompile(`;$`), // last line ends with ';'
Pattern: match.MustCompile(`;$`), // last line ends with ';'
Negate: true,
Match: "before",
},
Expand Down
12 changes: 5 additions & 7 deletions filebeat/harvester/util.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
package harvester

import "regexp"
import "github.com/elastic/beats/libbeat/common/match"

// MatchAnyRegexps checks if the text matches any of the regular expressions
func MatchAnyRegexps(regexps []*regexp.Regexp, text string) bool {

for _, rexp := range regexps {
if rexp.MatchString(text) {
// MatchAny checks if the text matches any of the regular expressions
func MatchAny(matchers []match.Matcher, text string) bool {
for _, m := range matchers {
if m.MatchString(text) {
return true
}
}

return false
}
23 changes: 9 additions & 14 deletions filebeat/harvester/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
package harvester

import (
"regexp"
"testing"

"github.com/elastic/beats/libbeat/logp"
"github.com/stretchr/testify/assert"
)

// InitRegexps initializes a list of compiled regular expressions.
func InitRegexps(exprs []string) ([]*regexp.Regexp, error) {
"github.com/elastic/beats/libbeat/common/match"
"github.com/elastic/beats/libbeat/logp"
)

result := []*regexp.Regexp{}
// InitMatchers initializes a list of compiled regular expressions.
func InitMatchers(exprs ...string) ([]match.Matcher, error) {
result := []match.Matcher{}

for _, exp := range exprs {
rexp, err := regexp.Compile(exp)
rexp, err := match.Compile(exp)
if err != nil {
logp.Err("Fail to compile the regexp %s: %s", exp, err)
return nil, err
Expand All @@ -27,13 +27,8 @@ func InitRegexps(exprs []string) ([]*regexp.Regexp, error) {
}

func TestMatchAnyRegexps(t *testing.T) {

patterns := []string{"\\.gz$"}

regexps, err := InitRegexps(patterns)

matchers, err := InitMatchers("\\.gz$")
assert.Nil(t, err)

assert.Equal(t, MatchAnyRegexps(regexps, "/var/log/log.gz"), true)
assert.Equal(t, MatchAny(matchers, "/var/log/log.gz"), true)

}
24 changes: 12 additions & 12 deletions filebeat/prospector/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ package prospector

import (
"fmt"
"regexp"
"time"

cfg "github.com/elastic/beats/filebeat/config"
"github.com/elastic/beats/libbeat/common/match"
)

var (
Expand All @@ -23,17 +23,17 @@ var (
)

type prospectorConfig struct {
Enabled bool `config:"enabled"`
ExcludeFiles []*regexp.Regexp `config:"exclude_files"`
IgnoreOlder time.Duration `config:"ignore_older"`
Paths []string `config:"paths"`
ScanFrequency time.Duration `config:"scan_frequency" validate:"min=0,nonzero"`
InputType string `config:"input_type"`
CleanInactive time.Duration `config:"clean_inactive" validate:"min=0"`
CleanRemoved bool `config:"clean_removed"`
HarvesterLimit uint64 `config:"harvester_limit" validate:"min=0"`
Symlinks bool `config:"symlinks"`
TailFiles bool `config:"tail_files"`
Enabled bool `config:"enabled"`
ExcludeFiles []match.Matcher `config:"exclude_files"`
IgnoreOlder time.Duration `config:"ignore_older"`
Paths []string `config:"paths"`
ScanFrequency time.Duration `config:"scan_frequency" validate:"min=0,nonzero"`
InputType string `config:"input_type"`
CleanInactive time.Duration `config:"clean_inactive" validate:"min=0"`
CleanRemoved bool `config:"clean_removed"`
HarvesterLimit uint64 `config:"harvester_limit" validate:"min=0"`
Symlinks bool `config:"symlinks"`
TailFiles bool `config:"tail_files"`
}

func (config *prospectorConfig) Validate() error {
Expand Down
2 changes: 1 addition & 1 deletion filebeat/prospector/prospector_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ func (p *ProspectorLog) handleIgnoreOlder(lastState, newState file.State) error
// isFileExcluded checks if the given path should be excluded
func (p *ProspectorLog) isFileExcluded(file string) bool {
patterns := p.config.ExcludeFiles
return len(patterns) > 0 && harvester.MatchAnyRegexps(patterns, file)
return len(patterns) > 0 && harvester.MatchAny(patterns, file)
}

// isIgnoreOlder checks if the given state reached ignore_older
Expand Down
8 changes: 4 additions & 4 deletions filebeat/prospector/prospector_log_other_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@
package prospector

import (
"regexp"
"testing"

"github.com/elastic/beats/filebeat/input"
"github.com/elastic/beats/filebeat/input/file"
"github.com/elastic/beats/libbeat/common/match"

"github.com/stretchr/testify/assert"
)

var matchTests = []struct {
file string
paths []string
excludeFiles []*regexp.Regexp
excludeFiles []match.Matcher
result bool
}{
{
Expand Down Expand Up @@ -45,13 +45,13 @@ var matchTests = []struct {
{
"test/test.log",
[]string{"test/*"},
[]*regexp.Regexp{regexp.MustCompile("test.log")},
[]match.Matcher{match.MustCompile("test.log")},
false,
},
{
"test/test.log",
[]string{"test/*"},
[]*regexp.Regexp{regexp.MustCompile("test2.log")},
[]match.Matcher{match.MustCompile("test2.log")},
true,
},
}
Expand Down
Loading

0 comments on commit 3fce440

Please sign in to comment.