Skip to content

Commit

Permalink
[4189] Fix colliding filters by updating match method for MultiCharSe…
Browse files Browse the repository at this point in the history
…quenceFilter to iterate through all patterns for the most complete match and not return on the first pattern found (#4188)

* Fix colliding filters by updating match method for MultiCharSequenceFilter to iterate through all patterns for the most complete match and not return on the first pattern found

* Remove empty newline

* Update new unit test to test length of returned byte array

* Update location of new test added to fold into an existing test

* Simplify code and add test for multiCharSequenceFilter with multiple patterns and backwards enabled

* Add benchmarks tests for new logic in multicharsequence matches method

* Add benchmark tests and optimizations to short circuit iterating through patterns if complete match is found

* Fix linting error
  • Loading branch information
saad-zaman authored Feb 22, 2023
1 parent e5dd9a4 commit 3afc5be
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 4 deletions.
37 changes: 33 additions & 4 deletions src/metrics/filters/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -585,14 +585,43 @@ func (f *multiCharSequenceFilter) matches(val []byte) ([]byte, bool) {
return nil, false
}

var matchIndex int
var bestPattern []byte
for _, pattern := range f.patterns {
if f.backwards && bytes.HasSuffix(val, pattern) {
return val[:len(val)-len(pattern)], true
if len(pattern) > len(val) {
continue
}

if !f.backwards && bytes.HasPrefix(val, pattern) {
return val[len(pattern):], true
if f.backwards {
if bytes.HasSuffix(val, pattern) {
if len(pattern) > len(bestPattern) {
bestPattern = pattern
matchIndex = len(val) - len(pattern)
// No need to continue searching through remaining patterns if a complete match is found
if len(bestPattern) == len(val) {
break
}
}
}
} else {
if bytes.HasPrefix(val, pattern) {
if len(pattern) > len(bestPattern) {
bestPattern = pattern
matchIndex = len(pattern)
// No need to continue searching through remaining patterns if a complete match is found
if len(bestPattern) == len(val) {
break
}
}
}
}
}

if bestPattern != nil {
if f.backwards {
return val[:matchIndex], true
}
return val[matchIndex:], true
}

return nil, false
Expand Down
10 changes: 10 additions & 0 deletions src/metrics/filters/filter_benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,16 @@ func BenchmarkRangeFilterRangeMatchNegation(b *testing.B) {
benchRangeFilterRange(b, []byte("!a-z"), []byte("p"), false)
}

func BenchmarkMultiRangeFilterPrefixCollision(b *testing.B) {
benchMultiRangeFilter(b, []byte("test_1,test_1_suffix,extra_unused_pattern"),
false, [][]byte{[]byte("test_1"), []byte("test_1_suffix")})
}

func BenchmarkMultiRangeFilterSuffixCollision(b *testing.B) {
benchMultiRangeFilter(b, []byte("test_1,prefix_test_1,extra_unused_pattern"),
true, [][]byte{[]byte("test_1"), []byte("prefix_test_1")})
}

func BenchmarkMultiRangeFilterTwo(b *testing.B) {
benchMultiRangeFilter(b, []byte("test_1,test_2"), false, [][]byte{[]byte("test_1"), []byte("fake_1")})
}
Expand Down
10 changes: 10 additions & 0 deletions src/metrics/filters/filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,16 @@ func TestMultiCharSequenceFilter(t *testing.T) {
validateLookup(t, f, "12test", true, "12")
validateLookup(t, f, "12test2", true, "12")
validateLookup(t, f, "123book", true, "123")

f, err = newMultiCharSequenceFilter([]byte("test,test_post,extra_unused_filter"), false)
require.NoError(t, err)
validateLookup(t, f, "test", true, "")
validateLookup(t, f, "test_post", true, "")

f, err = newMultiCharSequenceFilter([]byte("test,pre_test,extra_unused_filter"), true)
require.NoError(t, err)
validateLookup(t, f, "test", true, "")
validateLookup(t, f, "pre_test", true, "")
}

func validateLookup(t *testing.T, f chainFilter, val string, expectedMatch bool, expectedRemainder string) {
Expand Down

0 comments on commit 3afc5be

Please sign in to comment.