Skip to content

Commit

Permalink
Merge pull request #1 from tie/glob-match-like
Browse files Browse the repository at this point in the history
Use backtracking in glob.MatchLike
  • Loading branch information
tdakkota authored Oct 11, 2020
2 parents be621af + 7968a87 commit 30c1bd0
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 17 deletions.
53 changes: 41 additions & 12 deletions sql/query/glob/like.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,14 @@ func equalFold(sr, tr rune) bool {
func MatchLike(pattern, s string) bool {
var prevEscape bool

for len(pattern) != 0 {
var w, t string // backtracking state

for len(s) != 0 {
// Read (and consume) the next character from the input pattern.
var p rune
if len(pattern) == 0 {
goto backtrack
}
p, pattern = readRune(pattern)

// There are now 4 possibilities:
Expand All @@ -80,7 +85,7 @@ func MatchLike(pattern, s string) bool {
// 2. p is an unescaped match-one character "_",
// 3. p is an unescaped escape character, or
// 4. p is to be handled as an ordinary character
//
loop:
if p == matchAll && !prevEscape {
// Case 1.
var c byte
Expand Down Expand Up @@ -109,13 +114,9 @@ func MatchLike(pattern, s string) bool {
return true
}

for len(s) != 0 {
if MatchLike(pattern, s) {
return true
}
s = skipRune(s)
}
return false
// Save state and match next character.
//
w, t = pattern, s
} else if p == matchOne && !prevEscape {
// Case 2.
if len(s) == 0 {
Expand All @@ -127,14 +128,42 @@ func MatchLike(pattern, s string) bool {
prevEscape = true
} else {
// Case 4.
prevEscape = false

var r rune
r, s = readRune(s)
if !equalFold(p, r) {
return false
goto backtrack
}
prevEscape = false
}
continue

backtrack:
if len(w) == 0 {
// Nothing to backtrack.
return prevEscape
}
// Keep the pattern and skip rune in input.
// Note that we only backtrack to matchAll.
p, pattern = matchAll, w
s = skipRune(t)
goto loop
}

return len(s) == 0
// Check that the rest of the pattern is matchAll.
for i := 0; i < len(pattern); i++ {
if pattern[i] == matchAll {
continue
}

// Allow escaping end of string.
if i+1 == len(pattern) {
if pattern[i] == matchEsc {
return true
}
}

return false
}
return true
}
130 changes: 125 additions & 5 deletions sql/query/glob/like_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,35 @@ func TestMatchLike(t *testing.T) {
}{
// Empty
{"", "", true},
{"abc", "", false},
{"", "x", false},
{"x", "", false},

// One
{"", "_", false},
{"x", "_", true},
{"x", "__", false},
{"xx", "_", false},
{"", "_", false},
{"bLah", "bL_h", true},
{"bLaaa", "bLa_", false},
{"bLah", "bLa_", true},
{"bLaH", "_Lah", true},
{"bLaH", "_LaH", true},

// Any
// All
{"", "%", true},
{"abc", "%", true},
{"", "%", true},
{"abc", "%%", true},

// Any and one
{"x", "%_", true},
{"", "%_", false},

// Escape
{"", "\\", true},
{"x", "%\\", true},
{"x", "_\\", true},
{"x", "_\\x", false},
{"%", "\\%", true},
{"_", "\\_", true},
{"x", "\\%", false},
Expand All @@ -37,14 +54,29 @@ func TestMatchLike(t *testing.T) {

// Exact
{"abc", "abc", true},
{"aBc", "AbC", false},
{"aBc", "AbC", true},
{"abc", "def", false},

// Case folding
{"K", "\u212A", true}, // K → k → U+212A
{"\u212A", "k", true},

// Invalid UTF-8
{"\xFF", "\xFF", true},
{"\xFA", "\xFB", false},
{"\xFF", "_", true},
{"\xFF", "\xFF_", false},
{"\xFF", "%", true},
{"\xFF", "%\xFF%", true},
{"\xFF", "x", false},

// Prefix
{"abc", "abc%", true},
{"abcdef", "abc%", true},
{"abcdef", "def%", false},

// Suffix
{"abc", "%abc", true},
{"defabc", "%abc", true},
{"defabc", "%def", false},

Expand All @@ -54,10 +86,98 @@ func TestMatchLike(t *testing.T) {
{"abc", "b", false},

// Complex
{"abc", "ab%d", false},
{"ABCD", "%B%C%", true},
{"ABCD", "_%B%C%_", true},
{"ABxCxxD", "a%b%c%d", true},
{"a", "__", false},
{"ab", "__", true},
{"abc", "___", true},
{"abcd", "____", true},
{"abc", "____", false},
{"abcd", "_b__", true},
{"abcd", "_a__", false},
{"abcd", "__c_", true},
{"abcd", "__d_", false},

// Mixed
{"", "%_", false},
{"", "_%", false},
{"a", "%_", true},
{"a", "%__", false},
{"ab", "%_", true},
{"abc", "%_", true},
{"ab", "_%_", true},
{"ab", "%_%_%", true},
{"aaaa", "_aa%", true},
{"aaaa", "%aa_", true},
{"abc", "_%%_%_", true},
{"abc", "_%%_%&_", false},
{"abcd", "_b%__", true},
{"abcd", "_a%__", false},
{"abcd", "_%%_c_", true},
{"abcd", "_%%_d_", false},
{"abcde", "_b_d%_", true},
{"abcde", "_%b%_%d%_", true},
{"abcd", "_%b%c%_", true},
{"ABxCxxD", "%__B", false},
{"abBbc", "%b_c", true},

// Longer strings
{
"%abc%",
"%%\\%a%b%c\\%%%",
true,
},
{
"aaabbaabbaab",
"%aabbaa%a%",
true,
},
{
"abacaaadabacababacaaabadagabacaba",
"%a%a%a%a%a%a%a%a%a%a%a%a%a%a%a%a%a%",
true,
},
{
"aaaaaaaaaaaaaaaa",
"%a%a%a%a%a%a%a%a%a%a%a%a%a%a%a%a%a%",
false,
},
{
"%a%b%c%",
"%%%%%%%%a%%%%\\%%%%b%%%%\\%%%%c%%%%%%%%",
true,
},
{
"a%a%a%a%a%a%a%a%a%a%a%a%a%a%a%a%a%",
"a%a\\%a%a\\%a%a\\%a%a\\%a%a\\%a%a\\%a%a\\%a%a\\%a%",
true,
},
{
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab",
"a%a%a%a%a%a%aa%aaa%a%a%b",
true,
},
{
"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",
"%a%b%ba%ca%a%aa%aaa%fa%ga%b%",
true,
},
{
"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",
"%a%b%ba%ca%a%x%aaa%fa%ga%b%",
false,
},
{
"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",
"%a%b%ba%ca%aaaa%fa%ga%gggg%b%",
false,
},
{
"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",
"%a%b%ba%ca%aaaa%fa%ga%ggg%b%",
true,
},
}

for _, test := range tests {
Expand Down

0 comments on commit 30c1bd0

Please sign in to comment.