Skip to content

Commit

Permalink
eval: add LIKE patterns without wildcards to optimizedLikeFunc
Browse files Browse the repository at this point in the history
`eval.optimizedLikeFunc` has special cases for LIKE patterns that start and
end with wildcards `%` or `_` which can be evaluated without regular
expressions. Add support for patterns with no wildcards at all.

Fixes: cockroachdb#91887
Assists: cockroachdb#89749

Epic: None

Release note (performance improvement): Performance of the `LIKE` and
`ILIKE` operators using patterns without any wildcards has been
improved.
  • Loading branch information
michae2 authored and Mark Sirek committed Jun 30, 2023
1 parent 4b5144b commit e6e05f6
Show file tree
Hide file tree
Showing 3 changed files with 258 additions and 1 deletion.
1 change: 1 addition & 0 deletions pkg/sql/sem/eval/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ go_test(
"eval_internal_test.go",
"eval_test.go",
"like_test.go",
"match_test.go",
"parse_doid_test.go",
"timeconv_test.go",
"window_funcs_test.go",
Expand Down
18 changes: 17 additions & 1 deletion pkg/sql/sem/eval/match.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,18 @@ func optimizedLikeFunc(
}
return len(s) == len(string(firstChar)), nil
}, nil
default:
// Patterns without a wildcard boil down to a direct string comparison
// (after checking for escapes).
if rune(pattern[0]) == escape {
return nil, pgerror.Newf(pgcode.InvalidEscapeSequence, `LIKE pattern must not end with escape character`)
}
return func(s string) (bool, error) {
if caseInsensitive {
s, pattern = strings.ToUpper(s), strings.ToUpper(pattern)
}
return s == pattern, nil
}, nil
}
default:
if !strings.ContainsAny(pattern[1:len(pattern)-1], "_%") {
Expand Down Expand Up @@ -252,7 +264,11 @@ func optimizedLikeFunc(
return strings.HasSuffix(s, suffix), nil
}, nil

case singleAnyStart || singleAnyEnd:
default:
// This default case handles (singleAnyStart || singleAnyEnd) as well as
// the case with no wildcards at all (!singleAnyStart && !singleAnyEnd)
// which becomes a direct string comparison after accounting for
// escaping and case-sensitivity.
return func(s string) (bool, error) {
if len(s) < 1 {
return false, nil
Expand Down
240 changes: 240 additions & 0 deletions pkg/sql/sem/eval/match_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package eval

import (
"fmt"
"testing"
)

// TestOptimizedLike checks that for certain patterns we are using optimized
// evaluation of LIKE and ILIKE rather than regex-powered evaluation.
func TestOptimizedLike(t *testing.T) {
// All of these patterns should use optimizedLikeFunc rather than
// CovertLikeToRegexp.
testCases := []struct {
str string
pattern string
escape rune
caseInsensitive bool
expected bool
errorExpected bool
}{
/* The following test cases were generated using:
WITH c(c) AS (VALUES ('a'), ('%'), ('_'), ('\\'))
SELECT e'\t\t{"' || str || '", "' || pat || e'", \'' || esc || e'\', ' || ins || ', true, false},'
FROM (
SELECT 'a' str, c1.c pat, c2.c esc, 'false' ins FROM c c1, c c2
UNION ALL
SELECT 'A', c1.c, c2.c, 'true' FROM c c1, c c2
UNION ALL
SELECT 'aa', c1.c || c2.c pat, c3.c esc, 'false' FROM c c1, c c2, c c3
UNION ALL
SELECT 'aaa', c1.c || 'a' || c2.c pat, c3.c esc, 'false' FROM c c1, c c2, c c3
) x
ORDER BY str, pat, esc;
*/
{"A", "%", '%', true, false, true},
{"A", "%", '\\', true, true, false},
{"A", "%", '_', true, true, false},
{"A", "%", 'a', true, true, false},
{"A", "\\", '%', true, false, false},
{"A", "\\", '\\', true, false, true},
{"A", "\\", '_', true, false, false},
{"A", "\\", 'a', true, false, false},
{"A", "_", '%', true, true, false},
{"A", "_", '\\', true, true, false},
{"A", "_", '_', true, false, true},
{"A", "_", 'a', true, true, false},
{"A", "a", '%', true, true, false},
{"A", "a", '\\', true, true, false},
{"A", "a", '_', true, true, false},
{"A", "a", 'a', true, false, true},
{"a", "%", '%', false, false, true},
{"a", "%", '\\', false, true, false},
{"a", "%", '_', false, true, false},
{"a", "%", 'a', false, true, false},
{"a", "\\", '%', false, false, false},
{"a", "\\", '\\', false, false, true},
{"a", "\\", '_', false, false, false},
{"a", "\\", 'a', false, false, false},
{"a", "_", '%', false, true, false},
{"a", "_", '\\', false, true, false},
{"a", "_", '_', false, false, true},
{"a", "_", 'a', false, true, false},
{"a", "a", '%', false, true, false},
{"a", "a", '\\', false, true, false},
{"a", "a", '_', false, true, false},
{"a", "a", 'a', false, false, true},
{"aa", "%%", '%', false, false, false},
{"aa", "%%", '\\', false, true, false},
{"aa", "%%", '_', false, true, false},
{"aa", "%%", 'a', false, true, false},
{"aa", "%\\", '%', false, false, false},
{"aa", "%\\", '\\', false, false, true},
{"aa", "%\\", '_', false, false, false},
{"aa", "%\\", 'a', false, false, false},
{"aa", "%_", '%', false, false, false},
{"aa", "%_", '\\', false, true, false},
{"aa", "%_", '_', false, false, true},
{"aa", "%_", 'a', false, true, false},
{"aa", "%a", '%', false, false, false},
{"aa", "%a", '\\', false, true, false},
{"aa", "%a", '_', false, true, false},
{"aa", "%a", 'a', false, false, true},
{"aa", "\\%", '%', false, false, true},
{"aa", "\\%", '\\', false, false, false},
{"aa", "\\%", '_', false, false, false},
{"aa", "\\%", 'a', false, false, false},
{"aa", "\\\\", '%', false, false, false},
{"aa", "\\\\", '\\', false, false, false},
{"aa", "\\\\", '_', false, false, false},
{"aa", "\\\\", 'a', false, false, false},
{"aa", "\\_", '%', false, false, false},
{"aa", "\\_", '\\', false, false, false},
{"aa", "\\_", '_', false, false, true},
{"aa", "\\_", 'a', false, false, false},
{"aa", "\\a", '%', false, false, false},
{"aa", "\\a", '\\', false, false, false},
{"aa", "\\a", '_', false, false, false},
{"aa", "\\a", 'a', false, false, true},
{"aa", "_%", '%', false, false, true},
{"aa", "_%", '\\', false, true, false},
{"aa", "_%", '_', false, false, false},
{"aa", "_%", 'a', false, true, false},
{"aa", "_\\", '%', false, false, false},
{"aa", "_\\", '\\', false, false, true},
{"aa", "_\\", '_', false, false, false},
{"aa", "_\\", 'a', false, false, false},
{"aa", "__", '%', false, true, false},
{"aa", "__", '\\', false, true, false},
{"aa", "__", '_', false, false, false},
{"aa", "__", 'a', false, true, false},
{"aa", "_a", '%', false, true, false},
{"aa", "_a", '\\', false, true, false},
{"aa", "_a", '_', false, false, false},
{"aa", "_a", 'a', false, false, true},
{"aa", "a%", '%', false, false, true},
{"aa", "a%", '\\', false, true, false},
{"aa", "a%", '_', false, true, false},
{"aa", "a%", 'a', false, false, false},
{"aa", "a\\", '%', false, false, false},
{"aa", "a\\", '\\', false, false, true},
{"aa", "a\\", '_', false, false, false},
{"aa", "a\\", 'a', false, false, false},
{"aa", "a_", '%', false, true, false},
{"aa", "a_", '\\', false, true, false},
{"aa", "a_", '_', false, false, true},
{"aa", "a_", 'a', false, false, false},
{"aa", "aa", '%', false, true, false},
{"aa", "aa", '\\', false, true, false},
{"aa", "aa", '_', false, true, false},
{"aa", "aa", 'a', false, false, false},
{"aaa", "%a%", '%', false, false, true},
{"aaa", "%a%", '\\', false, true, false},
{"aaa", "%a%", '_', false, true, false},
{"aaa", "%a%", 'a', false, false, false},
{"aaa", "%a\\", '%', false, false, false},
{"aaa", "%a\\", '\\', false, false, true},
{"aaa", "%a\\", '_', false, false, false},
{"aaa", "%a\\", 'a', false, false, false},
{"aaa", "%a_", '%', false, false, false},
{"aaa", "%a_", '\\', false, true, false},
{"aaa", "%a_", '_', false, false, true},
{"aaa", "%a_", 'a', false, false, false},
{"aaa", "%aa", '%', false, false, false},
{"aaa", "%aa", '\\', false, true, false},
{"aaa", "%aa", '_', false, true, false},
{"aaa", "%aa", 'a', false, true, false},
{"aaa", "\\a%", '%', false, false, true},
{"aaa", "\\a%", '\\', false, true, false},
{"aaa", "\\a%", '_', false, false, false},
{"aaa", "\\a%", 'a', false, false, false},
{"aaa", "\\a\\", '%', false, false, false},
{"aaa", "\\a\\", '\\', false, false, true},
{"aaa", "\\a\\", '_', false, false, false},
{"aaa", "\\a\\", 'a', false, false, false},
{"aaa", "\\a_", '%', false, false, false},
{"aaa", "\\a_", '\\', false, false, false},
{"aaa", "\\a_", '_', false, false, true},
{"aaa", "\\a_", 'a', false, false, false},
{"aaa", "\\aa", '%', false, false, false},
{"aaa", "\\aa", '\\', false, false, false},
{"aaa", "\\aa", '_', false, false, false},
{"aaa", "\\aa", 'a', false, false, false},
{"aaa", "_a%", '%', false, false, true},
{"aaa", "_a%", '\\', false, true, false},
{"aaa", "_a%", '_', false, true, false},
{"aaa", "_a%", 'a', false, false, false},
{"aaa", "_a\\", '%', false, false, false},
{"aaa", "_a\\", '\\', false, false, true},
{"aaa", "_a\\", '_', false, false, false},
{"aaa", "_a\\", 'a', false, false, false},
{"aaa", "_a_", '%', false, true, false},
{"aaa", "_a_", '\\', false, true, false},
{"aaa", "_a_", '_', false, false, true},
{"aaa", "_a_", 'a', false, false, false},
{"aaa", "_aa", '%', false, true, false},
{"aaa", "_aa", '\\', false, true, false},
{"aaa", "_aa", '_', false, false, false},
{"aaa", "_aa", 'a', false, false, false},
{"aaa", "aa%", '%', false, false, true},
{"aaa", "aa%", '\\', false, true, false},
{"aaa", "aa%", '_', false, true, false},
{"aaa", "aa%", 'a', false, true, false},
{"aaa", "aa\\", '%', false, false, false},
{"aaa", "aa\\", '\\', false, false, true},
{"aaa", "aa\\", '_', false, false, false},
{"aaa", "aa\\", 'a', false, false, false},
{"aaa", "aa_", '%', false, true, false},
{"aaa", "aa_", '\\', false, true, false},
{"aaa", "aa_", '_', false, false, true},
{"aaa", "aa_", 'a', false, false, false},
{"aaa", "aaa", '%', false, true, false},
{"aaa", "aaa", '\\', false, true, false},
{"aaa", "aaa", '_', false, true, false},
{"aaa", "aaa", 'a', false, false, true},
}

for i, tc := range testCases {
op := "LIKE"
if tc.caseInsensitive {
op = "ILIKE"
}
name := fmt.Sprintf("%d-%s-%s-%s-ESCAPE-%s", i, tc.str, op, tc.pattern, string(tc.escape))
t.Run(name, func(t *testing.T) {
like, err := optimizedLikeFunc(tc.pattern, tc.caseInsensitive, tc.escape)
if err != nil {
if !tc.errorExpected {
t.Errorf("unexpected error: %v", err)
}
return
}
if tc.errorExpected {
t.Errorf("expected error")
return
}
if like == nil {
t.Errorf("did not use optimized like evaluation")
return
}
matches, err := like(tc.str)
if err != nil {
t.Error(err)
return
}
if matches != tc.expected {
t.Errorf("matches (%v) != tc.expected (%v)", matches, tc.expected)
}
})
}
}

0 comments on commit e6e05f6

Please sign in to comment.