Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LIKE operator support #241

Merged
merged 12 commits into from
Oct 25, 2020
13 changes: 10 additions & 3 deletions sql/parser/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,17 @@ func (p *Parser) parseOperator() (func(lhs, rhs expr.Expr) expr.Expr, scanner.To
p.Unscan()
return expr.Is, op, nil
case scanner.NOT:
if tok, pos, lit := p.ScanIgnoreWhitespace(); tok != scanner.IN {
return nil, 0, newParseError(scanner.Tokstr(tok, lit), []string{"IN"}, pos)
tok, pos, lit := p.ScanIgnoreWhitespace()
switch tok {
case scanner.IN:
return expr.NotIn, op, nil
case scanner.LIKE:
return expr.NotLike, op, nil
}
return expr.NotIn, op, nil

return nil, 0, newParseError(scanner.Tokstr(tok, lit), []string{"IN, LIKE"}, pos)
case scanner.LIKE:
return expr.Like, op, nil
}

panic(fmt.Sprintf("unknown operator %q", op))
Expand Down
14 changes: 2 additions & 12 deletions sql/query/expr/comparison.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ func (op cmpOp) compare(l, r document.Value) (bool, error) {
func IsComparisonOperator(op Operator) bool {
switch op.(type) {
case eqOp, neqOp, gtOp, gteOp, ltOp, lteOp,
isOp, isNotOp, inOp, notInOp:
isOp, isNotOp, inOp, notInOp, likeOp:
return true
}

Expand Down Expand Up @@ -540,17 +540,7 @@ func NotIn(a, b Expr) Expr {
}

func (op notInOp) Eval(ctx EvalStack) (document.Value, error) {
v, err := op.inOp.Eval(ctx)
if err != nil {
return v, err
}
if v == trueLitteral {
return falseLitteral, nil
}
if v == falseLitteral {
return trueLitteral, nil
}
return v, nil
return invertBoolResult(op.inOp.Eval)(ctx)
}

func (op notInOp) String() string {
Expand Down
17 changes: 17 additions & 0 deletions sql/query/expr/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,20 @@ type Parentheses struct {
func (p Parentheses) Eval(es EvalStack) (document.Value, error) {
return p.E.Eval(es)
}

func invertBoolResult(f func(ctx EvalStack) (document.Value, error)) func(ctx EvalStack) (document.Value, error) {
return func(ctx EvalStack) (document.Value, error) {
v, err := f(ctx)

if err != nil {
return v, err
}
if v == trueLitteral {
return falseLitteral, nil
}
if v == falseLitteral {
return trueLitteral, nil
}
return v, nil
}
}
193 changes: 193 additions & 0 deletions sql/query/expr/like.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
package expr

import (
"errors"
"fmt"
"github.com/genjidb/genji/document"
"github.com/genjidb/genji/sql/scanner"
"regexp"
"strings"
)

func isWildcard(char byte) bool {
return char == '%' || char == '_'
}

// replaceUnescaped replaces all instances of oldStr that are not escaped (read:
// preceded) with the specified unescape token with newStr.
// For example, with an escape token of `\\`
// replaceUnescaped("TE\\__ST", "_", ".", `\\`) --> "TE\\_.ST"
// replaceUnescaped("TE\\%%ST", "%", ".*", `\\`) --> "TE\\%.*ST"
// If the preceding escape token is escaped, then oldStr will be replaced.
// For example
// replaceUnescaped("TE\\\\_ST", "_", ".", `\\`) --> "TE\\\\.ST"
func replaceUnescaped(s, oldStr, newStr string, escapeToken string) string {
// We count the number of occurrences of 'oldStr'.
// This however can be an overestimate since the oldStr token could be
// escaped. e.g. `\\_`.
nOld := strings.Count(s, oldStr)
if nOld == 0 {
return s
}

// Allocate buffer for final string.
// This can be an overestimate since some of the oldStr tokens may
// be escaped.
// This is fine since we keep track of the running number of bytes
// actually copied.
// It's rather difficult to count the exact number of unescaped
// tokens without manually iterating through the entire string and
// keeping track of escaped escape tokens.
retLen := len(s)
// If len(newStr) - len(oldStr) < 0, then this can under-allocate which
// will not behave correctly with copy.
if addnBytes := nOld * (len(newStr) - len(oldStr)); addnBytes > 0 {
retLen += addnBytes
}
ret := make([]byte, retLen)
retWidth := 0
start := 0
OldLoop:
for i := 0; i < nOld; i++ {
nextIdx := start + strings.Index(s[start:], oldStr)

escaped := false
for {
// We need to look behind to check if the escape token
// is really an escape token.
// E.g. if our specified escape token is `\\` and oldStr
// is `_`, then
// `\\_` --> escaped
// `\\\\_` --> not escaped
// `\\\\\\_` --> escaped
curIdx := nextIdx
lookbehindIdx := curIdx - len(escapeToken)
for lookbehindIdx >= 0 && s[lookbehindIdx:curIdx] == escapeToken {
escaped = !escaped
curIdx = lookbehindIdx
lookbehindIdx = curIdx - len(escapeToken)
}

// The token was not be escaped. Proceed.
if !escaped {
break
}

// Token was escaped. Copy everything over and continue.
retWidth += copy(ret[retWidth:], s[start:nextIdx+len(oldStr)])
start = nextIdx + len(oldStr)

// Continue with next oldStr token.
continue OldLoop
}

// Token was not escaped so we replace it with newStr.
// Two copies is more efficient than concatenating the slices.
retWidth += copy(ret[retWidth:], s[start:nextIdx])
retWidth += copy(ret[retWidth:], newStr)
start = nextIdx + len(oldStr)
}

retWidth += copy(ret[retWidth:], s[start:])
return string(ret[0:retWidth])
}

func patternToRegexp(pattern string) (*regexp.Regexp, error) {
pattern = regexp.QuoteMeta(pattern)
pattern = replaceUnescaped(pattern, `%`, `.*`, `\\`)
pattern = replaceUnescaped(pattern, `_`, `.`, `\\`)
return regexp.Compile(pattern)
}

func like(text, pattern string) (bool, error) {
if pattern == "" {
// true only if text == pattern => text == "" => len(text) == 0
return len(text) == 0, nil
}

if pattern == "%" {
// any match
return true, nil
}

if pattern == "_" {
// one any character or more
return len(text) > 0, nil
}

if len(pattern) > 1 && !strings.ContainsAny(pattern[1:len(pattern)-1], "%_") {
first := pattern[0]
last := pattern[len(pattern)-1]
switch {
case !isWildcard(first) && !isWildcard(last):
// exact match
return text == pattern, nil
case first == '%' && !isWildcard(last):
// suffix match
return strings.HasSuffix(text, pattern[1:]), nil
case last == '%' && !isWildcard(first):
// prefix match
return strings.HasPrefix(text, pattern[0:len(pattern)-1]), nil
case first == '%' && last == '%':
// contains
return strings.Contains(text, pattern[1:len(pattern)-1]), nil
}
}

r, err := patternToRegexp(pattern)
if err != nil {
return false, err
}
return r.MatchString(text), nil
}

type likeOp struct {
*simpleOperator
}

// Like creates an expression that evaluates to the result of a LIKE b.
func Like(a, b Expr) Expr {
return &likeOp{&simpleOperator{a, b, scanner.LIKE}}
}

func (op likeOp) Eval(ctx EvalStack) (document.Value, error) {
a, b, err := op.simpleOperator.eval(ctx)
if err != nil {
return nullLitteral, err
}

if a.Type != document.TextValue || b.Type != document.TextValue {
return nullLitteral, errors.New("LIKE operator takes an text")
}

ok, err := like(a.V.(string), b.V.(string))
if err != nil {
return nullLitteral, err
}
if ok {
return trueLitteral, nil
}

return falseLitteral, nil
}

func (op likeOp) String() string {
return fmt.Sprintf("%v LIKE %v", op.a, op.b)
}

type notLikeOp struct {
likeOp
}

// NotLike creates an expression that evaluates to the result of a NOT LIKE b.
func NotLike(a, b Expr) Expr {
return &notLikeOp{likeOp{&simpleOperator{a, b, scanner.LIKE}}}
}

func (op notLikeOp) Eval(ctx EvalStack) (document.Value, error) {
return invertBoolResult(op.likeOp.Eval)(ctx)
}

func (op notLikeOp) String() string {
return fmt.Sprintf("%v NOT LIKE %v", op.a, op.b)
}
51 changes: 51 additions & 0 deletions sql/query/expr/like_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package expr

import "testing"

func Test_like(t *testing.T) {
type args struct {
text string
pattern string
}
tests := []struct {
name string
args args
want bool
wantErr bool
}{
{"Empty", args{"", ""}, true, false},
{"Empty false", args{"abc", ""}, false, false},

{"Any", args{"abc", "%"}, true, false},
{"Any false", args{"", "%"}, true, false},

{"Any one and more", args{"abc", "_"}, true, false},
{"Any one and more false", args{"", "_"}, false, false},

{"Exact", args{"abc", "abc"}, true, false},
{"Exact false", args{"abc", "def"}, false, false},

{"Prefix", args{"abcdef", "abc%"}, true, false},
{"Prefix false", args{"abcdef", "def%"}, false, false},

{"Suffix", args{"defabc", "%abc"}, true, false},
{"Suffix false", args{"defabc", "%def"}, false, false},

{"Contains", args{"defabcdef", "%abc%"}, true, false},
{"Contains false", args{"abcd", "%def%"}, false, false},

{"Regexp", args{"AdBeeeC", "A%B%C"}, true, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := like(tt.args.text, tt.args.pattern)
if (err != nil) != tt.wantErr {
t.Errorf("like() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("like() got = %v, want %v", got, tt.want)
}
})
}
}
1 change: 1 addition & 0 deletions sql/scanner/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func TestScanner_Scan(t *testing.T) {
{s: `>=`, tok: scanner.GTE, raw: `>=`},
{s: `IN`, tok: scanner.IN, raw: `IN`},
{s: `IS`, tok: scanner.IS, raw: `IS`},
{s: `LIKE`, tok: scanner.LIKE, raw: `LIKE`},

// Misc tokens
{s: `(`, tok: scanner.LPAREN, raw: `(`},
Expand Down
6 changes: 4 additions & 2 deletions sql/scanner/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ const (
GTE // >=
IN // IN
IS // IS
LIKE // LIKE
operatorEnd

LPAREN // (
Expand Down Expand Up @@ -165,6 +166,7 @@ var tokens = [...]string{
GTE: ">=",
IN: "IN",
IS: "IS",
LIKE: "LIKE",

LPAREN: "(",
RPAREN: ")",
Expand Down Expand Up @@ -238,7 +240,7 @@ func initKeywords() {
for tok := keywordBeg + 1; tok < keywordEnd; tok++ {
keywords[strings.ToLower(tokens[tok])] = tok
}
for _, tok := range []Token{AND, OR, TRUE, FALSE, NULL, IN, IS} {
for _, tok := range []Token{AND, OR, TRUE, FALSE, NULL, IN, IS, LIKE} {
keywords[strings.ToLower(tokens[tok])] = tok
}
}
Expand All @@ -260,7 +262,7 @@ func (tok Token) Precedence() int {
return 2
case IN:
return 3
case EQ, NEQ, EQREGEX, NEQREGEX, LT, LTE, GT, GTE, IS:
case EQ, NEQ, EQREGEX, NEQREGEX, LT, LTE, GT, GTE, IS, LIKE:
return 4
case ADD, SUB, BITWISEOR, BITWISEXOR:
return 5
Expand Down