chaisql · asdine · Oct 25, 2020 · Oct 8, 2020 · Oct 8, 2020 · Oct 9, 2020
diff --git a/sql/parser/expr.go b/sql/parser/expr.go
@@ -126,10 +126,17 @@ func (p *Parser) parseOperator() (func(lhs, rhs expr.Expr) expr.Expr, scanner.To
 		p.Unscan()
 		return expr.Is, op, nil
 	case scanner.NOT:
-		if tok, pos, lit := p.ScanIgnoreWhitespace(); tok != scanner.IN {
-			return nil, 0, newParseError(scanner.Tokstr(tok, lit), []string{"IN"}, pos)
+		tok, pos, lit := p.ScanIgnoreWhitespace()
+		switch tok {
+		case scanner.IN:
+			return expr.NotIn, op, nil
+		case scanner.LIKE:
+			return expr.NotLike, op, nil
 		}
-		return expr.NotIn, op, nil
+
+		return nil, 0, newParseError(scanner.Tokstr(tok, lit), []string{"IN, LIKE"}, pos)
+	case scanner.LIKE:
+		return expr.Like, op, nil
 	}
 
 	panic(fmt.Sprintf("unknown operator %q", op))

diff --git a/sql/query/expr/comparison.go b/sql/query/expr/comparison.go
@@ -426,7 +426,7 @@ func (op cmpOp) compare(l, r document.Value) (bool, error) {
 func IsComparisonOperator(op Operator) bool {
 	switch op.(type) {
 	case eqOp, neqOp, gtOp, gteOp, ltOp, lteOp,
-		isOp, isNotOp, inOp, notInOp:
+		isOp, isNotOp, inOp, notInOp, likeOp:
 		return true
 	}
 
@@ -540,17 +540,7 @@ func NotIn(a, b Expr) Expr {
 }
 
 func (op notInOp) Eval(ctx EvalStack) (document.Value, error) {
-	v, err := op.inOp.Eval(ctx)
-	if err != nil {
-		return v, err
-	}
-	if v == trueLitteral {
-		return falseLitteral, nil
-	}
-	if v == falseLitteral {
-		return trueLitteral, nil
-	}
-	return v, nil
+	return invertBoolResult(op.inOp.Eval)(ctx)
 }
 
 func (op notInOp) String() string {

diff --git a/sql/query/expr/expr.go b/sql/query/expr/expr.go
@@ -132,3 +132,20 @@ type Parentheses struct {
 func (p Parentheses) Eval(es EvalStack) (document.Value, error) {
 	return p.E.Eval(es)
 }
+
+func invertBoolResult(f func(ctx EvalStack) (document.Value, error)) func(ctx EvalStack) (document.Value, error) {
+	return func(ctx EvalStack) (document.Value, error) {
+		v, err := f(ctx)
+
+		if err != nil {
+			return v, err
+		}
+		if v == trueLitteral {
+			return falseLitteral, nil
+		}
+		if v == falseLitteral {
+			return trueLitteral, nil
+		}
+		return v, nil
+	}
+}
diff --git a/sql/query/expr/like.go b/sql/query/expr/like.go
@@ -0,0 +1,193 @@
+package expr
+
+import (
+	"errors"
+	"fmt"
+	"github.com/genjidb/genji/document"
+	"github.com/genjidb/genji/sql/scanner"
+	"regexp"
+	"strings"
+)
+
+func isWildcard(char byte) bool {
+	return char == '%' || char == '_'
+}
+
+// replaceUnescaped replaces all instances of oldStr that are not escaped (read:
+// preceded) with the specified unescape token with newStr.
+// For example, with an escape token of `\\`
+//    replaceUnescaped("TE\\__ST", "_", ".", `\\`) --> "TE\\_.ST"
+//    replaceUnescaped("TE\\%%ST", "%", ".*", `\\`) --> "TE\\%.*ST"
+// If the preceding escape token is escaped, then oldStr will be replaced.
+// For example
+//    replaceUnescaped("TE\\\\_ST", "_", ".", `\\`) --> "TE\\\\.ST"
+func replaceUnescaped(s, oldStr, newStr string, escapeToken string) string {
+	// We count the number of occurrences of 'oldStr'.
+	// This however can be an overestimate since the oldStr token could be
+	// escaped.  e.g. `\\_`.
+	nOld := strings.Count(s, oldStr)
+	if nOld == 0 {
+		return s
+	}
+
+	// Allocate buffer for final string.
+	// This can be an overestimate since some of the oldStr tokens may
+	// be escaped.
+	// This is fine since we keep track of the running number of bytes
+	// actually copied.
+	// It's rather difficult to count the exact number of unescaped
+	// tokens without manually iterating through the entire string and
+	// keeping track of escaped escape tokens.
+	retLen := len(s)
+	// If len(newStr) - len(oldStr) < 0, then this can under-allocate which
+	// will not behave correctly with copy.
+	if addnBytes := nOld * (len(newStr) - len(oldStr)); addnBytes > 0 {
+		retLen += addnBytes
+	}
+	ret := make([]byte, retLen)
+	retWidth := 0
+	start := 0
+OldLoop:
+	for i := 0; i < nOld; i++ {
+		nextIdx := start + strings.Index(s[start:], oldStr)
+
+		escaped := false
+		for {
+			// We need to look behind to check if the escape token
+			// is really an escape token.
+			// E.g. if our specified escape token is `\\` and oldStr
+			// is `_`, then
+			//    `\\_` --> escaped
+			//    `\\\\_` --> not escaped
+			//    `\\\\\\_` --> escaped
+			curIdx := nextIdx
+			lookbehindIdx := curIdx - len(escapeToken)
+			for lookbehindIdx >= 0 && s[lookbehindIdx:curIdx] == escapeToken {
+				escaped = !escaped
+				curIdx = lookbehindIdx
+				lookbehindIdx = curIdx - len(escapeToken)
+			}
+
+			// The token was not be escaped. Proceed.
+			if !escaped {
+				break
+			}
+
+			// Token was escaped. Copy everything over and continue.
+			retWidth += copy(ret[retWidth:], s[start:nextIdx+len(oldStr)])
+			start = nextIdx + len(oldStr)
+
+			// Continue with next oldStr token.
+			continue OldLoop
+		}
+
+		// Token was not escaped so we replace it with newStr.
+		// Two copies is more efficient than concatenating the slices.
+		retWidth += copy(ret[retWidth:], s[start:nextIdx])
+		retWidth += copy(ret[retWidth:], newStr)
+		start = nextIdx + len(oldStr)
+	}
+
+	retWidth += copy(ret[retWidth:], s[start:])
+	return string(ret[0:retWidth])
+}
+
+func patternToRegexp(pattern string) (*regexp.Regexp, error) {
+	pattern = regexp.QuoteMeta(pattern)
+	pattern = replaceUnescaped(pattern, `%`, `.*`, `\\`)
+	pattern = replaceUnescaped(pattern, `_`, `.`, `\\`)
+	return regexp.Compile(pattern)
+}
+
+func like(text, pattern string) (bool, error) {
+	if pattern == "" {
+		// true only if text == pattern => text == "" => len(text) == 0
+		return len(text) == 0, nil
+	}
+
+	if pattern == "%" {
+		// any match
+		return true, nil
+	}
+
+	if pattern == "_" {
+		// one any character or more
+		return len(text) > 0, nil
+	}
+
+	if len(pattern) > 1 && !strings.ContainsAny(pattern[1:len(pattern)-1], "%_") {
+		first := pattern[0]
+		last := pattern[len(pattern)-1]
+		switch {
+		case !isWildcard(first) && !isWildcard(last):
+			// exact match
+			return text == pattern, nil
+		case first == '%' && !isWildcard(last):
+			// suffix match
+			return strings.HasSuffix(text, pattern[1:]), nil
+		case last == '%' && !isWildcard(first):
+			// prefix match
+			return strings.HasPrefix(text, pattern[0:len(pattern)-1]), nil
+		case first == '%' && last == '%':
+			// contains
+			return strings.Contains(text, pattern[1:len(pattern)-1]), nil
+		}
+	}
+
+	r, err := patternToRegexp(pattern)
+	if err != nil {
+		return false, err
+	}
+	return r.MatchString(text), nil
+}
+
+type likeOp struct {
+	*simpleOperator
+}
+
+// Like creates an expression that evaluates to the result of a LIKE b.
+func Like(a, b Expr) Expr {
+	return &likeOp{&simpleOperator{a, b, scanner.LIKE}}
+}
+
+func (op likeOp) Eval(ctx EvalStack) (document.Value, error) {
+	a, b, err := op.simpleOperator.eval(ctx)
+	if err != nil {
+		return nullLitteral, err
+	}
+
+	if a.Type != document.TextValue || b.Type != document.TextValue {
+		return nullLitteral, errors.New("LIKE operator takes an text")
+	}
+
+	ok, err := like(a.V.(string), b.V.(string))
+	if err != nil {
+		return nullLitteral, err
+	}
+	if ok {
+		return trueLitteral, nil
+	}
+
+	return falseLitteral, nil
+}
+
+func (op likeOp) String() string {
+	return fmt.Sprintf("%v LIKE %v", op.a, op.b)
+}
+
+type notLikeOp struct {
+	likeOp
+}
+
+// NotLike creates an expression that evaluates to the result of a NOT LIKE b.
+func NotLike(a, b Expr) Expr {
+	return &notLikeOp{likeOp{&simpleOperator{a, b, scanner.LIKE}}}
+}
+
+func (op notLikeOp) Eval(ctx EvalStack) (document.Value, error) {
+	return invertBoolResult(op.likeOp.Eval)(ctx)
+}
+
+func (op notLikeOp) String() string {
+	return fmt.Sprintf("%v NOT LIKE %v", op.a, op.b)
+}
diff --git a/sql/query/expr/like_test.go b/sql/query/expr/like_test.go
@@ -0,0 +1,51 @@
+package expr
+
+import "testing"
+
+func Test_like(t *testing.T) {
+	type args struct {
+		text    string
+		pattern string
+	}
+	tests := []struct {
+		name    string
+		args    args
+		want    bool
+		wantErr bool
+	}{
+		{"Empty", args{"", ""}, true, false},
+		{"Empty false", args{"abc", ""}, false, false},
+
+		{"Any", args{"abc", "%"}, true, false},
+		{"Any false", args{"", "%"}, true, false},
+
+		{"Any one and more", args{"abc", "_"}, true, false},
+		{"Any one and more false", args{"", "_"}, false, false},
+
+		{"Exact", args{"abc", "abc"}, true, false},
+		{"Exact false", args{"abc", "def"}, false, false},
+
+		{"Prefix", args{"abcdef", "abc%"}, true, false},
+		{"Prefix false", args{"abcdef", "def%"}, false, false},
+
+		{"Suffix", args{"defabc", "%abc"}, true, false},
+		{"Suffix false", args{"defabc", "%def"}, false, false},
+
+		{"Contains", args{"defabcdef", "%abc%"}, true, false},
+		{"Contains false", args{"abcd", "%def%"}, false, false},
+
+		{"Regexp", args{"AdBeeeC", "A%B%C"}, true, false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := like(tt.args.text, tt.args.pattern)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("like() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+			if got != tt.want {
+				t.Errorf("like() got = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
diff --git a/sql/scanner/scanner_test.go b/sql/scanner/scanner_test.go
@@ -54,6 +54,7 @@ func TestScanner_Scan(t *testing.T) {
 		{s: `>=`, tok: scanner.GTE, raw: `>=`},
 		{s: `IN`, tok: scanner.IN, raw: `IN`},
 		{s: `IS`, tok: scanner.IS, raw: `IS`},
+		{s: `LIKE`, tok: scanner.LIKE, raw: `LIKE`},
 
 		// Misc tokens
 		{s: `(`, tok: scanner.LPAREN, raw: `(`},

diff --git a/sql/scanner/token.go b/sql/scanner/token.go
@@ -56,6 +56,7 @@ const (
 	GTE      // >=
 	IN       // IN
 	IS       // IS
+	LIKE     // LIKE
 	operatorEnd
 
 	LPAREN      // (
@@ -165,6 +166,7 @@ var tokens = [...]string{
 	GTE:      ">=",
 	IN:       "IN",
 	IS:       "IS",
+	LIKE:     "LIKE",
 
 	LPAREN:      "(",
 	RPAREN:      ")",
@@ -238,7 +240,7 @@ func initKeywords() {
 	for tok := keywordBeg + 1; tok < keywordEnd; tok++ {
 		keywords[strings.ToLower(tokens[tok])] = tok
 	}
-	for _, tok := range []Token{AND, OR, TRUE, FALSE, NULL, IN, IS} {
+	for _, tok := range []Token{AND, OR, TRUE, FALSE, NULL, IN, IS, LIKE} {
 		keywords[strings.ToLower(tokens[tok])] = tok
 	}
 }
@@ -260,7 +262,7 @@ func (tok Token) Precedence() int {
 		return 2
 	case IN:
 		return 3
-	case EQ, NEQ, EQREGEX, NEQREGEX, LT, LTE, GT, GTE, IS:
+	case EQ, NEQ, EQREGEX, NEQREGEX, LT, LTE, GT, GTE, IS, LIKE:
 		return 4
 	case ADD, SUB, BITWISEOR, BITWISEXOR:
 		return 5