Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix #1799: make scanner recognize regex #1806

Merged
merged 4 commits into from
Mar 4, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
- [#1644](https://github.com/influxdb/influxdb/pull/1644): Add batching support for significantly improved write performance
- [#1704](https://github.com/influxdb/influxdb/pull/1704): Fix queries that pull back raw data (i.e. ones without aggregate functions)
- [#1718](https://github.com/influxdb/influxdb/pull/1718): Return an error on write if any of the points are don't have at least one field
- [#1806](https://github.com/influxdb/influxdb/pull/1806): Fix regex parsing. Change regex syntax to use / delimiters.


## v0.9.0-rc1,2 [no public release]
Expand Down
12 changes: 6 additions & 6 deletions httpd/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1913,7 +1913,7 @@ func TestHandler_serveShowSeries(t *testing.T) {

// SHOW SERIES WHERE =~ regex
{
q: `SHOW SERIES WHERE region =~ 'ca.*'`,
q: `SHOW SERIES WHERE region =~ /ca.*/`,
r: &influxdb.Results{
Results: []*influxdb.Result{
{
Expand All @@ -1933,7 +1933,7 @@ func TestHandler_serveShowSeries(t *testing.T) {

// SHOW SERIES WHERE !~ regex
{
q: `SHOW SERIES WHERE host !~ 'server0[12]'`,
q: `SHOW SERIES WHERE host !~ /server0[12]/`,
r: &influxdb.Results{
Results: []*influxdb.Result{
{
Expand Down Expand Up @@ -2046,13 +2046,13 @@ func TestHandler_serveShowMeasurements(t *testing.T) {

// SHOW MEASUREMENTS WHERE =~ regex
{
q: `SHOW MEASUREMENTS WHERE region =~ 'ca.*'`,
q: `SHOW MEASUREMENTS WHERE region =~ /ca.*/`,
r: `{"results":[{"series":[{"name":"measurements","columns":["name"],"values":[["gpu"],["other"]]}]}]}`,
},

// SHOW MEASUREMENTS WHERE !~ regex
{
q: `SHOW MEASUREMENTS WHERE region !~ 'ca.*'`,
q: `SHOW MEASUREMENTS WHERE region !~ /ca.*/`,
r: `{"results":[{"series":[{"name":"measurements","columns":["name"],"values":[["cpu"]]}]}]}`,
},
}
Expand Down Expand Up @@ -2302,7 +2302,7 @@ func TestHandler_serveShowTagValues(t *testing.T) {
},
// SHOW TAG VALUES FROM ... WHERE =~ regex
{
q: `SHOW TAG VALUES WITH KEY = host WHERE region =~ 'ca.*'`,
q: `SHOW TAG VALUES WITH KEY = host WHERE region =~ /ca.*/`,
r: &influxdb.Results{
Results: []*influxdb.Result{
{
Expand All @@ -2321,7 +2321,7 @@ func TestHandler_serveShowTagValues(t *testing.T) {
},
// SHOW TAG VALUES FROM ... WHERE !~ regex
{
q: `SHOW TAG VALUES WITH KEY = region WHERE host !~ 'server0[12]'`,
q: `SHOW TAG VALUES WITH KEY = region WHERE host !~ /server0[12]/`,
r: &influxdb.Results{
Results: []*influxdb.Result{
{
Expand Down
8 changes: 7 additions & 1 deletion influxql/INFLUXQL.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,12 @@ time_lit = "2006-01-02 15:04:05.999999" | "2006-01-02"
bool_lit = TRUE | FALSE .
```

### Regular Expressions

```
regex_lit = "/" { unicode_char } "/" .
```

## Queries

A query is composed of one or more statements separated by a semicolon.
Expand Down Expand Up @@ -602,7 +608,7 @@ binary_op = "+" | "-" | "*" | "/" | "AND" | "OR" | "=" | "!=" | "<" |
expr = unary_expr { binary_op unary_expr } .

unary_expr = "(" expr ")" | var_ref | time_lit | string_lit |
number_lit | bool_lit | duration_lit .
number_lit | bool_lit | duration_lit | regex_lit .
```

## Other
Expand Down
84 changes: 36 additions & 48 deletions influxql/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -1456,6 +1456,8 @@ func (p *Parser) parseSortField() (*SortField, error) {

// ParseExpr parses an expression.
func (p *Parser) ParseExpr() (Expr, error) {
var err error

// Parse a non-binary expression type to start.
// This variable will always be the root of the expression tree.
expr, err := p.parseUnaryExpr()
Expand All @@ -1472,10 +1474,18 @@ func (p *Parser) ParseExpr() (Expr, error) {
return expr, nil
}

// Otherwise parse the next unary expression.
rhs, err := p.parseUnaryExpr()
if err != nil {
return nil, err
// Otherwise parse the next expression.
var rhs Expr
if IsRegexOp(op) {
// RHS of a regex operator must be a regular expression.
p.consumeWhitespace()
if rhs, err = p.parseRegex(); err != nil {
return nil, err
}
} else {
if rhs, err = p.parseUnaryExpr(); err != nil {
return nil, err
}
}

// Assign the new root based on the precendence of the LHS and RHS operators.
Expand All @@ -1488,14 +1498,6 @@ func (p *Parser) ParseExpr() (Expr, error) {
} else {
expr = &BinaryExpr{LHS: expr, RHS: rhs, Op: op}
}

// If the operator was =~ or !~, parse the regular expression.
b, ok := expr.(*BinaryExpr)
if ok && (b.Op == EQREGEX || b.Op == NEQREGEX) {
if expr, err = p.parseRegexExpr(b); err != nil {
return nil, err
}
}
}
}

Expand Down Expand Up @@ -1562,51 +1564,37 @@ func (p *Parser) parseUnaryExpr() (Expr, error) {
return &DurationLiteral{Val: v}, nil
case MUL:
return &Wildcard{}, nil
case REGEX:
re, err := regexp.Compile(lit)
if err != nil {
return nil, &ParseError{Message: err.Error(), Pos: pos}
}
return &RegexLiteral{Val: re}, nil
default:
return nil, newParseError(tokstr(tok, lit), []string{"identifier", "string", "number", "bool"}, pos)
}
}

// parseRegexExpr parses the string literal on one side of a binary expression
// and returns a new binary expression with a regex literal in place of the
// string literal.
func (p *Parser) parseRegexExpr(expr *BinaryExpr) (*BinaryExpr, error) {
if expr.Op != EQREGEX && expr.Op != NEQREGEX {
// Don't call parseRegex unless you have a regex operator!
panic("parseRegex only works with =~ or !~ regex operators")
}

newExpr := &BinaryExpr{Op: expr.Op}
// parseRegex parses a regular expression.
func (p *Parser) parseRegex() (Expr, error) {
tok, pos, lit := p.s.s.ScanRegex()

if regex, ok := expr.RHS.(*StringLiteral); ok {
// Found regex text on right side of operator.
re, err := regexp.Compile(regex.Val)
if err != nil {
return nil, err
}
newExpr.RHS = &RegexLiteral{Val: re}

// Make sure left side of operator is an identifier.
if newExpr.LHS, ok = expr.LHS.(*VarRef); !ok {
return nil, fmt.Errorf("left operand of operator %s must be an identifier", expr.Op.String())
}
} else if regex, ok = expr.LHS.(*StringLiteral); ok {
// Found regex text on left side of operator.
re, err := regexp.Compile(regex.Val)
if err != nil {
return nil, err
}
newExpr.LHS = &RegexLiteral{Val: re}
if tok == BADESCAPE {
msg := fmt.Sprintf("bad escape: %s", lit)
return nil, &ParseError{Message: msg, Pos: pos}
} else if tok == BADREGEX {
msg := fmt.Sprintf("bad regex: %s", lit)
return nil, &ParseError{Message: msg, Pos: pos}
} else if tok != REGEX {
return nil, newParseError(tokstr(tok, lit), []string{"regex"}, pos)
}

// Make sure right side of operator is an identifer.
if newExpr.RHS, ok = expr.RHS.(*VarRef); !ok {
return nil, fmt.Errorf("right operand of operator %s must be an identifier", expr.Op.String())
}
} else {
return nil, fmt.Errorf("operator %s requires one string operand", expr.Op.String())
re, err := regexp.Compile(lit)
if err != nil {
return nil, &ParseError{Message: err.Error(), Pos: pos}
}

return newExpr, nil
return &RegexLiteral{Val: re}, nil
}

// parseCall parses a function call.
Expand Down
37 changes: 25 additions & 12 deletions influxql/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,28 @@ func TestParser_ParseStatement(t *testing.T) {
},
},

// SELECT * FROM cpu WHERE host = 'serverC' AND region =~ /.*west.*/
{
s: `SELECT * FROM cpu WHERE host = 'serverC' AND region =~ /.*west.*/`,
stmt: &influxql.SelectStatement{
Fields: []*influxql.Field{{Expr: &influxql.Wildcard{}}},
Source: &influxql.Measurement{Name: "cpu"},
Condition: &influxql.BinaryExpr{
Op: influxql.AND,
LHS: &influxql.BinaryExpr{
Op: influxql.EQ,
LHS: &influxql.VarRef{Val: "host"},
RHS: &influxql.StringLiteral{Val: "serverC"},
},
RHS: &influxql.BinaryExpr{
Op: influxql.EQREGEX,
LHS: &influxql.VarRef{Val: "region"},
RHS: &influxql.RegexLiteral{Val: regexp.MustCompile(".*west.*")},
},
},
},
},

// DELETE statement
{
s: `DELETE FROM myseries WHERE host = 'hosta.influxdb.org'`,
Expand Down Expand Up @@ -746,6 +768,7 @@ func TestParser_ParseStatement(t *testing.T) {
tt.stmt.(*influxql.CreateContinuousQueryStatement).Source.GroupByInterval()
}
} else if tt.err == "" && !reflect.DeepEqual(tt.stmt, stmt) {
t.Logf("exp=%s\ngot=%s\n", mustMarshalJSON(tt.stmt), mustMarshalJSON(stmt))
t.Errorf("%d. %q\n\nstmt mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", i, tt.s, tt.stmt, stmt)
}
}
Expand Down Expand Up @@ -838,26 +861,16 @@ func TestParser_ParseExpr(t *testing.T) {
},
},

// Binary expression with regex on right.
// Binary expression with regex.
{
s: `region =~ 'us.*'`,
s: "region =~ /us.*/",
expr: &influxql.BinaryExpr{
Op: influxql.EQREGEX,
LHS: &influxql.VarRef{Val: "region"},
RHS: &influxql.RegexLiteral{Val: regexp.MustCompile(`us.*`)},
},
},

// Binary expression with NEQ regex on left.
{
s: `'us.*' !~ region`,
expr: &influxql.BinaryExpr{
Op: influxql.NEQREGEX,
RHS: &influxql.VarRef{Val: "region"},
LHS: &influxql.RegexLiteral{Val: regexp.MustCompile(`us.*`)},
},
},

// Complex binary expression.
{
s: `value + 3 < 30 AND 1 + 2 OR true`,
Expand Down
72 changes: 71 additions & 1 deletion influxql/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,25 @@ func (s *Scanner) scanString() (tok Token, pos Pos, lit string) {
return STRING, pos, lit
}

func (s *Scanner) ScanRegex() (tok Token, pos Pos, lit string) {
_, pos = s.r.curr()

// Start & end sentinels.
start, end := '/', '/'
// Valid escape chars.
escapes := map[rune]rune{'/': '/'}

b, err := ScanDelimited(s.r, start, end, escapes)

if err == errBadEscape {
_, pos = s.r.curr()
return BADESCAPE, pos, lit
} else if err != nil {
return BADREGEX, pos, lit
}
return REGEX, pos, string(b)
}

// scanNumber consumes anything that looks like the start of a number.
// Numbers start with a digit, full stop, plus sign or minus sign.
// This function can return non-number tokens if a scan is a false positive.
Expand Down Expand Up @@ -300,6 +319,16 @@ func newBufScanner(r io.Reader) *bufScanner {

// Scan reads the next token from the scanner.
func (s *bufScanner) Scan() (tok Token, pos Pos, lit string) {
return s.scanFunc(s.s.Scan)
}

// ScanRegex reads a regex token from the scanner.
func (s *bufScanner) ScanRegex() (tok Token, pos Pos, lit string) {
return s.scanFunc(s.s.ScanRegex)
}

// scanFunc uses the provided function to scan the next token.
func (s *bufScanner) scanFunc(scan func() (Token, Pos, string)) (tok Token, pos Pos, lit string) {
// If we have unread tokens then read them off the buffer first.
if s.n > 0 {
s.n--
Expand All @@ -309,7 +338,7 @@ func (s *bufScanner) Scan() (tok Token, pos Pos, lit string) {
// Move buffer position forward and save the token.
s.i = (s.i + 1) % len(s.buf)
buf := &s.buf[s.i]
buf.tok, buf.pos, buf.lit = s.s.Scan()
buf.tok, buf.pos, buf.lit = scan()

return s.curr()
}
Expand Down Expand Up @@ -415,6 +444,46 @@ func (r *reader) curr() (ch rune, pos Pos) {
// eof is a marker code point to signify that the reader can't read any more.
const eof = rune(0)

func ScanDelimited(r io.RuneScanner, start, end rune, escapes map[rune]rune) ([]byte, error) {
// Scan start delimiter.
if ch, _, err := r.ReadRune(); err != nil {
return nil, err
} else if ch != start {
return nil, fmt.Errorf("expected %s; found %s", string(start), string(ch))
}

var buf bytes.Buffer
for {
ch0, _, err := r.ReadRune()
if ch0 == end {
return buf.Bytes(), nil
} else if err != nil {
return buf.Bytes(), err
} else if ch0 == '\n' {
return nil, errors.New("delimited text contains new line")
} else if ch0 == '\\' {
// If the next character is an escape then write the escaped char.
// If it's not a valid escape then return an error.
ch1, _, err := r.ReadRune()
if err != nil {
return nil, err
}

r, ok := escapes[ch1]
if !ok {
buf.Reset()
_, _ = buf.WriteRune(ch0)
_, _ = buf.WriteRune(ch1)
return buf.Bytes(), errBadEscape
}

_, _ = buf.WriteRune(r)
} else {
_, _ = buf.WriteRune(ch0)
}
}
}

// ScanString reads a quoted string from a rune reader.
func ScanString(r io.RuneScanner) (string, error) {
ending, _, err := r.ReadRune()
Expand Down Expand Up @@ -450,6 +519,7 @@ func ScanString(r io.RuneScanner) (string, error) {

var errBadString = errors.New("bad string")
var errBadEscape = errors.New("bad escape")
var errBadRegex = errors.New("bad regex")

// ScanBareIdent reads bare identifier from a rune reader.
func ScanBareIdent(r io.RuneScanner) string {
Expand Down
Loading