Skip to content

Commit

Permalink
Merge pull request #1806 from influxdb/fix-1799
Browse files Browse the repository at this point in the history
fix #1799: make scanner recognize regex
  • Loading branch information
dgnorton committed Mar 4, 2015
2 parents 4391865 + 6652de9 commit 88a4852
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 69 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
- [#1644](https://github.com/influxdb/influxdb/pull/1644): Add batching support for significantly improved write performance
- [#1704](https://github.com/influxdb/influxdb/pull/1704): Fix queries that pull back raw data (i.e. ones without aggregate functions)
- [#1718](https://github.com/influxdb/influxdb/pull/1718): Return an error on write if any of the points are don't have at least one field
- [#1806](https://github.com/influxdb/influxdb/pull/1806): Fix regex parsing. Change regex syntax to use / delimiters.


## v0.9.0-rc1,2 [no public release]
Expand Down
12 changes: 6 additions & 6 deletions httpd/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1413,7 +1413,7 @@ func TestHandler_serveShowSeries(t *testing.T) {

// SHOW SERIES WHERE =~ regex
{
q: `SHOW SERIES WHERE region =~ 'ca.*'`,
q: `SHOW SERIES WHERE region =~ /ca.*/`,
r: &influxdb.Results{
Results: []*influxdb.Result{
{
Expand All @@ -1433,7 +1433,7 @@ func TestHandler_serveShowSeries(t *testing.T) {

// SHOW SERIES WHERE !~ regex
{
q: `SHOW SERIES WHERE host !~ 'server0[12]'`,
q: `SHOW SERIES WHERE host !~ /server0[12]/`,
r: &influxdb.Results{
Results: []*influxdb.Result{
{
Expand Down Expand Up @@ -1546,13 +1546,13 @@ func TestHandler_serveShowMeasurements(t *testing.T) {

// SHOW MEASUREMENTS WHERE =~ regex
{
q: `SHOW MEASUREMENTS WHERE region =~ 'ca.*'`,
q: `SHOW MEASUREMENTS WHERE region =~ /ca.*/`,
r: `{"results":[{"series":[{"name":"measurements","columns":["name"],"values":[["gpu"],["other"]]}]}]}`,
},

// SHOW MEASUREMENTS WHERE !~ regex
{
q: `SHOW MEASUREMENTS WHERE region !~ 'ca.*'`,
q: `SHOW MEASUREMENTS WHERE region !~ /ca.*/`,
r: `{"results":[{"series":[{"name":"measurements","columns":["name"],"values":[["cpu"]]}]}]}`,
},
}
Expand Down Expand Up @@ -1802,7 +1802,7 @@ func TestHandler_serveShowTagValues(t *testing.T) {
},
// SHOW TAG VALUES FROM ... WHERE =~ regex
{
q: `SHOW TAG VALUES WITH KEY = host WHERE region =~ 'ca.*'`,
q: `SHOW TAG VALUES WITH KEY = host WHERE region =~ /ca.*/`,
r: &influxdb.Results{
Results: []*influxdb.Result{
{
Expand All @@ -1821,7 +1821,7 @@ func TestHandler_serveShowTagValues(t *testing.T) {
},
// SHOW TAG VALUES FROM ... WHERE !~ regex
{
q: `SHOW TAG VALUES WITH KEY = region WHERE host !~ 'server0[12]'`,
q: `SHOW TAG VALUES WITH KEY = region WHERE host !~ /server0[12]/`,
r: &influxdb.Results{
Results: []*influxdb.Result{
{
Expand Down
8 changes: 7 additions & 1 deletion influxql/INFLUXQL.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,12 @@ time_lit = "2006-01-02 15:04:05.999999" | "2006-01-02"
bool_lit = TRUE | FALSE .
```

### Regular Expressions

```
regex_lit = "/" { unicode_char } "/" .
```

## Queries

A query is composed of one or more statements separated by a semicolon.
Expand Down Expand Up @@ -602,7 +608,7 @@ binary_op = "+" | "-" | "*" | "/" | "AND" | "OR" | "=" | "!=" | "<" |
expr = unary_expr { binary_op unary_expr } .
unary_expr = "(" expr ")" | var_ref | time_lit | string_lit |
number_lit | bool_lit | duration_lit .
number_lit | bool_lit | duration_lit | regex_lit .
```

## Other
Expand Down
84 changes: 36 additions & 48 deletions influxql/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -1456,6 +1456,8 @@ func (p *Parser) parseSortField() (*SortField, error) {

// ParseExpr parses an expression.
func (p *Parser) ParseExpr() (Expr, error) {
var err error

// Parse a non-binary expression type to start.
// This variable will always be the root of the expression tree.
expr, err := p.parseUnaryExpr()
Expand All @@ -1472,10 +1474,18 @@ func (p *Parser) ParseExpr() (Expr, error) {
return expr, nil
}

// Otherwise parse the next unary expression.
rhs, err := p.parseUnaryExpr()
if err != nil {
return nil, err
// Otherwise parse the next expression.
var rhs Expr
if IsRegexOp(op) {
// RHS of a regex operator must be a regular expression.
p.consumeWhitespace()
if rhs, err = p.parseRegex(); err != nil {
return nil, err
}
} else {
if rhs, err = p.parseUnaryExpr(); err != nil {
return nil, err
}
}

// Assign the new root based on the precendence of the LHS and RHS operators.
Expand All @@ -1488,14 +1498,6 @@ func (p *Parser) ParseExpr() (Expr, error) {
} else {
expr = &BinaryExpr{LHS: expr, RHS: rhs, Op: op}
}

// If the operator was =~ or !~, parse the regular expression.
b, ok := expr.(*BinaryExpr)
if ok && (b.Op == EQREGEX || b.Op == NEQREGEX) {
if expr, err = p.parseRegexExpr(b); err != nil {
return nil, err
}
}
}
}

Expand Down Expand Up @@ -1562,51 +1564,37 @@ func (p *Parser) parseUnaryExpr() (Expr, error) {
return &DurationLiteral{Val: v}, nil
case MUL:
return &Wildcard{}, nil
case REGEX:
re, err := regexp.Compile(lit)
if err != nil {
return nil, &ParseError{Message: err.Error(), Pos: pos}
}
return &RegexLiteral{Val: re}, nil
default:
return nil, newParseError(tokstr(tok, lit), []string{"identifier", "string", "number", "bool"}, pos)
}
}

// parseRegexExpr parses the string literal on one side of a binary expression
// and returns a new binary expression with a regex literal in place of the
// string literal.
func (p *Parser) parseRegexExpr(expr *BinaryExpr) (*BinaryExpr, error) {
if expr.Op != EQREGEX && expr.Op != NEQREGEX {
// Don't call parseRegex unless you have a regex operator!
panic("parseRegex only works with =~ or !~ regex operators")
}

newExpr := &BinaryExpr{Op: expr.Op}
// parseRegex parses a regular expression.
func (p *Parser) parseRegex() (Expr, error) {
tok, pos, lit := p.s.s.ScanRegex()

if regex, ok := expr.RHS.(*StringLiteral); ok {
// Found regex text on right side of operator.
re, err := regexp.Compile(regex.Val)
if err != nil {
return nil, err
}
newExpr.RHS = &RegexLiteral{Val: re}

// Make sure left side of operator is an identifier.
if newExpr.LHS, ok = expr.LHS.(*VarRef); !ok {
return nil, fmt.Errorf("left operand of operator %s must be an identifier", expr.Op.String())
}
} else if regex, ok = expr.LHS.(*StringLiteral); ok {
// Found regex text on left side of operator.
re, err := regexp.Compile(regex.Val)
if err != nil {
return nil, err
}
newExpr.LHS = &RegexLiteral{Val: re}
if tok == BADESCAPE {
msg := fmt.Sprintf("bad escape: %s", lit)
return nil, &ParseError{Message: msg, Pos: pos}
} else if tok == BADREGEX {
msg := fmt.Sprintf("bad regex: %s", lit)
return nil, &ParseError{Message: msg, Pos: pos}
} else if tok != REGEX {
return nil, newParseError(tokstr(tok, lit), []string{"regex"}, pos)
}

// Make sure right side of operator is an identifer.
if newExpr.RHS, ok = expr.RHS.(*VarRef); !ok {
return nil, fmt.Errorf("right operand of operator %s must be an identifier", expr.Op.String())
}
} else {
return nil, fmt.Errorf("operator %s requires one string operand", expr.Op.String())
re, err := regexp.Compile(lit)
if err != nil {
return nil, &ParseError{Message: err.Error(), Pos: pos}
}

return newExpr, nil
return &RegexLiteral{Val: re}, nil
}

// parseCall parses a function call.
Expand Down
37 changes: 25 additions & 12 deletions influxql/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,28 @@ func TestParser_ParseStatement(t *testing.T) {
},
},

// SELECT * FROM cpu WHERE host = 'serverC' AND region =~ /.*west.*/
{
s: `SELECT * FROM cpu WHERE host = 'serverC' AND region =~ /.*west.*/`,
stmt: &influxql.SelectStatement{
Fields: []*influxql.Field{{Expr: &influxql.Wildcard{}}},
Source: &influxql.Measurement{Name: "cpu"},
Condition: &influxql.BinaryExpr{
Op: influxql.AND,
LHS: &influxql.BinaryExpr{
Op: influxql.EQ,
LHS: &influxql.VarRef{Val: "host"},
RHS: &influxql.StringLiteral{Val: "serverC"},
},
RHS: &influxql.BinaryExpr{
Op: influxql.EQREGEX,
LHS: &influxql.VarRef{Val: "region"},
RHS: &influxql.RegexLiteral{Val: regexp.MustCompile(".*west.*")},
},
},
},
},

// DELETE statement
{
s: `DELETE FROM myseries WHERE host = 'hosta.influxdb.org'`,
Expand Down Expand Up @@ -746,6 +768,7 @@ func TestParser_ParseStatement(t *testing.T) {
tt.stmt.(*influxql.CreateContinuousQueryStatement).Source.GroupByInterval()
}
} else if tt.err == "" && !reflect.DeepEqual(tt.stmt, stmt) {
t.Logf("exp=%s\ngot=%s\n", mustMarshalJSON(tt.stmt), mustMarshalJSON(stmt))
t.Errorf("%d. %q\n\nstmt mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", i, tt.s, tt.stmt, stmt)
}
}
Expand Down Expand Up @@ -838,26 +861,16 @@ func TestParser_ParseExpr(t *testing.T) {
},
},

// Binary expression with regex on right.
// Binary expression with regex.
{
s: `region =~ 'us.*'`,
s: "region =~ /us.*/",
expr: &influxql.BinaryExpr{
Op: influxql.EQREGEX,
LHS: &influxql.VarRef{Val: "region"},
RHS: &influxql.RegexLiteral{Val: regexp.MustCompile(`us.*`)},
},
},

// Binary expression with NEQ regex on left.
{
s: `'us.*' !~ region`,
expr: &influxql.BinaryExpr{
Op: influxql.NEQREGEX,
RHS: &influxql.VarRef{Val: "region"},
LHS: &influxql.RegexLiteral{Val: regexp.MustCompile(`us.*`)},
},
},

// Complex binary expression.
{
s: `value + 3 < 30 AND 1 + 2 OR true`,
Expand Down
72 changes: 71 additions & 1 deletion influxql/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,25 @@ func (s *Scanner) scanString() (tok Token, pos Pos, lit string) {
return STRING, pos, lit
}

func (s *Scanner) ScanRegex() (tok Token, pos Pos, lit string) {
_, pos = s.r.curr()

// Start & end sentinels.
start, end := '/', '/'
// Valid escape chars.
escapes := map[rune]rune{'/': '/'}

b, err := ScanDelimited(s.r, start, end, escapes)

if err == errBadEscape {
_, pos = s.r.curr()
return BADESCAPE, pos, lit
} else if err != nil {
return BADREGEX, pos, lit
}
return REGEX, pos, string(b)
}

// scanNumber consumes anything that looks like the start of a number.
// Numbers start with a digit, full stop, plus sign or minus sign.
// This function can return non-number tokens if a scan is a false positive.
Expand Down Expand Up @@ -300,6 +319,16 @@ func newBufScanner(r io.Reader) *bufScanner {

// Scan reads the next token from the scanner.
func (s *bufScanner) Scan() (tok Token, pos Pos, lit string) {
return s.scanFunc(s.s.Scan)
}

// ScanRegex reads a regex token from the scanner.
func (s *bufScanner) ScanRegex() (tok Token, pos Pos, lit string) {
return s.scanFunc(s.s.ScanRegex)
}

// scanFunc uses the provided function to scan the next token.
func (s *bufScanner) scanFunc(scan func() (Token, Pos, string)) (tok Token, pos Pos, lit string) {
// If we have unread tokens then read them off the buffer first.
if s.n > 0 {
s.n--
Expand All @@ -309,7 +338,7 @@ func (s *bufScanner) Scan() (tok Token, pos Pos, lit string) {
// Move buffer position forward and save the token.
s.i = (s.i + 1) % len(s.buf)
buf := &s.buf[s.i]
buf.tok, buf.pos, buf.lit = s.s.Scan()
buf.tok, buf.pos, buf.lit = scan()

return s.curr()
}
Expand Down Expand Up @@ -415,6 +444,46 @@ func (r *reader) curr() (ch rune, pos Pos) {
// eof is a marker code point to signify that the reader can't read any more.
const eof = rune(0)

func ScanDelimited(r io.RuneScanner, start, end rune, escapes map[rune]rune) ([]byte, error) {
// Scan start delimiter.
if ch, _, err := r.ReadRune(); err != nil {
return nil, err
} else if ch != start {
return nil, fmt.Errorf("expected %s; found %s", string(start), string(ch))
}

var buf bytes.Buffer
for {
ch0, _, err := r.ReadRune()
if ch0 == end {
return buf.Bytes(), nil
} else if err != nil {
return buf.Bytes(), err
} else if ch0 == '\n' {
return nil, errors.New("delimited text contains new line")
} else if ch0 == '\\' {
// If the next character is an escape then write the escaped char.
// If it's not a valid escape then return an error.
ch1, _, err := r.ReadRune()
if err != nil {
return nil, err
}

r, ok := escapes[ch1]
if !ok {
buf.Reset()
_, _ = buf.WriteRune(ch0)
_, _ = buf.WriteRune(ch1)
return buf.Bytes(), errBadEscape
}

_, _ = buf.WriteRune(r)
} else {
_, _ = buf.WriteRune(ch0)
}
}
}

// ScanString reads a quoted string from a rune reader.
func ScanString(r io.RuneScanner) (string, error) {
ending, _, err := r.ReadRune()
Expand Down Expand Up @@ -450,6 +519,7 @@ func ScanString(r io.RuneScanner) (string, error) {

var errBadString = errors.New("bad string")
var errBadEscape = errors.New("bad escape")
var errBadRegex = errors.New("bad regex")

// ScanBareIdent reads bare identifier from a rune reader.
func ScanBareIdent(r io.RuneScanner) string {
Expand Down
Loading

0 comments on commit 88a4852

Please sign in to comment.