diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f4923a53da..b604bd43709 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,7 @@ - [#1644](https://github.com/influxdb/influxdb/pull/1644): Add batching support for significantly improved write performance - [#1704](https://github.com/influxdb/influxdb/pull/1704): Fix queries that pull back raw data (i.e. ones without aggregate functions) - [#1718](https://github.com/influxdb/influxdb/pull/1718): Return an error on write if any of the points are don't have at least one field +- [#1806](https://github.com/influxdb/influxdb/pull/1806): Fix regex parsing. Change regex syntax to use / delimiters. ## v0.9.0-rc1,2 [no public release] diff --git a/httpd/handler_test.go b/httpd/handler_test.go index a051048c9d7..f35f2ffb4bb 100644 --- a/httpd/handler_test.go +++ b/httpd/handler_test.go @@ -1413,7 +1413,7 @@ func TestHandler_serveShowSeries(t *testing.T) { // SHOW SERIES WHERE =~ regex { - q: `SHOW SERIES WHERE region =~ 'ca.*'`, + q: `SHOW SERIES WHERE region =~ /ca.*/`, r: &influxdb.Results{ Results: []*influxdb.Result{ { @@ -1433,7 +1433,7 @@ func TestHandler_serveShowSeries(t *testing.T) { // SHOW SERIES WHERE !~ regex { - q: `SHOW SERIES WHERE host !~ 'server0[12]'`, + q: `SHOW SERIES WHERE host !~ /server0[12]/`, r: &influxdb.Results{ Results: []*influxdb.Result{ { @@ -1546,13 +1546,13 @@ func TestHandler_serveShowMeasurements(t *testing.T) { // SHOW MEASUREMENTS WHERE =~ regex { - q: `SHOW MEASUREMENTS WHERE region =~ 'ca.*'`, + q: `SHOW MEASUREMENTS WHERE region =~ /ca.*/`, r: `{"results":[{"series":[{"name":"measurements","columns":["name"],"values":[["gpu"],["other"]]}]}]}`, }, // SHOW MEASUREMENTS WHERE !~ regex { - q: `SHOW MEASUREMENTS WHERE region !~ 'ca.*'`, + q: `SHOW MEASUREMENTS WHERE region !~ /ca.*/`, r: `{"results":[{"series":[{"name":"measurements","columns":["name"],"values":[["cpu"]]}]}]}`, }, } @@ -1802,7 +1802,7 @@ func TestHandler_serveShowTagValues(t *testing.T) { }, // SHOW TAG VALUES FROM ... WHERE =~ regex { - q: `SHOW TAG VALUES WITH KEY = host WHERE region =~ 'ca.*'`, + q: `SHOW TAG VALUES WITH KEY = host WHERE region =~ /ca.*/`, r: &influxdb.Results{ Results: []*influxdb.Result{ { @@ -1821,7 +1821,7 @@ func TestHandler_serveShowTagValues(t *testing.T) { }, // SHOW TAG VALUES FROM ... WHERE !~ regex { - q: `SHOW TAG VALUES WITH KEY = region WHERE host !~ 'server0[12]'`, + q: `SHOW TAG VALUES WITH KEY = region WHERE host !~ /server0[12]/`, r: &influxdb.Results{ Results: []*influxdb.Result{ { diff --git a/influxql/INFLUXQL.md b/influxql/INFLUXQL.md index 6be74a590fd..fb6032d6de5 100644 --- a/influxql/INFLUXQL.md +++ b/influxql/INFLUXQL.md @@ -162,6 +162,12 @@ time_lit = "2006-01-02 15:04:05.999999" | "2006-01-02" bool_lit = TRUE | FALSE . ``` +### Regular Expressions + +``` +regex_lit = "/" { unicode_char } "/" . +``` + ## Queries A query is composed of one or more statements separated by a semicolon. @@ -602,7 +608,7 @@ binary_op = "+" | "-" | "*" | "/" | "AND" | "OR" | "=" | "!=" | "<" | expr = unary_expr { binary_op unary_expr } . unary_expr = "(" expr ")" | var_ref | time_lit | string_lit | - number_lit | bool_lit | duration_lit . + number_lit | bool_lit | duration_lit | regex_lit . ``` ## Other diff --git a/influxql/parser.go b/influxql/parser.go index c4a4390b122..4fb41d03d19 100644 --- a/influxql/parser.go +++ b/influxql/parser.go @@ -1456,6 +1456,8 @@ func (p *Parser) parseSortField() (*SortField, error) { // ParseExpr parses an expression. func (p *Parser) ParseExpr() (Expr, error) { + var err error + // Parse a non-binary expression type to start. // This variable will always be the root of the expression tree. expr, err := p.parseUnaryExpr() @@ -1472,10 +1474,18 @@ func (p *Parser) ParseExpr() (Expr, error) { return expr, nil } - // Otherwise parse the next unary expression. - rhs, err := p.parseUnaryExpr() - if err != nil { - return nil, err + // Otherwise parse the next expression. + var rhs Expr + if IsRegexOp(op) { + // RHS of a regex operator must be a regular expression. + p.consumeWhitespace() + if rhs, err = p.parseRegex(); err != nil { + return nil, err + } + } else { + if rhs, err = p.parseUnaryExpr(); err != nil { + return nil, err + } } // Assign the new root based on the precendence of the LHS and RHS operators. @@ -1488,14 +1498,6 @@ func (p *Parser) ParseExpr() (Expr, error) { } else { expr = &BinaryExpr{LHS: expr, RHS: rhs, Op: op} } - - // If the operator was =~ or !~, parse the regular expression. - b, ok := expr.(*BinaryExpr) - if ok && (b.Op == EQREGEX || b.Op == NEQREGEX) { - if expr, err = p.parseRegexExpr(b); err != nil { - return nil, err - } - } } } @@ -1562,51 +1564,37 @@ func (p *Parser) parseUnaryExpr() (Expr, error) { return &DurationLiteral{Val: v}, nil case MUL: return &Wildcard{}, nil + case REGEX: + re, err := regexp.Compile(lit) + if err != nil { + return nil, &ParseError{Message: err.Error(), Pos: pos} + } + return &RegexLiteral{Val: re}, nil default: return nil, newParseError(tokstr(tok, lit), []string{"identifier", "string", "number", "bool"}, pos) } } -// parseRegexExpr parses the string literal on one side of a binary expression -// and returns a new binary expression with a regex literal in place of the -// string literal. -func (p *Parser) parseRegexExpr(expr *BinaryExpr) (*BinaryExpr, error) { - if expr.Op != EQREGEX && expr.Op != NEQREGEX { - // Don't call parseRegex unless you have a regex operator! - panic("parseRegex only works with =~ or !~ regex operators") - } - - newExpr := &BinaryExpr{Op: expr.Op} +// parseRegex parses a regular expression. +func (p *Parser) parseRegex() (Expr, error) { + tok, pos, lit := p.s.s.ScanRegex() - if regex, ok := expr.RHS.(*StringLiteral); ok { - // Found regex text on right side of operator. - re, err := regexp.Compile(regex.Val) - if err != nil { - return nil, err - } - newExpr.RHS = &RegexLiteral{Val: re} - - // Make sure left side of operator is an identifier. - if newExpr.LHS, ok = expr.LHS.(*VarRef); !ok { - return nil, fmt.Errorf("left operand of operator %s must be an identifier", expr.Op.String()) - } - } else if regex, ok = expr.LHS.(*StringLiteral); ok { - // Found regex text on left side of operator. - re, err := regexp.Compile(regex.Val) - if err != nil { - return nil, err - } - newExpr.LHS = &RegexLiteral{Val: re} + if tok == BADESCAPE { + msg := fmt.Sprintf("bad escape: %s", lit) + return nil, &ParseError{Message: msg, Pos: pos} + } else if tok == BADREGEX { + msg := fmt.Sprintf("bad regex: %s", lit) + return nil, &ParseError{Message: msg, Pos: pos} + } else if tok != REGEX { + return nil, newParseError(tokstr(tok, lit), []string{"regex"}, pos) + } - // Make sure right side of operator is an identifer. - if newExpr.RHS, ok = expr.RHS.(*VarRef); !ok { - return nil, fmt.Errorf("right operand of operator %s must be an identifier", expr.Op.String()) - } - } else { - return nil, fmt.Errorf("operator %s requires one string operand", expr.Op.String()) + re, err := regexp.Compile(lit) + if err != nil { + return nil, &ParseError{Message: err.Error(), Pos: pos} } - return newExpr, nil + return &RegexLiteral{Val: re}, nil } // parseCall parses a function call. diff --git a/influxql/parser_test.go b/influxql/parser_test.go index f74ec19aeb5..e85ec3ed164 100644 --- a/influxql/parser_test.go +++ b/influxql/parser_test.go @@ -136,6 +136,28 @@ func TestParser_ParseStatement(t *testing.T) { }, }, + // SELECT * FROM cpu WHERE host = 'serverC' AND region =~ /.*west.*/ + { + s: `SELECT * FROM cpu WHERE host = 'serverC' AND region =~ /.*west.*/`, + stmt: &influxql.SelectStatement{ + Fields: []*influxql.Field{{Expr: &influxql.Wildcard{}}}, + Source: &influxql.Measurement{Name: "cpu"}, + Condition: &influxql.BinaryExpr{ + Op: influxql.AND, + LHS: &influxql.BinaryExpr{ + Op: influxql.EQ, + LHS: &influxql.VarRef{Val: "host"}, + RHS: &influxql.StringLiteral{Val: "serverC"}, + }, + RHS: &influxql.BinaryExpr{ + Op: influxql.EQREGEX, + LHS: &influxql.VarRef{Val: "region"}, + RHS: &influxql.RegexLiteral{Val: regexp.MustCompile(".*west.*")}, + }, + }, + }, + }, + // DELETE statement { s: `DELETE FROM myseries WHERE host = 'hosta.influxdb.org'`, @@ -746,6 +768,7 @@ func TestParser_ParseStatement(t *testing.T) { tt.stmt.(*influxql.CreateContinuousQueryStatement).Source.GroupByInterval() } } else if tt.err == "" && !reflect.DeepEqual(tt.stmt, stmt) { + t.Logf("exp=%s\ngot=%s\n", mustMarshalJSON(tt.stmt), mustMarshalJSON(stmt)) t.Errorf("%d. %q\n\nstmt mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", i, tt.s, tt.stmt, stmt) } } @@ -838,9 +861,9 @@ func TestParser_ParseExpr(t *testing.T) { }, }, - // Binary expression with regex on right. + // Binary expression with regex. { - s: `region =~ 'us.*'`, + s: "region =~ /us.*/", expr: &influxql.BinaryExpr{ Op: influxql.EQREGEX, LHS: &influxql.VarRef{Val: "region"}, @@ -848,16 +871,6 @@ func TestParser_ParseExpr(t *testing.T) { }, }, - // Binary expression with NEQ regex on left. - { - s: `'us.*' !~ region`, - expr: &influxql.BinaryExpr{ - Op: influxql.NEQREGEX, - RHS: &influxql.VarRef{Val: "region"}, - LHS: &influxql.RegexLiteral{Val: regexp.MustCompile(`us.*`)}, - }, - }, - // Complex binary expression. { s: `value + 3 < 30 AND 1 + 2 OR true`, diff --git a/influxql/scanner.go b/influxql/scanner.go index ffcda356c14..26947c93825 100644 --- a/influxql/scanner.go +++ b/influxql/scanner.go @@ -179,6 +179,25 @@ func (s *Scanner) scanString() (tok Token, pos Pos, lit string) { return STRING, pos, lit } +func (s *Scanner) ScanRegex() (tok Token, pos Pos, lit string) { + _, pos = s.r.curr() + + // Start & end sentinels. + start, end := '/', '/' + // Valid escape chars. + escapes := map[rune]rune{'/': '/'} + + b, err := ScanDelimited(s.r, start, end, escapes) + + if err == errBadEscape { + _, pos = s.r.curr() + return BADESCAPE, pos, lit + } else if err != nil { + return BADREGEX, pos, lit + } + return REGEX, pos, string(b) +} + // scanNumber consumes anything that looks like the start of a number. // Numbers start with a digit, full stop, plus sign or minus sign. // This function can return non-number tokens if a scan is a false positive. @@ -300,6 +319,16 @@ func newBufScanner(r io.Reader) *bufScanner { // Scan reads the next token from the scanner. func (s *bufScanner) Scan() (tok Token, pos Pos, lit string) { + return s.scanFunc(s.s.Scan) +} + +// ScanRegex reads a regex token from the scanner. +func (s *bufScanner) ScanRegex() (tok Token, pos Pos, lit string) { + return s.scanFunc(s.s.ScanRegex) +} + +// scanFunc uses the provided function to scan the next token. +func (s *bufScanner) scanFunc(scan func() (Token, Pos, string)) (tok Token, pos Pos, lit string) { // If we have unread tokens then read them off the buffer first. if s.n > 0 { s.n-- @@ -309,7 +338,7 @@ func (s *bufScanner) Scan() (tok Token, pos Pos, lit string) { // Move buffer position forward and save the token. s.i = (s.i + 1) % len(s.buf) buf := &s.buf[s.i] - buf.tok, buf.pos, buf.lit = s.s.Scan() + buf.tok, buf.pos, buf.lit = scan() return s.curr() } @@ -415,6 +444,46 @@ func (r *reader) curr() (ch rune, pos Pos) { // eof is a marker code point to signify that the reader can't read any more. const eof = rune(0) +func ScanDelimited(r io.RuneScanner, start, end rune, escapes map[rune]rune) ([]byte, error) { + // Scan start delimiter. + if ch, _, err := r.ReadRune(); err != nil { + return nil, err + } else if ch != start { + return nil, fmt.Errorf("expected %s; found %s", string(start), string(ch)) + } + + var buf bytes.Buffer + for { + ch0, _, err := r.ReadRune() + if ch0 == end { + return buf.Bytes(), nil + } else if err != nil { + return buf.Bytes(), err + } else if ch0 == '\n' { + return nil, errors.New("delimited text contains new line") + } else if ch0 == '\\' { + // If the next character is an escape then write the escaped char. + // If it's not a valid escape then return an error. + ch1, _, err := r.ReadRune() + if err != nil { + return nil, err + } + + r, ok := escapes[ch1] + if !ok { + buf.Reset() + _, _ = buf.WriteRune(ch0) + _, _ = buf.WriteRune(ch1) + return buf.Bytes(), errBadEscape + } + + _, _ = buf.WriteRune(r) + } else { + _, _ = buf.WriteRune(ch0) + } + } +} + // ScanString reads a quoted string from a rune reader. func ScanString(r io.RuneScanner) (string, error) { ending, _, err := r.ReadRune() @@ -450,6 +519,7 @@ func ScanString(r io.RuneScanner) (string, error) { var errBadString = errors.New("bad string") var errBadEscape = errors.New("bad escape") +var errBadRegex = errors.New("bad regex") // ScanBareIdent reads bare identifier from a rune reader. func ScanBareIdent(r io.RuneScanner) string { diff --git a/influxql/token.go b/influxql/token.go index 900cc741b9b..fea1013f020 100644 --- a/influxql/token.go +++ b/influxql/token.go @@ -23,6 +23,8 @@ const ( BADESCAPE // \q TRUE // true FALSE // false + REGEX // Regular expressions + BADREGEX // `.* literal_end operator_beg @@ -125,6 +127,7 @@ var tokens = [...]string{ BADESCAPE: "BADESCAPE", TRUE: "TRUE", FALSE: "FALSE", + REGEX: "REGEX", ADD: "+", SUB: "-", @@ -238,7 +241,7 @@ func (tok Token) Precedence() int { return 1 case AND: return 2 - case EQ, NEQ, LT, LTE, GT, GTE: + case EQ, NEQ, EQREGEX, NEQREGEX, LT, LTE, GT, GTE: return 3 case ADD, SUB: return 4