Skip to content

Commit

Permalink
Merge #32982
Browse files Browse the repository at this point in the history
32982: parser: scan and split statements before parsing, return SQL strings for statements r=RaduBerinde a=RaduBerinde

These series of changes move the handling of `;` out of the parser and into a pre-processing stage (which scans lexical tokens and splits statements). The Parser now returns the strings for the statements, which are then used instead of `String()`. Eventually we will cache ASTs and these strings will be used to look up ASTs in the cache and avoid parsing entirely.

Functionally, only two small things change (both positive IMO):
 - traces and logs now show the original SQL instead of the roundtripped string
 - when we hit a parsing error in a multi-statement, we now show only the relevant statement (this is what postgres shows too).

In principle, this should be less efficient because we need to remember the scanned tokens. However, I made some improvements that offset the overhead. Also note that the code that handled multiple statements in sql.y was inefficient (every prefix of the `[]tree.Statement` slice was being copied on heap when we assigned it to the `val interface{}`).

#### parser: split lexer

This change splits the lexer into two parts:
 - `scanner` does the low-level scanning of the tokens
 - `lexer` operates on a slice of tokens, performs lookahead and
   handles errors

The separation of these steps will allow us to split statements and
parse each one individually.

Release note: None

#### parser: split statements prior to parsing

This change uses the scanner to split the input into statements and
then parses each one independently. This will allow the caller to get
the exact SQL for each statement. The SQL grammar does not allow for
semicolons anymore.

Release note: None

#### parser: make sqlSymType smaller

Change `id` and `pos` to `int32` to make the structure smaller.

Release note: None

#### parser: return SQL strings alongside the statements

Release note: None




Benchmarks:
```
name                 old time/op    new time/op    delta
Parse/simple           11.8µs ± 5%     9.5µs ± 1%  -19.22%  (p=0.000 n=12+12)
Parse/string           15.8µs ± 3%    13.6µs ± 1%  -13.95%  (p=0.000 n=12+12)
Parse/tpcc-delivery    19.8µs ± 3%    19.0µs ± 2%   -4.18%  (p=0.000 n=12+12)
Parse/account          34.0µs ± 2%    33.8µs ± 4%     ~     (p=0.101 n=12+12)

name                 old alloc/op   new alloc/op   delta
Parse/simple           2.44kB ± 0%    2.52kB ± 0%   +3.28%  (p=0.000 n=12+12)
Parse/string           2.71kB ± 0%    3.56kB ± 0%  +31.33%  (p=0.000 n=12+12)
Parse/tpcc-delivery    3.10kB ± 0%    5.49kB ± 0%  +76.80%  (p=0.000 n=12+12)
Parse/account          5.81kB ± 0%    9.06kB ± 0%  +55.92%  (p=0.000 n=12+12)

name                 old allocs/op  new allocs/op  delta
Parse/simple             23.0 ± 0%      21.0 ± 0%   -8.70%  (p=0.000 n=12+12)
Parse/string             27.0 ± 0%      26.0 ± 0%   -3.70%  (p=0.000 n=12+12)
Parse/tpcc-delivery      35.0 ± 0%      35.0 ± 0%     ~     (all equal)
Parse/account            81.0 ± 0%      81.0 ± 0%     ~     (all equal)
```

Co-authored-by: Radu Berinde <[email protected]>
  • Loading branch information
craig[bot] and RaduBerinde committed Dec 14, 2018
2 parents 7efc92a + f9bd79c commit 46de6b8
Show file tree
Hide file tree
Showing 32 changed files with 702 additions and 379 deletions.
5 changes: 1 addition & 4 deletions docs/generated/sql/bnf/stmt_block.bnf
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
stmt_block ::=
stmt_list

stmt_list ::=
( stmt ) ( ( ';' stmt ) )*
stmt

stmt ::=
'HELPTOKEN'
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/importccl/read_import_mysql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func descForTable(
t *testing.T, create string, parent, id sqlbase.ID, fks fkHandler,
) *sqlbase.TableDescriptor {
t.Helper()
parsed, err := parser.Parse(create)
parsed, _, err := parser.Parse(create)
if err != nil {
t.Fatalf("could not parse %q: %v", create, err)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/importccl/read_import_pgdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func (p *postgreStream) Next() (interface{}, error) {

for p.s.Scan() {
t := p.s.Text()
stmts, err := parser.Parse(t)
stmts, _, err := parser.Parse(t)
if err != nil {
// Something non-parseable may be something we don't yet parse but still
// want to ignore.
Expand Down
6 changes: 3 additions & 3 deletions pkg/cli/interactive_tests/test_auto_trace.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ send "select 1;\r"
eexpect "1 row"
# trace result
eexpect "session recording"
eexpect "executing: select 1"
eexpect "executing: SELECT 1"
eexpect "rows affected: 1"
eexpect root@
end_test
Expand All @@ -26,7 +26,7 @@ send "select woo;\r"
eexpect "column \"woo\" does not exist"
# trace result
eexpect "session recording"
eexpect "executing: select woo"
eexpect "executing: SELECT woo"
eexpect "does not exist"
eexpect root@
end_test
Expand All @@ -43,7 +43,7 @@ send "select * from t;\r"
eexpect "2 rows"
# trace result
eexpect "session recording"
eexpect "executing: select \* from t"
eexpect "executing: SELECT \* FROM t"
eexpect "output row:"
eexpect "output row:"
eexpect "rows affected: 2"
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ func (c *cliState) handleFunctionHelp(cmd []string, nextState, errState cliState
}
fmt.Println()
} else {
_, err := parser.Parse(fmt.Sprintf("select %s(??", funcName))
_, _, err := parser.Parse(fmt.Sprintf("select %s(??", funcName))
pgerr, ok := pgerror.GetPGCause(err)
if !ok || !strings.HasPrefix(pgerr.Hint, "help:") {
fmt.Fprintf(stderr,
Expand Down
4 changes: 2 additions & 2 deletions pkg/cli/sqlfmt.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func runSQLFmt(cmd *cobra.Command, args []string) error {
var sl tree.StatementList
if len(sqlfmtCtx.execStmts) != 0 {
for _, exec := range sqlfmtCtx.execStmts {
stmts, err := parser.Parse(exec)
stmts, _, err := parser.Parse(exec)
if err != nil {
return err
}
Expand All @@ -61,7 +61,7 @@ func runSQLFmt(cmd *cobra.Command, args []string) error {
if err != nil {
return err
}
sl, err = parser.Parse(string(in))
sl, _, err = parser.Parse(string(in))
if err != nil {
return err
}
Expand Down
74 changes: 37 additions & 37 deletions pkg/cmd/docgen/diagrams.go
Original file line number Diff line number Diff line change
Expand Up @@ -454,16 +454,16 @@ var specs = []stmtSpec{
},
},
{
name: "check_column_level",
stmt: "stmt_block",
replace: map[string]string{"stmt_list": "'CREATE' 'TABLE' table_name '(' column_name column_type 'CHECK' '(' check_expr ')' ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'"},
unlink: []string{"table_name", "column_name", "column_type", "check_expr", "column_constraints", "table_constraints"},
name: "check_column_level",
stmt: "stmt_block",
replace: map[string]string{" stmt": " 'CREATE' 'TABLE' table_name '(' column_name column_type 'CHECK' '(' check_expr ')' ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'"},
unlink: []string{"table_name", "column_name", "column_type", "check_expr", "column_constraints", "table_constraints"},
},
{
name: "check_table_level",
stmt: "stmt_block",
replace: map[string]string{"stmt_list": "'CREATE' 'TABLE' table_name '(' ( column_def ( ',' column_def )* ) ( 'CONSTRAINT' constraint_name | ) 'CHECK' '(' check_expr ')' ( table_constraints | ) ')'"},
unlink: []string{"table_name", "check_expr", "table_constraints"},
name: "check_table_level",
stmt: "stmt_block",
replace: map[string]string{" stmt": " 'CREATE' 'TABLE' table_name '(' ( column_def ( ',' column_def )* ) ( 'CONSTRAINT' constraint_name | ) 'CHECK' '(' check_expr ')' ( table_constraints | ) ')'"},
unlink: []string{"table_name", "check_expr", "table_constraints"},
},
{
name: "column_def",
Expand Down Expand Up @@ -580,7 +580,7 @@ var specs = []stmtSpec{
name: "default_value_column_level",
stmt: "stmt_block",
replace: map[string]string{
"stmt_list": "'CREATE' 'TABLE' table_name '(' column_name column_type 'DEFAULT' default_value ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'",
" stmt": " 'CREATE' 'TABLE' table_name '(' column_name column_type 'DEFAULT' default_value ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'",
},
unlink: []string{"table_name", "column_name", "column_type", "default_value", "table_constraints"},
},
Expand Down Expand Up @@ -761,16 +761,16 @@ var specs = []stmtSpec{
unlink: []string{"role_name", "user_name"},
},
{
name: "foreign_key_column_level",
stmt: "stmt_block",
replace: map[string]string{"stmt_list": "'CREATE' 'TABLE' table_name '(' column_name column_type 'REFERENCES' parent_table ( '(' ref_column_name ')' | ) ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'"},
unlink: []string{"table_name", "column_name", "column_type", "parent_table", "table_constraints"},
name: "foreign_key_column_level",
stmt: "stmt_block",
replace: map[string]string{" stmt": " 'CREATE' 'TABLE' table_name '(' column_name column_type 'REFERENCES' parent_table ( '(' ref_column_name ')' | ) ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'"},
unlink: []string{"table_name", "column_name", "column_type", "parent_table", "table_constraints"},
},
{
name: "foreign_key_table_level",
stmt: "stmt_block",
replace: map[string]string{"stmt_list": "'CREATE' 'TABLE' table_name '(' ( column_def ( ',' column_def )* ) ( 'CONSTRAINT' constraint_name | ) 'FOREIGN KEY' '(' ( fk_column_name ( ',' fk_column_name )* ) ')' 'REFERENCES' parent_table ( '(' ( ref_column_name ( ',' ref_column_name )* ) ')' | ) ( table_constraints | ) ')'"},
unlink: []string{"table_name", "column_name", "parent_table", "table_constraints"},
name: "foreign_key_table_level",
stmt: "stmt_block",
replace: map[string]string{" stmt": " 'CREATE' 'TABLE' table_name '(' ( column_def ( ',' column_def )* ) ( 'CONSTRAINT' constraint_name | ) 'FOREIGN KEY' '(' ( fk_column_name ( ',' fk_column_name )* ) ')' 'REFERENCES' parent_table ( '(' ( ref_column_name ( ',' ref_column_name )* ) ')' | ) ( table_constraints | ) ')'"},
unlink: []string{"table_name", "column_name", "parent_table", "table_constraints"},
},
{
name: "index_def",
Expand Down Expand Up @@ -809,10 +809,10 @@ var specs = []stmtSpec{
unlink: []string{"table_definition"},
},
{
name: "not_null_column_level",
stmt: "stmt_block",
replace: map[string]string{"stmt_list": "'CREATE' 'TABLE' table_name '(' column_name column_type 'NOT NULL' ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'"},
unlink: []string{"table_name", "column_name", "column_type", "table_constraints"},
name: "not_null_column_level",
stmt: "stmt_block",
replace: map[string]string{" stmt": " 'CREATE' 'TABLE' table_name '(' column_name column_type 'NOT NULL' ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'"},
unlink: []string{"table_name", "column_name", "column_type", "table_constraints"},
},
{
name: "opt_interleave",
Expand All @@ -824,16 +824,16 @@ var specs = []stmtSpec{
unlink: []string{"job_id"},
},
{
name: "primary_key_column_level",
stmt: "stmt_block",
replace: map[string]string{"stmt_list": "'CREATE' 'TABLE' table_name '(' column_name column_type 'PRIMARY KEY' ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'"},
unlink: []string{"table_name", "column_name", "column_type", "table_constraints"},
name: "primary_key_column_level",
stmt: "stmt_block",
replace: map[string]string{" stmt": " 'CREATE' 'TABLE' table_name '(' column_name column_type 'PRIMARY KEY' ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'"},
unlink: []string{"table_name", "column_name", "column_type", "table_constraints"},
},
{
name: "primary_key_table_level",
stmt: "stmt_block",
replace: map[string]string{"stmt_list": "'CREATE' 'TABLE' table_name '(' ( column_def ( ',' column_def )* ) ( 'CONSTRAINT' name | ) 'PRIMARY KEY' '(' ( column_name ( ',' column_name )* ) ')' ( table_constraints | ) ')'"},
unlink: []string{"table_name", "column_name", "table_constraints"},
name: "primary_key_table_level",
stmt: "stmt_block",
replace: map[string]string{" stmt": " 'CREATE' 'TABLE' table_name '(' ( column_def ( ',' column_def )* ) ( 'CONSTRAINT' name | ) 'PRIMARY KEY' '(' ( column_name ( ',' column_name )* ) ')' ( table_constraints | ) ')'"},
unlink: []string{"table_name", "column_name", "table_constraints"},
},
{
name: "release_savepoint",
Expand Down Expand Up @@ -1181,16 +1181,16 @@ var specs = []stmtSpec{
unlink: []string{"table_name"},
},
{
name: "unique_column_level",
stmt: "stmt_block",
replace: map[string]string{"stmt_list": "'CREATE' 'TABLE' table_name '(' column_name column_type 'UNIQUE' ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'"},
unlink: []string{"table_name", "column_name", "column_type", "table_constraints"},
name: "unique_column_level",
stmt: "stmt_block",
replace: map[string]string{" stmt": " 'CREATE' 'TABLE' table_name '(' column_name column_type 'UNIQUE' ( column_constraints | ) ( ',' ( column_def ( ',' column_def )* ) | ) ( table_constraints | ) ')' ')'"},
unlink: []string{"table_name", "column_name", "column_type", "table_constraints"},
},
{
name: "unique_table_level",
stmt: "stmt_block",
replace: map[string]string{"stmt_list": "'CREATE' 'TABLE' table_name '(' ( column_def ( ',' column_def )* ) ( 'CONSTRAINT' name | ) 'UNIQUE' '(' ( column_name ( ',' column_name )* ) ')' ( table_constraints | ) ')'"},
unlink: []string{"table_name", "check_expr", "table_constraints"},
name: "unique_table_level",
stmt: "stmt_block",
replace: map[string]string{" stmt": " 'CREATE' 'TABLE' table_name '(' ( column_def ( ',' column_def )* ) ( 'CONSTRAINT' name | ) 'UNIQUE' '(' ( column_name ( ',' column_name )* ) ')' ( table_constraints | ) ')'"},
unlink: []string{"table_name", "check_expr", "table_constraints"},
},
{
name: "update_stmt",
Expand Down
2 changes: 1 addition & 1 deletion pkg/server/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -1284,7 +1284,7 @@ func (s *adminServer) QueryPlan(

// As long as there's only one query provided it's safe to construct the
// explain query.
stmts, err := parser.Parse(req.Query)
stmts, _, err := parser.Parse(req.Query)
if err != nil {
return nil, s.serverError(err)
}
Expand Down
8 changes: 6 additions & 2 deletions pkg/sql/conn_io.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ type ExecStmt struct {
func (ExecStmt) command() {}

func (e ExecStmt) String() string {
return fmt.Sprintf("ExecStmt: %s", e.SQL)
// We have the original SQL, but we still use String() because it obfuscates
// passwords.
return fmt.Sprintf("ExecStmt: %s", e.Stmt.String())
}

var _ Command = ExecStmt{}
Expand Down Expand Up @@ -191,7 +193,9 @@ type PrepareStmt struct {
func (PrepareStmt) command() {}

func (p PrepareStmt) String() string {
return fmt.Sprintf("PrepareStmt: %s", p.SQL)
// We have the original SQL, but we still use String() because it obfuscates
// passwords.
return fmt.Sprintf("PrepareStmt: %s", p.Stmt.String())
}

var _ Command = PrepareStmt{}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -1000,7 +1000,7 @@ func anonymizeStmtAndConstants(stmt tree.Statement) string {
func AnonymizeStatementsForReporting(action, sqlStmts string, r interface{}) error {
var anonymized []string
{
stmts, err := parser.Parse(sqlStmts)
stmts, _, err := parser.Parse(sqlStmts)
if err == nil {
for _, stmt := range stmts {
anonymized = append(anonymized, anonymizeStmtAndConstants(stmt))
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/lex/encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func testEncodeString(t *testing.T, input []byte, encode func(*bytes.Buffer, str
t.Fatalf("unprintable character: %v (%v): %s %v", ch, input, sql, []byte(sql))
}
}
stmts, err := parser.Parse(sql)
stmts, _, err := parser.Parse(sql)
if err != nil {
t.Fatalf("%s: expected success, but found %s", sql, err)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/logic.go
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ func (ls *logicStatement) readSQL(
if !hasVars {
newSyntax, err := func(inSql string) (string, error) {
// Can't rewrite the SQL otherwise because the vars make it invalid.
stmtList, err := parser.Parse(inSql)
stmtList, _, err := parser.Parse(inSql)
if err != nil {
if ls.expectErr != "" {
// Maybe a parse error was expected. Simply do not rewrite.
Expand Down
7 changes: 2 additions & 5 deletions pkg/sql/parallel_stmts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/sql/parser"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
Expand Down Expand Up @@ -324,14 +325,10 @@ func planQuery(t *testing.T, s serverutils.TestServerInterface, sql string) (*pl
// planner with the session so that its copy of the session data gets updated.
p.extendedEvalCtx.SessionData.Database = "test"

stmts, err := p.parser.Parse(sql)
stmt, err := parser.ParseOne(sql)
if err != nil {
t.Fatal(err)
}
if len(stmts) != 1 {
t.Fatalf("expected to parse 1 statement, got: %d", len(stmts))
}
stmt := stmts[0]
if err := p.makePlan(context.TODO(), Statement{SQL: sql, AST: stmt}); err != nil {
t.Fatal(err)
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/parser/help.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (h *HelpMessage) Format(w io.Writer) {
// error", with the error set to a contextual help message about the
// current statement.
func helpWith(sqllex sqlLexer, helpText string) int {
scan := sqllex.(*scanner)
scan := sqllex.(*lexer)
if helpText == "" {
scan.populateHelpMsg("help:\n" + AllHelp)
return 1
Expand Down Expand Up @@ -127,7 +127,7 @@ func helpWithFunction(sqllex sqlLexer, f tree.ResolvableFunctionReference) int {
_ = w.Flush()
msg.Text = buf.String()

sqllex.(*scanner).SetHelp(msg)
sqllex.(*lexer).SetHelp(msg)
return 1
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/parser/help_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ func TestContextualHelp(t *testing.T) {
// The following checks that the grammar rules properly report help.
for _, test := range testData {
t.Run(test.input, func(t *testing.T) {
_, err := Parse(test.input)
_, _, err := Parse(test.input)
if err == nil {
t.Fatalf("parser didn't trigger error")
}
Expand Down Expand Up @@ -421,7 +421,7 @@ func TestHelpKeys(t *testing.T) {
// checks that the parser renders the correct help message.
for key, body := range HelpMessages {
t.Run(key, func(t *testing.T) {
_, err := Parse(key + " ??")
_, _, err := Parse(key + " ??")
if err == nil {
t.Errorf("parser didn't trigger error")
return
Expand Down
Loading

0 comments on commit 46de6b8

Please sign in to comment.