From 65c8ca5fcb5e25704cbe320989b127fbd6630c0c Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Wed, 7 Jun 2023 00:03:06 -0700 Subject: [PATCH 01/29] group by queries failing with limit and distinct Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/main_test.go | 94 ++++ .../vtgate/queries/random/random_test.go | 409 ++++++++++++++++++ .../endtoend/vtgate/queries/random/schema.sql | 20 + .../vtgate/queries/random/vschema.json | 26 ++ 4 files changed, 549 insertions(+) create mode 100644 go/test/endtoend/vtgate/queries/random/main_test.go create mode 100644 go/test/endtoend/vtgate/queries/random/random_test.go create mode 100644 go/test/endtoend/vtgate/queries/random/schema.sql create mode 100644 go/test/endtoend/vtgate/queries/random/vschema.json diff --git a/go/test/endtoend/vtgate/queries/random/main_test.go b/go/test/endtoend/vtgate/queries/random/main_test.go new file mode 100644 index 00000000000..bfef910f036 --- /dev/null +++ b/go/test/endtoend/vtgate/queries/random/main_test.go @@ -0,0 +1,94 @@ +/* +Copyright 2023 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package random + +import ( + _ "embed" + "flag" + "fmt" + "os" + "testing" + + "vitess.io/vitess/go/test/endtoend/utils" + + "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/test/endtoend/cluster" +) + +var ( + clusterInstance *cluster.LocalProcessCluster + vtParams mysql.ConnParams + mysqlParams mysql.ConnParams + keyspaceName = "ks_random" + cell = "test_random" + + //go:embed schema.sql + schemaSQL string + + //go:embed vschema.json + vschema string +) + +func TestMain(m *testing.M) { + defer cluster.PanicHandler(nil) + flag.Parse() + + exitCode := func() int { + clusterInstance = cluster.NewCluster(cell, "localhost") + defer clusterInstance.Teardown() + + // Start topo server + err := clusterInstance.StartTopo() + if err != nil { + return 1 + } + + // Start keyspace + keyspace := &cluster.Keyspace{ + Name: keyspaceName, + SchemaSQL: schemaSQL, + VSchema: vschema, + } + clusterInstance.VtGateExtraArgs = []string{"--schema_change_signal"} + clusterInstance.VtTabletExtraArgs = []string{"--queryserver-config-schema-change-signal", "--queryserver-config-schema-change-signal-interval", "0.1"} + err = clusterInstance.StartKeyspace(*keyspace, []string{"-80", "80-"}, 0, false) + if err != nil { + return 1 + } + + clusterInstance.VtGateExtraArgs = append(clusterInstance.VtGateExtraArgs, "--enable_system_settings=true") + // Start vtgate + err = clusterInstance.StartVtgate() + if err != nil { + return 1 + } + + vtParams = clusterInstance.GetVTParams(keyspaceName) + + // create mysql instance and connection parameters + conn, closer, err := utils.NewMySQL(clusterInstance, keyspaceName, schemaSQL) + if err != nil { + fmt.Println(err) + return 1 + } + defer closer() + mysqlParams = conn + + return m.Run() + }() + os.Exit(exitCode) +} diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go new file mode 100644 index 00000000000..066194f546a --- /dev/null +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -0,0 +1,409 @@ +/* +Copyright 2023 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package random + +import ( + "fmt" + "math/rand" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + "golang.org/x/exp/maps" + + "vitess.io/vitess/go/test/endtoend/cluster" + "vitess.io/vitess/go/test/endtoend/utils" +) + +type ( + column struct { + name string + typ string + } + tableT struct { + name string + columns []column + } +) + +func start(t *testing.T) (utils.MySQLCompare, func()) { + mcmp, err := utils.NewMySQLCompare(t, vtParams, mysqlParams) + require.NoError(t, err) + + deleteAll := func() { + _, _ = utils.ExecAllowError(t, mcmp.VtConn, "set workload = oltp") + + tables := []string{"dept", "emp"} + for _, table := range tables { + _, _ = mcmp.ExecAndIgnore("delete from " + table) + } + } + + deleteAll() + + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7369,'SMITH','CLERK',7902,'1980-12-17',800,NULL,20);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7499,'ALLEN','SALESMAN',7698,'1981-02-20',1600,300,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7566,'JONES','MANAGER',7839,'1981-04-02',2975,NULL,20);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7654,'MARTIN','SALESMAN',7698,'1981-09-28',1250,1400,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7698,'BLAKE','MANAGER',7839,'1981-05-01',2850,NULL,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7782,'CLARK','MANAGER',7839,'1981-06-09',2450,NULL,10);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7788,'SCOTT','ANALYST',7566,'1982-12-09',3000,NULL,20);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7839,'KING','PRESIDENT',NULL,'1981-11-17',5000,NULL,10);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7844,'TURNER','SALESMAN',7698,'1981-09-08',1500,0,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7876,'ADAMS','CLERK',7788,'1983-01-12',1100,NULL,20);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7900,'JAMES','CLERK',7698,'1981-12-03',950,NULL,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7902,'FORD','ANALYST',7566,'1981-12-03',3000,NULL,20);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7934,'MILLER','CLERK',7782,'1982-01-23',1300,NULL,10);") + mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (10,'ACCOUNTING','NEW YORK');") + mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (20,'RESEARCH','DALLAS');") + mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (30,'SALES','CHICAGO');") + mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (40,'OPERATIONS','BOSTON');") + + return mcmp, func() { + deleteAll() + mcmp.Close() + cluster.PanicHandler(t) + } +} + +func helperTest(t *testing.T, query string) { + t.Run(query, func(t *testing.T) { + mcmp, closer := start(t) + defer closer() + + mcmp.Exec(query) + }) +} + +func TestKnownFailures(t *testing.T) { + require.NoError(t, utils.WaitForAuthoritative(t, clusterInstance.VtgateProcess, keyspaceName, "emp")) + require.NoError(t, utils.WaitForAuthoritative(t, clusterInstance.VtgateProcess, keyspaceName, "dept")) + // logs more stuff + //clusterInstance.EnableGeneralLog() + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(*) from dept as tbl0, emp as tbl1 where tbl0.deptno = tbl1.deptno group by tbl1.empno order by tbl1.empno") + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl0.deptno) from dept as tbl0, emp as tbl1 group by tbl1.job order by tbl1.job limit 3") + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from emp as tbl0 group by tbl0.empno order by tbl0.empno") + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*), tbl0.loc from dept as tbl0 group by tbl0.loc") + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.loc") + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl1.comm) from emp as tbl0, emp as tbl1") + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ min(tbl0.loc) from dept as tbl0") + + // unsupported + // unsupported: in scatter query: aggregation function + helperTest(t, "select /*vt+ PLANNER=Gen4 */ avg(tbl0.deptno) from dept as tbl0") + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.mgr, tbl1.mgr, count(*) from emp as tbl1 group by tbl1.mgr") + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.mgr, tbl1.mgr, count(*) from emp as tbl0, emp as tbl1 group by tbl1.mgr") + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(tbl0.comm) from emp as tbl0, emp as tbl1 join dept as tbl2") + + // unsupported + // unsupported: using aggregation on top of a *planbuilder.orderedAggregate plan + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from (select count(*) from dept as tbl0) as tbl0") + + // unsupported + // unsupported: using aggregation on top of a *planbuilder.orderedAggregate plan + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0) as tbl0, dept as tbl1") + + // unsupported + // EOF (errno 2013) (sqlstate HY000) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0") + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0, dept as tbl1") + + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0") + + // mismatched results (group by + limit) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.sal, count(tbl0.sal), count(*) from emp as tbl0, emp as tbl1 group by tbl0.sal limit 7") + + // vttablet: rpc error: code = InvalidArgument desc = Can't group on 'count(*)' (errno 1056) (sqlstate 42000) (CallerID: userData1) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.deptno") + + // unsupported: in scatter query: complex aggregate expression (errno 1235) (sqlstate 42000) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ (select count(*) from emp as tbl0) from emp as tbl0") +} + +func TestRandom(t *testing.T) { + mcmp, closer := start(t) + defer closer() + + require.NoError(t, utils.WaitForAuthoritative(t, clusterInstance.VtgateProcess, keyspaceName, "emp")) + require.NoError(t, utils.WaitForAuthoritative(t, clusterInstance.VtgateProcess, keyspaceName, "dept")) + + schema := map[string]tableT{ + "emp": {name: "emp", columns: []column{ + {name: "empno", typ: "bigint"}, + {name: "ename", typ: "varchar"}, + {name: "job", typ: "varchar"}, + {name: "mgr", typ: "bigint"}, + {name: "hiredate", typ: "date"}, + {name: "sal", typ: "bigint"}, + {name: "comm", typ: "bigint"}, + {name: "deptno", typ: "bigint"}, + }}, + "dept": {name: "dept", columns: []column{ + {name: "deptno", typ: "bigint"}, + {name: "dname", typ: "varchar"}, + {name: "loc", typ: "varchar"}, + }}, + } + + endBy := time.Now().Add(1 * time.Second) + schemaTables := maps.Values(schema) + + var queryCount int + for time.Now().Before(endBy) || t.Failed() { + query := randomQuery(schemaTables, 3, 3) + mcmp.ExecAllowAndCompareError(query) + if t.Failed() { + fmt.Println(query) + closer() + mcmp, _ = start(t) + } + queryCount++ + } + fmt.Printf("Queries successfully executed: %d\n", queryCount) +} + +func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { + tables := createTables(schemaTables) + + randomCol := func(tblIdx int) (string, string) { + tbl := tables[tblIdx] + col := randomEl(tbl.columns) + return fmt.Sprintf("tbl%d.%s", tblIdx, col.name), col.typ + } + + isDerived := rand.Intn(10) < 1 + aggregates, _ := createAggregations(tables, maxAggrs, randomCol, isDerived) + predicates := createPredicates(tables, randomCol, false) + grouping, _ := createGroupBy(tables, maxGroupBy, randomCol) + sel := "select /*vt+ PLANNER=Gen4 */ " + + // select distinct (fails with group by bigint) + isDistinct := rand.Intn(2) < 1 + if isDistinct { + sel += "distinct " + } + + // select the grouping columns + if len(grouping) > 0 && rand.Intn(2) < 1 { + sel += strings.Join(grouping, ", ") + ", " + } + + // select the ordering columns + // we do it this way, so we don't have to do only `only_full_group_by` queries + var noOfOrderBy int + if len(grouping) > 0 { + // panic on rand function call if value is 0 + noOfOrderBy = rand.Intn(len(grouping)) + } + var orderBy []string + if noOfOrderBy > 0 { + for noOfOrderBy > 0 { + noOfOrderBy-- + if rand.Intn(2) == 0 || len(grouping) == 0 { + orderBy = append(orderBy, randomEl(aggregates)) + } else { + orderBy = append(orderBy, randomEl(grouping)) + } + } + + if rand.Intn(2) < 1 { + sel += strings.Join(orderBy, ", ") + ", " + } + } + + sel += strings.Join(aggregates, ", ") + " from " + + var tbls []string + for i, s := range tables { + tbls = append(tbls, fmt.Sprintf("%s as tbl%d", s.name, i)) + } + sel += strings.Join(tbls, ", ") + + // join + if rand.Intn(2) < 1 { + tables = append(tables, randomEl(schemaTables)) + join := createPredicates(tables, randomCol, true) + sel += " join " + fmt.Sprintf("%s as tbl%d", tables[len(tables)-1].name, len(tables)-1) + if len(join) > 0 { + sel += " on " + strings.Join(join, " and ") + } + } + + if len(predicates) > 0 { + sel += " where " + sel += strings.Join(predicates, " and ") + } + + if len(grouping) > 0 { + sel += " group by " + sel += strings.Join(grouping, ", ") + } + + if noOfOrderBy > 0 { + sel += " order by " + sel += strings.Join(orderBy, ", ") + } + + // limit (fails with select grouping columns) + if rand.Intn(2) < 1 { + limitNum := rand.Intn(10) + sel += fmt.Sprintf(" limit %d", limitNum) + } + + // add generated query to schemaTables + schemaTables = append(schemaTables, tableT{ + name: "(" + sel + ")", + columns: nil, + }) + + // derived tables (fails) + if isDerived { + sel = randomQuery(schemaTables, 3, 3) + } + + return sel +} + +func createGroupBy(tables []tableT, maxGB int, randomCol func(tblIdx int) (string, string)) (grouping []string, groupTypes []string) { + noOfGBs := rand.Intn(maxGB) + for i := 0; i < noOfGBs; i++ { + var tblIdx int + for { + tblIdx = rand.Intn(len(tables)) + if tables[tblIdx].columns != nil { + break + } + } + col, typ := randomCol(tblIdx) + grouping = append(grouping, col) + groupTypes = append(groupTypes, typ) + } + return grouping, groupTypes +} + +func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int) (string, string), isDerived bool) (aggregates []string, aggrTypes []string) { + aggregations := []func(string) string{ + func(_ string) string { return "count(*)" }, + func(e string) string { return fmt.Sprintf("count(%s)", e) }, + //func (e string) string { return fmt.Sprintf("sum(%s)", e) }, + //func(e string) string { return fmt.Sprintf("avg(%s)", e) }, + //func(e string) string { return fmt.Sprintf("min(%s)", e) }, + //func(e string) string { return fmt.Sprintf("max(%s)", e) }, + } + + noOfAggrs := rand.Intn(maxAggrs) + 1 + for i := 0; i < noOfAggrs; i++ { + var tblIdx int + for { + tblIdx = rand.Intn(len(tables)) + if tables[tblIdx].columns != nil { + break + } + } + e, typ := randomCol(tblIdx) + newAggregate := randomEl(aggregations)(e) + addAggr := true + if isDerived { + for _, aggr := range aggregates { + if newAggregate == aggr { + addAggr = false + break + } + } + } + if addAggr { + aggregates = append(aggregates, newAggregate) + if newAggregate == fmt.Sprintf("count(%s)", e) && typ == "bigint" { + aggrTypes = append(aggrTypes, "decimal") + } else { + aggrTypes = append(aggrTypes, typ) + } + } + } + return aggregates, aggrTypes +} + +func createTables(schemaTables []tableT) []tableT { + noOfTables := rand.Intn(2) + 1 + var tables []tableT + + for i := 0; i < noOfTables; i++ { + tables = append(tables, randomEl(schemaTables)) + } + return tables +} + +func createPredicates(tables []tableT, randomCol func(tblIdx int) (string, string), isJoin bool) (predicates []string) { + // if creating predicates for a join, + // then make sure predicates are created for the last two tables (which are being joined) + incr := 0 + if isJoin && len(tables) > 2 { + incr += len(tables) - 2 + tables = tables[len(tables)-3 : len(tables)-1] + } + for idx1 := range tables { + for idx2 := range tables { + if idx1 == idx2 || idx1 < incr || idx2 < incr || tables[idx1].columns == nil || tables[idx2].columns == nil { + continue + } + noOfPredicates := rand.Intn(2) + if isJoin { + noOfPredicates++ + } + + for noOfPredicates > 0 { + col1, t1 := randomCol(idx1) + col2, t2 := randomCol(idx2) + if t1 != t2 { + continue + } + predicates = append(predicates, fmt.Sprintf("%s = %s", col1, col2)) + noOfPredicates-- + } + } + } + return predicates +} + +func randomEl[K any](in []K) K { + return in[rand.Intn(len(in))] +} diff --git a/go/test/endtoend/vtgate/queries/random/schema.sql b/go/test/endtoend/vtgate/queries/random/schema.sql new file mode 100644 index 00000000000..7ef4721a381 --- /dev/null +++ b/go/test/endtoend/vtgate/queries/random/schema.sql @@ -0,0 +1,20 @@ +CREATE TABLE emp ( + EMPNO bigint NOT NULL, + ENAME VARCHAR(10), + JOB VARCHAR(9), + MGR bigint, + HIREDATE DATE, + SAL bigint, + COMM bigint, + DEPTNO bigint, + PRIMARY KEY (EMPNO) +) Engine = InnoDB + COLLATE = utf8mb4_general_ci; + +CREATE TABLE dept ( + DEPTNO bigint, + DNAME VARCHAR(14), + LOC VARCHAR(13), + PRIMARY KEY (DEPTNO) +) Engine = InnoDB + COLLATE = utf8mb4_general_ci; \ No newline at end of file diff --git a/go/test/endtoend/vtgate/queries/random/vschema.json b/go/test/endtoend/vtgate/queries/random/vschema.json new file mode 100644 index 00000000000..21e31d5618c --- /dev/null +++ b/go/test/endtoend/vtgate/queries/random/vschema.json @@ -0,0 +1,26 @@ +{ + "sharded": true, + "vindexes": { + "hash": { + "type": "hash" + } + }, + "tables": { + "emp": { + "column_vindexes": [ + { + "column": "deptno", + "name": "hash" + } + ] + }, + "dept": { + "column_vindexes": [ + { + "column": "deptno", + "name": "hash" + } + ] + } + } +} \ No newline at end of file From f575d69cd5a9be8d65394d05226c28302b9400d8 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Thu, 8 Jun 2023 02:29:34 -0700 Subject: [PATCH 02/29] added some failing test cases and a toggle to generate known failing queries Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 108 ++++++++++++------ 1 file changed, 75 insertions(+), 33 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 066194f546a..4c62a4dfed1 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -30,6 +30,9 @@ import ( "vitess.io/vitess/go/test/endtoend/utils" ) +// if true then known failing query types are still generated by randomQuery() +const TestFailingQueries = true + type ( column struct { name string @@ -83,6 +86,7 @@ func start(t *testing.T) (utils.MySQLCompare, func()) { } func helperTest(t *testing.T, query string) { + t.Helper() t.Run(query, func(t *testing.T) { mcmp, closer := start(t) defer closer() @@ -94,6 +98,7 @@ func helperTest(t *testing.T, query string) { func TestKnownFailures(t *testing.T) { require.NoError(t, utils.WaitForAuthoritative(t, clusterInstance.VtgateProcess, keyspaceName, "emp")) require.NoError(t, utils.WaitForAuthoritative(t, clusterInstance.VtgateProcess, keyspaceName, "dept")) + // logs more stuff //clusterInstance.EnableGeneralLog() @@ -115,13 +120,6 @@ func TestKnownFailures(t *testing.T) { // succeeds helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl1.comm) from emp as tbl0, emp as tbl1") - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ min(tbl0.loc) from dept as tbl0") - - // unsupported - // unsupported: in scatter query: aggregation function - helperTest(t, "select /*vt+ PLANNER=Gen4 */ avg(tbl0.deptno) from dept as tbl0") - // succeeds helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.mgr, tbl1.mgr, count(*) from emp as tbl1 group by tbl1.mgr") @@ -131,32 +129,61 @@ func TestKnownFailures(t *testing.T) { // succeeds helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(tbl0.comm) from emp as tbl0, emp as tbl1 join dept as tbl2") - // unsupported - // unsupported: using aggregation on top of a *planbuilder.orderedAggregate plan - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from (select count(*) from dept as tbl0) as tbl0") - - // unsupported - // unsupported: using aggregation on top of a *planbuilder.orderedAggregate plan - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0) as tbl0, dept as tbl1") - - // unsupported - // EOF (errno 2013) (sqlstate HY000) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0") - // succeeds helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0, dept as tbl1") // succeeds helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0") + // succeeds + helperTest(t, "select /*vt+ PLANNER=Gen4 */ min(tbl0.loc) from dept as tbl0") + + // cannot compare strings, collation is unknown or unsupported (collation ID: 0) (errno 1105) (sqlstate HY000) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.empno, max(tbl1.job) from dept as tbl0, emp as tbl1 group by tbl1.empno") + + // cannot compare strings, collation is unknown or unsupported (collation ID: 0) (errno 1105) (sqlstate HY000) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.ename, max(tbl0.comm) from emp as tbl0, emp as tbl1 group by tbl1.ename") + + // EOF (errno 2013) (sqlstate HY000) at first, then + // cannot compare strings, collation is unknown or unsupported (collation ID: 0) (errno 1105) (sqlstate HY000) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.dname, tbl0.dname, min(tbl0.deptno) from dept as tbl0, dept as tbl1 group by tbl0.dname, tbl0.dname") + + // mismatched results + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.dname, min(tbl1.deptno) from dept as tbl0, dept as tbl1 group by tbl0.dname, tbl1.dname") + // mismatched results (group by + limit) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.sal, count(tbl0.sal), count(*) from emp as tbl0, emp as tbl1 group by tbl0.sal limit 7") + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from emp as tbl0 group by tbl0.sal limit 7") // vttablet: rpc error: code = InvalidArgument desc = Can't group on 'count(*)' (errno 1056) (sqlstate 42000) (CallerID: userData1) helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.deptno") + // unsupported: column collation not known for the function: max(tbl0.hiredate) (errno 1235) (sqlstate 42000) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ max(tbl0.hiredate) from emp as tbl0") + + // EOF (errno 2013) (sqlstate HY000) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from dept as tbl0, (select /*vt+ PLANNER=Gen4 */ count(*) from emp as tbl0, emp as tbl1 limit 18) as tbl1") + + // push projection does not yet support: *planbuilder.memorySort (errno 1815) (sqlstate HY000) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from dept as tbl1 join (select count(*) from emp as tbl0, dept as tbl1 group by tbl1.loc) as tbl2") + // unsupported: in scatter query: complex aggregate expression (errno 1235) (sqlstate 42000) helperTest(t, "select /*vt+ PLANNER=Gen4 */ (select count(*) from emp as tbl0) from emp as tbl0") + + // unsupported + // unsupported: in scatter query: aggregation function + helperTest(t, "select /*vt+ PLANNER=Gen4 */ avg(tbl0.deptno) from dept as tbl0") + + // unsupported + // unsupported: using aggregation on top of a *planbuilder.orderedAggregate plan + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from (select count(*) from dept as tbl0) as tbl0") + + // unsupported + // unsupported: using aggregation on top of a *planbuilder.orderedAggregate plan + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0) as tbl0, dept as tbl1") + + // unsupported + // EOF (errno 2013) (sqlstate HY000) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0") } func TestRandom(t *testing.T) { @@ -184,14 +211,16 @@ func TestRandom(t *testing.T) { }}, } - endBy := time.Now().Add(1 * time.Second) + endBy := time.Now().Add(10 * time.Second) schemaTables := maps.Values(schema) var queryCount int for time.Now().Before(endBy) || t.Failed() { query := randomQuery(schemaTables, 3, 3) - mcmp.ExecAllowAndCompareError(query) - if t.Failed() { + _, vtErr := mcmp.ExecAllowAndCompareError(query) + // t.Failed() will become true once and subsequently print every query + // this instead assumes all queries are valid mysql queries + if vtErr != nil { fmt.Println(query) closer() mcmp, _ = start(t) @@ -210,7 +239,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { return fmt.Sprintf("tbl%d.%s", tblIdx, col.name), col.typ } - isDerived := rand.Intn(10) < 1 + isDerived := rand.Intn(10) < 1 && TestFailingQueries aggregates, _ := createAggregations(tables, maxAggrs, randomCol, isDerived) predicates := createPredicates(tables, randomCol, false) grouping, _ := createGroupBy(tables, maxGroupBy, randomCol) @@ -223,14 +252,14 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } // select the grouping columns - if len(grouping) > 0 && rand.Intn(2) < 1 { + if len(grouping) > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) { sel += strings.Join(grouping, ", ") + ", " } // select the ordering columns // we do it this way, so we don't have to do only `only_full_group_by` queries var noOfOrderBy int - if len(grouping) > 0 { + if len(grouping) > 0 && (!isDistinct || TestFailingQueries) { // panic on rand function call if value is 0 noOfOrderBy = rand.Intn(len(grouping)) } @@ -273,7 +302,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += strings.Join(predicates, " and ") } - if len(grouping) > 0 { + if len(grouping) > 0 && (!isDistinct || TestFailingQueries) { sel += " group by " sel += strings.Join(grouping, ", ") } @@ -284,12 +313,13 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } // limit (fails with select grouping columns) - if rand.Intn(2) < 1 { - limitNum := rand.Intn(10) + if rand.Intn(2) < 1 && TestFailingQueries { + limitNum := rand.Intn(20) sel += fmt.Sprintf(" limit %d", limitNum) } // add generated query to schemaTables + // TODO: make columns not nil but prevent aggregation on said columns schemaTables = append(schemaTables, tableT{ name: "(" + sel + ")", columns: nil, @@ -312,6 +342,7 @@ func createGroupBy(tables []tableT, maxGB int, randomCol func(tblIdx int) (strin if tables[tblIdx].columns != nil { break } + // fmt.Printf("group by tables:\n%v\n tblIdx: %d\n", tables, tblIdx) } col, typ := randomCol(tblIdx) grouping = append(grouping, col) @@ -324,10 +355,10 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int aggregations := []func(string) string{ func(_ string) string { return "count(*)" }, func(e string) string { return fmt.Sprintf("count(%s)", e) }, - //func (e string) string { return fmt.Sprintf("sum(%s)", e) }, + func(e string) string { return fmt.Sprintf("sum(%s)", e) }, //func(e string) string { return fmt.Sprintf("avg(%s)", e) }, - //func(e string) string { return fmt.Sprintf("min(%s)", e) }, - //func(e string) string { return fmt.Sprintf("max(%s)", e) }, + func(e string) string { return fmt.Sprintf("min(%s)", e) }, + func(e string) string { return fmt.Sprintf("max(%s)", e) }, } noOfAggrs := rand.Intn(maxAggrs) + 1 @@ -338,10 +369,13 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int if tables[tblIdx].columns != nil { break } + // fmt.Printf("aggregation tables:\n%v\n tblIdx: %d\n", tables, tblIdx) } e, typ := randomCol(tblIdx) newAggregate := randomEl(aggregations)(e) addAggr := true + + // derived tables do not allow duplicate columns if isDerived { for _, aggr := range aggregates { if newAggregate == aggr { @@ -350,6 +384,12 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int } } } + // collation unsupported on temporal types + // collation somewhat unsupported on varchar and bigint with group by + if (newAggregate == fmt.Sprintf("max(%s)", e) || newAggregate == fmt.Sprintf("min(%s)", e)) && (typ != "bigint" || !TestFailingQueries) && (typ != "varchar" || !TestFailingQueries) { + i-- + continue + } if addAggr { aggregates = append(aggregates, newAggregate) if newAggregate == fmt.Sprintf("count(%s)", e) && typ == "bigint" { @@ -363,9 +403,11 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int } func createTables(schemaTables []tableT) []tableT { - noOfTables := rand.Intn(2) + 1 var tables []tableT + // add at least one of original emp/dept tables for now because derived tables have nil columns + tables = append(tables, schemaTables[rand.Intn(2)]) + noOfTables := rand.Intn(len(schemaTables)) for i := 0; i < noOfTables; i++ { tables = append(tables, randomEl(schemaTables)) } From e0736a3a2ed2bd4edbf1264ff1d89ac8364a5f8f Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Thu, 8 Jun 2023 20:50:16 -0700 Subject: [PATCH 03/29] fix WaitForAuthoritative function call Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- go/test/endtoend/vtgate/queries/random/random_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 4c62a4dfed1..8dd1002d0d6 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -96,8 +96,8 @@ func helperTest(t *testing.T, query string) { } func TestKnownFailures(t *testing.T) { - require.NoError(t, utils.WaitForAuthoritative(t, clusterInstance.VtgateProcess, keyspaceName, "emp")) - require.NoError(t, utils.WaitForAuthoritative(t, clusterInstance.VtgateProcess, keyspaceName, "dept")) + require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) + require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) // logs more stuff //clusterInstance.EnableGeneralLog() @@ -190,8 +190,8 @@ func TestRandom(t *testing.T) { mcmp, closer := start(t) defer closer() - require.NoError(t, utils.WaitForAuthoritative(t, clusterInstance.VtgateProcess, keyspaceName, "emp")) - require.NoError(t, utils.WaitForAuthoritative(t, clusterInstance.VtgateProcess, keyspaceName, "dept")) + require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) + require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) schema := map[string]tableT{ "emp": {name: "emp", columns: []column{ From ffe4df26a2ab0ac53963232e3725663641e5cd19 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Thu, 8 Jun 2023 23:53:39 -0700 Subject: [PATCH 04/29] moved randomly generated buggy queries to aggregation Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../queries/aggregation/aggregation_test.go | 55 +++++++++++ .../vtgate/queries/random/random_test.go | 94 ++++++------------- go/vt/sqlparser/random_expr.go | 2 +- 3 files changed, 83 insertions(+), 68 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go b/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go index d6cb7baeb1d..072d029b97b 100644 --- a/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go +++ b/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go @@ -452,6 +452,61 @@ func TestBuggyQueries(t *testing.T) { mcmp.Exec("select /*vt+ PLANNER=gen4 */sum(tbl1.a), min(tbl0.b) from t10 as tbl0, t10 as tbl1 left join t10 as tbl2 on tbl1.a = tbl2.a and tbl1.b = tbl2.k") mcmp.Exec("select /*vt+ PLANNER=gen4 */count(*) from t10 left join t10 as t11 on t10.a = t11.b where t11.a") + + // from random/random_test.go + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7369,'SMITH','CLERK',7902,'1980-12-17',800,NULL,20);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7499,'ALLEN','SALESMAN',7698,'1981-02-20',1600,300,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7566,'JONES','MANAGER',7839,'1981-04-02',2975,NULL,20);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7654,'MARTIN','SALESMAN',7698,'1981-09-28',1250,1400,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7698,'BLAKE','MANAGER',7839,'1981-05-01',2850,NULL,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7782,'CLARK','MANAGER',7839,'1981-06-09',2450,NULL,10);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7788,'SCOTT','ANALYST',7566,'1982-12-09',3000,NULL,20);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7839,'KING','PRESIDENT',NULL,'1981-11-17',5000,NULL,10);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7844,'TURNER','SALESMAN',7698,'1981-09-08',1500,0,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7876,'ADAMS','CLERK',7788,'1983-01-12',1100,NULL,20);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7900,'JAMES','CLERK',7698,'1981-12-03',950,NULL,30);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7902,'FORD','ANALYST',7566,'1981-12-03',3000,NULL,20);") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7934,'MILLER','CLERK',7782,'1982-01-23',1300,NULL,10);") + mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (10,'ACCOUNTING','NEW YORK');") + mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (20,'RESEARCH','DALLAS');") + mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (30,'SALES','CHICAGO');") + mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (40,'OPERATIONS','BOSTON');") + + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(*) from dept as tbl0, emp as tbl1 where tbl0.deptno = tbl1.deptno group by tbl1.empno order by tbl1.empno", + `[[INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(tbl0.deptno) from dept as tbl0, emp as tbl1 group by tbl1.job order by tbl1.job limit 3", + `[[INT64(8)] [INT64(16)] [INT64(12)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*) from emp as tbl0 group by tbl0.empno order by tbl0.empno", + `[[INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ distinct count(*), tbl0.loc from dept as tbl0 group by tbl0.loc", + `[[INT64(1) VARCHAR("BOSTON")] [INT64(1) VARCHAR("CHICAGO")] [INT64(1) VARCHAR("DALLAS")] [INT64(1) VARCHAR("NEW YORK")]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.loc", + `[[INT64(1)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ sum(tbl1.comm) from emp as tbl0, emp as tbl1", + `[[DECIMAL(30800)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.mgr, tbl1.mgr, count(*) from emp as tbl1 group by tbl1.mgr", + `[[NULL NULL INT64(1)] [INT64(7566) INT64(7566) INT64(2)] [INT64(7698) INT64(7698) INT64(5)] [INT64(7782) INT64(7782) INT64(1)] [INT64(7788) INT64(7788) INT64(1)] [INT64(7839) INT64(7839) INT64(3)] [INT64(7902) INT64(7902) INT64(1)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.mgr, tbl1.mgr, count(*) from emp as tbl0, emp as tbl1 group by tbl1.mgr", + `[[NULL NULL INT64(14)] [INT64(7566) INT64(7566) INT64(28)] [INT64(7698) INT64(7698) INT64(70)] [INT64(7782) INT64(7782) INT64(14)] [INT64(7788) INT64(7788) INT64(14)] [INT64(7839) INT64(7839) INT64(42)] [INT64(7902) INT64(7902) INT64(14)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(tbl0.comm) from emp as tbl0, emp as tbl1 join dept as tbl2", + `[[INT64(784) INT64(784) INT64(224)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0, dept as tbl1", + `[[INT64(16) INT64(16)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0", + `[[INT64(4)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ min(tbl0.loc) from dept as tbl0", + `[[VARCHAR("BOSTON")]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.empno, max(tbl1.job) from dept as tbl0, emp as tbl1 group by tbl1.empno", + `[[INT64(7369) VARCHAR("CLERK")] [INT64(7499) VARCHAR("SALESMAN")] [INT64(7521) VARCHAR("SALESMAN")] [INT64(7566) VARCHAR("MANAGER")] [INT64(7654) VARCHAR("SALESMAN")] [INT64(7698) VARCHAR("MANAGER")] [INT64(7782) VARCHAR("MANAGER")] [INT64(7788) VARCHAR("ANALYST")] [INT64(7839) VARCHAR("PRESIDENT")] [INT64(7844) VARCHAR("SALESMAN")] [INT64(7876) VARCHAR("CLERK")] [INT64(7900) VARCHAR("CLERK")] [INT64(7902) VARCHAR("ANALYST")] [INT64(7934) VARCHAR("CLERK")]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.ename, max(tbl0.comm) from emp as tbl0, emp as tbl1 group by tbl1.ename", + `[[VARCHAR("ADAMS") INT64(1400)] [VARCHAR("ALLEN") INT64(1400)] [VARCHAR("BLAKE") INT64(1400)] [VARCHAR("CLARK") INT64(1400)] [VARCHAR("FORD") INT64(1400)] [VARCHAR("JAMES") INT64(1400)] [VARCHAR("JONES") INT64(1400)] [VARCHAR("KING") INT64(1400)] [VARCHAR("MARTIN") INT64(1400)] [VARCHAR("MILLER") INT64(1400)] [VARCHAR("SCOTT") INT64(1400)] [VARCHAR("SMITH") INT64(1400)] [VARCHAR("TURNER") INT64(1400)] [VARCHAR("WARD") INT64(1400)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl0.dname, tbl0.dname, min(tbl0.deptno) from dept as tbl0, dept as tbl1 group by tbl0.dname, tbl0.dname", + `[[VARCHAR("ACCOUNTING") VARCHAR("ACCOUNTING") INT64(10)] [VARCHAR("OPERATIONS") VARCHAR("OPERATIONS") INT64(40)] [VARCHAR("RESEARCH") VARCHAR("RESEARCH") INT64(20)] [VARCHAR("SALES") VARCHAR("SALES") INT64(30)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl0.dname, min(tbl1.deptno) from dept as tbl0, dept as tbl1 group by tbl0.dname, tbl1.dname", + `[[VARCHAR("ACCOUNTING") INT64(10)] [VARCHAR("ACCOUNTING") INT64(40)] [VARCHAR("ACCOUNTING") INT64(20)] [VARCHAR("ACCOUNTING") INT64(30)] [VARCHAR("OPERATIONS") INT64(10)] [VARCHAR("OPERATIONS") INT64(40)] [VARCHAR("OPERATIONS") INT64(20)] [VARCHAR("OPERATIONS") INT64(30)] [VARCHAR("RESEARCH") INT64(10)] [VARCHAR("RESEARCH") INT64(40)] [VARCHAR("RESEARCH") INT64(20)] [VARCHAR("RESEARCH") INT64(30)] [VARCHAR("SALES") INT64(10)] [VARCHAR("SALES") INT64(40)] [VARCHAR("SALES") INT64(20)] [VARCHAR("SALES") INT64(30)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ max(tbl0.hiredate) from emp as tbl0", + `[[DATE("1983-01-12")]]`) } func TestMinMaxAcrossJoins(t *testing.T) { diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 8dd1002d0d6..6adda9d53b0 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -31,7 +31,7 @@ import ( ) // if true then known failing query types are still generated by randomQuery() -const TestFailingQueries = true +const TestFailingQueries = false type ( column struct { @@ -91,7 +91,8 @@ func helperTest(t *testing.T, query string) { mcmp, closer := start(t) defer closer() - mcmp.Exec(query) + result := mcmp.Exec(query) + fmt.Println(result) }) } @@ -102,64 +103,12 @@ func TestKnownFailures(t *testing.T) { // logs more stuff //clusterInstance.EnableGeneralLog() - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(*) from dept as tbl0, emp as tbl1 where tbl0.deptno = tbl1.deptno group by tbl1.empno order by tbl1.empno") - - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl0.deptno) from dept as tbl0, emp as tbl1 group by tbl1.job order by tbl1.job limit 3") - - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from emp as tbl0 group by tbl0.empno order by tbl0.empno") - - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*), tbl0.loc from dept as tbl0 group by tbl0.loc") - - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.loc") - - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl1.comm) from emp as tbl0, emp as tbl1") - - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.mgr, tbl1.mgr, count(*) from emp as tbl1 group by tbl1.mgr") - - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.mgr, tbl1.mgr, count(*) from emp as tbl0, emp as tbl1 group by tbl1.mgr") - - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(tbl0.comm) from emp as tbl0, emp as tbl1 join dept as tbl2") - - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0, dept as tbl1") - - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0") - - // succeeds - helperTest(t, "select /*vt+ PLANNER=Gen4 */ min(tbl0.loc) from dept as tbl0") - - // cannot compare strings, collation is unknown or unsupported (collation ID: 0) (errno 1105) (sqlstate HY000) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.empno, max(tbl1.job) from dept as tbl0, emp as tbl1 group by tbl1.empno") - - // cannot compare strings, collation is unknown or unsupported (collation ID: 0) (errno 1105) (sqlstate HY000) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.ename, max(tbl0.comm) from emp as tbl0, emp as tbl1 group by tbl1.ename") - - // EOF (errno 2013) (sqlstate HY000) at first, then - // cannot compare strings, collation is unknown or unsupported (collation ID: 0) (errno 1105) (sqlstate HY000) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.dname, tbl0.dname, min(tbl0.deptno) from dept as tbl0, dept as tbl1 group by tbl0.dname, tbl0.dname") - - // mismatched results - helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.dname, min(tbl1.deptno) from dept as tbl0, dept as tbl1 group by tbl0.dname, tbl1.dname") - // mismatched results (group by + limit) helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from emp as tbl0 group by tbl0.sal limit 7") // vttablet: rpc error: code = InvalidArgument desc = Can't group on 'count(*)' (errno 1056) (sqlstate 42000) (CallerID: userData1) helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.deptno") - // unsupported: column collation not known for the function: max(tbl0.hiredate) (errno 1235) (sqlstate 42000) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ max(tbl0.hiredate) from emp as tbl0") - // EOF (errno 2013) (sqlstate HY000) helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from dept as tbl0, (select /*vt+ PLANNER=Gen4 */ count(*) from emp as tbl0, emp as tbl1 limit 18) as tbl1") @@ -215,13 +164,14 @@ func TestRandom(t *testing.T) { schemaTables := maps.Values(schema) var queryCount int - for time.Now().Before(endBy) || t.Failed() { + for time.Now().Before(endBy) && (!t.Failed() || TestFailingQueries) { query := randomQuery(schemaTables, 3, 3) _, vtErr := mcmp.ExecAllowAndCompareError(query) // t.Failed() will become true once and subsequently print every query // this instead assumes all queries are valid mysql queries if vtErr != nil { fmt.Println(query) + fmt.Println(vtErr) closer() mcmp, _ = start(t) } @@ -240,9 +190,9 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } isDerived := rand.Intn(10) < 1 && TestFailingQueries - aggregates, _ := createAggregations(tables, maxAggrs, randomCol, isDerived) + aggregates, aggrTypes := createAggregations(tables, maxAggrs, randomCol, isDerived) predicates := createPredicates(tables, randomCol, false) - grouping, _ := createGroupBy(tables, maxGroupBy, randomCol) + grouping, groupTypes := createGroupBy(tables, maxGroupBy, randomCol) sel := "select /*vt+ PLANNER=Gen4 */ " // select distinct (fails with group by bigint) @@ -279,6 +229,14 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } } + var newColumns []column + // populate columns of this query to add to schemaTables + for i := range aggregates { + newColumns = append(newColumns, column{ + name: aggregates[i], + typ: aggrTypes[i], + }) + } sel += strings.Join(aggregates, ", ") + " from " var tbls []string @@ -291,6 +249,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { if rand.Intn(2) < 1 { tables = append(tables, randomEl(schemaTables)) join := createPredicates(tables, randomCol, true) + sel += " join " + fmt.Sprintf("%s as tbl%d", tables[len(tables)-1].name, len(tables)-1) if len(join) > 0 { sel += " on " + strings.Join(join, " and ") @@ -303,6 +262,13 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } if len(grouping) > 0 && (!isDistinct || TestFailingQueries) { + // populate columns of this query to add to schemaTables + for i := range grouping { + newColumns = append(newColumns, column{ + name: grouping[i], + typ: groupTypes[i], + }) + } sel += " group by " sel += strings.Join(grouping, ", ") } @@ -322,10 +288,10 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { // TODO: make columns not nil but prevent aggregation on said columns schemaTables = append(schemaTables, tableT{ name: "(" + sel + ")", - columns: nil, + columns: newColumns, }) - // derived tables (fails) + // derived tables (unsupported) if isDerived { sel = randomQuery(schemaTables, 3, 3) } @@ -373,9 +339,9 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int } e, typ := randomCol(tblIdx) newAggregate := randomEl(aggregations)(e) - addAggr := true // derived tables do not allow duplicate columns + addAggr := true if isDerived { for _, aggr := range aggregates { if newAggregate == aggr { @@ -384,15 +350,9 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int } } } - // collation unsupported on temporal types - // collation somewhat unsupported on varchar and bigint with group by - if (newAggregate == fmt.Sprintf("max(%s)", e) || newAggregate == fmt.Sprintf("min(%s)", e)) && (typ != "bigint" || !TestFailingQueries) && (typ != "varchar" || !TestFailingQueries) { - i-- - continue - } if addAggr { aggregates = append(aggregates, newAggregate) - if newAggregate == fmt.Sprintf("count(%s)", e) && typ == "bigint" { + if newAggregate == fmt.Sprintf("avg(%s)", e) && typ == "bigint" { aggrTypes = append(aggrTypes, "decimal") } else { aggrTypes = append(aggrTypes, typ) diff --git a/go/vt/sqlparser/random_expr.go b/go/vt/sqlparser/random_expr.go index e2725f37a37..359e002d287 100644 --- a/go/vt/sqlparser/random_expr.go +++ b/go/vt/sqlparser/random_expr.go @@ -58,7 +58,7 @@ func (g *generator) atMaxDepth() bool { Creates a random expression. It builds an expression tree using the following constructs: - true/false - AND/OR/NOT - - string literalrs, numeric literals (-/+ 1000) + - string literals, numeric literals (-/+ 1000) - =, >, <, >=, <=, <=>, != - &, |, ^, +, -, *, /, div, %, <<, >> - IN, BETWEEN and CASE From 9bff9e005510b148d29b1575b6daa832f0712922 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Fri, 9 Jun 2023 01:16:24 -0700 Subject: [PATCH 05/29] added left/right joins Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 6adda9d53b0..97eb090820b 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -103,9 +103,24 @@ func TestKnownFailures(t *testing.T) { // logs more stuff //clusterInstance.EnableGeneralLog() + // mismatched results (group by + right join) + // left instead of right works + helperTest(t, "select /*vt+ PLANNER=Gen4 */ max(tbl0.deptno) from dept as tbl0 right join emp as tbl1 on tbl0.deptno = tbl1.empno and tbl0.deptno = tbl1.deptno group by tbl0.deptno") + + // mismatched results (count + right join) + // left instead of right works + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl1.comm) from emp as tbl1 right join emp as tbl2 on tbl1.mgr = tbl2.sal") + + // mismatched results (sum + right join) + // left instead of right works + helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl0.mgr) from emp as tbl0 right join emp as tbl1 on tbl0.mgr = tbl1.empno") + // mismatched results (group by + limit) helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from emp as tbl0 group by tbl0.sal limit 7") + // mismatched results (group by + select grouping + limit) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.sal, count(*) from emp as tbl0 group by tbl0.sal limit 7") + // vttablet: rpc error: code = InvalidArgument desc = Can't group on 'count(*)' (errno 1056) (sqlstate 42000) (CallerID: userData1) helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.deptno") @@ -202,14 +217,15 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } // select the grouping columns - if len(grouping) > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) { + isRightJoin := rand.Intn(2) < 1 + if len(grouping) > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) && (!isRightJoin || TestFailingQueries) { sel += strings.Join(grouping, ", ") + ", " } // select the ordering columns // we do it this way, so we don't have to do only `only_full_group_by` queries var noOfOrderBy int - if len(grouping) > 0 && (!isDistinct || TestFailingQueries) { + if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isRightJoin || TestFailingQueries) { // panic on rand function call if value is 0 noOfOrderBy = rand.Intn(len(grouping)) } @@ -246,11 +262,11 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += strings.Join(tbls, ", ") // join - if rand.Intn(2) < 1 { + if isRightJoin { tables = append(tables, randomEl(schemaTables)) join := createPredicates(tables, randomCol, true) - sel += " join " + fmt.Sprintf("%s as tbl%d", tables[len(tables)-1].name, len(tables)-1) + sel += " left join " + fmt.Sprintf("%s as tbl%d", tables[len(tables)-1].name, len(tables)-1) if len(join) > 0 { sel += " on " + strings.Join(join, " and ") } @@ -261,7 +277,8 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += strings.Join(predicates, " and ") } - if len(grouping) > 0 && (!isDistinct || TestFailingQueries) { + isGrouping := false + if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isRightJoin || TestFailingQueries) { // populate columns of this query to add to schemaTables for i := range grouping { newColumns = append(newColumns, column{ @@ -271,6 +288,8 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } sel += " group by " sel += strings.Join(grouping, ", ") + + isGrouping = true } if noOfOrderBy > 0 { @@ -279,7 +298,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } // limit (fails with select grouping columns) - if rand.Intn(2) < 1 && TestFailingQueries { + if rand.Intn(2) < 1 && (!isGrouping || noOfOrderBy != 0 || TestFailingQueries) { limitNum := rand.Intn(20) sel += fmt.Sprintf(" limit %d", limitNum) } @@ -291,7 +310,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { columns: newColumns, }) - // derived tables (unsupported) + // derived tables (partially unsupported) if isDerived { sel = randomQuery(schemaTables, 3, 3) } @@ -380,11 +399,11 @@ func createPredicates(tables []tableT, randomCol func(tblIdx int) (string, strin incr := 0 if isJoin && len(tables) > 2 { incr += len(tables) - 2 - tables = tables[len(tables)-3 : len(tables)-1] } for idx1 := range tables { for idx2 := range tables { - if idx1 == idx2 || idx1 < incr || idx2 < incr || tables[idx1].columns == nil || tables[idx2].columns == nil { + // fmt.Printf("predicate tables:\n%v\n idx1: %d idx2: %d, incr: %d", tables, idx1, idx2, incr) + if idx1 >= idx2 || idx1 < incr || idx2 < incr || tables[idx1].columns == nil || tables[idx2].columns == nil { continue } noOfPredicates := rand.Intn(2) From 1f440f3c6b79e72c7c09334daf726dbbb2a134ea Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Sun, 11 Jun 2023 23:08:29 -0700 Subject: [PATCH 06/29] Refactor sqlparser/random_expr.go to be public Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 42 ++++---- go/vt/sqlparser/precedence_test.go | 4 +- go/vt/sqlparser/random_expr.go | 102 +++++++++++++----- go/vt/sqlparser/rewriter_test.go | 4 +- go/vt/sqlparser/walker_test.go | 4 +- 5 files changed, 100 insertions(+), 56 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 97eb090820b..0299a6e26eb 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -205,9 +205,9 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } isDerived := rand.Intn(10) < 1 && TestFailingQueries - aggregates, aggrTypes := createAggregations(tables, maxAggrs, randomCol, isDerived) + aggregates, _ := createAggregations(tables, maxAggrs, randomCol, isDerived) predicates := createPredicates(tables, randomCol, false) - grouping, groupTypes := createGroupBy(tables, maxGroupBy, randomCol) + grouping, _ := createGroupBy(tables, maxGroupBy, randomCol) sel := "select /*vt+ PLANNER=Gen4 */ " // select distinct (fails with group by bigint) @@ -217,15 +217,15 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } // select the grouping columns - isRightJoin := rand.Intn(2) < 1 - if len(grouping) > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) && (!isRightJoin || TestFailingQueries) { + isLeftJoin := rand.Intn(2) < 1 + if len(grouping) > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) && (!isLeftJoin || TestFailingQueries) { sel += strings.Join(grouping, ", ") + ", " } // select the ordering columns // we do it this way, so we don't have to do only `only_full_group_by` queries var noOfOrderBy int - if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isRightJoin || TestFailingQueries) { + if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isLeftJoin || TestFailingQueries) { // panic on rand function call if value is 0 noOfOrderBy = rand.Intn(len(grouping)) } @@ -245,14 +245,14 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } } - var newColumns []column + //var newColumns []column // populate columns of this query to add to schemaTables - for i := range aggregates { - newColumns = append(newColumns, column{ - name: aggregates[i], - typ: aggrTypes[i], - }) - } + //for i := range aggregates { + // newColumns = append(newColumns, column{ + // name: aggregates[i], + // typ: aggrTypes[i], + // }) + //} sel += strings.Join(aggregates, ", ") + " from " var tbls []string @@ -262,7 +262,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += strings.Join(tbls, ", ") // join - if isRightJoin { + if isLeftJoin { tables = append(tables, randomEl(schemaTables)) join := createPredicates(tables, randomCol, true) @@ -278,14 +278,14 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } isGrouping := false - if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isRightJoin || TestFailingQueries) { + if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isLeftJoin || TestFailingQueries) { // populate columns of this query to add to schemaTables - for i := range grouping { - newColumns = append(newColumns, column{ - name: grouping[i], - typ: groupTypes[i], - }) - } + //for i := range grouping { + // newColumns = append(newColumns, column{ + // name: grouping[i], + // typ: groupTypes[i], + // }) + //} sel += " group by " sel += strings.Join(grouping, ", ") @@ -307,7 +307,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { // TODO: make columns not nil but prevent aggregation on said columns schemaTables = append(schemaTables, tableT{ name: "(" + sel + ")", - columns: newColumns, + columns: nil, }) // derived tables (partially unsupported) diff --git a/go/vt/sqlparser/precedence_test.go b/go/vt/sqlparser/precedence_test.go index 215c9480823..cb8c1f23805 100644 --- a/go/vt/sqlparser/precedence_test.go +++ b/go/vt/sqlparser/precedence_test.go @@ -216,7 +216,7 @@ func TestRandom(t *testing.T) { // The idea is to generate random queries, and pass them through the parser and then the unparser, and one more time. The result of the first unparse should be the same as the second result. seed := time.Now().UnixNano() fmt.Println(fmt.Sprintf("seed is %d", seed)) // nolint - g := newGenerator(seed, 5) + g := NewGenerator(seed, 5) endBy := time.Now().Add(1 * time.Second) for { @@ -224,7 +224,7 @@ func TestRandom(t *testing.T) { break } // Given a random expression - randomExpr := g.expression() + randomExpr := g.Expression() inputQ := "select " + String(randomExpr) + " from t" // When it's parsed and unparsed diff --git a/go/vt/sqlparser/random_expr.go b/go/vt/sqlparser/random_expr.go index 359e002d287..4efe346c3d2 100644 --- a/go/vt/sqlparser/random_expr.go +++ b/go/vt/sqlparser/random_expr.go @@ -21,36 +21,49 @@ import ( "math/rand" ) +type ( + Col struct { + Name string + Typ string + } + TabletT struct { + Name string + Columns []Col + } +) + // This file is used to generate random expressions to be used for testing -func newGenerator(seed int64, maxDepth int) *generator { - g := generator{ +func NewGenerator(seed int64, maxDepth int, tables ...TabletT) *Generator { + g := Generator{ seed: seed, r: rand.New(rand.NewSource(seed)), maxDepth: maxDepth, + tables: tables, } return &g } -type generator struct { +type Generator struct { seed int64 r *rand.Rand depth int maxDepth int + tables []TabletT } // enter should be called whenever we are producing an intermediate node. it should be followed by a `defer g.exit()` -func (g *generator) enter() { +func (g *Generator) enter() { g.depth++ } // exit should be called when exiting an intermediate node -func (g *generator) exit() { +func (g *Generator) exit() { g.depth-- } // atMaxDepth returns true if we have reached the maximum allowed depth or the expression tree -func (g *generator) atMaxDepth() bool { +func (g *Generator) atMaxDepth() bool { return g.depth >= g.maxDepth } @@ -67,7 +80,7 @@ func (g *generator) atMaxDepth() bool { Note: It's important to update this method so that it produces all expressions that need precedence checking. It's currently missing function calls and string operators */ -func (g *generator) expression() Expr { +func (g *Generator) Expression() Expr { if g.randomBool() { return g.booleanExpr() } @@ -80,7 +93,7 @@ func (g *generator) expression() Expr { return g.randomOf(options) } -func (g *generator) booleanExpr() Expr { +func (g *Generator) booleanExpr() Expr { if g.atMaxDepth() { return g.booleanLiteral() } @@ -102,7 +115,7 @@ func (g *generator) booleanExpr() Expr { return g.randomOf(options) } -func (g *generator) intExpr() Expr { +func (g *Generator) intExpr() Expr { if g.atMaxDepth() { return g.intLiteral() } @@ -110,21 +123,22 @@ func (g *generator) intExpr() Expr { options := []exprF{ func() Expr { return g.arithmetic() }, func() Expr { return g.intLiteral() }, + func() Expr { return g.intColumn() }, func() Expr { return g.caseExpr(g.intExpr) }, } return g.randomOf(options) } -func (g *generator) booleanLiteral() Expr { +func (g *Generator) booleanLiteral() Expr { return BoolVal(g.randomBool()) } -func (g *generator) randomBool() bool { +func (g *Generator) randomBool() bool { return g.r.Float32() < 0.5 } -func (g *generator) intLiteral() Expr { +func (g *Generator) intLiteral() Expr { t := fmt.Sprintf("%d", g.r.Intn(1000)-g.r.Intn((1000))) return NewIntLiteral(t) @@ -132,24 +146,25 @@ func (g *generator) intLiteral() Expr { var words = []string{"ox", "ant", "ape", "asp", "bat", "bee", "boa", "bug", "cat", "cod", "cow", "cub", "doe", "dog", "eel", "eft", "elf", "elk", "emu", "ewe", "fly", "fox", "gar", "gnu", "hen", "hog", "imp", "jay", "kid", "kit", "koi", "lab", "man", "owl", "pig", "pug", "pup", "ram", "rat", "ray", "yak", "bass", "bear", "bird", "boar", "buck", "bull", "calf", "chow", "clam", "colt", "crab", "crow", "dane", "deer", "dodo", "dory", "dove", "drum", "duck", "fawn", "fish", "flea", "foal", "fowl", "frog", "gnat", "goat", "grub", "gull", "hare", "hawk", "ibex", "joey", "kite", "kiwi", "lamb", "lark", "lion", "loon", "lynx", "mako", "mink", "mite", "mole", "moth", "mule", "mutt", "newt", "orca", "oryx", "pika", "pony", "puma", "seal", "shad", "slug", "sole", "stag", "stud", "swan", "tahr", "teal", "tick", "toad", "tuna", "wasp", "wolf", "worm", "wren", "yeti", "adder", "akita", "alien", "aphid", "bison", "boxer", "bream", "bunny", "burro", "camel", "chimp", "civet", "cobra", "coral", "corgi", "crane", "dingo", "drake", "eagle", "egret", "filly", "finch", "gator", "gecko", "ghost", "ghoul", "goose", "guppy", "heron", "hippo", "horse", "hound", "husky", "hyena", "koala", "krill", "leech", "lemur", "liger", "llama", "louse", "macaw", "midge", "molly", "moose", "moray", "mouse", "panda", "perch", "prawn", "quail", "racer", "raven", "rhino", "robin", "satyr", "shark", "sheep", "shrew", "skink", "skunk", "sloth", "snail", "snake", "snipe", "squid", "stork", "swift", "swine", "tapir", "tetra", "tiger", "troll", "trout", "viper", "wahoo", "whale", "zebra", "alpaca", "amoeba", "baboon", "badger", "beagle", "bedbug", "beetle", "bengal", "bobcat", "caiman", "cattle", "cicada", "collie", "condor", "cougar", "coyote", "dassie", "donkey", "dragon", "earwig", "falcon", "feline", "ferret", "gannet", "gibbon", "glider", "goblin", "gopher", "grouse", "guinea", "hermit", "hornet", "iguana", "impala", "insect", "jackal", "jaguar", "jennet", "kitten", "kodiak", "lizard", "locust", "maggot", "magpie", "mammal", "mantis", "marlin", "marmot", "marten", "martin", "mayfly", "minnow", "monkey", "mullet", "muskox", "ocelot", "oriole", "osprey", "oyster", "parrot", "pigeon", "piglet", "poodle", "possum", "python", "quagga", "rabbit", "raptor", "rodent", "roughy", "salmon", "sawfly", "serval", "shiner", "shrimp", "spider", "sponge", "tarpon", "thrush", "tomcat", "toucan", "turkey", "turtle", "urchin", "vervet", "walrus", "weasel", "weevil", "wombat", "anchovy", "anemone", "bluejay", "buffalo", "bulldog", "buzzard", "caribou", "catfish", "chamois", "cheetah", "chicken", "chigger", "cowbird", "crappie", "crawdad", "cricket", "dogfish", "dolphin", "firefly", "garfish", "gazelle", "gelding", "giraffe", "gobbler", "gorilla", "goshawk", "grackle", "griffon", "grizzly", "grouper", "haddock", "hagfish", "halibut", "hamster", "herring", "jackass", "javelin", "jawfish", "jaybird", "katydid", "ladybug", "lamprey", "lemming", "leopard", "lioness", "lobster", "macaque", "mallard", "mammoth", "manatee", "mastiff", "meerkat", "mollusk", "monarch", "mongrel", "monitor", "monster", "mudfish", "muskrat", "mustang", "narwhal", "oarfish", "octopus", "opossum", "ostrich", "panther", "peacock", "pegasus", "pelican", "penguin", "phoenix", "piranha", "polecat", "primate", "quetzal", "raccoon", "rattler", "redbird", "redfish", "reptile", "rooster", "sawfish", "sculpin", "seagull", "skylark", "snapper", "spaniel", "sparrow", "sunbeam", "sunbird", "sunfish", "tadpole", "termite", "terrier", "unicorn", "vulture", "wallaby", "walleye", "warthog", "whippet", "wildcat", "aardvark", "airedale", "albacore", "anteater", "antelope", "arachnid", "barnacle", "basilisk", "blowfish", "bluebird", "bluegill", "bonefish", "bullfrog", "cardinal", "chipmunk", "cockatoo", "crayfish", "dinosaur", "doberman", "duckling", "elephant", "escargot", "flamingo", "flounder", "foxhound", "glowworm", "goldfish", "grubworm", "hedgehog", "honeybee", "hookworm", "humpback", "kangaroo", "killdeer", "kingfish", "labrador", "lacewing", "ladybird", "lionfish", "longhorn", "mackerel", "malamute", "marmoset", "mastodon", "moccasin", "mongoose", "monkfish", "mosquito", "pangolin", "parakeet", "pheasant", "pipefish", "platypus", "polliwog", "porpoise", "reindeer", "ringtail", "sailfish", "scorpion", "seahorse", "seasnail", "sheepdog", "shepherd", "silkworm", "squirrel", "stallion", "starfish", "starling", "stingray", "stinkbug", "sturgeon", "terrapin", "titmouse", "tortoise", "treefrog", "werewolf", "woodcock"} -func (g *generator) stringLiteral() Expr { +func (g *Generator) stringLiteral() Expr { return NewStrLiteral(g.randomOfS(words)) } -func (g *generator) stringExpr() Expr { +func (g *Generator) stringExpr() Expr { if g.atMaxDepth() { return g.stringLiteral() } options := []exprF{ func() Expr { return g.stringLiteral() }, + func() Expr { return g.stringColumn() }, func() Expr { return g.caseExpr(g.stringExpr) }, } return g.randomOf(options) } -func (g *generator) likeExpr() Expr { +func (g *Generator) likeExpr() Expr { g.enter() defer g.exit() return &ComparisonExpr{ @@ -161,7 +176,7 @@ func (g *generator) likeExpr() Expr { var comparisonOps = []ComparisonExprOperator{EqualOp, LessThanOp, GreaterThanOp, LessEqualOp, GreaterEqualOp, NotEqualOp, NullSafeEqualOp} -func (g *generator) comparison(f func() Expr) Expr { +func (g *Generator) comparison(f func() Expr) Expr { g.enter() defer g.exit() @@ -173,7 +188,7 @@ func (g *generator) comparison(f func() Expr) Expr { return cmp } -func (g *generator) caseExpr(valueF func() Expr) Expr { +func (g *Generator) caseExpr(valueF func() Expr) Expr { g.enter() defer g.exit() @@ -193,12 +208,12 @@ func (g *generator) caseExpr(valueF func() Expr) Expr { if exp == nil { cond = g.booleanExpr() } else { - cond = g.expression() + cond = g.Expression() } whens = append(whens, &When{ Cond: cond, - Val: g.expression(), + Val: g.Expression(), }) } @@ -211,7 +226,7 @@ func (g *generator) caseExpr(valueF func() Expr) Expr { var arithmeticOps = []BinaryExprOperator{BitAndOp, BitOrOp, BitXorOp, PlusOp, MinusOp, MultOp, DivOp, IntDivOp, ModOp, ShiftRightOp, ShiftLeftOp} -func (g *generator) arithmetic() Expr { +func (g *Generator) arithmetic() Expr { g.enter() defer g.exit() @@ -224,17 +239,46 @@ func (g *generator) arithmetic() Expr { } } +func (g *Generator) typeColumn(typ string, typeLiteral func() Expr) Expr { + table := g.tables[rand.Intn(len(g.tables))] + for len(table.Columns) > 0 { + idx := rand.Intn(len(table.Columns)) + randCol := table.Columns[idx] + if randCol.Typ == typ /* better way to check if int type? */ { + return &ColName{ + Metadata: nil, + Name: NewIdentifierCI(randCol.Name), + Qualifier: TableName{Name: NewIdentifierCS(table.Name)}, + } + } else { + // delete randCol from table.columns + table.Columns[idx] = table.Columns[len(table.Columns)-1] + table.Columns = table.Columns[:len(table.Columns)-1] + } + } + + return typeLiteral() +} + +func (g *Generator) intColumn() Expr { + return g.typeColumn("bigint", g.intLiteral) +} + +func (g *Generator) stringColumn() Expr { + return g.typeColumn("varchar", g.stringLiteral) +} + type exprF func() Expr -func (g *generator) randomOf(options []exprF) Expr { +func (g *Generator) randomOf(options []exprF) Expr { return options[g.r.Intn(len(options))]() } -func (g *generator) randomOfS(options []string) string { +func (g *Generator) randomOfS(options []string) string { return options[g.r.Intn(len(options))] } -func (g *generator) andExpr() Expr { +func (g *Generator) andExpr() Expr { g.enter() defer g.exit() return &AndExpr{ @@ -243,7 +287,7 @@ func (g *generator) andExpr() Expr { } } -func (g *generator) orExpr() Expr { +func (g *Generator) orExpr() Expr { g.enter() defer g.exit() return &OrExpr{ @@ -252,7 +296,7 @@ func (g *generator) orExpr() Expr { } } -func (g *generator) xorExpr() Expr { +func (g *Generator) xorExpr() Expr { g.enter() defer g.exit() return &XorExpr{ @@ -261,13 +305,13 @@ func (g *generator) xorExpr() Expr { } } -func (g *generator) notExpr() Expr { +func (g *Generator) notExpr() Expr { g.enter() defer g.exit() return &NotExpr{g.booleanExpr()} } -func (g *generator) inExpr() Expr { +func (g *Generator) inExpr() Expr { g.enter() defer g.exit() @@ -289,7 +333,7 @@ func (g *generator) inExpr() Expr { } } -func (g *generator) between() Expr { +func (g *Generator) between() Expr { g.enter() defer g.exit() @@ -308,7 +352,7 @@ func (g *generator) between() Expr { } } -func (g *generator) isExpr() Expr { +func (g *Generator) isExpr() Expr { g.enter() defer g.exit() diff --git a/go/vt/sqlparser/rewriter_test.go b/go/vt/sqlparser/rewriter_test.go index dadd2c501df..9adae1b4a81 100644 --- a/go/vt/sqlparser/rewriter_test.go +++ b/go/vt/sqlparser/rewriter_test.go @@ -25,8 +25,8 @@ import ( ) func BenchmarkVisitLargeExpression(b *testing.B) { - gen := newGenerator(1, 5) - exp := gen.expression() + gen := NewGenerator(1, 5) + exp := gen.Expression() depth := 0 for i := 0; i < b.N; i++ { diff --git a/go/vt/sqlparser/walker_test.go b/go/vt/sqlparser/walker_test.go index f8bf2b4792a..5359235afa5 100644 --- a/go/vt/sqlparser/walker_test.go +++ b/go/vt/sqlparser/walker_test.go @@ -26,7 +26,7 @@ import ( func BenchmarkWalkLargeExpression(b *testing.B) { for i := 0; i < 10; i++ { b.Run(fmt.Sprintf("%d", i), func(b *testing.B) { - exp := newGenerator(int64(i*100), 5).expression() + exp := NewGenerator(int64(i*100), 5).Expression() count := 0 for i := 0; i < b.N; i++ { err := Walk(func(node SQLNode) (kontinue bool, err error) { @@ -42,7 +42,7 @@ func BenchmarkWalkLargeExpression(b *testing.B) { func BenchmarkRewriteLargeExpression(b *testing.B) { for i := 1; i < 7; i++ { b.Run(fmt.Sprintf("%d", i), func(b *testing.B) { - exp := newGenerator(int64(i*100), i).expression() + exp := NewGenerator(int64(i*100), i).Expression() count := 0 for i := 0; i < b.N; i++ { _ = Rewrite(exp, func(_ *Cursor) bool { From 40298d51b29e89718b0f99337f4da8c620839eae Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Mon, 12 Jun 2023 01:16:49 -0700 Subject: [PATCH 07/29] added random expressions to random query generation Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 117 +++++++++++------- go/vt/sqlparser/random_expr.go | 35 +++--- go/vt/sqlparser/random_expr_test.go | 35 ++++++ 3 files changed, 128 insertions(+), 59 deletions(-) create mode 100644 go/vt/sqlparser/random_expr_test.go diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 0299a6e26eb..180fb91902f 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -22,6 +22,7 @@ import ( "strings" "testing" "time" + "vitess.io/vitess/go/vt/sqlparser" "github.com/stretchr/testify/require" "golang.org/x/exp/maps" @@ -30,20 +31,12 @@ import ( "vitess.io/vitess/go/test/endtoend/utils" ) +type tableT = sqlparser.TableT +type column = sqlparser.Col + // if true then known failing query types are still generated by randomQuery() const TestFailingQueries = false -type ( - column struct { - name string - typ string - } - tableT struct { - name string - columns []column - } -) - func start(t *testing.T) (utils.MySQLCompare, func()) { mcmp, err := utils.NewMySQLCompare(t, vtParams, mysqlParams) require.NoError(t, err) @@ -59,6 +52,8 @@ func start(t *testing.T) (utils.MySQLCompare, func()) { deleteAll() + // mcmp.Exec("set sql_mode=''") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7369,'SMITH','CLERK',7902,'1980-12-17',800,NULL,20);") mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7499,'ALLEN','SALESMAN',7698,'1981-02-20',1600,300,30);") mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30);") @@ -91,8 +86,9 @@ func helperTest(t *testing.T, query string) { mcmp, closer := start(t) defer closer() - result := mcmp.Exec(query) + result, err := mcmp.ExecAllowAndCompareError(query) fmt.Println(result) + fmt.Println(err) }) } @@ -103,6 +99,17 @@ func TestKnownFailures(t *testing.T) { // logs more stuff //clusterInstance.EnableGeneralLog() + // vitess error: nil + // mysql error: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'ks_random.tbl0.ENAME' + helperTest(t, "select /*vt+ PLANNER=Gen4 */ (tbl0.ename), min(tbl0.comm) from emp as tbl0 left join emp as tbl1 on tbl0.empno = tbl1.comm and tbl0.empno = tbl1.empno") + + // Cannot convert value to desired type + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct max(tbl0.deptno), count(tbl0.job) from emp as tbl0, dept as tbl1 left join dept as tbl2 on tbl1.dname = tbl2.loc and tbl1.dname = tbl2.loc where (tbl2.loc) and tbl0.deptno = tbl1.deptno") + + // only_full_group_by disabled + // unknown aggregation random + helperTest(t, "select /*vt+ PLANNER=Gen4 */ (tbl0.comm), count(tbl1.loc), min(tbl1.deptno), min(tbl0.comm) from emp as tbl0, dept as tbl1 left join emp as tbl2 on tbl1.loc = tbl2.job and tbl1.deptno = tbl2.comm where tbl0.empno = tbl1.deptno") + // mismatched results (group by + right join) // left instead of right works helperTest(t, "select /*vt+ PLANNER=Gen4 */ max(tbl0.deptno) from dept as tbl0 right join emp as tbl1 on tbl0.deptno = tbl1.empno and tbl0.deptno = tbl1.deptno group by tbl0.deptno") @@ -158,20 +165,20 @@ func TestRandom(t *testing.T) { require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) schema := map[string]tableT{ - "emp": {name: "emp", columns: []column{ - {name: "empno", typ: "bigint"}, - {name: "ename", typ: "varchar"}, - {name: "job", typ: "varchar"}, - {name: "mgr", typ: "bigint"}, - {name: "hiredate", typ: "date"}, - {name: "sal", typ: "bigint"}, - {name: "comm", typ: "bigint"}, - {name: "deptno", typ: "bigint"}, + "emp": {Name: "emp", Cols: []column{ + {Name: "empno", Typ: "bigint"}, + {Name: "ename", Typ: "varchar"}, + {Name: "job", Typ: "varchar"}, + {Name: "mgr", Typ: "bigint"}, + {Name: "hiredate", Typ: "date"}, + {Name: "sal", Typ: "bigint"}, + {Name: "comm", Typ: "bigint"}, + {Name: "deptno", Typ: "bigint"}, }}, - "dept": {name: "dept", columns: []column{ - {name: "deptno", typ: "bigint"}, - {name: "dname", typ: "varchar"}, - {name: "loc", typ: "varchar"}, + "dept": {Name: "dept", Cols: []column{ + {Name: "deptno", Typ: "bigint"}, + {Name: "dname", Typ: "varchar"}, + {Name: "loc", Typ: "varchar"}, }}, } @@ -182,10 +189,10 @@ func TestRandom(t *testing.T) { for time.Now().Before(endBy) && (!t.Failed() || TestFailingQueries) { query := randomQuery(schemaTables, 3, 3) _, vtErr := mcmp.ExecAllowAndCompareError(query) + fmt.Println(query) // t.Failed() will become true once and subsequently print every query // this instead assumes all queries are valid mysql queries if vtErr != nil { - fmt.Println(query) fmt.Println(vtErr) closer() mcmp, _ = start(t) @@ -195,13 +202,20 @@ func TestRandom(t *testing.T) { fmt.Printf("Queries successfully executed: %d\n", queryCount) } +func getRandomExpr(tables []tableT) string { + seed := time.Now().UnixNano() + g := sqlparser.NewGenerator(seed, 2, tables...) + randomExpr := g.Expression() + return sqlparser.String(randomExpr) +} + func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { tables := createTables(schemaTables) randomCol := func(tblIdx int) (string, string) { tbl := tables[tblIdx] - col := randomEl(tbl.columns) - return fmt.Sprintf("tbl%d.%s", tblIdx, col.name), col.typ + col := randomEl(tbl.Cols) + return fmt.Sprintf("tbl%d.%s", tblIdx, col.Name), col.Typ } isDerived := rand.Intn(10) < 1 && TestFailingQueries @@ -217,15 +231,15 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } // select the grouping columns - isLeftJoin := rand.Intn(2) < 1 - if len(grouping) > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) && (!isLeftJoin || TestFailingQueries) { + isJoin := rand.Intn(2) < 1 + if len(grouping) > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) { sel += strings.Join(grouping, ", ") + ", " } // select the ordering columns // we do it this way, so we don't have to do only `only_full_group_by` queries - var noOfOrderBy int - if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isLeftJoin || TestFailingQueries) { + noOfOrderBy := 0 + if len(grouping) > 0 && (!isDistinct || TestFailingQueries) { // panic on rand function call if value is 0 noOfOrderBy = rand.Intn(len(grouping)) } @@ -245,6 +259,13 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } } + // add random expression to select + isRandomExpr := rand.Intn(2) < 1 + randomExpr := getRandomExpr(tables) + if isRandomExpr { + sel += "(" + randomExpr + "), " + } + //var newColumns []column // populate columns of this query to add to schemaTables //for i := range aggregates { @@ -257,16 +278,16 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { var tbls []string for i, s := range tables { - tbls = append(tbls, fmt.Sprintf("%s as tbl%d", s.name, i)) + tbls = append(tbls, fmt.Sprintf("%s as tbl%d", s.Name, i)) } sel += strings.Join(tbls, ", ") // join - if isLeftJoin { + if isJoin { tables = append(tables, randomEl(schemaTables)) join := createPredicates(tables, randomCol, true) - sel += " left join " + fmt.Sprintf("%s as tbl%d", tables[len(tables)-1].name, len(tables)-1) + sel += " left join " + fmt.Sprintf("%s as tbl%d", tables[len(tables)-1].Name, len(tables)-1) if len(join) > 0 { sel += " on " + strings.Join(join, " and ") } @@ -274,11 +295,13 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { if len(predicates) > 0 { sel += " where " + if rand.Intn(2) < 1 { + sel += "(" + getRandomExpr(tables) + ") and " + } sel += strings.Join(predicates, " and ") } - isGrouping := false - if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isLeftJoin || TestFailingQueries) { + if len(grouping) > 0 && (!isDistinct || TestFailingQueries) { // populate columns of this query to add to schemaTables //for i := range grouping { // newColumns = append(newColumns, column{ @@ -288,8 +311,12 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { //} sel += " group by " sel += strings.Join(grouping, ", ") - - isGrouping = true + if isRandomExpr { + sel += ", " + } + } + if isRandomExpr && (!isDistinct || TestFailingQueries) { + sel += "(" + randomExpr + ")" } if noOfOrderBy > 0 { @@ -298,7 +325,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } // limit (fails with select grouping columns) - if rand.Intn(2) < 1 && (!isGrouping || noOfOrderBy != 0 || TestFailingQueries) { + if rand.Intn(2) < 1 && noOfOrderBy > 0 { limitNum := rand.Intn(20) sel += fmt.Sprintf(" limit %d", limitNum) } @@ -306,8 +333,8 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { // add generated query to schemaTables // TODO: make columns not nil but prevent aggregation on said columns schemaTables = append(schemaTables, tableT{ - name: "(" + sel + ")", - columns: nil, + Name: "(" + sel + ")", + Cols: nil, }) // derived tables (partially unsupported) @@ -324,7 +351,7 @@ func createGroupBy(tables []tableT, maxGB int, randomCol func(tblIdx int) (strin var tblIdx int for { tblIdx = rand.Intn(len(tables)) - if tables[tblIdx].columns != nil { + if tables[tblIdx].Cols != nil { break } // fmt.Printf("group by tables:\n%v\n tblIdx: %d\n", tables, tblIdx) @@ -351,7 +378,7 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int var tblIdx int for { tblIdx = rand.Intn(len(tables)) - if tables[tblIdx].columns != nil { + if tables[tblIdx].Cols != nil { break } // fmt.Printf("aggregation tables:\n%v\n tblIdx: %d\n", tables, tblIdx) @@ -403,7 +430,7 @@ func createPredicates(tables []tableT, randomCol func(tblIdx int) (string, strin for idx1 := range tables { for idx2 := range tables { // fmt.Printf("predicate tables:\n%v\n idx1: %d idx2: %d, incr: %d", tables, idx1, idx2, incr) - if idx1 >= idx2 || idx1 < incr || idx2 < incr || tables[idx1].columns == nil || tables[idx2].columns == nil { + if idx1 >= idx2 || idx1 < incr || idx2 < incr || tables[idx1].Cols == nil || tables[idx2].Cols == nil { continue } noOfPredicates := rand.Intn(2) diff --git a/go/vt/sqlparser/random_expr.go b/go/vt/sqlparser/random_expr.go index 4efe346c3d2..01ea5c08558 100644 --- a/go/vt/sqlparser/random_expr.go +++ b/go/vt/sqlparser/random_expr.go @@ -26,15 +26,15 @@ type ( Name string Typ string } - TabletT struct { - Name string - Columns []Col + TableT struct { + Name string + Cols []Col } ) // This file is used to generate random expressions to be used for testing -func NewGenerator(seed int64, maxDepth int, tables ...TabletT) *Generator { +func NewGenerator(seed int64, maxDepth int, tables ...TableT) *Generator { g := Generator{ seed: seed, r: rand.New(rand.NewSource(seed)), @@ -49,7 +49,7 @@ type Generator struct { r *rand.Rand depth int maxDepth int - tables []TabletT + tables []TableT } // enter should be called whenever we are producing an intermediate node. it should be followed by a `defer g.exit()` @@ -123,10 +123,13 @@ func (g *Generator) intExpr() Expr { options := []exprF{ func() Expr { return g.arithmetic() }, func() Expr { return g.intLiteral() }, - func() Expr { return g.intColumn() }, func() Expr { return g.caseExpr(g.intExpr) }, } + if g.tables != nil { + options = append(options, func() Expr { return g.intColumn() }) + } + return g.randomOf(options) } @@ -157,10 +160,13 @@ func (g *Generator) stringExpr() Expr { options := []exprF{ func() Expr { return g.stringLiteral() }, - func() Expr { return g.stringColumn() }, func() Expr { return g.caseExpr(g.stringExpr) }, } + if g.tables != nil { + options = append(options, func() Expr { return g.intColumn() }) + } + return g.randomOf(options) } @@ -240,20 +246,21 @@ func (g *Generator) arithmetic() Expr { } func (g *Generator) typeColumn(typ string, typeLiteral func() Expr) Expr { - table := g.tables[rand.Intn(len(g.tables))] - for len(table.Columns) > 0 { - idx := rand.Intn(len(table.Columns)) - randCol := table.Columns[idx] + tblIdx := rand.Intn(len(g.tables)) + table := g.tables[tblIdx] + for len(table.Cols) > 0 { + idx := rand.Intn(len(table.Cols)) + randCol := table.Cols[idx] if randCol.Typ == typ /* better way to check if int type? */ { return &ColName{ Metadata: nil, Name: NewIdentifierCI(randCol.Name), - Qualifier: TableName{Name: NewIdentifierCS(table.Name)}, + Qualifier: TableName{Name: NewIdentifierCS(fmt.Sprintf("tbl%d", tblIdx))}, } } else { // delete randCol from table.columns - table.Columns[idx] = table.Columns[len(table.Columns)-1] - table.Columns = table.Columns[:len(table.Columns)-1] + table.Cols[idx] = table.Cols[len(table.Cols)-1] + table.Cols = table.Cols[:len(table.Cols)-1] } } diff --git a/go/vt/sqlparser/random_expr_test.go b/go/vt/sqlparser/random_expr_test.go new file mode 100644 index 00000000000..0cb48484d27 --- /dev/null +++ b/go/vt/sqlparser/random_expr_test.go @@ -0,0 +1,35 @@ +package sqlparser + +import ( + "fmt" + "golang.org/x/exp/maps" + "testing" + "time" +) + +func TestRandomExprWithTables(t *testing.T) { + schema := map[string]TableT{ + "emp": {Name: "emp", Cols: []Col{ + {Name: "empno", Typ: "bigint"}, + {Name: "ename", Typ: "varchar"}, + {Name: "job", Typ: "varchar"}, + {Name: "mgr", Typ: "bigint"}, + {Name: "hiredate", Typ: "date"}, + {Name: "sal", Typ: "bigint"}, + {Name: "comm", Typ: "bigint"}, + {Name: "deptno", Typ: "bigint"}, + }}, + "dept": {Name: "dept", Cols: []Col{ + {Name: "deptno", Typ: "bigint"}, + {Name: "dname", Typ: "varchar"}, + {Name: "loc", Typ: "varchar"}, + }}, + } + + schemaTables := maps.Values(schema) + + seed := time.Now().UnixNano() + g := NewGenerator(seed, 2, schemaTables...) + randomExpr := g.Expression() + fmt.Println(String(randomExpr)) +} From 8e8002279485836e9df20d849c16bb438095f1bc Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Mon, 12 Jun 2023 01:28:49 -0700 Subject: [PATCH 08/29] fixed syntax error in random query generation group by Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 180fb91902f..8efd48d389f 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -103,6 +103,9 @@ func TestKnownFailures(t *testing.T) { // mysql error: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'ks_random.tbl0.ENAME' helperTest(t, "select /*vt+ PLANNER=Gen4 */ (tbl0.ename), min(tbl0.comm) from emp as tbl0 left join emp as tbl1 on tbl0.empno = tbl1.comm and tbl0.empno = tbl1.empno") + // the type of this expression cannot be statically computed + helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl1.ename), min(tbl0.empno) from emp as tbl0, emp as tbl1 left join dept as tbl2 on tbl1.job = tbl2.loc and tbl1.comm = tbl2.deptno where ('trout') and tbl0.deptno = tbl1.comm") + // Cannot convert value to desired type helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct max(tbl0.deptno), count(tbl0.job) from emp as tbl0, dept as tbl1 left join dept as tbl2 on tbl1.dname = tbl2.loc and tbl1.dname = tbl2.loc where (tbl2.loc) and tbl0.deptno = tbl1.deptno") @@ -232,14 +235,14 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { // select the grouping columns isJoin := rand.Intn(2) < 1 - if len(grouping) > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) { + if len(grouping) > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { sel += strings.Join(grouping, ", ") + ", " } // select the ordering columns // we do it this way, so we don't have to do only `only_full_group_by` queries noOfOrderBy := 0 - if len(grouping) > 0 && (!isDistinct || TestFailingQueries) { + if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { // panic on rand function call if value is 0 noOfOrderBy = rand.Intn(len(grouping)) } @@ -262,7 +265,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { // add random expression to select isRandomExpr := rand.Intn(2) < 1 randomExpr := getRandomExpr(tables) - if isRandomExpr { + if isRandomExpr && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { sel += "(" + randomExpr + "), " } @@ -301,7 +304,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += strings.Join(predicates, " and ") } - if len(grouping) > 0 && (!isDistinct || TestFailingQueries) { + if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { // populate columns of this query to add to schemaTables //for i := range grouping { // newColumns = append(newColumns, column{ @@ -315,7 +318,10 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += ", " } } - if isRandomExpr && (!isDistinct || TestFailingQueries) { + if isRandomExpr && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { + if len(grouping) <= 0 { + sel += " group by " + } sel += "(" + randomExpr + ")" } From 2ec528d93ee8a9ad3dec822f758cc5659ed96088 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Tue, 13 Jun 2023 00:29:11 -0700 Subject: [PATCH 09/29] added column aliases Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 44 +++++++++++-------- go/vt/sqlparser/random_expr.go | 22 +++++++--- 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 8efd48d389f..ef8c565039b 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -131,6 +131,9 @@ func TestKnownFailures(t *testing.T) { // mismatched results (group by + select grouping + limit) helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.sal, count(*) from emp as tbl0 group by tbl0.sal limit 7") + // vttablet: rpc error: code = NotFound desc = Unknown column 'cgroup0' in 'field list' (errno 1054) (sqlstate 42S22) (CallerID: userData1) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.ename as cgroup0, max(tbl0.comm) as caggr0 from emp as tbl0, emp as tbl1 group by cgroup0") + // vttablet: rpc error: code = InvalidArgument desc = Can't group on 'count(*)' (errno 1056) (sqlstate 42000) (CallerID: userData1) helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.deptno") @@ -218,7 +221,11 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { randomCol := func(tblIdx int) (string, string) { tbl := tables[tblIdx] col := randomEl(tbl.Cols) - return fmt.Sprintf("tbl%d.%s", tblIdx, col.Name), col.Typ + colAlias := fmt.Sprintf("%s.%s", tbl.Alias, col.Name) + if col.Alias != "" { + colAlias = col.Alias + } + return colAlias, col.Typ } isDerived := rand.Intn(10) < 1 && TestFailingQueries @@ -239,7 +246,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += strings.Join(grouping, ", ") + ", " } - // select the ordering columns + // generate the order by columns // we do it this way, so we don't have to do only `only_full_group_by` queries noOfOrderBy := 0 if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { @@ -250,23 +257,19 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { if noOfOrderBy > 0 { for noOfOrderBy > 0 { noOfOrderBy-- - if rand.Intn(2) == 0 || len(grouping) == 0 { - orderBy = append(orderBy, randomEl(aggregates)) + if rand.Intn(2) < 1 || len(grouping) == 0 { + orderBy = append(orderBy, fmt.Sprintf("caggr%d", rand.Intn(len(aggregates)))) } else { - orderBy = append(orderBy, randomEl(grouping)) + orderBy = append(orderBy, fmt.Sprintf("cgroup%d", rand.Intn(len(grouping)))) } } - - if rand.Intn(2) < 1 { - sel += strings.Join(orderBy, ", ") + ", " - } } // add random expression to select isRandomExpr := rand.Intn(2) < 1 randomExpr := getRandomExpr(tables) if isRandomExpr && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - sel += "(" + randomExpr + "), " + sel += "(" + randomExpr + ") as crandom0, " } //var newColumns []column @@ -280,17 +283,18 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += strings.Join(aggregates, ", ") + " from " var tbls []string - for i, s := range tables { - tbls = append(tbls, fmt.Sprintf("%s as tbl%d", s.Name, i)) + for _, s := range tables { + tbls = append(tbls, fmt.Sprintf("%s as %s", s.Name, s.Alias)) } sel += strings.Join(tbls, ", ") // join if isJoin { tables = append(tables, randomEl(schemaTables)) + tables[len(tables)-1].Alias = fmt.Sprintf("tbl%d", len(tables)-1) join := createPredicates(tables, randomCol, true) - sel += " left join " + fmt.Sprintf("%s as tbl%d", tables[len(tables)-1].Name, len(tables)-1) + sel += " left join " + fmt.Sprintf("%s as %s", tables[len(tables)-1].Name, tables[len(tables)-1].Alias) if len(join) > 0 { sel += " on " + strings.Join(join, " and ") } @@ -312,8 +316,10 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { // typ: groupTypes[i], // }) //} - sel += " group by " - sel += strings.Join(grouping, ", ") + sel += " group by cgroup0" + for i := 1; i < len(grouping); i++ { + sel += fmt.Sprintf(", cgroup%d", i) + } if isRandomExpr { sel += ", " } @@ -322,7 +328,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { if len(grouping) <= 0 { sel += " group by " } - sel += "(" + randomExpr + ")" + sel += "crandom0" } if noOfOrderBy > 0 { @@ -363,7 +369,7 @@ func createGroupBy(tables []tableT, maxGB int, randomCol func(tblIdx int) (strin // fmt.Printf("group by tables:\n%v\n tblIdx: %d\n", tables, tblIdx) } col, typ := randomCol(tblIdx) - grouping = append(grouping, col) + grouping = append(grouping, col+fmt.Sprintf(" as cgroup%d", i)) groupTypes = append(groupTypes, typ) } return grouping, groupTypes @@ -403,7 +409,7 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int } } if addAggr { - aggregates = append(aggregates, newAggregate) + aggregates = append(aggregates, newAggregate+fmt.Sprintf(" as caggr%d", i)) if newAggregate == fmt.Sprintf("avg(%s)", e) && typ == "bigint" { aggrTypes = append(aggrTypes, "decimal") } else { @@ -418,10 +424,12 @@ func createTables(schemaTables []tableT) []tableT { var tables []tableT // add at least one of original emp/dept tables for now because derived tables have nil columns tables = append(tables, schemaTables[rand.Intn(2)]) + tables[0].Alias = "tbl0" noOfTables := rand.Intn(len(schemaTables)) for i := 0; i < noOfTables; i++ { tables = append(tables, randomEl(schemaTables)) + tables[i+1].Alias = fmt.Sprintf("tbl%d", i+1) } return tables } diff --git a/go/vt/sqlparser/random_expr.go b/go/vt/sqlparser/random_expr.go index 01ea5c08558..f361a2005f9 100644 --- a/go/vt/sqlparser/random_expr.go +++ b/go/vt/sqlparser/random_expr.go @@ -23,12 +23,14 @@ import ( type ( Col struct { - Name string - Typ string + Name string + Alias string + Typ string } TableT struct { - Name string - Cols []Col + Name string + Alias string + Cols []Col } ) @@ -252,10 +254,18 @@ func (g *Generator) typeColumn(typ string, typeLiteral func() Expr) Expr { idx := rand.Intn(len(table.Cols)) randCol := table.Cols[idx] if randCol.Typ == typ /* better way to check if int type? */ { + newName := randCol.Name + if randCol.Alias != "" { + newName = randCol.Alias + } + newTableName := table.Name + if table.Alias != "" { + newTableName = table.Alias + } return &ColName{ Metadata: nil, - Name: NewIdentifierCI(randCol.Name), - Qualifier: TableName{Name: NewIdentifierCS(fmt.Sprintf("tbl%d", tblIdx))}, + Name: NewIdentifierCI(newName), + Qualifier: TableName{Name: NewIdentifierCS(newTableName)}, } } else { // delete randCol from table.columns From 6fb6f4fb366d2ddc72d88f93a5842beda72768eb Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Tue, 13 Jun 2023 01:16:14 -0700 Subject: [PATCH 10/29] added derived tables Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index ef8c565039b..dcc572bd39a 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -22,6 +22,7 @@ import ( "strings" "testing" "time" + "vitess.io/vitess/go/vt/sqlparser" "github.com/stretchr/testify/require" @@ -229,9 +230,9 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } isDerived := rand.Intn(10) < 1 && TestFailingQueries - aggregates, _ := createAggregations(tables, maxAggrs, randomCol, isDerived) + aggregates, aggrTypes := createAggregations(tables, maxAggrs, randomCol, isDerived) predicates := createPredicates(tables, randomCol, false) - grouping, _ := createGroupBy(tables, maxGroupBy, randomCol) + grouping, groupTypes := createGroupBy(tables, maxGroupBy, randomCol) sel := "select /*vt+ PLANNER=Gen4 */ " // select distinct (fails with group by bigint) @@ -272,14 +273,16 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += "(" + randomExpr + ") as crandom0, " } - //var newColumns []column // populate columns of this query to add to schemaTables - //for i := range aggregates { - // newColumns = append(newColumns, column{ - // name: aggregates[i], - // typ: aggrTypes[i], - // }) - //} + var newColumns []column + for i := range aggregates { + newName, newAlias, _ := strings.Cut(aggregates[i], " as ") + newColumns = append(newColumns, column{ + Name: newName, + Alias: newAlias, + Typ: aggrTypes[i], + }) + } sel += strings.Join(aggregates, ", ") + " from " var tbls []string @@ -310,12 +313,14 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { // populate columns of this query to add to schemaTables - //for i := range grouping { - // newColumns = append(newColumns, column{ - // name: grouping[i], - // typ: groupTypes[i], - // }) - //} + for i := range grouping { + newName, newAlias, _ := strings.Cut(grouping[i], " as ") + newColumns = append(newColumns, column{ + Name: newName, + Alias: newAlias, + Typ: groupTypes[i], + }) + } sel += " group by cgroup0" for i := 1; i < len(grouping); i++ { sel += fmt.Sprintf(", cgroup%d", i) @@ -343,10 +348,9 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { } // add generated query to schemaTables - // TODO: make columns not nil but prevent aggregation on said columns schemaTables = append(schemaTables, tableT{ Name: "(" + sel + ")", - Cols: nil, + Cols: newColumns, }) // derived tables (partially unsupported) From f093992c67f44e05b2f9d780be3d6ab4e974aa8a Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Tue, 13 Jun 2023 01:47:39 -0700 Subject: [PATCH 11/29] fixed random_expr_test formatting Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- go/vt/sqlparser/random_expr_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/go/vt/sqlparser/random_expr_test.go b/go/vt/sqlparser/random_expr_test.go index 0cb48484d27..641f099d3b3 100644 --- a/go/vt/sqlparser/random_expr_test.go +++ b/go/vt/sqlparser/random_expr_test.go @@ -2,9 +2,10 @@ package sqlparser import ( "fmt" - "golang.org/x/exp/maps" "testing" "time" + + "golang.org/x/exp/maps" ) func TestRandomExprWithTables(t *testing.T) { From f3f00c537c457c0c17daaea3c9fdad88bb28c859 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Wed, 14 Jun 2023 02:29:41 -0700 Subject: [PATCH 12/29] fixed infinite loop in predicate generation Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 90 +++++++++---------- go/vt/sqlparser/random_expr.go | 41 ++++++--- 2 files changed, 74 insertions(+), 57 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index dcc572bd39a..0cc2e8f5f5a 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -26,8 +26,6 @@ import ( "vitess.io/vitess/go/vt/sqlparser" "github.com/stretchr/testify/require" - "golang.org/x/exp/maps" - "vitess.io/vitess/go/test/endtoend/cluster" "vitess.io/vitess/go/test/endtoend/utils" ) @@ -36,7 +34,7 @@ type tableT = sqlparser.TableT type column = sqlparser.Col // if true then known failing query types are still generated by randomQuery() -const TestFailingQueries = false +const TestFailingQueries = true func start(t *testing.T) (utils.MySQLCompare, func()) { mcmp, err := utils.NewMySQLCompare(t, vtParams, mysqlParams) @@ -55,24 +53,8 @@ func start(t *testing.T) (utils.MySQLCompare, func()) { // mcmp.Exec("set sql_mode=''") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7369,'SMITH','CLERK',7902,'1980-12-17',800,NULL,20);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7499,'ALLEN','SALESMAN',7698,'1981-02-20',1600,300,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7566,'JONES','MANAGER',7839,'1981-04-02',2975,NULL,20);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7654,'MARTIN','SALESMAN',7698,'1981-09-28',1250,1400,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7698,'BLAKE','MANAGER',7839,'1981-05-01',2850,NULL,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7782,'CLARK','MANAGER',7839,'1981-06-09',2450,NULL,10);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7788,'SCOTT','ANALYST',7566,'1982-12-09',3000,NULL,20);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7839,'KING','PRESIDENT',NULL,'1981-11-17',5000,NULL,10);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7844,'TURNER','SALESMAN',7698,'1981-09-08',1500,0,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7876,'ADAMS','CLERK',7788,'1983-01-12',1100,NULL,20);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7900,'JAMES','CLERK',7698,'1981-12-03',950,NULL,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7902,'FORD','ANALYST',7566,'1981-12-03',3000,NULL,20);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7934,'MILLER','CLERK',7782,'1982-01-23',1300,NULL,10);") - mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (10,'ACCOUNTING','NEW YORK');") - mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (20,'RESEARCH','DALLAS');") - mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (30,'SALES','CHICAGO');") - mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (40,'OPERATIONS','BOSTON');") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7369,'SMITH','CLERK',7902,'1980-12-17',800,NULL,20), (7499,'ALLEN','SALESMAN',7698,'1981-02-20',1600,300,30), (7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30), (7566,'JONES','MANAGER',7839,'1981-04-02',2975,NULL,20), (7654,'MARTIN','SALESMAN',7698,'1981-09-28',1250,1400,30), (7698,'BLAKE','MANAGER',7839,'1981-05-01',2850,NULL,30), (7782,'CLARK','MANAGER',7839,'1981-06-09',2450,NULL,10), (7788,'SCOTT','ANALYST',7566,'1982-12-09',3000,NULL,20), (7839,'KING','PRESIDENT',NULL,'1981-11-17',5000,NULL,10), (7844,'TURNER','SALESMAN',7698,'1981-09-08',1500,0,30), (7876,'ADAMS','CLERK',7788,'1983-01-12',1100,NULL,20), (7900,'JAMES','CLERK',7698,'1981-12-03',950,NULL,30), (7902,'FORD','ANALYST',7566,'1981-12-03',3000,NULL,20), (7934,'MILLER','CLERK',7782,'1982-01-23',1300,NULL,10)") + mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES ('10','ACCOUNTING','NEW YORK'), ('20','RESEARCH','DALLAS'), ('30','SALES','CHICAGO'), ('40','OPERATIONS','BOSTON')") return mcmp, func() { deleteAll() @@ -100,6 +82,21 @@ func TestKnownFailures(t *testing.T) { // logs more stuff //clusterInstance.EnableGeneralLog() + // mismatched results + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl1.loc) as caggr0 from emp as tbl0, dept as tbl1 left join dept as tbl2 on tbl1.loc = tbl2.loc and tbl1.loc = tbl2.loc where (tbl2.deptno) and tbl0.deptno = tbl1.deptno") + + // mismatched results (group by + right join) + // left instead of right works + helperTest(t, "select /*vt+ PLANNER=Gen4 */ max(tbl0.deptno) from dept as tbl0 right join emp as tbl1 on tbl0.deptno = tbl1.empno and tbl0.deptno = tbl1.deptno group by tbl0.deptno") + + // mismatched results (count + right join) + // left instead of right works + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl1.comm) from emp as tbl1 right join emp as tbl2 on tbl1.mgr = tbl2.sal") + + // mismatched results (sum + right join) + // left instead of right works + helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl0.mgr) from emp as tbl0 right join emp as tbl1 on tbl0.mgr = tbl1.empno") + // vitess error: nil // mysql error: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'ks_random.tbl0.ENAME' helperTest(t, "select /*vt+ PLANNER=Gen4 */ (tbl0.ename), min(tbl0.comm) from emp as tbl0 left join emp as tbl1 on tbl0.empno = tbl1.comm and tbl0.empno = tbl1.empno") @@ -114,22 +111,12 @@ func TestKnownFailures(t *testing.T) { // unknown aggregation random helperTest(t, "select /*vt+ PLANNER=Gen4 */ (tbl0.comm), count(tbl1.loc), min(tbl1.deptno), min(tbl0.comm) from emp as tbl0, dept as tbl1 left join emp as tbl2 on tbl1.loc = tbl2.job and tbl1.deptno = tbl2.comm where tbl0.empno = tbl1.deptno") - // mismatched results (group by + right join) - // left instead of right works - helperTest(t, "select /*vt+ PLANNER=Gen4 */ max(tbl0.deptno) from dept as tbl0 right join emp as tbl1 on tbl0.deptno = tbl1.empno and tbl0.deptno = tbl1.deptno group by tbl0.deptno") - - // mismatched results (count + right join) - // left instead of right works - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl1.comm) from emp as tbl1 right join emp as tbl2 on tbl1.mgr = tbl2.sal") - - // mismatched results (sum + right join) - // left instead of right works - helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl0.mgr) from emp as tbl0 right join emp as tbl1 on tbl0.mgr = tbl1.empno") - - // mismatched results (group by + limit) + // unavoidable + // mismatched results (group by + limit no order by) helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from emp as tbl0 group by tbl0.sal limit 7") - // mismatched results (group by + select grouping + limit) + // unavoidable + // mismatched results (group by + select grouping + limit no order by) helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.sal, count(*) from emp as tbl0 group by tbl0.sal limit 7") // vttablet: rpc error: code = NotFound desc = Unknown column 'cgroup0' in 'field list' (errno 1054) (sqlstate 42S22) (CallerID: userData1) @@ -171,8 +158,8 @@ func TestRandom(t *testing.T) { require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) - schema := map[string]tableT{ - "emp": {Name: "emp", Cols: []column{ + schemaTables := []tableT{ + {Name: "emp", Cols: []column{ {Name: "empno", Typ: "bigint"}, {Name: "ename", Typ: "varchar"}, {Name: "job", Typ: "varchar"}, @@ -182,7 +169,7 @@ func TestRandom(t *testing.T) { {Name: "comm", Typ: "bigint"}, {Name: "deptno", Typ: "bigint"}, }}, - "dept": {Name: "dept", Cols: []column{ + {Name: "dept", Cols: []column{ {Name: "deptno", Typ: "bigint"}, {Name: "dname", Typ: "varchar"}, {Name: "loc", Typ: "varchar"}, @@ -190,7 +177,6 @@ func TestRandom(t *testing.T) { } endBy := time.Now().Add(10 * time.Second) - schemaTables := maps.Values(schema) var queryCount int for time.Now().Before(endBy) && (!t.Failed() || TestFailingQueries) { @@ -233,6 +219,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { aggregates, aggrTypes := createAggregations(tables, maxAggrs, randomCol, isDerived) predicates := createPredicates(tables, randomCol, false) grouping, groupTypes := createGroupBy(tables, maxGroupBy, randomCol) + sel := "select /*vt+ PLANNER=Gen4 */ " // select distinct (fails with group by bigint) @@ -341,7 +328,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += strings.Join(orderBy, ", ") } - // limit (fails with select grouping columns) + // limit (fails with group by and no order by) if rand.Intn(2) < 1 && noOfOrderBy > 0 { limitNum := rand.Intn(20) sel += fmt.Sprintf(" limit %d", limitNum) @@ -370,9 +357,9 @@ func createGroupBy(tables []tableT, maxGB int, randomCol func(tblIdx int) (strin if tables[tblIdx].Cols != nil { break } - // fmt.Printf("group by tables:\n%v\n tblIdx: %d\n", tables, tblIdx) } col, typ := randomCol(tblIdx) + // if group by columns are not selected don't alias grouping = append(grouping, col+fmt.Sprintf(" as cgroup%d", i)) groupTypes = append(groupTypes, typ) } @@ -397,7 +384,6 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int if tables[tblIdx].Cols != nil { break } - // fmt.Printf("aggregation tables:\n%v\n tblIdx: %d\n", tables, tblIdx) } e, typ := randomCol(tblIdx) newAggregate := randomEl(aggregations)(e) @@ -414,7 +400,9 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int } if addAggr { aggregates = append(aggregates, newAggregate+fmt.Sprintf(" as caggr%d", i)) - if newAggregate == fmt.Sprintf("avg(%s)", e) && typ == "bigint" { + if newAggregate == fmt.Sprintf("count(%s", e) || newAggregate == "count(*)" { + aggrTypes = append(aggrTypes, "bigint") + } else if newAggregate == fmt.Sprintf("avg(%s)", e) && typ == "bigint" { aggrTypes = append(aggrTypes, "decimal") } else { aggrTypes = append(aggrTypes, typ) @@ -456,12 +444,20 @@ func createPredicates(tables []tableT, randomCol func(tblIdx int) (string, strin noOfPredicates++ } - for noOfPredicates > 0 { - col1, t1 := randomCol(idx1) - col2, t2 := randomCol(idx2) - if t1 != t2 { + for i := 0; noOfPredicates > 0; i++ { + col1, typ1 := randomCol(idx1) + col2, typ2 := randomCol(idx2) + + if i > 50 { + predicates = append(predicates, fmt.Sprintf("%s = %s", col1, col1)) + noOfPredicates-- + break + } + + if typ1 != typ2 { continue } + predicates = append(predicates, fmt.Sprintf("%s = %s", col1, col2)) noOfPredicates-- } diff --git a/go/vt/sqlparser/random_expr.go b/go/vt/sqlparser/random_expr.go index f361a2005f9..94fa56ccfa6 100644 --- a/go/vt/sqlparser/random_expr.go +++ b/go/vt/sqlparser/random_expr.go @@ -21,6 +21,8 @@ import ( "math/rand" ) +// This file is used to generate random expressions to be used for testing + type ( Col struct { Name string @@ -34,7 +36,23 @@ type ( } ) -// This file is used to generate random expressions to be used for testing +func (c *Col) copy() *Col { + return &Col{ + Name: c.Name, + Alias: c.Alias, + Typ: c.Typ, + } +} + +func (t *TableT) copy() *TableT { + newCols := make([]Col, len(t.Cols)) + copy(newCols, t.Cols) + return &TableT{ + Name: t.Name, + Alias: t.Alias, + Cols: newCols, + } +} func NewGenerator(seed int64, maxDepth int, tables ...TableT) *Generator { g := Generator{ @@ -144,7 +162,7 @@ func (g *Generator) randomBool() bool { } func (g *Generator) intLiteral() Expr { - t := fmt.Sprintf("%d", g.r.Intn(1000)-g.r.Intn((1000))) + t := fmt.Sprintf("%d", g.r.Intn(1000)-g.r.Intn(1000)) return NewIntLiteral(t) } @@ -250,17 +268,19 @@ func (g *Generator) arithmetic() Expr { func (g *Generator) typeColumn(typ string, typeLiteral func() Expr) Expr { tblIdx := rand.Intn(len(g.tables)) table := g.tables[tblIdx] - for len(table.Cols) > 0 { - idx := rand.Intn(len(table.Cols)) - randCol := table.Cols[idx] - if randCol.Typ == typ /* better way to check if int type? */ { + tableCopy := table.copy() + + for len(tableCopy.Cols) > 0 { + idx := rand.Intn(len(tableCopy.Cols)) + randCol := tableCopy.Cols[idx] + if randCol.Typ == typ { newName := randCol.Name if randCol.Alias != "" { newName = randCol.Alias } newTableName := table.Name - if table.Alias != "" { - newTableName = table.Alias + if tableCopy.Alias != "" { + newTableName = tableCopy.Alias } return &ColName{ Metadata: nil, @@ -269,8 +289,8 @@ func (g *Generator) typeColumn(typ string, typeLiteral func() Expr) Expr { } } else { // delete randCol from table.columns - table.Cols[idx] = table.Cols[len(table.Cols)-1] - table.Cols = table.Cols[:len(table.Cols)-1] + tableCopy.Cols[idx] = tableCopy.Cols[len(tableCopy.Cols)-1] + tableCopy.Cols = tableCopy.Cols[:len(tableCopy.Cols)-1] } } @@ -278,6 +298,7 @@ func (g *Generator) typeColumn(typ string, typeLiteral func() Expr) Expr { } func (g *Generator) intColumn() Expr { + // better way to check if int type? return g.typeColumn("bigint", g.intLiteral) } From 7eb939d91c8126bb4af705a4ffd419ef97162fae Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Thu, 15 Jun 2023 22:39:34 -0700 Subject: [PATCH 13/29] added column aliases Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 231 +++++++++--------- go/vt/sqlparser/precedence_test.go | 2 +- go/vt/sqlparser/random_expr.go | 108 ++++++-- go/vt/sqlparser/random_expr_test.go | 2 +- go/vt/sqlparser/rewriter_test.go | 2 +- go/vt/sqlparser/walker_test.go | 4 +- 6 files changed, 206 insertions(+), 143 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 0cc2e8f5f5a..7d8303c75a6 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -82,8 +82,11 @@ func TestKnownFailures(t *testing.T) { // logs more stuff //clusterInstance.EnableGeneralLog() - // mismatched results - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl1.loc) as caggr0 from emp as tbl0, dept as tbl1 left join dept as tbl2 on tbl1.loc = tbl2.loc and tbl1.loc = tbl2.loc where (tbl2.deptno) and tbl0.deptno = tbl1.deptno") + // mismatched results (left join + odd on) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ min(tbl0.deptno) as caggr0, count(*) as caggr1 from dept as tbl0 left join dept as tbl1 on tbl1.loc = tbl1.dname") + + // mismatched results (left join + odd where) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl1.loc) as caggr0 from dept as tbl1 left join dept as tbl2 on tbl1.loc = tbl2.loc where (tbl2.deptno)") // mismatched results (group by + right join) // left instead of right works @@ -159,22 +162,24 @@ func TestRandom(t *testing.T) { require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) schemaTables := []tableT{ - {Name: "emp", Cols: []column{ - {Name: "empno", Typ: "bigint"}, - {Name: "ename", Typ: "varchar"}, - {Name: "job", Typ: "varchar"}, - {Name: "mgr", Typ: "bigint"}, - {Name: "hiredate", Typ: "date"}, - {Name: "sal", Typ: "bigint"}, - {Name: "comm", Typ: "bigint"}, - {Name: "deptno", Typ: "bigint"}, - }}, - {Name: "dept", Cols: []column{ - {Name: "deptno", Typ: "bigint"}, - {Name: "dname", Typ: "varchar"}, - {Name: "loc", Typ: "varchar"}, - }}, + {Name: "emp"}, + {Name: "dept"}, } + schemaTables[0].AddColumns([]column{ + {Name: "empno", Typ: "bigint"}, + {Name: "ename", Typ: "varchar"}, + {Name: "job", Typ: "varchar"}, + {Name: "mgr", Typ: "bigint"}, + {Name: "hiredate", Typ: "date"}, + {Name: "sal", Typ: "bigint"}, + {Name: "comm", Typ: "bigint"}, + {Name: "deptno", Typ: "bigint"}, + }...) + schemaTables[1].AddColumns([]column{ + {Name: "deptno", Typ: "bigint"}, + {Name: "dname", Typ: "varchar"}, + {Name: "loc", Typ: "varchar"}, + }...) endBy := time.Now().Add(10 * time.Second) @@ -195,30 +200,19 @@ func TestRandom(t *testing.T) { fmt.Printf("Queries successfully executed: %d\n", queryCount) } -func getRandomExpr(tables []tableT) string { +func getRandomExpr(tables []tableT) (string, string) { seed := time.Now().UnixNano() g := sqlparser.NewGenerator(seed, 2, tables...) - randomExpr := g.Expression() - return sqlparser.String(randomExpr) + randomExpr, typ := g.Expression() + return sqlparser.String(randomExpr), typ } func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { tables := createTables(schemaTables) - randomCol := func(tblIdx int) (string, string) { - tbl := tables[tblIdx] - col := randomEl(tbl.Cols) - colAlias := fmt.Sprintf("%s.%s", tbl.Alias, col.Name) - if col.Alias != "" { - colAlias = col.Alias - } - return colAlias, col.Typ - } - - isDerived := rand.Intn(10) < 1 && TestFailingQueries - aggregates, aggrTypes := createAggregations(tables, maxAggrs, randomCol, isDerived) - predicates := createPredicates(tables, randomCol, false) - grouping, groupTypes := createGroupBy(tables, maxGroupBy, randomCol) + grouping, numGBs := createGroupBy(tables, maxGroupBy) + aggregates, numAggrs := createAggregations(tables, maxAggrs) + predicates := createPredicates(tables, false) sel := "select /*vt+ PLANNER=Gen4 */ " @@ -230,99 +224,99 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { // select the grouping columns isJoin := rand.Intn(2) < 1 - if len(grouping) > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - sel += strings.Join(grouping, ", ") + ", " + + if numGBs > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { + for i := 0; i < numGBs; i++ { + sel += grouping[i].GetSelectName() + ", " + } } // generate the order by columns // we do it this way, so we don't have to do only `only_full_group_by` queries noOfOrderBy := 0 - if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { + if numGBs > 0 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { // panic on rand function call if value is 0 - noOfOrderBy = rand.Intn(len(grouping)) + noOfOrderBy = rand.Intn(numGBs) } + var orderBy []string if noOfOrderBy > 0 { for noOfOrderBy > 0 { noOfOrderBy-- - if rand.Intn(2) < 1 || len(grouping) == 0 { - orderBy = append(orderBy, fmt.Sprintf("caggr%d", rand.Intn(len(aggregates)))) + if rand.Intn(2) < 1 { + orderBy = append(orderBy, aggregates[rand.Intn(numAggrs)].Alias) } else { - orderBy = append(orderBy, fmt.Sprintf("cgroup%d", rand.Intn(len(grouping)))) + orderBy = append(orderBy, grouping[rand.Intn(numGBs)].Alias) } } } + var newTable tableT // add random expression to select isRandomExpr := rand.Intn(2) < 1 - randomExpr := getRandomExpr(tables) + randomExpr, typ := getRandomExpr(tables) if isRandomExpr && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { sel += "(" + randomExpr + ") as crandom0, " + newTable.AddColumns(column{ + Name: "crandom0", + Typ: typ, + }) } - // populate columns of this query to add to schemaTables - var newColumns []column - for i := range aggregates { - newName, newAlias, _ := strings.Cut(aggregates[i], " as ") - newColumns = append(newColumns, column{ - Name: newName, - Alias: newAlias, - Typ: aggrTypes[i], - }) + // add aggregates to select + sel += aggregates[0].Name + " as " + aggregates[0].Alias + for i := 1; i < numAggrs; i++ { + sel += ", " + aggregates[i].Name + " as " + aggregates[i].Alias } - sel += strings.Join(aggregates, ", ") + " from " + sel += " from " var tbls []string - for _, s := range tables { - tbls = append(tbls, fmt.Sprintf("%s as %s", s.Name, s.Alias)) + for _, t := range tables { + tbls = append(tbls, t.GetSelectName()) } sel += strings.Join(tbls, ", ") // join if isJoin { tables = append(tables, randomEl(schemaTables)) - tables[len(tables)-1].Alias = fmt.Sprintf("tbl%d", len(tables)-1) - join := createPredicates(tables, randomCol, true) + tables[len(tables)-1].SetAlias(fmt.Sprintf("tbl%d", len(tables)-1)) + join := createPredicates(tables, true) - sel += " left join " + fmt.Sprintf("%s as %s", tables[len(tables)-1].Name, tables[len(tables)-1].Alias) + sel += " left join " + fmt.Sprintf("%s as %s", tables[len(tables)-1].Name, tables[len(tables)-1].GetAlias()) if len(join) > 0 { sel += " on " + strings.Join(join, " and ") } } + // where if len(predicates) > 0 { sel += " where " if rand.Intn(2) < 1 { - sel += "(" + getRandomExpr(tables) + ") and " + predRandomExpr, _ := getRandomExpr(tables) + sel += "(" + predRandomExpr + ") and " } sel += strings.Join(predicates, " and ") } - if len(grouping) > 0 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - // populate columns of this query to add to schemaTables - for i := range grouping { - newName, newAlias, _ := strings.Cut(grouping[i], " as ") - newColumns = append(newColumns, column{ - Name: newName, - Alias: newAlias, - Typ: groupTypes[i], - }) - } - sel += " group by cgroup0" - for i := 1; i < len(grouping); i++ { - sel += fmt.Sprintf(", cgroup%d", i) + // group by + if numGBs > 0 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { + sel += " group by " + grouping[0].GetUnaliasedName() + for i := 1; i < numGBs; i++ { + sel += ", " + grouping[i].GetUnaliasedName() } if isRandomExpr { sel += ", " } } + // make sure to group by the random expression if isRandomExpr && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - if len(grouping) <= 0 { + if numGBs <= 0 { sel += " group by " } sel += "crandom0" } + // order by if noOfOrderBy > 0 { sel += " order by " sel += strings.Join(orderBy, ", ") @@ -334,23 +328,31 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += fmt.Sprintf(" limit %d", limitNum) } - // add generated query to schemaTables - schemaTables = append(schemaTables, tableT{ - Name: "(" + sel + ")", - Cols: newColumns, - }) + // add new table to schemaTables + newTable.Name = "(" + sel + ")" + + // workaround for derived tables only using column alias in select statement; make sure Name is empty + for i := 0; i < numGBs; i++ { + grouping[i].Name = grouping[i].Alias + } + for i := 0; i < numAggrs; i++ { + aggregates[i].Name = aggregates[i].Alias + } + newTable.AddColumns(grouping...) + newTable.AddColumns(aggregates...) + schemaTables = append(schemaTables, newTable) // derived tables (partially unsupported) - if isDerived { + if rand.Intn(10) < 1 && TestFailingQueries { sel = randomQuery(schemaTables, 3, 3) } return sel } -func createGroupBy(tables []tableT, maxGB int, randomCol func(tblIdx int) (string, string)) (grouping []string, groupTypes []string) { - noOfGBs := rand.Intn(maxGB) - for i := 0; i < noOfGBs; i++ { +func createGroupBy(tables []tableT, maxGB int) (grouping []column, numGBs int) { + numGBs = rand.Intn(maxGB) + for i := 0; i < numGBs; i++ { var tblIdx int for { tblIdx = rand.Intn(len(tables)) @@ -358,15 +360,14 @@ func createGroupBy(tables []tableT, maxGB int, randomCol func(tblIdx int) (strin break } } - col, typ := randomCol(tblIdx) - // if group by columns are not selected don't alias - grouping = append(grouping, col+fmt.Sprintf(" as cgroup%d", i)) - groupTypes = append(groupTypes, typ) + col := randomEl(tables[tblIdx].Cols) + col.Alias = fmt.Sprintf("cgroup%d", i) + grouping = append(grouping, col) } - return grouping, groupTypes + return grouping, numGBs } -func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int) (string, string), isDerived bool) (aggregates []string, aggrTypes []string) { +func createAggregations(tables []tableT, maxAggrs int) (aggregates []column, numAggrs int) { aggregations := []func(string) string{ func(_ string) string { return "count(*)" }, func(e string) string { return fmt.Sprintf("count(%s)", e) }, @@ -376,8 +377,8 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int func(e string) string { return fmt.Sprintf("max(%s)", e) }, } - noOfAggrs := rand.Intn(maxAggrs) + 1 - for i := 0; i < noOfAggrs; i++ { + numAggrs = rand.Intn(maxAggrs) + 1 + for i := 0; i < numAggrs; i++ { var tblIdx int for { tblIdx = rand.Intn(len(tables)) @@ -385,48 +386,36 @@ func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int break } } - e, typ := randomCol(tblIdx) - newAggregate := randomEl(aggregations)(e) - - // derived tables do not allow duplicate columns - addAggr := true - if isDerived { - for _, aggr := range aggregates { - if newAggregate == aggr { - addAggr = false - break - } - } - } - if addAggr { - aggregates = append(aggregates, newAggregate+fmt.Sprintf(" as caggr%d", i)) - if newAggregate == fmt.Sprintf("count(%s", e) || newAggregate == "count(*)" { - aggrTypes = append(aggrTypes, "bigint") - } else if newAggregate == fmt.Sprintf("avg(%s)", e) && typ == "bigint" { - aggrTypes = append(aggrTypes, "decimal") - } else { - aggrTypes = append(aggrTypes, typ) - } + col := randomEl(tables[tblIdx].Cols) + newAggregate := randomEl(aggregations)(col.GetUnaliasedName()) + + col.Alias = fmt.Sprintf("caggr%d", i) + if newAggregate == fmt.Sprintf("count(%s)", col.GetQueryName()) || newAggregate == "count(*)" { + col.Typ = "bigint" + } else if newAggregate == fmt.Sprintf("avg(%s)", col.GetQueryName()) && col.GetQueryName() == "bigint" { + col.Typ = "decimal" } + col.Name = newAggregate + aggregates = append(aggregates, col) } - return aggregates, aggrTypes + return aggregates, numAggrs } func createTables(schemaTables []tableT) []tableT { var tables []tableT // add at least one of original emp/dept tables for now because derived tables have nil columns tables = append(tables, schemaTables[rand.Intn(2)]) - tables[0].Alias = "tbl0" + tables[0].SetAlias("tbl0") noOfTables := rand.Intn(len(schemaTables)) for i := 0; i < noOfTables; i++ { tables = append(tables, randomEl(schemaTables)) - tables[i+1].Alias = fmt.Sprintf("tbl%d", i+1) + tables[i+1].SetAlias(fmt.Sprintf("tbl%d", i+1)) } return tables } -func createPredicates(tables []tableT, randomCol func(tblIdx int) (string, string), isJoin bool) (predicates []string) { +func createPredicates(tables []tableT, isJoin bool) (predicates []string) { // if creating predicates for a join, // then make sure predicates are created for the last two tables (which are being joined) incr := 0 @@ -436,7 +425,7 @@ func createPredicates(tables []tableT, randomCol func(tblIdx int) (string, strin for idx1 := range tables { for idx2 := range tables { // fmt.Printf("predicate tables:\n%v\n idx1: %d idx2: %d, incr: %d", tables, idx1, idx2, incr) - if idx1 >= idx2 || idx1 < incr || idx2 < incr || tables[idx1].Cols == nil || tables[idx2].Cols == nil { + if idx1 >= idx2 || idx1 < incr || idx2 < incr { continue } noOfPredicates := rand.Intn(2) @@ -445,20 +434,20 @@ func createPredicates(tables []tableT, randomCol func(tblIdx int) (string, strin } for i := 0; noOfPredicates > 0; i++ { - col1, typ1 := randomCol(idx1) - col2, typ2 := randomCol(idx2) + col1 := randomEl(tables[idx1].Cols) + col2 := randomEl(tables[idx2].Cols) if i > 50 { - predicates = append(predicates, fmt.Sprintf("%s = %s", col1, col1)) + predicates = append(predicates, fmt.Sprintf("%s = %s", col1.GetUnaliasedName(), col1.GetUnaliasedName())) noOfPredicates-- break } - if typ1 != typ2 { + if col1.Typ != col2.Typ { continue } - predicates = append(predicates, fmt.Sprintf("%s = %s", col1, col2)) + predicates = append(predicates, fmt.Sprintf("%s = %s", col1.GetUnaliasedName(), col2.GetUnaliasedName())) noOfPredicates-- } } diff --git a/go/vt/sqlparser/precedence_test.go b/go/vt/sqlparser/precedence_test.go index cb8c1f23805..99ecea7fc01 100644 --- a/go/vt/sqlparser/precedence_test.go +++ b/go/vt/sqlparser/precedence_test.go @@ -224,7 +224,7 @@ func TestRandom(t *testing.T) { break } // Given a random expression - randomExpr := g.Expression() + randomExpr, _ := g.Expression() inputQ := "select " + String(randomExpr) + " from t" // When it's parsed and unparsed diff --git a/go/vt/sqlparser/random_expr.go b/go/vt/sqlparser/random_expr.go index 94fa56ccfa6..80ed94f21ac 100644 --- a/go/vt/sqlparser/random_expr.go +++ b/go/vt/sqlparser/random_expr.go @@ -25,31 +25,96 @@ import ( type ( Col struct { - Name string - Alias string - Typ string + TableName string + Name string + Alias string + Typ string + // add isDerived flag? } TableT struct { Name string - Alias string + alias string Cols []Col } ) -func (c *Col) copy() *Col { - return &Col{ - Name: c.Name, - Alias: c.Alias, - Typ: c.Typ, +// GetSelectName returns the aliasing command if Alias is nonempty +func (c *Col) GetSelectName() string { + // workaround for derived tables only using column alias in select statement; make sure Name is empty + + sel := fmt.Sprintf("%s.%s", c.TableName, c.Name) + if c.Alias != "" { + sel += fmt.Sprintf(" as %s", c.Alias) + } + return sel +} + +// GetQueryName returns the Alias if it's nonempty +func (c *Col) GetQueryName() string { + if c.Alias != "" { + return c.Alias + } + return c.GetUnaliasedName() +} + +// GetUnaliasedName returns the name used in queries if the alias is empty (TableName.Name) +func (c *Col) GetUnaliasedName() string { + return fmt.Sprintf("%s.%s", c.TableName, c.Name) +} + +// GetSelectName returns the aliasing command if alias is nonempty +func (t *TableT) GetSelectName() string { + sel := fmt.Sprintf("%s", t.Name) + if t.alias != "" { + sel += fmt.Sprintf(" as %s", t.alias) + } + return sel +} + +// GetAlias returns the alias +func (t *TableT) GetAlias() string { + return t.alias +} + +// SetAlias sets the alias for t, as well as setting the TableName for all columns in Cols +func (t *TableT) SetAlias(newAlias string) { + t.alias = newAlias + for i := range t.Cols { + t.Cols[i].TableName = newAlias } } +// GetQueryName returns the alias if it's nonempty +func (t *TableT) GetQueryName() string { + if t.alias != "" { + return t.alias + } + return t.Name +} + +// SetColumns sets the columns of t, and automatically assigns TableName +// this makes it unnatural (but still possible as Cols is exportable) to modify TableName +func (t *TableT) SetColumns(col ...Col) { + t.Cols = make([]Col, len(col)) + t.AddColumns(col...) +} + +// AddColumns adds columns to t, and automatically assigns TableName +// this makes it unnatural (but still possible as Cols is exportable) to modify TableName +func (t *TableT) AddColumns(col ...Col) { + for i := range col { + col[i].TableName = t.GetQueryName() + t.Cols = append(t.Cols, col[i]) + } +} + +// copy returns a deep copy of t func (t *TableT) copy() *TableT { newCols := make([]Col, len(t.Cols)) copy(newCols, t.Cols) return &TableT{ Name: t.Name, - Alias: t.Alias, + alias: t.alias, Cols: newCols, } } @@ -100,9 +165,10 @@ func (g *Generator) atMaxDepth() bool { Note: It's important to update this method so that it produces all expressions that need precedence checking. It's currently missing function calls and string operators */ -func (g *Generator) Expression() Expr { +func (g *Generator) Expression() (Expr, string) { + typ := "tinyint" if g.randomBool() { - return g.booleanExpr() + return g.booleanExpr(), typ } options := []exprF{ func() Expr { return g.intExpr() }, @@ -110,7 +176,14 @@ func (g *Generator) Expression() Expr { func() Expr { return g.booleanExpr() }, } - return g.randomOf(options) + fn := g.randomOf(options) + if fn == g.intExpr() { + typ = "bigint" + } else if fn == g.stringExpr() { + typ = "varchar" + } + + return fn, typ } func (g *Generator) booleanExpr() Expr { @@ -234,12 +307,13 @@ func (g *Generator) caseExpr(valueF func() Expr) Expr { if exp == nil { cond = g.booleanExpr() } else { - cond = g.Expression() + cond, _ = g.Expression() } + val, _ := g.Expression() whens = append(whens, &When{ Cond: cond, - Val: g.Expression(), + Val: val, }) } @@ -279,8 +353,8 @@ func (g *Generator) typeColumn(typ string, typeLiteral func() Expr) Expr { newName = randCol.Alias } newTableName := table.Name - if tableCopy.Alias != "" { - newTableName = tableCopy.Alias + if tableCopy.alias != "" { + newTableName = tableCopy.alias } return &ColName{ Metadata: nil, diff --git a/go/vt/sqlparser/random_expr_test.go b/go/vt/sqlparser/random_expr_test.go index 641f099d3b3..f2cb22a4b97 100644 --- a/go/vt/sqlparser/random_expr_test.go +++ b/go/vt/sqlparser/random_expr_test.go @@ -31,6 +31,6 @@ func TestRandomExprWithTables(t *testing.T) { seed := time.Now().UnixNano() g := NewGenerator(seed, 2, schemaTables...) - randomExpr := g.Expression() + randomExpr, _ := g.Expression() fmt.Println(String(randomExpr)) } diff --git a/go/vt/sqlparser/rewriter_test.go b/go/vt/sqlparser/rewriter_test.go index 9adae1b4a81..3a9ef42bf36 100644 --- a/go/vt/sqlparser/rewriter_test.go +++ b/go/vt/sqlparser/rewriter_test.go @@ -26,7 +26,7 @@ import ( func BenchmarkVisitLargeExpression(b *testing.B) { gen := NewGenerator(1, 5) - exp := gen.Expression() + exp, _ := gen.Expression() depth := 0 for i := 0; i < b.N; i++ { diff --git a/go/vt/sqlparser/walker_test.go b/go/vt/sqlparser/walker_test.go index 5359235afa5..acea63ef56b 100644 --- a/go/vt/sqlparser/walker_test.go +++ b/go/vt/sqlparser/walker_test.go @@ -26,7 +26,7 @@ import ( func BenchmarkWalkLargeExpression(b *testing.B) { for i := 0; i < 10; i++ { b.Run(fmt.Sprintf("%d", i), func(b *testing.B) { - exp := NewGenerator(int64(i*100), 5).Expression() + exp, _ := NewGenerator(int64(i*100), 5).Expression() count := 0 for i := 0; i < b.N; i++ { err := Walk(func(node SQLNode) (kontinue bool, err error) { @@ -42,7 +42,7 @@ func BenchmarkWalkLargeExpression(b *testing.B) { func BenchmarkRewriteLargeExpression(b *testing.B) { for i := 1; i < 7; i++ { b.Run(fmt.Sprintf("%d", i), func(b *testing.B) { - exp := NewGenerator(int64(i*100), i).Expression() + exp, _ := NewGenerator(int64(i*100), i).Expression() count := 0 for i := 0; i < b.N; i++ { _ = Rewrite(exp, func(_ *Cursor) bool { From 3ce4f2079cf3bb2f7675c96b4a1553d2088fbc11 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Fri, 16 Jun 2023 00:02:37 -0700 Subject: [PATCH 14/29] renamed TableT and Col methods Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 19 +++++++++--------- go/vt/sqlparser/random_expr.go | 20 +++++++++---------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 7d8303c75a6..71f0cc58197 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -100,9 +100,10 @@ func TestKnownFailures(t *testing.T) { // left instead of right works helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl0.mgr) from emp as tbl0 right join emp as tbl1 on tbl0.mgr = tbl1.empno") + // only_full_group_by enabled (vitess produces the correct result assuming only_full_group_by is disabled) // vitess error: nil // mysql error: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'ks_random.tbl0.ENAME' - helperTest(t, "select /*vt+ PLANNER=Gen4 */ (tbl0.ename), min(tbl0.comm) from emp as tbl0 left join emp as tbl1 on tbl0.empno = tbl1.comm and tbl0.empno = tbl1.empno") + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.ename, min(tbl0.comm) from emp as tbl0 left join emp as tbl1 on tbl0.empno = tbl1.comm and tbl0.empno = tbl1.empno") // the type of this expression cannot be statically computed helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl1.ename), min(tbl0.empno) from emp as tbl0, emp as tbl1 left join dept as tbl2 on tbl1.job = tbl2.loc and tbl1.comm = tbl2.deptno where ('trout') and tbl0.deptno = tbl1.comm") @@ -137,10 +138,6 @@ func TestKnownFailures(t *testing.T) { // unsupported: in scatter query: complex aggregate expression (errno 1235) (sqlstate 42000) helperTest(t, "select /*vt+ PLANNER=Gen4 */ (select count(*) from emp as tbl0) from emp as tbl0") - // unsupported - // unsupported: in scatter query: aggregation function - helperTest(t, "select /*vt+ PLANNER=Gen4 */ avg(tbl0.deptno) from dept as tbl0") - // unsupported // unsupported: using aggregation on top of a *planbuilder.orderedAggregate plan helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from (select count(*) from dept as tbl0) as tbl0") @@ -152,6 +149,10 @@ func TestKnownFailures(t *testing.T) { // unsupported // EOF (errno 2013) (sqlstate HY000) helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0") + + // unsupported + // unsupported: in scatter query: aggregation function + helperTest(t, "select /*vt+ PLANNER=Gen4 */ avg(tbl0.deptno) from dept as tbl0") } func TestRandom(t *testing.T) { @@ -227,7 +228,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { if numGBs > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { for i := 0; i < numGBs; i++ { - sel += grouping[i].GetSelectName() + ", " + sel += grouping[i].GetAliasedExpression() + ", " } } @@ -272,7 +273,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { sel += " from " var tbls []string for _, t := range tables { - tbls = append(tbls, t.GetSelectName()) + tbls = append(tbls, t.GetAliasedExpression()) } sel += strings.Join(tbls, ", ") @@ -390,9 +391,9 @@ func createAggregations(tables []tableT, maxAggrs int) (aggregates []column, num newAggregate := randomEl(aggregations)(col.GetUnaliasedName()) col.Alias = fmt.Sprintf("caggr%d", i) - if newAggregate == fmt.Sprintf("count(%s)", col.GetQueryName()) || newAggregate == "count(*)" { + if newAggregate == fmt.Sprintf("count(%s)", col.GetColumnName()) || newAggregate == "count(*)" { col.Typ = "bigint" - } else if newAggregate == fmt.Sprintf("avg(%s)", col.GetQueryName()) && col.GetQueryName() == "bigint" { + } else if newAggregate == fmt.Sprintf("avg(%s)", col.GetColumnName()) && col.GetColumnName() == "bigint" { col.Typ = "decimal" } col.Name = newAggregate diff --git a/go/vt/sqlparser/random_expr.go b/go/vt/sqlparser/random_expr.go index 80ed94f21ac..30559bbe16b 100644 --- a/go/vt/sqlparser/random_expr.go +++ b/go/vt/sqlparser/random_expr.go @@ -32,14 +32,14 @@ type ( // add isDerived flag? } TableT struct { - Name string + Name string // select type alias string Cols []Col } ) -// GetSelectName returns the aliasing command if Alias is nonempty -func (c *Col) GetSelectName() string { +// GetAliasedExpression returns the aliasing command if Alias is nonempty +func (c *Col) GetAliasedExpression() string { // workaround for derived tables only using column alias in select statement; make sure Name is empty sel := fmt.Sprintf("%s.%s", c.TableName, c.Name) @@ -49,8 +49,8 @@ func (c *Col) GetSelectName() string { return sel } -// GetQueryName returns the Alias if it's nonempty -func (c *Col) GetQueryName() string { +// GetColumnName returns the Alias if it's nonempty +func (c *Col) GetColumnName() string { if c.Alias != "" { return c.Alias } @@ -62,8 +62,8 @@ func (c *Col) GetUnaliasedName() string { return fmt.Sprintf("%s.%s", c.TableName, c.Name) } -// GetSelectName returns the aliasing command if alias is nonempty -func (t *TableT) GetSelectName() string { +// GetAliasedExpression returns the aliasing command if alias is nonempty +func (t *TableT) GetAliasedExpression() string { sel := fmt.Sprintf("%s", t.Name) if t.alias != "" { sel += fmt.Sprintf(" as %s", t.alias) @@ -84,8 +84,8 @@ func (t *TableT) SetAlias(newAlias string) { } } -// GetQueryName returns the alias if it's nonempty -func (t *TableT) GetQueryName() string { +// GetColumnName returns the alias if it's nonempty +func (t *TableT) GetColumnName() string { if t.alias != "" { return t.alias } @@ -103,7 +103,7 @@ func (t *TableT) SetColumns(col ...Col) { // this makes it unnatural (but still possible as Cols is exportable) to modify TableName func (t *TableT) AddColumns(col ...Col) { for i := range col { - col[i].TableName = t.GetQueryName() + col[i].TableName = t.GetColumnName() t.Cols = append(t.Cols, col[i]) } } From a59380e74e7fcdf0cf10eb2df8d6489a77fbc58e Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Fri, 16 Jun 2023 03:44:10 -0700 Subject: [PATCH 15/29] reorder failing queries Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../endtoend/vtgate/queries/random/random_test.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 71f0cc58197..44565c3509e 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -51,7 +51,7 @@ func start(t *testing.T) (utils.MySQLCompare, func()) { deleteAll() - // mcmp.Exec("set sql_mode=''") + mcmp.Exec("set sql_mode=''") mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7369,'SMITH','CLERK',7902,'1980-12-17',800,NULL,20), (7499,'ALLEN','SALESMAN',7698,'1981-02-20',1600,300,30), (7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30), (7566,'JONES','MANAGER',7839,'1981-04-02',2975,NULL,20), (7654,'MARTIN','SALESMAN',7698,'1981-09-28',1250,1400,30), (7698,'BLAKE','MANAGER',7839,'1981-05-01',2850,NULL,30), (7782,'CLARK','MANAGER',7839,'1981-06-09',2450,NULL,10), (7788,'SCOTT','ANALYST',7566,'1982-12-09',3000,NULL,20), (7839,'KING','PRESIDENT',NULL,'1981-11-17',5000,NULL,10), (7844,'TURNER','SALESMAN',7698,'1981-09-08',1500,0,30), (7876,'ADAMS','CLERK',7788,'1983-01-12',1100,NULL,20), (7900,'JAMES','CLERK',7698,'1981-12-03',950,NULL,30), (7902,'FORD','ANALYST',7566,'1981-12-03',3000,NULL,20), (7934,'MILLER','CLERK',7782,'1982-01-23',1300,NULL,10)") mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES ('10','ACCOUNTING','NEW YORK'), ('20','RESEARCH','DALLAS'), ('30','SALES','CHICAGO'), ('40','OPERATIONS','BOSTON')") @@ -100,20 +100,21 @@ func TestKnownFailures(t *testing.T) { // left instead of right works helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl0.mgr) from emp as tbl0 right join emp as tbl1 on tbl0.mgr = tbl1.empno") - // only_full_group_by enabled (vitess produces the correct result assuming only_full_group_by is disabled) - // vitess error: nil - // mysql error: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'ks_random.tbl0.ENAME' - helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.ename, min(tbl0.comm) from emp as tbl0 left join emp as tbl1 on tbl0.empno = tbl1.comm and tbl0.empno = tbl1.empno") - // the type of this expression cannot be statically computed helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl1.ename), min(tbl0.empno) from emp as tbl0, emp as tbl1 left join dept as tbl2 on tbl1.job = tbl2.loc and tbl1.comm = tbl2.deptno where ('trout') and tbl0.deptno = tbl1.comm") // Cannot convert value to desired type helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct max(tbl0.deptno), count(tbl0.job) from emp as tbl0, dept as tbl1 left join dept as tbl2 on tbl1.dname = tbl2.loc and tbl1.dname = tbl2.loc where (tbl2.loc) and tbl0.deptno = tbl1.deptno") + // sometimes fails if the following query is more complicated (?) + // only_full_group_by enabled (vitess produces the correct result assuming only_full_group_by is disabled) + // vitess error: nil + // mysql error: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'ks_random.tbl0.ENAME' + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.ename, min(tbl0.comm) from emp as tbl0 left join emp as tbl1 on tbl0.empno = tbl1.comm and tbl0.empno = tbl1.empno") + // only_full_group_by disabled // unknown aggregation random - helperTest(t, "select /*vt+ PLANNER=Gen4 */ (tbl0.comm), count(tbl1.loc), min(tbl1.deptno), min(tbl0.comm) from emp as tbl0, dept as tbl1 left join emp as tbl2 on tbl1.loc = tbl2.job and tbl1.deptno = tbl2.comm where tbl0.empno = tbl1.deptno") + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.comm, count(*) from emp as tbl0, emp as tbl1 where tbl0.empno = tbl1.deptno") // unavoidable // mismatched results (group by + limit no order by) From 34a46cd8e9fc74e59e99feb7aa9278e4044901d4 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Sun, 18 Jun 2023 01:29:47 -0700 Subject: [PATCH 16/29] refactor random query generation to use the ast Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 325 +++++++++--------- go/vt/sqlparser/ast_funcs.go | 100 ++++++ go/vt/sqlparser/random_expr.go | 86 ++--- go/vt/sqlparser/random_expr_test.go | 56 +-- 4 files changed, 314 insertions(+), 253 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 44565c3509e..0cfc0db0f25 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -19,13 +19,15 @@ package random import ( "fmt" "math/rand" - "strings" "testing" "time" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/sqlparser" "github.com/stretchr/testify/require" + "vitess.io/vitess/go/test/endtoend/cluster" "vitess.io/vitess/go/test/endtoend/utils" ) @@ -51,7 +53,7 @@ func start(t *testing.T) (utils.MySQLCompare, func()) { deleteAll() - mcmp.Exec("set sql_mode=''") + // mcmp.Exec("set sql_mode=''") mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7369,'SMITH','CLERK',7902,'1980-12-17',800,NULL,20), (7499,'ALLEN','SALESMAN',7698,'1981-02-20',1600,300,30), (7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30), (7566,'JONES','MANAGER',7839,'1981-04-02',2975,NULL,20), (7654,'MARTIN','SALESMAN',7698,'1981-09-28',1250,1400,30), (7698,'BLAKE','MANAGER',7839,'1981-05-01',2850,NULL,30), (7782,'CLARK','MANAGER',7839,'1981-06-09',2450,NULL,10), (7788,'SCOTT','ANALYST',7566,'1982-12-09',3000,NULL,20), (7839,'KING','PRESIDENT',NULL,'1981-11-17',5000,NULL,10), (7844,'TURNER','SALESMAN',7698,'1981-09-08',1500,0,30), (7876,'ADAMS','CLERK',7788,'1983-01-12',1100,NULL,20), (7900,'JAMES','CLERK',7698,'1981-12-03',950,NULL,30), (7902,'FORD','ANALYST',7566,'1981-12-03',3000,NULL,20), (7934,'MILLER','CLERK',7782,'1982-01-23',1300,NULL,10)") mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES ('10','ACCOUNTING','NEW YORK'), ('20','RESEARCH','DALLAS'), ('30','SALES','CHICAGO'), ('40','OPERATIONS','BOSTON')") @@ -82,6 +84,9 @@ func TestKnownFailures(t *testing.T) { // logs more stuff //clusterInstance.EnableGeneralLog() + // coercion should not try to coerce this value: DATE("1980-12-17") + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct tbl1.hiredate as cgroup0, count(tbl1.mgr) as caggr0 from emp as tbl1 group by tbl1.hiredate, tbl1.ename") + // mismatched results (left join + odd on) helperTest(t, "select /*vt+ PLANNER=Gen4 */ min(tbl0.deptno) as caggr0, count(*) as caggr1 from dept as tbl0 left join dept as tbl1 on tbl1.loc = tbl1.dname") @@ -107,7 +112,7 @@ func TestKnownFailures(t *testing.T) { helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct max(tbl0.deptno), count(tbl0.job) from emp as tbl0, dept as tbl1 left join dept as tbl2 on tbl1.dname = tbl2.loc and tbl1.dname = tbl2.loc where (tbl2.loc) and tbl0.deptno = tbl1.deptno") // sometimes fails if the following query is more complicated (?) - // only_full_group_by enabled (vitess produces the correct result assuming only_full_group_by is disabled) + // only_full_group_by enabled (vitess sometimes (?) produces the correct result assuming only_full_group_by is disabled) // vitess error: nil // mysql error: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'ks_random.tbl0.ENAME' helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.ename, min(tbl0.comm) from emp as tbl0 left join emp as tbl1 on tbl0.empno = tbl1.comm and tbl0.empno = tbl1.empno") @@ -164,8 +169,8 @@ func TestRandom(t *testing.T) { require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) schemaTables := []tableT{ - {Name: "emp"}, - {Name: "dept"}, + {Name: sqlparser.NewTableName("emp")}, + {Name: sqlparser.NewTableName("dept")}, } schemaTables[0].AddColumns([]column{ {Name: "empno", Typ: "bigint"}, @@ -187,7 +192,7 @@ func TestRandom(t *testing.T) { var queryCount int for time.Now().Before(endBy) && (!t.Failed() || TestFailingQueries) { - query := randomQuery(schemaTables, 3, 3) + query := sqlparser.String(randomQuery(schemaTables, 3, 3)) _, vtErr := mcmp.ExecAllowAndCompareError(query) fmt.Println(query) // t.Failed() will become true once and subsequently print every query @@ -202,55 +207,34 @@ func TestRandom(t *testing.T) { fmt.Printf("Queries successfully executed: %d\n", queryCount) } -func getRandomExpr(tables []tableT) (string, string) { - seed := time.Now().UnixNano() - g := sqlparser.NewGenerator(seed, 2, tables...) - randomExpr, typ := g.Expression() - return sqlparser.String(randomExpr), typ -} - -func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { - tables := createTables(schemaTables) +// TODO: bunch of TestFailingQueries checks were deleted by refactor to use AST +func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Select { + sel := &sqlparser.Select{} + sel.SetComments(sqlparser.Comments{"/*vt+ PLANNER=Gen4 */"}) - grouping, numGBs := createGroupBy(tables, maxGroupBy) - aggregates, numAggrs := createAggregations(tables, maxAggrs) - predicates := createPredicates(tables, false) + // also creates the join + tables, isJoin := createTablesAndJoin(schemaTables, sel) - sel := "select /*vt+ PLANNER=Gen4 */ " + grouping := createGroupBy(tables, sel, maxGroupBy) + aggregates := createAggregations(tables, sel, maxAggrs) + sel.AddWhere(sqlparser.AndExpressions(createPredicates(tables, false)...)) // select distinct (fails with group by bigint) isDistinct := rand.Intn(2) < 1 if isDistinct { - sel += "distinct " + sel.MakeDistinct() } - // select the grouping columns - isJoin := rand.Intn(2) < 1 - - if numGBs > 0 && rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - for i := 0; i < numGBs; i++ { - sel += grouping[i].GetAliasedExpression() + ", " - } + // random predicate expression + if rand.Intn(2) < 1 { + predRandomExpr, _ := getRandomExpr(tables) + sel.AddWhere(predRandomExpr) } - // generate the order by columns - // we do it this way, so we don't have to do only `only_full_group_by` queries - noOfOrderBy := 0 - if numGBs > 0 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - // panic on rand function call if value is 0 - noOfOrderBy = rand.Intn(numGBs) - } - - var orderBy []string - if noOfOrderBy > 0 { - for noOfOrderBy > 0 { - noOfOrderBy-- - if rand.Intn(2) < 1 { - orderBy = append(orderBy, aggregates[rand.Intn(numAggrs)].Alias) - } else { - orderBy = append(orderBy, grouping[rand.Intn(numGBs)].Alias) - } - } + // limit (fails with group by and no order by) + // TODO: numOrderBy needs to be > 0 + if rand.Intn(2) < 1 /* numOrderBy > 0 */ { + createLimit(sel) } var newTable tableT @@ -258,166 +242,136 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) string { isRandomExpr := rand.Intn(2) < 1 randomExpr, typ := getRandomExpr(tables) if isRandomExpr && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - sel += "(" + randomExpr + ") as crandom0, " + sel.SelectExprs = append(sel.SelectExprs, sqlparser.NewAliasedExpr(randomExpr, "crandom0")) newTable.AddColumns(column{ Name: "crandom0", Typ: typ, }) } - // add aggregates to select - sel += aggregates[0].Name + " as " + aggregates[0].Alias - for i := 1; i < numAggrs; i++ { - sel += ", " + aggregates[i].Name + " as " + aggregates[i].Alias - } + // add new table to schemaTables + newTable.AddColumns(grouping...) + newTable.AddColumns(aggregates...) + newTable.Name = sqlparser.NewDerivedTable(false, sel) + schemaTables = append(schemaTables, newTable) - sel += " from " - var tbls []string - for _, t := range tables { - tbls = append(tbls, t.GetAliasedExpression()) + // derived tables (partially unsupported) + if rand.Intn(10) < 1 && TestFailingQueries { + sel = randomQuery(schemaTables, 3, 3) } - sel += strings.Join(tbls, ", ") - // join - if isJoin { - tables = append(tables, randomEl(schemaTables)) - tables[len(tables)-1].SetAlias(fmt.Sprintf("tbl%d", len(tables)-1)) - join := createPredicates(tables, true) + return sel +} - sel += " left join " + fmt.Sprintf("%s as %s", tables[len(tables)-1].Name, tables[len(tables)-1].GetAlias()) - if len(join) > 0 { - sel += " on " + strings.Join(join, " and ") - } - } +func createTablesAndJoin(schemaTables []tableT, sel *sqlparser.Select) ([]tableT, bool) { + var tables []tableT + // add at least one of original emp/dept tables for now because derived tables have nil columns + tables = append(tables, schemaTables[rand.Intn(2)]) - // where - if len(predicates) > 0 { - sel += " where " - if rand.Intn(2) < 1 { - predRandomExpr, _ := getRandomExpr(tables) - sel += "(" + predRandomExpr + ") and " - } - sel += strings.Join(predicates, " and ") - } + sel.From = append(sel.From, newAliasedTable(tables[0], "tbl0")) + tables[0].SetName("tbl0") - // group by - if numGBs > 0 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - sel += " group by " + grouping[0].GetUnaliasedName() - for i := 1; i < numGBs; i++ { - sel += ", " + grouping[i].GetUnaliasedName() - } - if isRandomExpr { - sel += ", " - } - } - // make sure to group by the random expression - if isRandomExpr && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - if numGBs <= 0 { - sel += " group by " - } - sel += "crandom0" + numTables := rand.Intn(len(schemaTables)) + for i := 0; i < numTables; i++ { + tables = append(tables, randomEl(schemaTables)) + sel.From = append(sel.From, newAliasedTable(tables[i+1], fmt.Sprintf("tbl%d", i+1))) + tables[i+1].SetName(fmt.Sprintf("tbl%d", i+1)) } - // order by - if noOfOrderBy > 0 { - sel += " order by " - sel += strings.Join(orderBy, ", ") - } + isJoin := rand.Intn(2) < 1 + if isJoin { + newTable := randomEl(schemaTables) + tables = append(tables, newTable) - // limit (fails with group by and no order by) - if rand.Intn(2) < 1 && noOfOrderBy > 0 { - limitNum := rand.Intn(20) - sel += fmt.Sprintf(" limit %d", limitNum) - } + // create the join before aliasing + newJoinTableExpr := createJoin(tables, sel) - // add new table to schemaTables - newTable.Name = "(" + sel + ")" + tables[numTables+1].SetName(fmt.Sprintf("tbl%d", numTables+1)) - // workaround for derived tables only using column alias in select statement; make sure Name is empty - for i := 0; i < numGBs; i++ { - grouping[i].Name = grouping[i].Alias - } - for i := 0; i < numAggrs; i++ { - aggregates[i].Name = aggregates[i].Alias + // create the condition after aliasing + newJoinTableExpr.Condition = sqlparser.NewJoinCondition(sqlparser.AndExpressions(createPredicates(tables, true)...), nil) + sel.From[numTables] = newJoinTableExpr } - newTable.AddColumns(grouping...) - newTable.AddColumns(aggregates...) - schemaTables = append(schemaTables, newTable) - // derived tables (partially unsupported) - if rand.Intn(10) < 1 && TestFailingQueries { - sel = randomQuery(schemaTables, 3, 3) + return tables, isJoin +} + +// creates a left join (without the condition) between the last table in sel and newTable +// tables should have one more table than sel +func createJoin(tables []tableT, sel *sqlparser.Select) *sqlparser.JoinTableExpr { + n := len(sel.From) + if len(tables) != n+1 { + log.Fatalf("sel has %d tables and tables has %d tables", len(sel.From), n) } - return sel + return sqlparser.NewJoinTableExpr(sel.From[n-1], sqlparser.LeftJoinType, newAliasedTable(tables[n], fmt.Sprintf("tbl%d", n)), nil) } -func createGroupBy(tables []tableT, maxGB int) (grouping []column, numGBs int) { - numGBs = rand.Intn(maxGB) +// adds grouping columns to sel.GroupBy and optionally to sel.SelectExprs and sel.OrderBy +// TODO: maybe change to return this stuff instead +func createGroupBy(tables []tableT, sel *sqlparser.Select, maxGB int) (grouping []column) { + numGBs := rand.Intn(maxGB) for i := 0; i < numGBs; i++ { - var tblIdx int - for { - tblIdx = rand.Intn(len(tables)) - if tables[tblIdx].Cols != nil { - break - } - } + tblIdx := rand.Intn(len(tables)) col := randomEl(tables[tblIdx].Cols) - col.Alias = fmt.Sprintf("cgroup%d", i) - grouping = append(grouping, col) + sel.GroupBy = append(sel.GroupBy, newColumn(col)) + + // add to order by (might have to change if there are no grouping columns and ordering is done on aggregation columns) + // (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) + if rand.Intn(2) < 1 { + sel.AddOrder(newOrderColumn(col)) + } + + // add to select + if rand.Intn(2) < 1 { + sel.SelectExprs = append(sel.SelectExprs, newAliasedColumn(col, fmt.Sprintf("cgroup%d", i))) + col.Name = fmt.Sprintf("cgroup%d", i) + grouping = append(grouping, col) + } } - return grouping, numGBs + + return grouping } -func createAggregations(tables []tableT, maxAggrs int) (aggregates []column, numAggrs int) { - aggregations := []func(string) string{ - func(_ string) string { return "count(*)" }, - func(e string) string { return fmt.Sprintf("count(%s)", e) }, - func(e string) string { return fmt.Sprintf("sum(%s)", e) }, - //func(e string) string { return fmt.Sprintf("avg(%s)", e) }, - func(e string) string { return fmt.Sprintf("min(%s)", e) }, - func(e string) string { return fmt.Sprintf("max(%s)", e) }, +// adds aggregation columns to sel.SelectExprs and optionally to sel.OrderBy +// TODO: maybe change to return this stuff instead +func createAggregations(tables []tableT, sel *sqlparser.Select, maxAggrs int) (aggregates []column) { + aggregations := []func(col column) sqlparser.Expr{ + func(_ column) sqlparser.Expr { return &sqlparser.CountStar{} }, + func(col column) sqlparser.Expr { return &sqlparser.Count{Args: sqlparser.Exprs{newColumn(col)}} }, + func(col column) sqlparser.Expr { return &sqlparser.Sum{Arg: newColumn(col)} }, + // func(col column) sqlparser.Expr { return &sqlparser.Avg{Arg: newAggregateExpr(col)} }, + func(col column) sqlparser.Expr { return &sqlparser.Min{Arg: newColumn(col)} }, + func(col column) sqlparser.Expr { return &sqlparser.Max{Arg: newColumn(col)} }, } - numAggrs = rand.Intn(maxAggrs) + 1 + numAggrs := rand.Intn(maxAggrs) + 1 for i := 0; i < numAggrs; i++ { - var tblIdx int - for { - tblIdx = rand.Intn(len(tables)) - if tables[tblIdx].Cols != nil { - break - } - } + tblIdx, aggrIdx := rand.Intn(len(tables)), rand.Intn(len(aggregations)) col := randomEl(tables[tblIdx].Cols) - newAggregate := randomEl(aggregations)(col.GetUnaliasedName()) + newAggregate := aggregations[aggrIdx](col) + sel.SelectExprs = append(sel.SelectExprs, sqlparser.NewAliasedExpr(newAggregate, fmt.Sprintf("caggr%d", i))) + + // add to order by (might have to change if there are no grouping columns and ordering is done on aggregation columns) + // (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) + if rand.Intn(2) < 1 { + sel.AddOrder(sqlparser.NewOrder(newAggregate, sqlparser.AscOrder)) + } - col.Alias = fmt.Sprintf("caggr%d", i) - if newAggregate == fmt.Sprintf("count(%s)", col.GetColumnName()) || newAggregate == "count(*)" { + if aggrIdx <= 1 /* CountStar and Count */ { col.Typ = "bigint" - } else if newAggregate == fmt.Sprintf("avg(%s)", col.GetColumnName()) && col.GetColumnName() == "bigint" { + } else if _, ok := newAggregate.(*sqlparser.Avg); ok && col.GetColumnName() == "bigint" { col.Typ = "decimal" } - col.Name = newAggregate - aggregates = append(aggregates, col) - } - return aggregates, numAggrs -} - -func createTables(schemaTables []tableT) []tableT { - var tables []tableT - // add at least one of original emp/dept tables for now because derived tables have nil columns - tables = append(tables, schemaTables[rand.Intn(2)]) - tables[0].SetAlias("tbl0") - noOfTables := rand.Intn(len(schemaTables)) - for i := 0; i < noOfTables; i++ { - tables = append(tables, randomEl(schemaTables)) - tables[i+1].SetAlias(fmt.Sprintf("tbl%d", i+1)) + col.Name = fmt.Sprintf("caggr%d", i) + aggregates = append(aggregates, col) } - return tables + return aggregates } -func createPredicates(tables []tableT, isJoin bool) (predicates []string) { +// returns the predicate as an Expr +func createPredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) { // if creating predicates for a join, // then make sure predicates are created for the last two tables (which are being joined) incr := 0 @@ -439,9 +393,10 @@ func createPredicates(tables []tableT, isJoin bool) (predicates []string) { col1 := randomEl(tables[idx1].Cols) col2 := randomEl(tables[idx2].Cols) + // prevent infinite loops if i > 50 { - predicates = append(predicates, fmt.Sprintf("%s = %s", col1.GetUnaliasedName(), col1.GetUnaliasedName())) - noOfPredicates-- + // cant do this because this minimizes + predicates = append(predicates, sqlparser.NewComparisonExpr(sqlparser.EqualOp, newColumn(col1), newColumn(col1), nil)) break } @@ -449,7 +404,8 @@ func createPredicates(tables []tableT, isJoin bool) (predicates []string) { continue } - predicates = append(predicates, fmt.Sprintf("%s = %s", col1.GetUnaliasedName(), col2.GetUnaliasedName())) + // cant do this because this minimizes + predicates = append(predicates, sqlparser.NewComparisonExpr(sqlparser.EqualOp, newColumn(col1), newColumn(col2), nil)) noOfPredicates-- } } @@ -457,6 +413,39 @@ func createPredicates(tables []tableT, isJoin bool) (predicates []string) { return predicates } +func createLimit(sel *sqlparser.Select) { + limitNum := rand.Intn(10) + if rand.Intn(2) < 1 { + offset := rand.Intn(10) + sel.Limit = sqlparser.NewLimit(offset, limitNum) + } else { + sel.Limit = sqlparser.NewLimitWithoutOffset(limitNum) + } + +} + +func getRandomExpr(tables []tableT) (sqlparser.Expr, string) { + seed := time.Now().UnixNano() + g := sqlparser.NewGenerator(seed, 2, tables...) + return g.Expression() +} + +func newAliasedTable(tbl tableT, alias string) *sqlparser.AliasedTableExpr { + return sqlparser.NewAliasedTableExpr(tbl.Name, alias) +} + +func newAliasedColumn(col column, alias string) *sqlparser.AliasedExpr { + return sqlparser.NewAliasedExpr(newColumn(col), alias) +} + +func newColumn(col column) *sqlparser.ColName { + return sqlparser.NewColNameWithQualifier(col.Name, sqlparser.NewTableName(col.TableName)) +} + +func newOrderColumn(col column) *sqlparser.Order { + return sqlparser.NewOrder(newColumn(col), sqlparser.AscOrder) +} + func randomEl[K any](in []K) K { return in[rand.Intn(len(in))] } diff --git a/go/vt/sqlparser/ast_funcs.go b/go/vt/sqlparser/ast_funcs.go index 29d84412d7f..5efa51da6c0 100644 --- a/go/vt/sqlparser/ast_funcs.go +++ b/go/vt/sqlparser/ast_funcs.go @@ -685,6 +685,106 @@ func NewColNameWithQualifier(identifier string, table TableName) *ColName { } } +// NewTableName makes a new TableName +func NewTableName(name string) TableName { + return TableName{ + Name: NewIdentifierCS(name), + } +} + +// NewTableNameWithQualifier makes a new TableName with a qualifier +func NewTableNameWithQualifier(name, qualifier string) TableName { + return TableName{ + Name: NewIdentifierCS(name), + Qualifier: NewIdentifierCS(qualifier), + } +} + +// NewAliasedTableExpr makes a new AliasedTableExpr with an alias +func NewAliasedTableExpr(simpleTableExpr SimpleTableExpr, alias string) *AliasedTableExpr { + return &AliasedTableExpr{ + Expr: simpleTableExpr, + As: NewIdentifierCS(alias), + } +} + +// NewJoinTableExpr makes a new JoinTableExpr +func NewJoinTableExpr(leftExpr TableExpr, join JoinType, rightExpr TableExpr, condition *JoinCondition) *JoinTableExpr { + return &JoinTableExpr{ + LeftExpr: leftExpr, + Join: join, + RightExpr: rightExpr, + Condition: condition, + } +} + +// NewJoinCondition makes a new JoinCondition +func NewJoinCondition(on Expr, using Columns) *JoinCondition { + return &JoinCondition{ + On: on, + Using: using, + } +} + +// NewAliasedExpr makes a new AliasedExpr +func NewAliasedExpr(expr Expr, alias string) *AliasedExpr { + return &AliasedExpr{ + Expr: expr, + As: NewIdentifierCI(alias), + } +} + +// NewOrder makes a new Order +func NewOrder(expr Expr, direction OrderDirection) *Order { + return &Order{ + Expr: expr, + Direction: direction, + } +} + +// NewComparisonExpr makes a new ComparisonExpr +func NewComparisonExpr(operator ComparisonExprOperator, left, right, escape Expr) *ComparisonExpr { + return &ComparisonExpr{ + Operator: operator, + Left: left, + Right: right, + Escape: escape, + } +} + +// NewLimit makes a new Limit +func NewLimit(offset, rowCount int) *Limit { + return &Limit{ + Offset: &Literal{ + Type: IntVal, + Val: fmt.Sprint(offset), + }, + Rowcount: &Literal{ + Type: IntVal, + Val: fmt.Sprint(rowCount), + }, + } +} + +// NewLimitWithoutOffset makes a new Limit without an offset +func NewLimitWithoutOffset(rowCount int) *Limit { + return &Limit{ + Offset: nil, + Rowcount: &Literal{ + Type: IntVal, + Val: fmt.Sprint(rowCount), + }, + } +} + +// NewDerivedTable makes a new DerivedTable +func NewDerivedTable(lateral bool, selectStatement SelectStatement) *DerivedTable { + return &DerivedTable{ + Lateral: lateral, + Select: selectStatement, + } +} + // NewSelect is used to create a select statement func NewSelect(comments Comments, exprs SelectExprs, selectOptions []string, into *SelectInto, from TableExprs, where *Where, groupBy GroupBy, having *Where, windows NamedWindows) *Select { var cache *bool diff --git a/go/vt/sqlparser/random_expr.go b/go/vt/sqlparser/random_expr.go index 30559bbe16b..2df65392ac4 100644 --- a/go/vt/sqlparser/random_expr.go +++ b/go/vt/sqlparser/random_expr.go @@ -1,5 +1,5 @@ /* -Copyright 2020 The Vitess Authors. +Copyright 2023 The Vitess Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -27,71 +27,30 @@ type ( Col struct { TableName string Name string - Alias string Typ string // add isDerived flag? } TableT struct { - Name string // select type - alias string - Cols []Col + // Name will be a TableName object if it is used, with Name: alias or name if no alias is provided + // Name will only be a DerivedTable for moving that data around + Name SimpleTableExpr + Cols []Col } ) -// GetAliasedExpression returns the aliasing command if Alias is nonempty -func (c *Col) GetAliasedExpression() string { - // workaround for derived tables only using column alias in select statement; make sure Name is empty - - sel := fmt.Sprintf("%s.%s", c.TableName, c.Name) - if c.Alias != "" { - sel += fmt.Sprintf(" as %s", c.Alias) - } - return sel -} - -// GetColumnName returns the Alias if it's nonempty +// GetColumnName returns TableName.Name func (c *Col) GetColumnName() string { - if c.Alias != "" { - return c.Alias - } - return c.GetUnaliasedName() -} - -// GetUnaliasedName returns the name used in queries if the alias is empty (TableName.Name) -func (c *Col) GetUnaliasedName() string { return fmt.Sprintf("%s.%s", c.TableName, c.Name) } -// GetAliasedExpression returns the aliasing command if alias is nonempty -func (t *TableT) GetAliasedExpression() string { - sel := fmt.Sprintf("%s", t.Name) - if t.alias != "" { - sel += fmt.Sprintf(" as %s", t.alias) - } - return sel -} - -// GetAlias returns the alias -func (t *TableT) GetAlias() string { - return t.alias -} - -// SetAlias sets the alias for t, as well as setting the TableName for all columns in Cols -func (t *TableT) SetAlias(newAlias string) { - t.alias = newAlias +// SetName sets the alias for t, as well as setting the TableName for all columns in Cols +func (t *TableT) SetName(newName string) { + t.Name = NewTableName(newName) for i := range t.Cols { - t.Cols[i].TableName = newAlias + t.Cols[i].TableName = newName } } -// GetColumnName returns the alias if it's nonempty -func (t *TableT) GetColumnName() string { - if t.alias != "" { - return t.alias - } - return t.Name -} - // SetColumns sets the columns of t, and automatically assigns TableName // this makes it unnatural (but still possible as Cols is exportable) to modify TableName func (t *TableT) SetColumns(col ...Col) { @@ -103,7 +62,11 @@ func (t *TableT) SetColumns(col ...Col) { // this makes it unnatural (but still possible as Cols is exportable) to modify TableName func (t *TableT) AddColumns(col ...Col) { for i := range col { - col[i].TableName = t.GetColumnName() + // only change TableName if + if tName, ok := t.Name.(TableName); ok { + col[i].TableName = tName.Name.String() + } + t.Cols = append(t.Cols, col[i]) } } @@ -113,9 +76,8 @@ func (t *TableT) copy() *TableT { newCols := make([]Col, len(t.Cols)) copy(newCols, t.Cols) return &TableT{ - Name: t.Name, - alias: t.alias, - Cols: newCols, + Name: t.Name, + Cols: newCols, } } @@ -348,18 +310,14 @@ func (g *Generator) typeColumn(typ string, typeLiteral func() Expr) Expr { idx := rand.Intn(len(tableCopy.Cols)) randCol := tableCopy.Cols[idx] if randCol.Typ == typ { - newName := randCol.Name - if randCol.Alias != "" { - newName = randCol.Alias - } - newTableName := table.Name - if tableCopy.alias != "" { - newTableName = tableCopy.alias + newTableName := NewIdentifierCS("") + if tName, ok := table.Name.(TableName); ok { + newTableName = tName.Name } return &ColName{ Metadata: nil, - Name: NewIdentifierCI(newName), - Qualifier: TableName{Name: NewIdentifierCS(newTableName)}, + Name: NewIdentifierCI(randCol.Name), + Qualifier: TableName{Name: newTableName}, } } else { // delete randCol from table.columns diff --git a/go/vt/sqlparser/random_expr_test.go b/go/vt/sqlparser/random_expr_test.go index f2cb22a4b97..65fd4bed15c 100644 --- a/go/vt/sqlparser/random_expr_test.go +++ b/go/vt/sqlparser/random_expr_test.go @@ -1,36 +1,50 @@ +/* +Copyright 2023 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package sqlparser import ( "fmt" "testing" "time" - - "golang.org/x/exp/maps" ) func TestRandomExprWithTables(t *testing.T) { - schema := map[string]TableT{ - "emp": {Name: "emp", Cols: []Col{ - {Name: "empno", Typ: "bigint"}, - {Name: "ename", Typ: "varchar"}, - {Name: "job", Typ: "varchar"}, - {Name: "mgr", Typ: "bigint"}, - {Name: "hiredate", Typ: "date"}, - {Name: "sal", Typ: "bigint"}, - {Name: "comm", Typ: "bigint"}, - {Name: "deptno", Typ: "bigint"}, - }}, - "dept": {Name: "dept", Cols: []Col{ - {Name: "deptno", Typ: "bigint"}, - {Name: "dname", Typ: "varchar"}, - {Name: "loc", Typ: "varchar"}, - }}, + schemaTables := []TableT{ + {Name: NewTableName("emp")}, + {Name: NewTableName("dept")}, } - - schemaTables := maps.Values(schema) + schemaTables[0].AddColumns([]Col{ + {Name: "empno", Typ: "bigint"}, + {Name: "ename", Typ: "varchar"}, + {Name: "job", Typ: "varchar"}, + {Name: "mgr", Typ: "bigint"}, + {Name: "hiredate", Typ: "date"}, + {Name: "sal", Typ: "bigint"}, + {Name: "comm", Typ: "bigint"}, + {Name: "deptno", Typ: "bigint"}, + }...) + schemaTables[1].AddColumns([]Col{ + {Name: "deptno", Typ: "bigint"}, + {Name: "dname", Typ: "varchar"}, + {Name: "loc", Typ: "varchar"}, + }...) seed := time.Now().UnixNano() - g := NewGenerator(seed, 2, schemaTables...) + g := NewGenerator(seed, 3, schemaTables...) randomExpr, _ := g.Expression() fmt.Println(String(randomExpr)) } From e925343fb69b7683159a9f8a1ef398cd9bbe8f31 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Mon, 19 Jun 2023 00:24:09 -0700 Subject: [PATCH 17/29] separated failures in must-fix and known failures Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../queries/aggregation/aggregation_test.go | 33 +++++------ .../vtgate/queries/random/random_test.go | 57 +++++++++---------- 2 files changed, 39 insertions(+), 51 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go b/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go index 072d029b97b..be94f0f2d1e 100644 --- a/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go +++ b/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go @@ -454,29 +454,13 @@ func TestBuggyQueries(t *testing.T) { mcmp.Exec("select /*vt+ PLANNER=gen4 */count(*) from t10 left join t10 as t11 on t10.a = t11.b where t11.a") // from random/random_test.go - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7369,'SMITH','CLERK',7902,'1980-12-17',800,NULL,20);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7499,'ALLEN','SALESMAN',7698,'1981-02-20',1600,300,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7566,'JONES','MANAGER',7839,'1981-04-02',2975,NULL,20);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7654,'MARTIN','SALESMAN',7698,'1981-09-28',1250,1400,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7698,'BLAKE','MANAGER',7839,'1981-05-01',2850,NULL,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7782,'CLARK','MANAGER',7839,'1981-06-09',2450,NULL,10);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7788,'SCOTT','ANALYST',7566,'1982-12-09',3000,NULL,20);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7839,'KING','PRESIDENT',NULL,'1981-11-17',5000,NULL,10);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7844,'TURNER','SALESMAN',7698,'1981-09-08',1500,0,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7876,'ADAMS','CLERK',7788,'1983-01-12',1100,NULL,20);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7900,'JAMES','CLERK',7698,'1981-12-03',950,NULL,30);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7902,'FORD','ANALYST',7566,'1981-12-03',3000,NULL,20);") - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7934,'MILLER','CLERK',7782,'1982-01-23',1300,NULL,10);") - mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (10,'ACCOUNTING','NEW YORK');") - mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (20,'RESEARCH','DALLAS');") - mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (30,'SALES','CHICAGO');") - mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES (40,'OPERATIONS','BOSTON');") + mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7369,'SMITH','CLERK',7902,'1980-12-17',800,NULL,20), (7499,'ALLEN','SALESMAN',7698,'1981-02-20',1600,300,30), (7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30), (7566,'JONES','MANAGER',7839,'1981-04-02',2975,NULL,20), (7654,'MARTIN','SALESMAN',7698,'1981-09-28',1250,1400,30), (7698,'BLAKE','MANAGER',7839,'1981-05-01',2850,NULL,30), (7782,'CLARK','MANAGER',7839,'1981-06-09',2450,NULL,10), (7788,'SCOTT','ANALYST',7566,'1982-12-09',3000,NULL,20), (7839,'KING','PRESIDENT',NULL,'1981-11-17',5000,NULL,10), (7844,'TURNER','SALESMAN',7698,'1981-09-08',1500,0,30), (7876,'ADAMS','CLERK',7788,'1983-01-12',1100,NULL,20), (7900,'JAMES','CLERK',7698,'1981-12-03',950,NULL,30), (7902,'FORD','ANALYST',7566,'1981-12-03',3000,NULL,20), (7934,'MILLER','CLERK',7782,'1982-01-23',1300,NULL,10)") + mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES ('10','ACCOUNTING','NEW YORK'), ('20','RESEARCH','DALLAS'), ('30','SALES','CHICAGO'), ('40','OPERATIONS','BOSTON')") mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(*) from dept as tbl0, emp as tbl1 where tbl0.deptno = tbl1.deptno group by tbl1.empno order by tbl1.empno", `[[INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(tbl0.deptno) from dept as tbl0, emp as tbl1 group by tbl1.job order by tbl1.job limit 3", - `[[INT64(8)] [INT64(16)] [INT64(12)]]`) + //mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(tbl0.deptno) from dept as tbl0, emp as tbl1 group by tbl1.job order by tbl1.job limit 3", + // `[[INT64(8)] [INT64(16)] [INT64(12)]]`) mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*) from emp as tbl0 group by tbl0.empno order by tbl0.empno", `[[INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)]]`) mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ distinct count(*), tbl0.loc from dept as tbl0 group by tbl0.loc", @@ -507,6 +491,15 @@ func TestBuggyQueries(t *testing.T) { `[[VARCHAR("ACCOUNTING") INT64(10)] [VARCHAR("ACCOUNTING") INT64(40)] [VARCHAR("ACCOUNTING") INT64(20)] [VARCHAR("ACCOUNTING") INT64(30)] [VARCHAR("OPERATIONS") INT64(10)] [VARCHAR("OPERATIONS") INT64(40)] [VARCHAR("OPERATIONS") INT64(20)] [VARCHAR("OPERATIONS") INT64(30)] [VARCHAR("RESEARCH") INT64(10)] [VARCHAR("RESEARCH") INT64(40)] [VARCHAR("RESEARCH") INT64(20)] [VARCHAR("RESEARCH") INT64(30)] [VARCHAR("SALES") INT64(10)] [VARCHAR("SALES") INT64(40)] [VARCHAR("SALES") INT64(20)] [VARCHAR("SALES") INT64(30)]]`) mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ max(tbl0.hiredate) from emp as tbl0", `[[DATE("1983-01-12")]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ min(tbl0.deptno) as caggr0, count(*) as caggr1 from dept as tbl0 left join dept as tbl1 on tbl1.loc = tbl1.dname", + `[[INT64(10) INT64(4)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(tbl1.loc) as caggr0 from dept as tbl1 left join dept as tbl2 on tbl1.loc = tbl2.loc where (tbl2.deptno)", + `[[INT64(4)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ sum(tbl1.ename), min(tbl0.empno) from emp as tbl0, emp as tbl1 left join dept as tbl2 on tbl1.job = tbl2.loc and tbl1.comm = tbl2.deptno where ('trout') and tbl0.deptno = tbl1.comm", + `[[NULL NULL]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ distinct max(tbl0.deptno), count(tbl0.job) from emp as tbl0, dept as tbl1 left join dept as tbl2 on tbl1.dname = tbl2.loc and tbl1.dname = tbl2.loc where (tbl2.loc) and tbl0.deptno = tbl1.deptno", + `[[NULL INT64(0)]]`) + } func TestMinMaxAcrossJoins(t *testing.T) { diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 0cfc0db0f25..cea9d917b34 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -77,21 +77,13 @@ func helperTest(t *testing.T, query string) { }) } -func TestKnownFailures(t *testing.T) { +func TestMustFix(t *testing.T) { require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) - // logs more stuff - //clusterInstance.EnableGeneralLog() - - // coercion should not try to coerce this value: DATE("1980-12-17") - helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct tbl1.hiredate as cgroup0, count(tbl1.mgr) as caggr0 from emp as tbl1 group by tbl1.hiredate, tbl1.ename") - - // mismatched results (left join + odd on) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ min(tbl0.deptno) as caggr0, count(*) as caggr1 from dept as tbl0 left join dept as tbl1 on tbl1.loc = tbl1.dname") - - // mismatched results (left join + odd where) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl1.loc) as caggr0 from dept as tbl1 left join dept as tbl2 on tbl1.loc = tbl2.loc where (tbl2.deptno)") + // mismatched results + // previously failing, then succeeding query, now failing again + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl0.deptno) from dept as tbl0, emp as tbl1 group by tbl1.job order by tbl1.job limit 3") // mismatched results (group by + right join) // left instead of right works @@ -104,30 +96,27 @@ func TestKnownFailures(t *testing.T) { // mismatched results (sum + right join) // left instead of right works helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl0.mgr) from emp as tbl0 right join emp as tbl1 on tbl0.mgr = tbl1.empno") +} + +func TestKnownFailures(t *testing.T) { + require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) + require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) - // the type of this expression cannot be statically computed - helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl1.ename), min(tbl0.empno) from emp as tbl0, emp as tbl1 left join dept as tbl2 on tbl1.job = tbl2.loc and tbl1.comm = tbl2.deptno where ('trout') and tbl0.deptno = tbl1.comm") + // logs more stuff + //clusterInstance.EnableGeneralLog() - // Cannot convert value to desired type - helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct max(tbl0.deptno), count(tbl0.job) from emp as tbl0, dept as tbl1 left join dept as tbl2 on tbl1.dname = tbl2.loc and tbl1.dname = tbl2.loc where (tbl2.loc) and tbl0.deptno = tbl1.deptno") + // coercion should not try to coerce this value: DATE("1980-12-17") + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct tbl1.hiredate as cgroup0, count(tbl1.mgr) as caggr0 from emp as tbl1 group by tbl1.hiredate, tbl1.ename") - // sometimes fails if the following query is more complicated (?) // only_full_group_by enabled (vitess sometimes (?) produces the correct result assuming only_full_group_by is disabled) // vitess error: nil // mysql error: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'ks_random.tbl0.ENAME' helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.ename, min(tbl0.comm) from emp as tbl0 left join emp as tbl1 on tbl0.empno = tbl1.comm and tbl0.empno = tbl1.empno") - // only_full_group_by disabled - // unknown aggregation random - helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.comm, count(*) from emp as tbl0, emp as tbl1 where tbl0.empno = tbl1.deptno") - - // unavoidable - // mismatched results (group by + limit no order by) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from emp as tbl0 group by tbl0.sal limit 7") - - // unavoidable - // mismatched results (group by + select grouping + limit no order by) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.sal, count(*) from emp as tbl0 group by tbl0.sal limit 7") + // only_full_group_by enabled + // vitess error: nil + // mysql error: Expression #1 of ORDER BY clause is not in SELECT list, references column 'ks_random.tbl2.DNAME' which is not in SELECT list; this is incompatible with DISTINCT + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) as caggr0 from dept as tbl2 group by tbl2.dname order by tbl2.dname asc") // vttablet: rpc error: code = NotFound desc = Unknown column 'cgroup0' in 'field list' (errno 1054) (sqlstate 42S22) (CallerID: userData1) helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.ename as cgroup0, max(tbl0.comm) as caggr0 from emp as tbl0, emp as tbl1 group by cgroup0") @@ -370,7 +359,7 @@ func createAggregations(tables []tableT, sel *sqlparser.Select, maxAggrs int) (a return aggregates } -// returns the predicate as an Expr +// returns the predicates as a sqlparser.Exprs (slice of sqlparser.Expr's) func createPredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) { // if creating predicates for a join, // then make sure predicates are created for the last two tables (which are being joined) @@ -378,6 +367,7 @@ func createPredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) if isJoin && len(tables) > 2 { incr += len(tables) - 2 } + for idx1 := range tables { for idx2 := range tables { // fmt.Printf("predicate tables:\n%v\n idx1: %d idx2: %d, incr: %d", tables, idx1, idx2, incr) @@ -396,7 +386,7 @@ func createPredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) // prevent infinite loops if i > 50 { // cant do this because this minimizes - predicates = append(predicates, sqlparser.NewComparisonExpr(sqlparser.EqualOp, newColumn(col1), newColumn(col1), nil)) + predicates = append(predicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), newColumn(col1), newColumn(col1), nil)) break } @@ -405,7 +395,7 @@ func createPredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) } // cant do this because this minimizes - predicates = append(predicates, sqlparser.NewComparisonExpr(sqlparser.EqualOp, newColumn(col1), newColumn(col2), nil)) + predicates = append(predicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), newColumn(col1), newColumn(col2), nil)) noOfPredicates-- } } @@ -446,6 +436,11 @@ func newOrderColumn(col column) *sqlparser.Order { return sqlparser.NewOrder(newColumn(col), sqlparser.AscOrder) } +func getRandomComparisonExprOperator() sqlparser.ComparisonExprOperator { + // =, <, >, <=, >=, !=, <=> + return randomEl([]sqlparser.ComparisonExprOperator{0, 1, 2, 3, 4, 5, 6}) +} + func randomEl[K any](in []K) K { return in[rand.Intn(len(in))] } From 2a30aea7fe0799ef1cc3acf502a4ab888678fec9 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Mon, 19 Jun 2023 01:16:31 -0700 Subject: [PATCH 18/29] added two must-fix queries Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- go/test/endtoend/vtgate/queries/random/random_test.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index cea9d917b34..2febe2344db 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -81,20 +81,29 @@ func TestMustFix(t *testing.T) { require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) + // mismatched results + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct max(tbl0.dname) as caggr0, 'cattle' as crandom0 from dept as tbl0, emp as tbl1 where tbl0.deptno != tbl1.sal group by tbl1.comm") + + // mismatched results + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) as caggr0, 1 as crandom0 from dept as tbl0, emp as tbl1 where 'octopus'") + // mismatched results // previously failing, then succeeding query, now failing again helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl0.deptno) from dept as tbl0, emp as tbl1 group by tbl1.job order by tbl1.job limit 3") // mismatched results (group by + right join) // left instead of right works + // swapping tables and predicates and changing to left fails helperTest(t, "select /*vt+ PLANNER=Gen4 */ max(tbl0.deptno) from dept as tbl0 right join emp as tbl1 on tbl0.deptno = tbl1.empno and tbl0.deptno = tbl1.deptno group by tbl0.deptno") // mismatched results (count + right join) // left instead of right works + // swapping tables and predicates and changing to left fails helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl1.comm) from emp as tbl1 right join emp as tbl2 on tbl1.mgr = tbl2.sal") // mismatched results (sum + right join) // left instead of right works + // swapping tables and predicates and changing to left fails helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl0.mgr) from emp as tbl0 right join emp as tbl1 on tbl0.mgr = tbl1.empno") } @@ -386,7 +395,7 @@ func createPredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) // prevent infinite loops if i > 50 { // cant do this because this minimizes - predicates = append(predicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), newColumn(col1), newColumn(col1), nil)) + predicates = append(predicates, newColumn(col1)) break } From 5aabd0ecd71225fa6801bb887943870599695a23 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Tue, 20 Jun 2023 21:45:04 -0700 Subject: [PATCH 19/29] added having to random query generation Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/random_test.go | 175 ++++++++++++------ go/vt/sqlparser/ast_funcs.go | 4 + go/vt/sqlparser/random_expr.go | 59 +----- go/vt/sqlparser/schema.go | 79 ++++++++ 4 files changed, 205 insertions(+), 112 deletions(-) create mode 100644 go/vt/sqlparser/schema.go diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 2febe2344db..384977d2e00 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -32,6 +32,8 @@ import ( "vitess.io/vitess/go/test/endtoend/utils" ) +// this test uses the AST defined in the sqlparser package to randomly generate queries + type tableT = sqlparser.TableT type column = sqlparser.Col @@ -53,8 +55,10 @@ func start(t *testing.T) (utils.MySQLCompare, func()) { deleteAll() + // disable only_full_group_by // mcmp.Exec("set sql_mode=''") + // insert data mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7369,'SMITH','CLERK',7902,'1980-12-17',800,NULL,20), (7499,'ALLEN','SALESMAN',7698,'1981-02-20',1600,300,30), (7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30), (7566,'JONES','MANAGER',7839,'1981-04-02',2975,NULL,20), (7654,'MARTIN','SALESMAN',7698,'1981-09-28',1250,1400,30), (7698,'BLAKE','MANAGER',7839,'1981-05-01',2850,NULL,30), (7782,'CLARK','MANAGER',7839,'1981-06-09',2450,NULL,10), (7788,'SCOTT','ANALYST',7566,'1982-12-09',3000,NULL,20), (7839,'KING','PRESIDENT',NULL,'1981-11-17',5000,NULL,10), (7844,'TURNER','SALESMAN',7698,'1981-09-08',1500,0,30), (7876,'ADAMS','CLERK',7788,'1983-01-12',1100,NULL,20), (7900,'JAMES','CLERK',7698,'1981-12-03',950,NULL,30), (7902,'FORD','ANALYST',7566,'1981-12-03',3000,NULL,20), (7934,'MILLER','CLERK',7782,'1982-01-23',1300,NULL,10)") mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES ('10','ACCOUNTING','NEW YORK'), ('20','RESEARCH','DALLAS'), ('30','SALES','CHICAGO'), ('40','OPERATIONS','BOSTON')") @@ -81,6 +85,12 @@ func TestMustFix(t *testing.T) { require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) + // mismatched results + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) as caggr1 from dept as tbl0, emp as tbl1 group by tbl1.sal having max(tbl1.comm) != true") + + // mismatched results + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct sum(tbl1.loc) as caggr0 from dept as tbl0, dept as tbl1 group by tbl1.deptno having max(tbl1.dname) <= 1") + // mismatched results helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct max(tbl0.dname) as caggr0, 'cattle' as crandom0 from dept as tbl0, emp as tbl1 where tbl0.deptno != tbl1.sal group by tbl1.comm") @@ -114,12 +124,19 @@ func TestKnownFailures(t *testing.T) { // logs more stuff //clusterInstance.EnableGeneralLog() + // [BUG] unknown plan type for DISTINCT *planbuilder.filter + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct sum(tbl1.loc) as caggr0 from dept as tbl0, dept as tbl1 group by tbl1.deptno having count(*) <=> tbl1.deptno") + + // vitess error: + // mysql error: Unknown column 'tbl0.deptno' in 'having clause' + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) as caggr0 from dept as tbl0 having tbl0.deptno") + // coercion should not try to coerce this value: DATE("1980-12-17") helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct tbl1.hiredate as cgroup0, count(tbl1.mgr) as caggr0 from emp as tbl1 group by tbl1.hiredate, tbl1.ename") // only_full_group_by enabled (vitess sometimes (?) produces the correct result assuming only_full_group_by is disabled) // vitess error: nil - // mysql error: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'ks_random.tbl0.ENAME' + // mysql error: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'ks_random.tbl0.ENAME'; this is incompatible with sql_mode=only_full_group_by helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.ename, min(tbl0.comm) from emp as tbl0 left join emp as tbl1 on tbl0.empno = tbl1.comm and tbl0.empno = tbl1.empno") // only_full_group_by enabled @@ -133,12 +150,13 @@ func TestKnownFailures(t *testing.T) { // vttablet: rpc error: code = InvalidArgument desc = Can't group on 'count(*)' (errno 1056) (sqlstate 42000) (CallerID: userData1) helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.deptno") + // [BUG] push projection does not yet support: *planbuilder.memorySort (errno 1815) (sqlstate HY000) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from dept as tbl1 join (select count(*) from emp as tbl0, dept as tbl1 group by tbl1.loc) as tbl2") + // EOF (errno 2013) (sqlstate HY000) helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from dept as tbl0, (select /*vt+ PLANNER=Gen4 */ count(*) from emp as tbl0, emp as tbl1 limit 18) as tbl1") - // push projection does not yet support: *planbuilder.memorySort (errno 1815) (sqlstate HY000) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from dept as tbl1 join (select count(*) from emp as tbl0, dept as tbl1 group by tbl1.loc) as tbl2") - + // unsupported // unsupported: in scatter query: complex aggregate expression (errno 1235) (sqlstate 42000) helperTest(t, "select /*vt+ PLANNER=Gen4 */ (select count(*) from emp as tbl0) from emp as tbl0") @@ -155,7 +173,7 @@ func TestKnownFailures(t *testing.T) { helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0") // unsupported - // unsupported: in scatter query: aggregation function + // unsupported: in scatter query: aggregation function 'avg(tbl0.deptno)' helperTest(t, "select /*vt+ PLANNER=Gen4 */ avg(tbl0.deptno) from dept as tbl0") } @@ -166,6 +184,7 @@ func TestRandom(t *testing.T) { require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) + // specify the schema (that is defined in schema.sql) schemaTables := []tableT{ {Name: sqlparser.NewTableName("emp")}, {Name: sqlparser.NewTableName("dept")}, @@ -192,13 +211,13 @@ func TestRandom(t *testing.T) { for time.Now().Before(endBy) && (!t.Failed() || TestFailingQueries) { query := sqlparser.String(randomQuery(schemaTables, 3, 3)) _, vtErr := mcmp.ExecAllowAndCompareError(query) - fmt.Println(query) - // t.Failed() will become true once and subsequently print every query - // this instead assumes all queries are valid mysql queries + // this assumes all queries are valid mysql queries if vtErr != nil { + fmt.Println(query) fmt.Println(vtErr) + // restart the mysql and vitess connections in case something bad happened closer() - mcmp, _ = start(t) + mcmp, closer = start(t) } queryCount++ } @@ -210,29 +229,45 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel sel := &sqlparser.Select{} sel.SetComments(sqlparser.Comments{"/*vt+ PLANNER=Gen4 */"}) - // also creates the join - tables, isJoin := createTablesAndJoin(schemaTables, sel) - - grouping := createGroupBy(tables, sel, maxGroupBy) - aggregates := createAggregations(tables, sel, maxAggrs) - sel.AddWhere(sqlparser.AndExpressions(createPredicates(tables, false)...)) - // select distinct (fails with group by bigint) isDistinct := rand.Intn(2) < 1 if isDistinct { sel.MakeDistinct() } + // create both tables and join at the same time since both occupy the from clause + tables, isJoin := createTablesAndJoin(schemaTables, sel) + + groupExprs, groupSelectExprs, grouping := createGroupBy(tables, maxGroupBy) + sel.AddSelectExprs(groupSelectExprs) + sel.GroupBy = groupExprs + aggrExprs, aggregates := createAggregations(tables, maxAggrs) + sel.AddSelectExprs(aggrExprs) + + // can add both aggregate and grouping columns to order by + isOrdered := rand.Intn(2) < 1 + if isOrdered && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { + addOrderBy(sel) + } + + // where + sel.AddWhere(sqlparser.AndExpressions(createWherePredicates(tables, false)...)) + // random predicate expression if rand.Intn(2) < 1 { predRandomExpr, _ := getRandomExpr(tables) sel.AddWhere(predRandomExpr) } - // limit (fails with group by and no order by) - // TODO: numOrderBy needs to be > 0 - if rand.Intn(2) < 1 /* numOrderBy > 0 */ { - createLimit(sel) + // having + sel.AddHaving(sqlparser.AndExpressions(createHavingPredicates(tables)...)) + if rand.Intn(2) < 1 { + sel.AddHaving(sqlparser.AndExpressions(createWherePredicates(tables, false)...)) + } + + // only add a limit if the grouping columns are ordered + if rand.Intn(2) < 1 && isOrdered { + sel.Limit = createLimit() } var newTable tableT @@ -247,9 +282,19 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel }) } - // add new table to schemaTables + // alias grouping and aggregate columns + for i := range grouping { + grouping[i].Name = fmt.Sprintf("cgroup%d", i) + } + for i := range aggregates { + aggregates[i].Name = fmt.Sprintf("cgroup%d", i) + } + + // add them to newTable newTable.AddColumns(grouping...) newTable.AddColumns(aggregates...) + + // add new table to schemaTables newTable.Name = sqlparser.NewDerivedTable(false, sel) schemaTables = append(schemaTables, newTable) @@ -284,10 +329,11 @@ func createTablesAndJoin(schemaTables []tableT, sel *sqlparser.Select) ([]tableT // create the join before aliasing newJoinTableExpr := createJoin(tables, sel) + // alias tables[numTables+1].SetName(fmt.Sprintf("tbl%d", numTables+1)) // create the condition after aliasing - newJoinTableExpr.Condition = sqlparser.NewJoinCondition(sqlparser.AndExpressions(createPredicates(tables, true)...), nil) + newJoinTableExpr.Condition = sqlparser.NewJoinCondition(sqlparser.AndExpressions(createWherePredicates(tables, true)...), nil) sel.From[numTables] = newJoinTableExpr } @@ -307,33 +353,26 @@ func createJoin(tables []tableT, sel *sqlparser.Select) *sqlparser.JoinTableExpr // adds grouping columns to sel.GroupBy and optionally to sel.SelectExprs and sel.OrderBy // TODO: maybe change to return this stuff instead -func createGroupBy(tables []tableT, sel *sqlparser.Select, maxGB int) (grouping []column) { +func createGroupBy(tables []tableT, maxGB int) (groupBy sqlparser.GroupBy, groupSelectExprs sqlparser.SelectExprs, grouping []column) { numGBs := rand.Intn(maxGB) for i := 0; i < numGBs; i++ { tblIdx := rand.Intn(len(tables)) col := randomEl(tables[tblIdx].Cols) - sel.GroupBy = append(sel.GroupBy, newColumn(col)) - - // add to order by (might have to change if there are no grouping columns and ordering is done on aggregation columns) - // (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) - if rand.Intn(2) < 1 { - sel.AddOrder(newOrderColumn(col)) - } + groupBy = append(groupBy, newColumn(col)) // add to select if rand.Intn(2) < 1 { - sel.SelectExprs = append(sel.SelectExprs, newAliasedColumn(col, fmt.Sprintf("cgroup%d", i))) - col.Name = fmt.Sprintf("cgroup%d", i) + groupSelectExprs = append(groupSelectExprs, newAliasedColumn(col, fmt.Sprintf("cgroup%d", i))) grouping = append(grouping, col) } } - return grouping + return groupBy, groupSelectExprs, grouping } // adds aggregation columns to sel.SelectExprs and optionally to sel.OrderBy // TODO: maybe change to return this stuff instead -func createAggregations(tables []tableT, sel *sqlparser.Select, maxAggrs int) (aggregates []column) { +func createAggregations(tables []tableT, maxAggrs int) (aggrExprs sqlparser.SelectExprs, aggregates []column) { aggregations := []func(col column) sqlparser.Expr{ func(_ column) sqlparser.Expr { return &sqlparser.CountStar{} }, func(col column) sqlparser.Expr { return &sqlparser.Count{Args: sqlparser.Exprs{newColumn(col)}} }, @@ -348,13 +387,7 @@ func createAggregations(tables []tableT, sel *sqlparser.Select, maxAggrs int) (a tblIdx, aggrIdx := rand.Intn(len(tables)), rand.Intn(len(aggregations)) col := randomEl(tables[tblIdx].Cols) newAggregate := aggregations[aggrIdx](col) - sel.SelectExprs = append(sel.SelectExprs, sqlparser.NewAliasedExpr(newAggregate, fmt.Sprintf("caggr%d", i))) - - // add to order by (might have to change if there are no grouping columns and ordering is done on aggregation columns) - // (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) - if rand.Intn(2) < 1 { - sel.AddOrder(sqlparser.NewOrder(newAggregate, sqlparser.AscOrder)) - } + aggrExprs = append(aggrExprs, sqlparser.NewAliasedExpr(newAggregate, fmt.Sprintf("caggr%d", i))) if aggrIdx <= 1 /* CountStar and Count */ { col.Typ = "bigint" @@ -362,14 +395,29 @@ func createAggregations(tables []tableT, sel *sqlparser.Select, maxAggrs int) (a col.Typ = "decimal" } - col.Name = fmt.Sprintf("caggr%d", i) + col.Name = sqlparser.String(newAggregate) aggregates = append(aggregates, col) } - return aggregates + return aggrExprs, aggregates +} + +// orders on all non-aggregate SelectExprs and independently at random on all aggregate SelectExprs of sel +func addOrderBy(sel *sqlparser.Select) { + for _, selExpr := range sel.SelectExprs { + if aliasedExpr, ok := selExpr.(*sqlparser.AliasedExpr); ok { + // if the SelectExpr is non-aggregate (the AliasedExpr has Expr of type ColName) + // then add to the order by + if colName, ok1 := aliasedExpr.Expr.(*sqlparser.ColName); ok1 { + sel.AddOrder(sqlparser.NewOrder(colName, getRandomOrderDirection())) + } else if rand.Intn(2) < 1 { + sel.AddOrder(sqlparser.NewOrder(aliasedExpr.Expr, getRandomOrderDirection())) + } + } + } } // returns the predicates as a sqlparser.Exprs (slice of sqlparser.Expr's) -func createPredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) { +func createWherePredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) { // if creating predicates for a join, // then make sure predicates are created for the last two tables (which are being joined) incr := 0 @@ -394,8 +442,7 @@ func createPredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) // prevent infinite loops if i > 50 { - // cant do this because this minimizes - predicates = append(predicates, newColumn(col1)) + predicates = append(predicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), newColumn(col1), newColumn(col2), nil)) break } @@ -403,26 +450,45 @@ func createPredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) continue } - // cant do this because this minimizes predicates = append(predicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), newColumn(col1), newColumn(col2), nil)) noOfPredicates-- } } } + + // make sure the join predicate is never empty + if len(predicates) == 0 && isJoin { + predRandomExpr, _ := getRandomExpr(tables) + predicates = append(predicates, predRandomExpr) + } + return predicates } -func createLimit(sel *sqlparser.Select) { +func createHavingPredicates(tables []tableT) (havingPredicates sqlparser.Exprs) { + aggrSelectExprs, _ := createAggregations(tables, 3) + for i := range aggrSelectExprs { + if aliasedExpr, ok := aggrSelectExprs[i].(*sqlparser.AliasedExpr); ok { + predRandomExpr, _ := getRandomExpr(tables) + havingPredicates = append(havingPredicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), aliasedExpr.Expr, predRandomExpr, nil)) + } + } + return havingPredicates +} + +// creates sel.Limit +func createLimit() *sqlparser.Limit { limitNum := rand.Intn(10) if rand.Intn(2) < 1 { offset := rand.Intn(10) - sel.Limit = sqlparser.NewLimit(offset, limitNum) + return sqlparser.NewLimit(offset, limitNum) } else { - sel.Limit = sqlparser.NewLimitWithoutOffset(limitNum) + return sqlparser.NewLimitWithoutOffset(limitNum) } } +// returns a random expression and its type func getRandomExpr(tables []tableT) (sqlparser.Expr, string) { seed := time.Now().UnixNano() g := sqlparser.NewGenerator(seed, 2, tables...) @@ -441,15 +507,16 @@ func newColumn(col column) *sqlparser.ColName { return sqlparser.NewColNameWithQualifier(col.Name, sqlparser.NewTableName(col.TableName)) } -func newOrderColumn(col column) *sqlparser.Order { - return sqlparser.NewOrder(newColumn(col), sqlparser.AscOrder) -} - func getRandomComparisonExprOperator() sqlparser.ComparisonExprOperator { // =, <, >, <=, >=, !=, <=> return randomEl([]sqlparser.ComparisonExprOperator{0, 1, 2, 3, 4, 5, 6}) } +func getRandomOrderDirection() sqlparser.OrderDirection { + // asc, desc + return randomEl([]sqlparser.OrderDirection{0, 1}) +} + func randomEl[K any](in []K) K { return in[rand.Intn(len(in))] } diff --git a/go/vt/sqlparser/ast_funcs.go b/go/vt/sqlparser/ast_funcs.go index 5efa51da6c0..dc4a826ab7c 100644 --- a/go/vt/sqlparser/ast_funcs.go +++ b/go/vt/sqlparser/ast_funcs.go @@ -1052,6 +1052,10 @@ func compliantName(in string) string { return buf.String() } +func (node *Select) AddSelectExprs(selectExprs SelectExprs) { + node.SelectExprs = selectExprs +} + // AddOrder adds an order by element func (node *Select) AddOrder(order *Order) { node.OrderBy = append(node.OrderBy, order) diff --git a/go/vt/sqlparser/random_expr.go b/go/vt/sqlparser/random_expr.go index 2df65392ac4..25242d6ac14 100644 --- a/go/vt/sqlparser/random_expr.go +++ b/go/vt/sqlparser/random_expr.go @@ -23,64 +23,6 @@ import ( // This file is used to generate random expressions to be used for testing -type ( - Col struct { - TableName string - Name string - Typ string - // add isDerived flag? - } - TableT struct { - // Name will be a TableName object if it is used, with Name: alias or name if no alias is provided - // Name will only be a DerivedTable for moving that data around - Name SimpleTableExpr - Cols []Col - } -) - -// GetColumnName returns TableName.Name -func (c *Col) GetColumnName() string { - return fmt.Sprintf("%s.%s", c.TableName, c.Name) -} - -// SetName sets the alias for t, as well as setting the TableName for all columns in Cols -func (t *TableT) SetName(newName string) { - t.Name = NewTableName(newName) - for i := range t.Cols { - t.Cols[i].TableName = newName - } -} - -// SetColumns sets the columns of t, and automatically assigns TableName -// this makes it unnatural (but still possible as Cols is exportable) to modify TableName -func (t *TableT) SetColumns(col ...Col) { - t.Cols = make([]Col, len(col)) - t.AddColumns(col...) -} - -// AddColumns adds columns to t, and automatically assigns TableName -// this makes it unnatural (but still possible as Cols is exportable) to modify TableName -func (t *TableT) AddColumns(col ...Col) { - for i := range col { - // only change TableName if - if tName, ok := t.Name.(TableName); ok { - col[i].TableName = tName.Name.String() - } - - t.Cols = append(t.Cols, col[i]) - } -} - -// copy returns a deep copy of t -func (t *TableT) copy() *TableT { - newCols := make([]Col, len(t.Cols)) - copy(newCols, t.Cols) - return &TableT{ - Name: t.Name, - Cols: newCols, - } -} - func NewGenerator(seed int64, maxDepth int, tables ...TableT) *Generator { g := Generator{ seed: seed, @@ -119,6 +61,7 @@ func (g *Generator) atMaxDepth() bool { - true/false - AND/OR/NOT - string literals, numeric literals (-/+ 1000) + - columns of types bigint and varchar - =, >, <, >=, <=, <=>, != - &, |, ^, +, -, *, /, div, %, <<, >> - IN, BETWEEN and CASE diff --git a/go/vt/sqlparser/schema.go b/go/vt/sqlparser/schema.go new file mode 100644 index 00000000000..43a955da012 --- /dev/null +++ b/go/vt/sqlparser/schema.go @@ -0,0 +1,79 @@ +/* +Copyright 2023 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sqlparser + +import "fmt" + +// this file defines two structs (Col and TableT) which represent the schema of the database used +// currently used in random expression generation and random query generation in endtoend testing + +type ( + Col struct { + TableName string + Name string + Typ string + } + TableT struct { + // Name will be a TableName object if it is used, with Name: alias or name if no alias is provided + // Name will only be a DerivedTable for moving its data around + Name SimpleTableExpr + Cols []Col + } +) + +// GetColumnName returns TableName.Name +func (c *Col) GetColumnName() string { + return fmt.Sprintf("%s.%s", c.TableName, c.Name) +} + +// SetName sets the alias for t, as well as setting the TableName for all columns in Cols +func (t *TableT) SetName(newName string) { + t.Name = NewTableName(newName) + for i := range t.Cols { + t.Cols[i].TableName = newName + } +} + +// SetColumns sets the columns of t, and automatically assigns TableName +// this makes it unnatural (but still possible as Cols is exportable) to modify TableName +func (t *TableT) SetColumns(col ...Col) { + t.Cols = make([]Col, len(col)) + t.AddColumns(col...) +} + +// AddColumns adds columns to t, and automatically assigns TableName +// this makes it unnatural (but still possible as Cols is exportable) to modify TableName +func (t *TableT) AddColumns(col ...Col) { + for i := range col { + // only change the Col's TableName if t is of type TableName + if tName, ok := t.Name.(TableName); ok { + col[i].TableName = tName.Name.String() + } + + t.Cols = append(t.Cols, col[i]) + } +} + +// copy returns a deep copy of t +func (t *TableT) copy() *TableT { + newCols := make([]Col, len(t.Cols)) + copy(newCols, t.Cols) + return &TableT{ + Name: t.Name, + Cols: newCols, + } +} From 9de596c444d2beeb960d79da5ecc741fca499fda Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Tue, 20 Jun 2023 21:54:05 -0700 Subject: [PATCH 20/29] removed one passing must-fix query Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- go/test/endtoend/vtgate/queries/random/random_test.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 384977d2e00..5a01ed89c8d 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -110,11 +110,6 @@ func TestMustFix(t *testing.T) { // left instead of right works // swapping tables and predicates and changing to left fails helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl1.comm) from emp as tbl1 right join emp as tbl2 on tbl1.mgr = tbl2.sal") - - // mismatched results (sum + right join) - // left instead of right works - // swapping tables and predicates and changing to left fails - helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl0.mgr) from emp as tbl0 right join emp as tbl1 on tbl0.mgr = tbl1.empno") } func TestKnownFailures(t *testing.T) { From 8b80cd93a610d607a530e09ff4fde6df10868d97 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Thu, 22 Jun 2023 03:20:55 -0700 Subject: [PATCH 21/29] created interface for random expression generation with a schema Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/query_gen_test.go | 437 ++++++++++++++++++ .../vtgate/queries/random/random_expr_test.go | 53 +++ .../vtgate/queries/random/random_test.go | 339 +------------- go/vt/sqlparser/ast_funcs.go | 2 +- go/vt/sqlparser/random_expr.go | 84 ++-- go/vt/sqlparser/random_expr_test.go | 50 -- go/vt/sqlparser/schema.go | 79 ---- 7 files changed, 544 insertions(+), 500 deletions(-) create mode 100644 go/test/endtoend/vtgate/queries/random/query_gen_test.go create mode 100644 go/test/endtoend/vtgate/queries/random/random_expr_test.go delete mode 100644 go/vt/sqlparser/random_expr_test.go delete mode 100644 go/vt/sqlparser/schema.go diff --git a/go/test/endtoend/vtgate/queries/random/query_gen_test.go b/go/test/endtoend/vtgate/queries/random/query_gen_test.go new file mode 100644 index 00000000000..bfb46876232 --- /dev/null +++ b/go/test/endtoend/vtgate/queries/random/query_gen_test.go @@ -0,0 +1,437 @@ +/* +Copyright 2023 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package random + +import ( + "fmt" + "math/rand" + "testing" + "time" + + "golang.org/x/exp/slices" + + "vitess.io/vitess/go/slices2" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/sqlparser" +) + +// this file contains the structs and functions to generate random queries + +type ( + column struct { + tableName string + name string + typ string + } + tableT struct { + // name will be a tableName object if it is used, with name: alias or name if no alias is provided + // name will only be a DerivedTable for moving its data around + name sqlparser.SimpleTableExpr + cols []column + } +) + +var _ sqlparser.ExprGenerator = (*tableT)(nil) + +func (t *tableT) typeExpr(typ string) sqlparser.Expr { + tableCopy := t.clone() + + for len(tableCopy.cols) > 0 { + idx := rand.Intn(len(tableCopy.cols)) + randCol := tableCopy.cols[idx] + if randCol.typ == typ { + newTableName := "" + if tName, ok := tableCopy.name.(sqlparser.TableName); ok { + newTableName = sqlparser.String(tName.Name) + } + return sqlparser.NewColNameWithQualifier(randCol.name, sqlparser.NewTableName(newTableName)) + } else { + // delete randCol from table.columns + tableCopy.cols[idx] = tableCopy.cols[len(tableCopy.cols)-1] + tableCopy.cols = tableCopy.cols[:len(tableCopy.cols)-1] + } + } + + return nil +} + +func (t *tableT) IntExpr() sqlparser.Expr { + // better way to check if int type? + return t.typeExpr("bigint") +} + +func (t *tableT) StringExpr() sqlparser.Expr { + return t.typeExpr("varchar") +} + +// setName sets the alias for t, as well as setting the tableName for all columns in cols +func (t *tableT) setName(newName string) { + t.name = sqlparser.NewTableName(newName) + for i := range t.cols { + t.cols[i].tableName = newName + } +} + +// setColumns sets the columns of t, and automatically assigns tableName +// this makes it unnatural (but still possible as cols is exportable) to modify tableName +func (t *tableT) setColumns(col ...column) { + t.cols = nil + t.addColumns(col...) +} + +// addColumns adds columns to t, and automatically assigns tableName +// this makes it unnatural (but still possible as cols is exportable) to modify tableName +func (t *tableT) addColumns(col ...column) { + for i := range col { + // only change the Col's tableName if t is of type tableName + if tName, ok := t.name.(sqlparser.TableName); ok { + col[i].tableName = sqlparser.String(tName.Name) + } + + t.cols = append(t.cols, col[i]) + } +} + +// clone returns a deep copy of t +func (t *tableT) clone() *tableT { + return &tableT{ + name: t.name, + cols: slices.Clone(t.cols), + } +} + +// getColumnName returns tableName.name +func (c *column) getColumnName() string { + return fmt.Sprintf("%s.%s", c.tableName, c.name) +} + +func TestRandomQuery(t *testing.T) { + schemaTables := []tableT{ + {name: sqlparser.NewTableName("emp")}, + {name: sqlparser.NewTableName("dept")}, + } + schemaTables[0].addColumns([]column{ + {name: "empno", typ: "bigint"}, + {name: "ename", typ: "varchar"}, + {name: "job", typ: "varchar"}, + {name: "mgr", typ: "bigint"}, + {name: "hiredate", typ: "date"}, + {name: "sal", typ: "bigint"}, + {name: "comm", typ: "bigint"}, + {name: "deptno", typ: "bigint"}, + }...) + schemaTables[1].addColumns([]column{ + {name: "deptno", typ: "bigint"}, + {name: "dname", typ: "varchar"}, + {name: "loc", typ: "varchar"}, + }...) + + fmt.Println(sqlparser.String(randomQuery(schemaTables, 3, 3))) +} + +// TODO: bunch of TestFailingQueries checks were deleted by refactor to use AST +func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Select { + sel := &sqlparser.Select{} + sel.SetComments(sqlparser.Comments{"/*vt+ PLANNER=Gen4 */"}) + + // select distinct (fails with group by bigint) + isDistinct := rand.Intn(2) < 1 + if isDistinct { + sel.MakeDistinct() + } + + // create both tables and join at the same time since both occupy the from clause + tables, isJoin := createTablesAndJoin(schemaTables, sel) + + groupExprs, groupSelectExprs, grouping := createGroupBy(tables, maxGroupBy) + sel.AddSelectExprs(groupSelectExprs) + sel.GroupBy = groupExprs + aggrExprs, aggregates := createAggregations(tables, maxAggrs) + sel.AddSelectExprs(aggrExprs) + + // can add both aggregate and grouping columns to order by + isOrdered := rand.Intn(2) < 1 + if isOrdered && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { + addOrderBy(sel) + } + + // where + sel.AddWhere(sqlparser.AndExpressions(createWherePredicates(tables, false)...)) + + // random predicate expression + if rand.Intn(2) < 1 { + predRandomExpr, _ := getRandomExpr(tables) + sel.AddWhere(predRandomExpr) + } + + // having + sel.AddHaving(sqlparser.AndExpressions(createHavingPredicates(tables)...)) + if rand.Intn(2) < 1 { + // TODO: having can only contain aggregate or grouping columns in mysql, works fine in vitess + sel.AddHaving(sqlparser.AndExpressions(createWherePredicates(tables, false)...)) + } + + // only add a limit if the grouping columns are ordered + if rand.Intn(2) < 1 && isOrdered { + sel.Limit = createLimit() + } + + var newTable tableT + // add random expression to select + isRandomExpr := rand.Intn(2) < 1 + randomExpr, typ := getRandomExpr(tables) + if isRandomExpr && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { + sel.SelectExprs = append(sel.SelectExprs, sqlparser.NewAliasedExpr(randomExpr, "crandom0")) + newTable.addColumns(column{ + name: "crandom0", + typ: typ, + }) + } + + // add them to newTable + newTable.addColumns(grouping...) + newTable.addColumns(aggregates...) + + // add new table to schemaTables + newTable.name = sqlparser.NewDerivedTable(false, sel) + schemaTables = append(schemaTables, newTable) + + // derived tables (partially unsupported) + if rand.Intn(10) < 1 && TestFailingQueries { + sel = randomQuery(schemaTables, 3, 3) + } + + return sel +} + +func createTablesAndJoin(schemaTables []tableT, sel *sqlparser.Select) ([]tableT, bool) { + var tables []tableT + // add at least one of original emp/dept tables for now because derived tables have nil columns + tables = append(tables, schemaTables[rand.Intn(2)]) + + sel.From = append(sel.From, newAliasedTable(tables[0], "tbl0")) + tables[0].setName("tbl0") + + numTables := rand.Intn(len(schemaTables)) + for i := 0; i < numTables; i++ { + tables = append(tables, randomEl(schemaTables)) + sel.From = append(sel.From, newAliasedTable(tables[i+1], fmt.Sprintf("tbl%d", i+1))) + tables[i+1].setName(fmt.Sprintf("tbl%d", i+1)) + } + + isJoin := rand.Intn(2) < 1 + if isJoin { + newTable := randomEl(schemaTables) + tables = append(tables, newTable) + + // create the join before aliasing + newJoinTableExpr := createJoin(tables, sel) + + // alias + tables[numTables+1].setName(fmt.Sprintf("tbl%d", numTables+1)) + + // create the condition after aliasing + newJoinTableExpr.Condition = sqlparser.NewJoinCondition(sqlparser.AndExpressions(createWherePredicates(tables, true)...), nil) + sel.From[numTables] = newJoinTableExpr + } + + return tables, isJoin +} + +// creates a left join (without the condition) between the last table in sel and newTable +// tables should have one more table than sel +func createJoin(tables []tableT, sel *sqlparser.Select) *sqlparser.JoinTableExpr { + n := len(sel.From) + if len(tables) != n+1 { + log.Fatalf("sel has %d tables and tables has %d tables", len(sel.From), n) + } + + return sqlparser.NewJoinTableExpr(sel.From[n-1], sqlparser.LeftJoinType, newAliasedTable(tables[n], fmt.Sprintf("tbl%d", n)), nil) +} + +// returns the grouping columns as three types: sqlparser.GroupBy, sqlparser.SelectExprs, []column +func createGroupBy(tables []tableT, maxGB int) (groupBy sqlparser.GroupBy, groupSelectExprs sqlparser.SelectExprs, grouping []column) { + numGBs := rand.Intn(maxGB) + for i := 0; i < numGBs; i++ { + tblIdx := rand.Intn(len(tables)) + col := randomEl(tables[tblIdx].cols) + groupBy = append(groupBy, newColumn(col)) + + // add to select + if rand.Intn(2) < 1 { + groupSelectExprs = append(groupSelectExprs, newAliasedColumn(col, fmt.Sprintf("cgroup%d", i))) + col.name = fmt.Sprintf("cgroup%d", i) + grouping = append(grouping, col) + } + } + + return groupBy, groupSelectExprs, grouping +} + +// returns the aggregation columns as three types: sqlparser.SelectExprs, []column +func createAggregations(tables []tableT, maxAggrs int) (aggrExprs sqlparser.SelectExprs, aggregates []column) { + aggregations := []func(col column) sqlparser.Expr{ + func(_ column) sqlparser.Expr { return &sqlparser.CountStar{} }, + func(col column) sqlparser.Expr { return &sqlparser.Count{Args: sqlparser.Exprs{newColumn(col)}} }, + func(col column) sqlparser.Expr { return &sqlparser.Sum{Arg: newColumn(col)} }, + // func(col column) sqlparser.Expr { return &sqlparser.Avg{Arg: newAggregateExpr(col)} }, + func(col column) sqlparser.Expr { return &sqlparser.Min{Arg: newColumn(col)} }, + func(col column) sqlparser.Expr { return &sqlparser.Max{Arg: newColumn(col)} }, + } + + numAggrs := rand.Intn(maxAggrs) + 1 + for i := 0; i < numAggrs; i++ { + tblIdx, aggrIdx := rand.Intn(len(tables)), rand.Intn(len(aggregations)) + col := randomEl(tables[tblIdx].cols) + newAggregate := aggregations[aggrIdx](col) + aggrExprs = append(aggrExprs, sqlparser.NewAliasedExpr(newAggregate, fmt.Sprintf("caggr%d", i))) + + if aggrIdx <= 1 /* CountStar and Count */ { + col.typ = "bigint" + } else if _, ok := newAggregate.(*sqlparser.Avg); ok && col.getColumnName() == "bigint" { + col.typ = "decimal" + } + + col.name = sqlparser.String(newAggregate) + col.name = fmt.Sprintf("caggr%d", i) + aggregates = append(aggregates, col) + } + return aggrExprs, aggregates +} + +// orders on all non-aggregate SelectExprs and independently at random on all aggregate SelectExprs of sel +func addOrderBy(sel *sqlparser.Select) { + for _, selExpr := range sel.SelectExprs { + if aliasedExpr, ok := selExpr.(*sqlparser.AliasedExpr); ok { + // if the SelectExpr is non-aggregate (the AliasedExpr has Expr of type ColName) + // then add to the order by + if colName, ok1 := aliasedExpr.Expr.(*sqlparser.ColName); ok1 { + sel.AddOrder(sqlparser.NewOrder(colName, getRandomOrderDirection())) + } else if rand.Intn(2) < 1 { + sel.AddOrder(sqlparser.NewOrder(aliasedExpr.Expr, getRandomOrderDirection())) + } + } + } +} + +// compares two random columns (usually of the same type) +// returns a random expression if there are no other predicates and isJoin is true +// returns the predicates as a sqlparser.Exprs (slice of sqlparser.Expr's) +func createWherePredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) { + // if creating predicates for a join, + // then make sure predicates are created for the last two tables (which are being joined) + incr := 0 + if isJoin && len(tables) > 2 { + incr += len(tables) - 2 + } + + for idx1 := range tables { + for idx2 := range tables { + // fmt.Printf("predicate tables:\n%v\n idx1: %d idx2: %d, incr: %d", tables, idx1, idx2, incr) + if idx1 >= idx2 || idx1 < incr || idx2 < incr { + continue + } + noOfPredicates := rand.Intn(2) + if isJoin { + noOfPredicates++ + } + + for i := 0; noOfPredicates > 0; i++ { + col1 := randomEl(tables[idx1].cols) + col2 := randomEl(tables[idx2].cols) + + // prevent infinite loops + if i > 50 { + predicates = append(predicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), newColumn(col1), newColumn(col2), nil)) + break + } + + if col1.typ != col2.typ { + continue + } + + predicates = append(predicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), newColumn(col1), newColumn(col2), nil)) + noOfPredicates-- + } + } + } + + // make sure the join predicate is never empty + if len(predicates) == 0 && isJoin { + predRandomExpr, _ := getRandomExpr(tables) + predicates = append(predicates, predRandomExpr) + } + + return predicates +} + +// creates predicates for the having clause comparing a column to a random expression +func createHavingPredicates(tables []tableT) (havingPredicates sqlparser.Exprs) { + aggrSelectExprs, _ := createAggregations(tables, 2) + for i := range aggrSelectExprs { + if aliasedExpr, ok := aggrSelectExprs[i].(*sqlparser.AliasedExpr); ok { + predRandomExpr, _ := getRandomExpr(tables) + havingPredicates = append(havingPredicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), aliasedExpr.Expr, predRandomExpr, nil)) + } + } + return havingPredicates +} + +// creates sel.Limit +func createLimit() *sqlparser.Limit { + limitNum := rand.Intn(10) + if rand.Intn(2) < 1 { + offset := rand.Intn(10) + return sqlparser.NewLimit(offset, limitNum) + } else { + return sqlparser.NewLimitWithoutOffset(limitNum) + } + +} + +// returns a random expression and its type +func getRandomExpr(tables []tableT) (sqlparser.Expr, string) { + seed := time.Now().UnixNano() + g := sqlparser.NewGenerator(seed, 2, slices2.Map(tables, func(t tableT) sqlparser.ExprGenerator { return &t })...) + return g.Expression() +} + +func newAliasedTable(tbl tableT, alias string) *sqlparser.AliasedTableExpr { + return sqlparser.NewAliasedTableExpr(tbl.name, alias) +} + +func newAliasedColumn(col column, alias string) *sqlparser.AliasedExpr { + return sqlparser.NewAliasedExpr(newColumn(col), alias) +} + +func newColumn(col column) *sqlparser.ColName { + return sqlparser.NewColNameWithQualifier(col.name, sqlparser.NewTableName(col.tableName)) +} + +func getRandomComparisonExprOperator() sqlparser.ComparisonExprOperator { + // =, <, >, <=, >=, !=, <=> + return randomEl([]sqlparser.ComparisonExprOperator{0, 1, 2, 3, 4, 5, 6}) +} + +func getRandomOrderDirection() sqlparser.OrderDirection { + // asc, desc + return randomEl([]sqlparser.OrderDirection{0, 1}) +} + +func randomEl[K any](in []K) K { + return in[rand.Intn(len(in))] +} diff --git a/go/test/endtoend/vtgate/queries/random/random_expr_test.go b/go/test/endtoend/vtgate/queries/random/random_expr_test.go new file mode 100644 index 00000000000..dd333bb4479 --- /dev/null +++ b/go/test/endtoend/vtgate/queries/random/random_expr_test.go @@ -0,0 +1,53 @@ +/* +Copyright 2023 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package random + +import ( + "fmt" + "testing" + "time" + + "vitess.io/vitess/go/slices2" + "vitess.io/vitess/go/vt/sqlparser" +) + +func TestRandomExprWithTables(t *testing.T) { + schemaTables := []tableT{ + {name: sqlparser.NewTableName("emp")}, + {name: sqlparser.NewTableName("dept")}, + } + schemaTables[0].addColumns([]column{ + {name: "empno", typ: "bigint"}, + {name: "ename", typ: "varchar"}, + {name: "job", typ: "varchar"}, + {name: "mgr", typ: "bigint"}, + {name: "hiredate", typ: "date"}, + {name: "sal", typ: "bigint"}, + {name: "comm", typ: "bigint"}, + {name: "deptno", typ: "bigint"}, + }...) + schemaTables[1].addColumns([]column{ + {name: "deptno", typ: "bigint"}, + {name: "dname", typ: "varchar"}, + {name: "loc", typ: "varchar"}, + }...) + + seed := time.Now().UnixNano() + g := sqlparser.NewGenerator(seed, 3, slices2.Map(schemaTables, func(t tableT) sqlparser.ExprGenerator { return &t })...) + randomExpr, _ := g.Expression() + fmt.Println(sqlparser.String(randomExpr)) +} diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 5a01ed89c8d..18c527b844d 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -18,12 +18,10 @@ package random import ( "fmt" - "math/rand" "testing" "time" - "vitess.io/vitess/go/vt/log" - + "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/vt/sqlparser" "github.com/stretchr/testify/require" @@ -34,9 +32,6 @@ import ( // this test uses the AST defined in the sqlparser package to randomly generate queries -type tableT = sqlparser.TableT -type column = sqlparser.Col - // if true then known failing query types are still generated by randomQuery() const TestFailingQueries = true @@ -181,23 +176,23 @@ func TestRandom(t *testing.T) { // specify the schema (that is defined in schema.sql) schemaTables := []tableT{ - {Name: sqlparser.NewTableName("emp")}, - {Name: sqlparser.NewTableName("dept")}, - } - schemaTables[0].AddColumns([]column{ - {Name: "empno", Typ: "bigint"}, - {Name: "ename", Typ: "varchar"}, - {Name: "job", Typ: "varchar"}, - {Name: "mgr", Typ: "bigint"}, - {Name: "hiredate", Typ: "date"}, - {Name: "sal", Typ: "bigint"}, - {Name: "comm", Typ: "bigint"}, - {Name: "deptno", Typ: "bigint"}, + {name: sqlparser.NewTableName("emp")}, + {name: sqlparser.NewTableName("dept")}, + } + schemaTables[0].addColumns([]column{ + {name: "empno", typ: "bigint"}, + {name: "ename", typ: "varchar"}, + {name: "job", typ: "varchar"}, + {name: "mgr", typ: "bigint"}, + {name: "hiredate", typ: "date"}, + {name: "sal", typ: "bigint"}, + {name: "comm", typ: "bigint"}, + {name: "deptno", typ: "bigint"}, }...) - schemaTables[1].AddColumns([]column{ - {Name: "deptno", Typ: "bigint"}, - {Name: "dname", Typ: "varchar"}, - {Name: "loc", Typ: "varchar"}, + schemaTables[1].addColumns([]column{ + {name: "deptno", typ: "bigint"}, + {name: "dname", typ: "varchar"}, + {name: "loc", typ: "varchar"}, }...) endBy := time.Now().Add(10 * time.Second) @@ -206,6 +201,9 @@ func TestRandom(t *testing.T) { for time.Now().Before(endBy) && (!t.Failed() || TestFailingQueries) { query := sqlparser.String(randomQuery(schemaTables, 3, 3)) _, vtErr := mcmp.ExecAllowAndCompareError(query) + if sqlError, ok := vtErr.(*mysql.SQLError); ok && sqlError.Message == "EOF" { + break + } // this assumes all queries are valid mysql queries if vtErr != nil { fmt.Println(query) @@ -218,300 +216,3 @@ func TestRandom(t *testing.T) { } fmt.Printf("Queries successfully executed: %d\n", queryCount) } - -// TODO: bunch of TestFailingQueries checks were deleted by refactor to use AST -func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Select { - sel := &sqlparser.Select{} - sel.SetComments(sqlparser.Comments{"/*vt+ PLANNER=Gen4 */"}) - - // select distinct (fails with group by bigint) - isDistinct := rand.Intn(2) < 1 - if isDistinct { - sel.MakeDistinct() - } - - // create both tables and join at the same time since both occupy the from clause - tables, isJoin := createTablesAndJoin(schemaTables, sel) - - groupExprs, groupSelectExprs, grouping := createGroupBy(tables, maxGroupBy) - sel.AddSelectExprs(groupSelectExprs) - sel.GroupBy = groupExprs - aggrExprs, aggregates := createAggregations(tables, maxAggrs) - sel.AddSelectExprs(aggrExprs) - - // can add both aggregate and grouping columns to order by - isOrdered := rand.Intn(2) < 1 - if isOrdered && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - addOrderBy(sel) - } - - // where - sel.AddWhere(sqlparser.AndExpressions(createWherePredicates(tables, false)...)) - - // random predicate expression - if rand.Intn(2) < 1 { - predRandomExpr, _ := getRandomExpr(tables) - sel.AddWhere(predRandomExpr) - } - - // having - sel.AddHaving(sqlparser.AndExpressions(createHavingPredicates(tables)...)) - if rand.Intn(2) < 1 { - sel.AddHaving(sqlparser.AndExpressions(createWherePredicates(tables, false)...)) - } - - // only add a limit if the grouping columns are ordered - if rand.Intn(2) < 1 && isOrdered { - sel.Limit = createLimit() - } - - var newTable tableT - // add random expression to select - isRandomExpr := rand.Intn(2) < 1 - randomExpr, typ := getRandomExpr(tables) - if isRandomExpr && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - sel.SelectExprs = append(sel.SelectExprs, sqlparser.NewAliasedExpr(randomExpr, "crandom0")) - newTable.AddColumns(column{ - Name: "crandom0", - Typ: typ, - }) - } - - // alias grouping and aggregate columns - for i := range grouping { - grouping[i].Name = fmt.Sprintf("cgroup%d", i) - } - for i := range aggregates { - aggregates[i].Name = fmt.Sprintf("cgroup%d", i) - } - - // add them to newTable - newTable.AddColumns(grouping...) - newTable.AddColumns(aggregates...) - - // add new table to schemaTables - newTable.Name = sqlparser.NewDerivedTable(false, sel) - schemaTables = append(schemaTables, newTable) - - // derived tables (partially unsupported) - if rand.Intn(10) < 1 && TestFailingQueries { - sel = randomQuery(schemaTables, 3, 3) - } - - return sel -} - -func createTablesAndJoin(schemaTables []tableT, sel *sqlparser.Select) ([]tableT, bool) { - var tables []tableT - // add at least one of original emp/dept tables for now because derived tables have nil columns - tables = append(tables, schemaTables[rand.Intn(2)]) - - sel.From = append(sel.From, newAliasedTable(tables[0], "tbl0")) - tables[0].SetName("tbl0") - - numTables := rand.Intn(len(schemaTables)) - for i := 0; i < numTables; i++ { - tables = append(tables, randomEl(schemaTables)) - sel.From = append(sel.From, newAliasedTable(tables[i+1], fmt.Sprintf("tbl%d", i+1))) - tables[i+1].SetName(fmt.Sprintf("tbl%d", i+1)) - } - - isJoin := rand.Intn(2) < 1 - if isJoin { - newTable := randomEl(schemaTables) - tables = append(tables, newTable) - - // create the join before aliasing - newJoinTableExpr := createJoin(tables, sel) - - // alias - tables[numTables+1].SetName(fmt.Sprintf("tbl%d", numTables+1)) - - // create the condition after aliasing - newJoinTableExpr.Condition = sqlparser.NewJoinCondition(sqlparser.AndExpressions(createWherePredicates(tables, true)...), nil) - sel.From[numTables] = newJoinTableExpr - } - - return tables, isJoin -} - -// creates a left join (without the condition) between the last table in sel and newTable -// tables should have one more table than sel -func createJoin(tables []tableT, sel *sqlparser.Select) *sqlparser.JoinTableExpr { - n := len(sel.From) - if len(tables) != n+1 { - log.Fatalf("sel has %d tables and tables has %d tables", len(sel.From), n) - } - - return sqlparser.NewJoinTableExpr(sel.From[n-1], sqlparser.LeftJoinType, newAliasedTable(tables[n], fmt.Sprintf("tbl%d", n)), nil) -} - -// adds grouping columns to sel.GroupBy and optionally to sel.SelectExprs and sel.OrderBy -// TODO: maybe change to return this stuff instead -func createGroupBy(tables []tableT, maxGB int) (groupBy sqlparser.GroupBy, groupSelectExprs sqlparser.SelectExprs, grouping []column) { - numGBs := rand.Intn(maxGB) - for i := 0; i < numGBs; i++ { - tblIdx := rand.Intn(len(tables)) - col := randomEl(tables[tblIdx].Cols) - groupBy = append(groupBy, newColumn(col)) - - // add to select - if rand.Intn(2) < 1 { - groupSelectExprs = append(groupSelectExprs, newAliasedColumn(col, fmt.Sprintf("cgroup%d", i))) - grouping = append(grouping, col) - } - } - - return groupBy, groupSelectExprs, grouping -} - -// adds aggregation columns to sel.SelectExprs and optionally to sel.OrderBy -// TODO: maybe change to return this stuff instead -func createAggregations(tables []tableT, maxAggrs int) (aggrExprs sqlparser.SelectExprs, aggregates []column) { - aggregations := []func(col column) sqlparser.Expr{ - func(_ column) sqlparser.Expr { return &sqlparser.CountStar{} }, - func(col column) sqlparser.Expr { return &sqlparser.Count{Args: sqlparser.Exprs{newColumn(col)}} }, - func(col column) sqlparser.Expr { return &sqlparser.Sum{Arg: newColumn(col)} }, - // func(col column) sqlparser.Expr { return &sqlparser.Avg{Arg: newAggregateExpr(col)} }, - func(col column) sqlparser.Expr { return &sqlparser.Min{Arg: newColumn(col)} }, - func(col column) sqlparser.Expr { return &sqlparser.Max{Arg: newColumn(col)} }, - } - - numAggrs := rand.Intn(maxAggrs) + 1 - for i := 0; i < numAggrs; i++ { - tblIdx, aggrIdx := rand.Intn(len(tables)), rand.Intn(len(aggregations)) - col := randomEl(tables[tblIdx].Cols) - newAggregate := aggregations[aggrIdx](col) - aggrExprs = append(aggrExprs, sqlparser.NewAliasedExpr(newAggregate, fmt.Sprintf("caggr%d", i))) - - if aggrIdx <= 1 /* CountStar and Count */ { - col.Typ = "bigint" - } else if _, ok := newAggregate.(*sqlparser.Avg); ok && col.GetColumnName() == "bigint" { - col.Typ = "decimal" - } - - col.Name = sqlparser.String(newAggregate) - aggregates = append(aggregates, col) - } - return aggrExprs, aggregates -} - -// orders on all non-aggregate SelectExprs and independently at random on all aggregate SelectExprs of sel -func addOrderBy(sel *sqlparser.Select) { - for _, selExpr := range sel.SelectExprs { - if aliasedExpr, ok := selExpr.(*sqlparser.AliasedExpr); ok { - // if the SelectExpr is non-aggregate (the AliasedExpr has Expr of type ColName) - // then add to the order by - if colName, ok1 := aliasedExpr.Expr.(*sqlparser.ColName); ok1 { - sel.AddOrder(sqlparser.NewOrder(colName, getRandomOrderDirection())) - } else if rand.Intn(2) < 1 { - sel.AddOrder(sqlparser.NewOrder(aliasedExpr.Expr, getRandomOrderDirection())) - } - } - } -} - -// returns the predicates as a sqlparser.Exprs (slice of sqlparser.Expr's) -func createWherePredicates(tables []tableT, isJoin bool) (predicates sqlparser.Exprs) { - // if creating predicates for a join, - // then make sure predicates are created for the last two tables (which are being joined) - incr := 0 - if isJoin && len(tables) > 2 { - incr += len(tables) - 2 - } - - for idx1 := range tables { - for idx2 := range tables { - // fmt.Printf("predicate tables:\n%v\n idx1: %d idx2: %d, incr: %d", tables, idx1, idx2, incr) - if idx1 >= idx2 || idx1 < incr || idx2 < incr { - continue - } - noOfPredicates := rand.Intn(2) - if isJoin { - noOfPredicates++ - } - - for i := 0; noOfPredicates > 0; i++ { - col1 := randomEl(tables[idx1].Cols) - col2 := randomEl(tables[idx2].Cols) - - // prevent infinite loops - if i > 50 { - predicates = append(predicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), newColumn(col1), newColumn(col2), nil)) - break - } - - if col1.Typ != col2.Typ { - continue - } - - predicates = append(predicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), newColumn(col1), newColumn(col2), nil)) - noOfPredicates-- - } - } - } - - // make sure the join predicate is never empty - if len(predicates) == 0 && isJoin { - predRandomExpr, _ := getRandomExpr(tables) - predicates = append(predicates, predRandomExpr) - } - - return predicates -} - -func createHavingPredicates(tables []tableT) (havingPredicates sqlparser.Exprs) { - aggrSelectExprs, _ := createAggregations(tables, 3) - for i := range aggrSelectExprs { - if aliasedExpr, ok := aggrSelectExprs[i].(*sqlparser.AliasedExpr); ok { - predRandomExpr, _ := getRandomExpr(tables) - havingPredicates = append(havingPredicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), aliasedExpr.Expr, predRandomExpr, nil)) - } - } - return havingPredicates -} - -// creates sel.Limit -func createLimit() *sqlparser.Limit { - limitNum := rand.Intn(10) - if rand.Intn(2) < 1 { - offset := rand.Intn(10) - return sqlparser.NewLimit(offset, limitNum) - } else { - return sqlparser.NewLimitWithoutOffset(limitNum) - } - -} - -// returns a random expression and its type -func getRandomExpr(tables []tableT) (sqlparser.Expr, string) { - seed := time.Now().UnixNano() - g := sqlparser.NewGenerator(seed, 2, tables...) - return g.Expression() -} - -func newAliasedTable(tbl tableT, alias string) *sqlparser.AliasedTableExpr { - return sqlparser.NewAliasedTableExpr(tbl.Name, alias) -} - -func newAliasedColumn(col column, alias string) *sqlparser.AliasedExpr { - return sqlparser.NewAliasedExpr(newColumn(col), alias) -} - -func newColumn(col column) *sqlparser.ColName { - return sqlparser.NewColNameWithQualifier(col.Name, sqlparser.NewTableName(col.TableName)) -} - -func getRandomComparisonExprOperator() sqlparser.ComparisonExprOperator { - // =, <, >, <=, >=, !=, <=> - return randomEl([]sqlparser.ComparisonExprOperator{0, 1, 2, 3, 4, 5, 6}) -} - -func getRandomOrderDirection() sqlparser.OrderDirection { - // asc, desc - return randomEl([]sqlparser.OrderDirection{0, 1}) -} - -func randomEl[K any](in []K) K { - return in[rand.Intn(len(in))] -} diff --git a/go/vt/sqlparser/ast_funcs.go b/go/vt/sqlparser/ast_funcs.go index dc4a826ab7c..6540105383b 100644 --- a/go/vt/sqlparser/ast_funcs.go +++ b/go/vt/sqlparser/ast_funcs.go @@ -1053,7 +1053,7 @@ func compliantName(in string) string { } func (node *Select) AddSelectExprs(selectExprs SelectExprs) { - node.SelectExprs = selectExprs + node.SelectExprs = append(node.SelectExprs, selectExprs...) } // AddOrder adds an order by element diff --git a/go/vt/sqlparser/random_expr.go b/go/vt/sqlparser/random_expr.go index 25242d6ac14..72a7c45c5da 100644 --- a/go/vt/sqlparser/random_expr.go +++ b/go/vt/sqlparser/random_expr.go @@ -23,22 +23,29 @@ import ( // This file is used to generate random expressions to be used for testing -func NewGenerator(seed int64, maxDepth int, tables ...TableT) *Generator { +type ( + ExprGenerator interface { + IntExpr() Expr + StringExpr() Expr + } +) + +func NewGenerator(seed int64, maxDepth int, exprGenerators ...ExprGenerator) *Generator { g := Generator{ - seed: seed, - r: rand.New(rand.NewSource(seed)), - maxDepth: maxDepth, - tables: tables, + seed: seed, + r: rand.New(rand.NewSource(seed)), + maxDepth: maxDepth, + exprGenerator: exprGenerators, } return &g } type Generator struct { - seed int64 - r *rand.Rand - depth int - maxDepth int - tables []TableT + seed int64 + r *rand.Rand + depth int + maxDepth int + exprGenerator []ExprGenerator } // enter should be called whenever we are producing an intermediate node. it should be followed by a `defer g.exit()` @@ -124,8 +131,14 @@ func (g *Generator) intExpr() Expr { func() Expr { return g.caseExpr(g.intExpr) }, } - if g.tables != nil { - options = append(options, func() Expr { return g.intColumn() }) + for _, generator := range g.exprGenerator { + options = append(options, func() Expr { + expr := generator.IntExpr() + if expr == nil { + return g.intLiteral() + } + return expr + }) } return g.randomOf(options) @@ -161,8 +174,14 @@ func (g *Generator) stringExpr() Expr { func() Expr { return g.caseExpr(g.stringExpr) }, } - if g.tables != nil { - options = append(options, func() Expr { return g.intColumn() }) + for _, generator := range g.exprGenerator { + options = append(options, func() Expr { + expr := generator.StringExpr() + if expr == nil { + return g.stringLiteral() + } + return expr + }) } return g.randomOf(options) @@ -244,43 +263,6 @@ func (g *Generator) arithmetic() Expr { } } -func (g *Generator) typeColumn(typ string, typeLiteral func() Expr) Expr { - tblIdx := rand.Intn(len(g.tables)) - table := g.tables[tblIdx] - tableCopy := table.copy() - - for len(tableCopy.Cols) > 0 { - idx := rand.Intn(len(tableCopy.Cols)) - randCol := tableCopy.Cols[idx] - if randCol.Typ == typ { - newTableName := NewIdentifierCS("") - if tName, ok := table.Name.(TableName); ok { - newTableName = tName.Name - } - return &ColName{ - Metadata: nil, - Name: NewIdentifierCI(randCol.Name), - Qualifier: TableName{Name: newTableName}, - } - } else { - // delete randCol from table.columns - tableCopy.Cols[idx] = tableCopy.Cols[len(tableCopy.Cols)-1] - tableCopy.Cols = tableCopy.Cols[:len(tableCopy.Cols)-1] - } - } - - return typeLiteral() -} - -func (g *Generator) intColumn() Expr { - // better way to check if int type? - return g.typeColumn("bigint", g.intLiteral) -} - -func (g *Generator) stringColumn() Expr { - return g.typeColumn("varchar", g.stringLiteral) -} - type exprF func() Expr func (g *Generator) randomOf(options []exprF) Expr { diff --git a/go/vt/sqlparser/random_expr_test.go b/go/vt/sqlparser/random_expr_test.go deleted file mode 100644 index 65fd4bed15c..00000000000 --- a/go/vt/sqlparser/random_expr_test.go +++ /dev/null @@ -1,50 +0,0 @@ -/* -Copyright 2023 The Vitess Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package sqlparser - -import ( - "fmt" - "testing" - "time" -) - -func TestRandomExprWithTables(t *testing.T) { - schemaTables := []TableT{ - {Name: NewTableName("emp")}, - {Name: NewTableName("dept")}, - } - schemaTables[0].AddColumns([]Col{ - {Name: "empno", Typ: "bigint"}, - {Name: "ename", Typ: "varchar"}, - {Name: "job", Typ: "varchar"}, - {Name: "mgr", Typ: "bigint"}, - {Name: "hiredate", Typ: "date"}, - {Name: "sal", Typ: "bigint"}, - {Name: "comm", Typ: "bigint"}, - {Name: "deptno", Typ: "bigint"}, - }...) - schemaTables[1].AddColumns([]Col{ - {Name: "deptno", Typ: "bigint"}, - {Name: "dname", Typ: "varchar"}, - {Name: "loc", Typ: "varchar"}, - }...) - - seed := time.Now().UnixNano() - g := NewGenerator(seed, 3, schemaTables...) - randomExpr, _ := g.Expression() - fmt.Println(String(randomExpr)) -} diff --git a/go/vt/sqlparser/schema.go b/go/vt/sqlparser/schema.go deleted file mode 100644 index 43a955da012..00000000000 --- a/go/vt/sqlparser/schema.go +++ /dev/null @@ -1,79 +0,0 @@ -/* -Copyright 2023 The Vitess Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package sqlparser - -import "fmt" - -// this file defines two structs (Col and TableT) which represent the schema of the database used -// currently used in random expression generation and random query generation in endtoend testing - -type ( - Col struct { - TableName string - Name string - Typ string - } - TableT struct { - // Name will be a TableName object if it is used, with Name: alias or name if no alias is provided - // Name will only be a DerivedTable for moving its data around - Name SimpleTableExpr - Cols []Col - } -) - -// GetColumnName returns TableName.Name -func (c *Col) GetColumnName() string { - return fmt.Sprintf("%s.%s", c.TableName, c.Name) -} - -// SetName sets the alias for t, as well as setting the TableName for all columns in Cols -func (t *TableT) SetName(newName string) { - t.Name = NewTableName(newName) - for i := range t.Cols { - t.Cols[i].TableName = newName - } -} - -// SetColumns sets the columns of t, and automatically assigns TableName -// this makes it unnatural (but still possible as Cols is exportable) to modify TableName -func (t *TableT) SetColumns(col ...Col) { - t.Cols = make([]Col, len(col)) - t.AddColumns(col...) -} - -// AddColumns adds columns to t, and automatically assigns TableName -// this makes it unnatural (but still possible as Cols is exportable) to modify TableName -func (t *TableT) AddColumns(col ...Col) { - for i := range col { - // only change the Col's TableName if t is of type TableName - if tName, ok := t.Name.(TableName); ok { - col[i].TableName = tName.Name.String() - } - - t.Cols = append(t.Cols, col[i]) - } -} - -// copy returns a deep copy of t -func (t *TableT) copy() *TableT { - newCols := make([]Col, len(t.Cols)) - copy(newCols, t.Cols) - return &TableT{ - Name: t.Name, - Cols: newCols, - } -} From 4e07f06e13fde5d2ce0cfbe3345cf7b7c99895fc Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Thu, 22 Jun 2023 23:55:50 -0700 Subject: [PATCH 22/29] added toggle to fail on EOF and mismatched results errors Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- go/test/endtoend/utils/cmp.go | 4 ++- go/test/endtoend/utils/mysql.go | 26 ++++++++++++------- .../vtgate/queries/random/query_gen_test.go | 4 +-- .../vtgate/queries/random/random_test.go | 22 +++++++++++++--- 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/go/test/endtoend/utils/cmp.go b/go/test/endtoend/utils/cmp.go index 5d03e305154..bf66fc6ef2b 100644 --- a/go/test/endtoend/utils/cmp.go +++ b/go/test/endtoend/utils/cmp.go @@ -233,6 +233,8 @@ func (mcmp *MySQLCompare) ExecWithColumnCompare(query string) *sqltypes.Result { // - MySQL and Vitess did not find an error, but their results are matching // // The result set and error produced by Vitess are returned to the caller. +// If the Vitess and MySQL error are both nil, but the results do not match, +// the mismatched results are instead returned as an error, as well as the Vitess result set func (mcmp *MySQLCompare) ExecAllowAndCompareError(query string) (*sqltypes.Result, error) { mcmp.t.Helper() vtQr, vtErr := mcmp.VtConn.ExecuteFetch(query, 1000, true) @@ -242,7 +244,7 @@ func (mcmp *MySQLCompare) ExecAllowAndCompareError(query string) (*sqltypes.Resu // Since we allow errors, we don't want to compare results if one of the client failed. // Vitess and MySQL should always be agreeing whether the query returns an error or not. if vtErr == nil && mysqlErr == nil { - compareVitessAndMySQLResults(mcmp.t, query, mcmp.VtConn, vtQr, mysqlQr, false) + vtErr = compareVitessAndMySQLResults(mcmp.t, query, mcmp.VtConn, vtQr, mysqlQr, false) } return vtQr, vtErr } diff --git a/go/test/endtoend/utils/mysql.go b/go/test/endtoend/utils/mysql.go index 6249d639a4d..a289a07c63a 100644 --- a/go/test/endtoend/utils/mysql.go +++ b/go/test/endtoend/utils/mysql.go @@ -18,6 +18,7 @@ package utils import ( "context" + "errors" "fmt" "os" "path" @@ -154,24 +155,27 @@ func prepareMySQLWithSchema(params mysql.ConnParams, sql string) error { return nil } -func compareVitessAndMySQLResults(t *testing.T, query string, vtConn *mysql.Conn, vtQr, mysqlQr *sqltypes.Result, compareColumns bool) { +func compareVitessAndMySQLResults(t *testing.T, query string, vtConn *mysql.Conn, vtQr, mysqlQr *sqltypes.Result, compareColumns bool) error { if vtQr == nil && mysqlQr == nil { - return + return nil } if vtQr == nil { t.Error("Vitess result is 'nil' while MySQL's is not.") - return + return errors.New("Vitess result is 'nil' while MySQL's is not.\n") } if mysqlQr == nil { t.Error("MySQL result is 'nil' while Vitess' is not.") - return + return errors.New("MySQL result is 'nil' while Vitess' is not.\n") } + + var errStr string if compareColumns { vtColCount := len(vtQr.Fields) myColCount := len(mysqlQr.Fields) if vtColCount > 0 && myColCount > 0 { if vtColCount != myColCount { t.Errorf("column count does not match: %d vs %d", vtColCount, myColCount) + errStr += fmt.Sprintf("column count does not match: %d vs %d\n", vtColCount, myColCount) } var vtCols []string @@ -180,13 +184,16 @@ func compareVitessAndMySQLResults(t *testing.T, query string, vtConn *mysql.Conn vtCols = append(vtCols, vtField.Name) myCols = append(myCols, mysqlQr.Fields[i].Name) } - assert.Equal(t, myCols, vtCols, "column names do not match - the expected values are what mysql produced") + if !assert.Equal(t, myCols, vtCols, "column names do not match - the expected values are what mysql produced") { + errStr += "column names do not match - the expected values are what mysql produced\n" + errStr += fmt.Sprintf("Not equal: \nexpected: %v\nactual: %v\n", myCols, vtCols) + } } } stmt, err := sqlparser.Parse(query) if err != nil { t.Error(err) - return + return err } orderBy := false if selStmt, isSelStmt := stmt.(sqlparser.SelectStatement); isSelStmt { @@ -194,12 +201,12 @@ func compareVitessAndMySQLResults(t *testing.T, query string, vtConn *mysql.Conn } if orderBy && sqltypes.ResultsEqual([]sqltypes.Result{*vtQr}, []sqltypes.Result{*mysqlQr}) { - return + return nil } else if sqltypes.ResultsEqualUnordered([]sqltypes.Result{*vtQr}, []sqltypes.Result{*mysqlQr}) { - return + return nil } - errStr := "Query (" + query + ") results mismatched.\nVitess Results:\n" + errStr += "Query (" + query + ") results mismatched.\nVitess Results:\n" for _, row := range vtQr.Rows { errStr += fmt.Sprintf("%s\n", row) } @@ -212,6 +219,7 @@ func compareVitessAndMySQLResults(t *testing.T, query string, vtConn *mysql.Conn errStr += fmt.Sprintf("query plan: \n%s\n", qr.Rows[0][0].ToString()) } t.Error(errStr) + return errors.New(errStr) } func compareVitessAndMySQLErrors(t *testing.T, vtErr, mysqlErr error) { diff --git a/go/test/endtoend/vtgate/queries/random/query_gen_test.go b/go/test/endtoend/vtgate/queries/random/query_gen_test.go index bfb46876232..986c4b29da6 100644 --- a/go/test/endtoend/vtgate/queries/random/query_gen_test.go +++ b/go/test/endtoend/vtgate/queries/random/query_gen_test.go @@ -180,13 +180,13 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel // having sel.AddHaving(sqlparser.AndExpressions(createHavingPredicates(tables)...)) - if rand.Intn(2) < 1 { + if rand.Intn(2) < 1 && TestFailingQueries { // TODO: having can only contain aggregate or grouping columns in mysql, works fine in vitess sel.AddHaving(sqlparser.AndExpressions(createWherePredicates(tables, false)...)) } // only add a limit if the grouping columns are ordered - if rand.Intn(2) < 1 && isOrdered { + if rand.Intn(2) < 1 && (isOrdered || len(grouping) == 0) { sel.Limit = createLimit() } diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 18c527b844d..b086a91326c 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -18,6 +18,7 @@ package random import ( "fmt" + "strings" "testing" "time" @@ -35,6 +36,9 @@ import ( // if true then known failing query types are still generated by randomQuery() const TestFailingQueries = true +// if true then execution will always stop on "must fix" error: a mismatched results or EOF +const StopOnMustFixError = true + func start(t *testing.T) (utils.MySQLCompare, func()) { mcmp, err := utils.NewMySQLCompare(t, vtParams, mysqlParams) require.NoError(t, err) @@ -144,7 +148,7 @@ func TestKnownFailures(t *testing.T) { helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from dept as tbl1 join (select count(*) from emp as tbl0, dept as tbl1 group by tbl1.loc) as tbl2") // EOF (errno 2013) (sqlstate HY000) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from dept as tbl0, (select /*vt+ PLANNER=Gen4 */ count(*) from emp as tbl0, emp as tbl1 limit 18) as tbl1") + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) as caggr0 from dept as tbl0, (select count(*) as caggr0 from emp as tbl0, emp as tbl1 limit 18) as tbl1") // unsupported // unsupported: in scatter query: complex aggregate expression (errno 1235) (sqlstate 42000) @@ -201,13 +205,23 @@ func TestRandom(t *testing.T) { for time.Now().Before(endBy) && (!t.Failed() || TestFailingQueries) { query := sqlparser.String(randomQuery(schemaTables, 3, 3)) _, vtErr := mcmp.ExecAllowAndCompareError(query) - if sqlError, ok := vtErr.(*mysql.SQLError); ok && sqlError.Message == "EOF" { - break - } + // this assumes all queries are valid mysql queries if vtErr != nil { fmt.Println(query) fmt.Println(vtErr) + + if StopOnMustFixError { + // EOF + if sqlError, ok := vtErr.(*mysql.SQLError); ok && strings.Contains(sqlError.Message, "EOF") { + break + } + // mismatched results + if strings.Contains(vtErr.Error(), "results mismatched") && false { + break + } + } + // restart the mysql and vitess connections in case something bad happened closer() mcmp, closer = start(t) From 9ef5861ece187cd4f5ea7023ab2fbfd095c611c1 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Fri, 23 Jun 2023 01:42:49 -0700 Subject: [PATCH 23/29] deleted aggregation/fuzz_test.go and minor fixes/changes to random Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/aggregation/fuzz_test.go | 213 ------------------ .../vtgate/queries/random/query_gen_test.go | 1 - .../vtgate/queries/random/random_test.go | 22 +- 3 files changed, 11 insertions(+), 225 deletions(-) delete mode 100644 go/test/endtoend/vtgate/queries/aggregation/fuzz_test.go diff --git a/go/test/endtoend/vtgate/queries/aggregation/fuzz_test.go b/go/test/endtoend/vtgate/queries/aggregation/fuzz_test.go deleted file mode 100644 index ba6fca839c4..00000000000 --- a/go/test/endtoend/vtgate/queries/aggregation/fuzz_test.go +++ /dev/null @@ -1,213 +0,0 @@ -/* -Copyright 2023 The Vitess Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package aggregation - -import ( - "fmt" - "math/rand" - "strings" - "testing" - "time" - - "golang.org/x/exp/maps" - - "vitess.io/vitess/go/vt/log" -) - -type ( - column struct { - name string - typ string - } - tableT struct { - name string - columns []column - } -) - -func TestFuzzAggregations(t *testing.T) { - t.Skip("dont run on CI for now") - // This test randomizes values and queries, and checks that mysql returns the same values that Vitess does - mcmp, closer := start(t) - defer closer() - - noOfRows := rand.Intn(20) - var values []string - for i := 0; i < noOfRows; i++ { - values = append(values, fmt.Sprintf("(%d, 'name%d', 'value%d', %d)", i, i, i, i)) - } - t1Insert := fmt.Sprintf("insert into t1 (t1_id, name, value, shardKey) values %s;", strings.Join(values, ",")) - values = nil - noOfRows = rand.Intn(20) - for i := 0; i < noOfRows; i++ { - values = append(values, fmt.Sprintf("(%d, %d)", i, i)) - } - t2Insert := fmt.Sprintf("insert into t2 (id, shardKey) values %s;", strings.Join(values, ",")) - - mcmp.Exec(t1Insert) - mcmp.Exec(t2Insert) - - t.Cleanup(func() { - if t.Failed() { - fmt.Println(t1Insert) - fmt.Println(t2Insert) - } - }) - - schema := map[string]tableT{ - "t1": {name: "t1", columns: []column{ - {name: "t1_id", typ: "bigint"}, - {name: "name", typ: "varchar"}, - {name: "value", typ: "varchar"}, - {name: "shardKey", typ: "bigint"}, - }}, - "t2": {name: "t2", columns: []column{ - {name: "id", typ: "bigint"}, - {name: "shardKey", typ: "bigint"}, - }}, - } - - endBy := time.Now().Add(1 * time.Second) - schemaTables := maps.Values(schema) - - var queryCount int - for time.Now().Before(endBy) || t.Failed() { - tables := createTables(schemaTables) - query := randomQuery(tables, 3, 3) - mcmp.Exec(query) - if t.Failed() { - fmt.Println(query) - } - queryCount++ - } - log.Info("Queries successfully executed: %d", queryCount) -} - -func randomQuery(tables []tableT, maxAggrs, maxGroupBy int) string { - randomCol := func(tblIdx int) (string, string) { - tbl := tables[tblIdx] - col := randomEl(tbl.columns) - return fmt.Sprintf("tbl%d.%s", tblIdx, col.name), col.typ - } - predicates := createPredicates(tables, randomCol) - aggregates := createAggregations(tables, maxAggrs, randomCol) - grouping := createGroupBy(tables, maxGroupBy, randomCol) - sel := "select /*vt+ PLANNER=Gen4 */ " + strings.Join(aggregates, ", ") + " from " - - var tbls []string - for i, s := range tables { - tbls = append(tbls, fmt.Sprintf("%s as tbl%d", s.name, i)) - } - sel += strings.Join(tbls, ", ") - - if len(predicates) > 0 { - sel += " where " - sel += strings.Join(predicates, " and ") - } - if len(grouping) > 0 { - sel += " group by " - sel += strings.Join(grouping, ", ") - } - // we do it this way so we don't have to do only `only_full_group_by` queries - var noOfOrderBy int - if len(grouping) > 0 { - // panic on rand function call if value is 0 - noOfOrderBy = rand.Intn(len(grouping)) - } - if noOfOrderBy > 0 { - noOfOrderBy = 0 // TODO turning on ORDER BY here causes lots of failures to happen - } - if noOfOrderBy > 0 { - var orderBy []string - for noOfOrderBy > 0 { - noOfOrderBy-- - if rand.Intn(2) == 0 || len(grouping) == 0 { - orderBy = append(orderBy, randomEl(aggregates)) - } else { - orderBy = append(orderBy, randomEl(grouping)) - } - } - sel += " order by " - sel += strings.Join(orderBy, ", ") - } - return sel -} - -func createGroupBy(tables []tableT, maxGB int, randomCol func(tblIdx int) (string, string)) (grouping []string) { - noOfGBs := rand.Intn(maxGB) - for i := 0; i < noOfGBs; i++ { - tblIdx := rand.Intn(len(tables)) - col, _ := randomCol(tblIdx) - grouping = append(grouping, col) - } - return -} - -func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int) (string, string)) (aggregates []string) { - aggregations := []func(string) string{ - func(_ string) string { return "count(*)" }, - func(e string) string { return fmt.Sprintf("count(%s)", e) }, - //func(e string) string { return fmt.Sprintf("sum(%s)", e) }, - //func(e string) string { return fmt.Sprintf("avg(%s)", e) }, - //func(e string) string { return fmt.Sprintf("min(%s)", e) }, - //func(e string) string { return fmt.Sprintf("max(%s)", e) }, - } - - noOfAggrs := rand.Intn(maxAggrs) + 1 - for i := 0; i < noOfAggrs; i++ { - tblIdx := rand.Intn(len(tables)) - e, _ := randomCol(tblIdx) - aggregates = append(aggregates, randomEl(aggregations)(e)) - } - return aggregates -} - -func createTables(schemaTables []tableT) []tableT { - noOfTables := rand.Intn(2) + 1 - var tables []tableT - - for i := 0; i < noOfTables; i++ { - tables = append(tables, randomEl(schemaTables)) - } - return tables -} - -func createPredicates(tables []tableT, randomCol func(tblIdx int) (string, string)) (predicates []string) { - for idx1 := range tables { - for idx2 := range tables { - if idx1 == idx2 { - continue - } - noOfPredicates := rand.Intn(2) - - for noOfPredicates > 0 { - col1, t1 := randomCol(idx1) - col2, t2 := randomCol(idx2) - if t1 != t2 { - continue - } - predicates = append(predicates, fmt.Sprintf("%s = %s", col1, col2)) - noOfPredicates-- - } - } - } - return predicates -} - -func randomEl[K any](in []K) K { - return in[rand.Intn(len(in))] -} diff --git a/go/test/endtoend/vtgate/queries/random/query_gen_test.go b/go/test/endtoend/vtgate/queries/random/query_gen_test.go index 986c4b29da6..c7374794abb 100644 --- a/go/test/endtoend/vtgate/queries/random/query_gen_test.go +++ b/go/test/endtoend/vtgate/queries/random/query_gen_test.go @@ -143,7 +143,6 @@ func TestRandomQuery(t *testing.T) { fmt.Println(sqlparser.String(randomQuery(schemaTables, 3, 3))) } -// TODO: bunch of TestFailingQueries checks were deleted by refactor to use AST func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Select { sel := &sqlparser.Select{} sel.SetComments(sqlparser.Comments{"/*vt+ PLANNER=Gen4 */"}) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index b086a91326c..ff1efac2ea7 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -36,7 +36,7 @@ import ( // if true then known failing query types are still generated by randomQuery() const TestFailingQueries = true -// if true then execution will always stop on "must fix" error: a mismatched results or EOF +// if true then execution will always stop on a "must fix" error: a mismatched results or EOF const StopOnMustFixError = true func start(t *testing.T) (utils.MySQLCompare, func()) { @@ -109,6 +109,12 @@ func TestMustFix(t *testing.T) { // left instead of right works // swapping tables and predicates and changing to left fails helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl1.comm) from emp as tbl1 right join emp as tbl2 on tbl1.mgr = tbl2.sal") + + // EOF + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from dept as tbl0, (select count(*) from emp as tbl0, emp as tbl1 limit 18) as tbl1") + + // EOF + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0") } func TestKnownFailures(t *testing.T) { @@ -118,8 +124,9 @@ func TestKnownFailures(t *testing.T) { // logs more stuff //clusterInstance.EnableGeneralLog() - // [BUG] unknown plan type for DISTINCT *planbuilder.filter - helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct sum(tbl1.loc) as caggr0 from dept as tbl0, dept as tbl1 group by tbl1.deptno having count(*) <=> tbl1.deptno") + // vitess error: + // mysql error: Incorrect DATE value: 'tuna' + helperTest(t, "select /*vt+ PLANNER=Gen4 */ min(tbl0.empno) as caggr0 from emp as tbl0 where case 'gator' when false then 314 else 'weevil' end > tbl0.job having min(tbl0.hiredate) <=> 'tuna'") // vitess error: // mysql error: Unknown column 'tbl0.deptno' in 'having clause' @@ -147,9 +154,6 @@ func TestKnownFailures(t *testing.T) { // [BUG] push projection does not yet support: *planbuilder.memorySort (errno 1815) (sqlstate HY000) helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from dept as tbl1 join (select count(*) from emp as tbl0, dept as tbl1 group by tbl1.loc) as tbl2") - // EOF (errno 2013) (sqlstate HY000) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) as caggr0 from dept as tbl0, (select count(*) as caggr0 from emp as tbl0, emp as tbl1 limit 18) as tbl1") - // unsupported // unsupported: in scatter query: complex aggregate expression (errno 1235) (sqlstate 42000) helperTest(t, "select /*vt+ PLANNER=Gen4 */ (select count(*) from emp as tbl0) from emp as tbl0") @@ -162,10 +166,6 @@ func TestKnownFailures(t *testing.T) { // unsupported: using aggregation on top of a *planbuilder.orderedAggregate plan helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0) as tbl0, dept as tbl1") - // unsupported - // EOF (errno 2013) (sqlstate HY000) - helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0") - // unsupported // unsupported: in scatter query: aggregation function 'avg(tbl0.deptno)' helperTest(t, "select /*vt+ PLANNER=Gen4 */ avg(tbl0.deptno) from dept as tbl0") @@ -217,7 +217,7 @@ func TestRandom(t *testing.T) { break } // mismatched results - if strings.Contains(vtErr.Error(), "results mismatched") && false { + if strings.Contains(vtErr.Error(), "results mismatched") { break } } From 656f63893487727367538ebb3de6f1e18db4382b Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Mon, 26 Jun 2023 03:57:48 -0700 Subject: [PATCH 24/29] limited query generation to make TestRandom consistently pass Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/query_gen_test.go | 133 ++++++++++++++---- .../vtgate/queries/random/random_test.go | 17 ++- 2 files changed, 120 insertions(+), 30 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/query_gen_test.go b/go/test/endtoend/vtgate/queries/random/query_gen_test.go index c7374794abb..57f2ce69489 100644 --- a/go/test/endtoend/vtgate/queries/random/query_gen_test.go +++ b/go/test/endtoend/vtgate/queries/random/query_gen_test.go @@ -70,7 +70,7 @@ func (t *tableT) typeExpr(typ string) sqlparser.Expr { } func (t *tableT) IntExpr() sqlparser.Expr { - // better way to check if int type? + // TODO: better way to check if int type? return t.typeExpr("bigint") } @@ -156,49 +156,81 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel // create both tables and join at the same time since both occupy the from clause tables, isJoin := createTablesAndJoin(schemaTables, sel) - groupExprs, groupSelectExprs, grouping := createGroupBy(tables, maxGroupBy) - sel.AddSelectExprs(groupSelectExprs) - sel.GroupBy = groupExprs + var ( + groupBy sqlparser.GroupBy + groupSelectExprs sqlparser.SelectExprs + grouping []column + ) + // TODO: distinct makes vitess think there is grouping on aggregation columns + if TestFailingQueries || !isDistinct { + groupBy, groupSelectExprs, grouping = createGroupBy(tables, maxGroupBy) + sel.AddSelectExprs(groupSelectExprs) + sel.GroupBy = groupBy + } aggrExprs, aggregates := createAggregations(tables, maxAggrs) sel.AddSelectExprs(aggrExprs) // can add both aggregate and grouping columns to order by - isOrdered := rand.Intn(2) < 1 - if isOrdered && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { - addOrderBy(sel) + // TODO: order fails with distinct and outer joins + isOrdered := rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) && TestFailingQueries + // TODO: order by fails a lot; probably related to the previously passing query + // TODO: should be fixed soon + if isOrdered { + sel.OrderBy = createOrderBy(groupBy, aggrExprs) } // where sel.AddWhere(sqlparser.AndExpressions(createWherePredicates(tables, false)...)) // random predicate expression - if rand.Intn(2) < 1 { + // TODO: random expressions cause a lot of failures + if rand.Intn(2) < 1 && TestFailingQueries { predRandomExpr, _ := getRandomExpr(tables) sel.AddWhere(predRandomExpr) } // having - sel.AddHaving(sqlparser.AndExpressions(createHavingPredicates(tables)...)) - if rand.Intn(2) < 1 && TestFailingQueries { - // TODO: having can only contain aggregate or grouping columns in mysql, works fine in vitess - sel.AddHaving(sqlparser.AndExpressions(createWherePredicates(tables, false)...)) + isHaving := rand.Intn(2) < 1 + if isHaving { + sel.AddHaving(sqlparser.AndExpressions(createHavingPredicates(tables)...)) + if rand.Intn(2) < 1 && TestFailingQueries { + // TODO: having can only contain aggregate or grouping columns in mysql, works fine in vitess + // TODO: Can fix this by putting only the table with the grouping and aggregates column (newTable) + sel.AddHaving(sqlparser.AndExpressions(createWherePredicates(tables, false)...)) + } } + // TODO: use sqlparser.ExprGenerator to generate a random expression with aggregation functions // only add a limit if the grouping columns are ordered - if rand.Intn(2) < 1 && (isOrdered || len(grouping) == 0) { + // TODO: limit fails a lot + if rand.Intn(2) < 1 && (isOrdered || len(groupBy) == 0) && TestFailingQueries { sel.Limit = createLimit() } var newTable tableT // add random expression to select - isRandomExpr := rand.Intn(2) < 1 - randomExpr, typ := getRandomExpr(tables) - if isRandomExpr && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) { + // TODO: random expressions cause a lot of failures + isRandomExpr := rand.Intn(2) < 1 && TestFailingQueries + var ( + randomExpr sqlparser.Expr + typ string + ) + // TODO: selecting a random expression potentially with columns creates + // TODO: only_full_group_by related errors in Vitess + if TestFailingQueries { + randomExpr, typ = getRandomExpr(tables) + } else { + randomExpr, typ = getRandomExpr(nil) + } + if isRandomExpr { sel.SelectExprs = append(sel.SelectExprs, sqlparser.NewAliasedExpr(randomExpr, "crandom0")) newTable.addColumns(column{ name: "crandom0", typ: typ, }) + + // make sure to add the random expression to group by for only_full_group_by + sel.AddGroupBy(randomExpr) } // add them to newTable @@ -210,6 +242,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel schemaTables = append(schemaTables, newTable) // derived tables (partially unsupported) + // TODO: derived tables fails a lot if rand.Intn(10) < 1 && TestFailingQueries { sel = randomQuery(schemaTables, 3, 3) } @@ -232,7 +265,8 @@ func createTablesAndJoin(schemaTables []tableT, sel *sqlparser.Select) ([]tableT tables[i+1].setName(fmt.Sprintf("tbl%d", i+1)) } - isJoin := rand.Intn(2) < 1 + // TODO: outer joins produce mismatched results + isJoin := rand.Intn(2) < 1 && TestFailingQueries if isJoin { newTable := randomEl(schemaTables) tables = append(tables, newTable) @@ -268,6 +302,10 @@ func createGroupBy(tables []tableT, maxGB int) (groupBy sqlparser.GroupBy, group for i := 0; i < numGBs; i++ { tblIdx := rand.Intn(len(tables)) col := randomEl(tables[tblIdx].cols) + // TODO: grouping by a date column sometimes errors + if col.typ == "date" && !TestFailingQueries { + continue + } groupBy = append(groupBy, newColumn(col)) // add to select @@ -296,7 +334,31 @@ func createAggregations(tables []tableT, maxAggrs int) (aggrExprs sqlparser.Sele for i := 0; i < numAggrs; i++ { tblIdx, aggrIdx := rand.Intn(len(tables)), rand.Intn(len(aggregations)) col := randomEl(tables[tblIdx].cols) + // TODO: aggregating on a date column sometimes errors + if col.typ == "date" && !TestFailingQueries { + i-- + continue + } + newAggregate := aggregations[aggrIdx](col) + // TODO: collating on strings sometimes errors + if col.typ == "varchar" && !TestFailingQueries { + if _, ok := newAggregate.(*sqlparser.Min); ok { + i-- + continue + } + if _, ok := newAggregate.(*sqlparser.Max); ok { + i-- + continue + } + } + + // TODO: type of sum() is incorrect (int64 vs decimal) in certain queries + if _, ok := newAggregate.(*sqlparser.Sum); ok && !TestFailingQueries { + i-- + continue + } + aggrExprs = append(aggrExprs, sqlparser.NewAliasedExpr(newAggregate, fmt.Sprintf("caggr%d", i))) if aggrIdx <= 1 /* CountStar and Count */ { @@ -313,18 +375,22 @@ func createAggregations(tables []tableT, maxAggrs int) (aggrExprs sqlparser.Sele } // orders on all non-aggregate SelectExprs and independently at random on all aggregate SelectExprs of sel -func addOrderBy(sel *sqlparser.Select) { - for _, selExpr := range sel.SelectExprs { - if aliasedExpr, ok := selExpr.(*sqlparser.AliasedExpr); ok { - // if the SelectExpr is non-aggregate (the AliasedExpr has Expr of type ColName) - // then add to the order by - if colName, ok1 := aliasedExpr.Expr.(*sqlparser.ColName); ok1 { - sel.AddOrder(sqlparser.NewOrder(colName, getRandomOrderDirection())) - } else if rand.Intn(2) < 1 { - sel.AddOrder(sqlparser.NewOrder(aliasedExpr.Expr, getRandomOrderDirection())) +func createOrderBy(groupBy sqlparser.GroupBy, aggrExprs sqlparser.SelectExprs) (orderBy sqlparser.OrderBy) { + // always order on grouping columns + for i := range groupBy { + orderBy = append(orderBy, sqlparser.NewOrder(groupBy[i], getRandomOrderDirection())) + } + + // randomly order on aggregation columns + for i := range aggrExprs { + if aliasedExpr, ok := aggrExprs[i].(*sqlparser.AliasedExpr); ok { + if rand.Intn(2) < 1 { + orderBy = append(orderBy, sqlparser.NewOrder(aliasedExpr.Expr, getRandomOrderDirection())) } } } + + return orderBy } // compares two random columns (usually of the same type) @@ -382,9 +448,18 @@ func createWherePredicates(tables []tableT, isJoin bool) (predicates sqlparser.E func createHavingPredicates(tables []tableT) (havingPredicates sqlparser.Exprs) { aggrSelectExprs, _ := createAggregations(tables, 2) for i := range aggrSelectExprs { - if aliasedExpr, ok := aggrSelectExprs[i].(*sqlparser.AliasedExpr); ok { - predRandomExpr, _ := getRandomExpr(tables) - havingPredicates = append(havingPredicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), aliasedExpr.Expr, predRandomExpr, nil)) + if lhs, ok := aggrSelectExprs[i].(*sqlparser.AliasedExpr); ok { + // TODO: HAVING can only contain aggregate or grouping columns in mysql, works fine in vitess + // TODO: Can fix this by putting only the table with the grouping and aggregates column (newTable) + // TODO: but random expressions without the columns also fails + if TestFailingQueries { + predRandomExpr, _ := getRandomExpr(tables) + havingPredicates = append(havingPredicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), lhs.Expr, predRandomExpr, nil)) + } else { + if rhs, ok1 := randomEl(aggrSelectExprs).(*sqlparser.AliasedExpr); ok1 { + havingPredicates = append(havingPredicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), lhs.Expr, rhs.Expr, nil)) + } + } } } return havingPredicates diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index ff1efac2ea7..dc870099228 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -34,7 +34,7 @@ import ( // this test uses the AST defined in the sqlparser package to randomly generate queries // if true then known failing query types are still generated by randomQuery() -const TestFailingQueries = true +const TestFailingQueries = false // if true then execution will always stop on a "must fix" error: a mismatched results or EOF const StopOnMustFixError = true @@ -84,6 +84,14 @@ func TestMustFix(t *testing.T) { require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) + // mismatched results + // sum values returned as int64 instead of decimal + helperTest(t, "select /*vt+ PLANNER=Gen4 */ sum(tbl1.sal) as caggr1 from emp as tbl0, emp as tbl1 group by tbl1.ename order by tbl1.ename asc") + + // mismatched results + // limit >= 9 works + helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl0.ename as cgroup1 from emp as tbl0 group by tbl0.job, tbl0.ename having sum(tbl0.mgr) = sum(tbl0.mgr) order by tbl0.job desc, tbl0.ename asc limit 8") + // mismatched results helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) as caggr1 from dept as tbl0, emp as tbl1 group by tbl1.sal having max(tbl1.comm) != true") @@ -124,6 +132,9 @@ func TestKnownFailures(t *testing.T) { // logs more stuff //clusterInstance.EnableGeneralLog() + // cannot compare strings, collation is unknown or unsupported (collation ID: 0) + helperTest(t, "select /*vt+ PLANNER=Gen4 */ max(tbl1.dname) as caggr1 from dept as tbl0, dept as tbl1 group by tbl1.dname order by tbl1.dname asc") + // vitess error: // mysql error: Incorrect DATE value: 'tuna' helperTest(t, "select /*vt+ PLANNER=Gen4 */ min(tbl0.empno) as caggr0 from emp as tbl0 where case 'gator' when false then 314 else 'weevil' end > tbl0.job having min(tbl0.hiredate) <=> 'tuna'") @@ -158,6 +169,10 @@ func TestKnownFailures(t *testing.T) { // unsupported: in scatter query: complex aggregate expression (errno 1235) (sqlstate 42000) helperTest(t, "select /*vt+ PLANNER=Gen4 */ (select count(*) from emp as tbl0) from emp as tbl0") + // unsupported + // unsupported: using aggregation on top of a *planbuilder.filter plan + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl1.dname) as caggr1 from dept as tbl0 left join dept as tbl1 on tbl1.dname > tbl1.loc where tbl1.loc <=> tbl1.dname group by tbl1.dname order by tbl1.dname asc") + // unsupported // unsupported: using aggregation on top of a *planbuilder.orderedAggregate plan helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) from (select count(*) from dept as tbl0) as tbl0") From 8e4a1dad06da8ae292da3f51243924cba644e64a Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Tue, 27 Jun 2023 22:08:06 -0700 Subject: [PATCH 25/29] rename random/query_gen_test.go to query_gen.go Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../vtgate/queries/random/{query_gen_test.go => query_gen.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename go/test/endtoend/vtgate/queries/random/{query_gen_test.go => query_gen.go} (100%) diff --git a/go/test/endtoend/vtgate/queries/random/query_gen_test.go b/go/test/endtoend/vtgate/queries/random/query_gen.go similarity index 100% rename from go/test/endtoend/vtgate/queries/random/query_gen_test.go rename to go/test/endtoend/vtgate/queries/random/query_gen.go From db14cd4133ac49b74f6c7a673b5b285ceb2665f3 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Tue, 27 Jun 2023 23:29:32 -0700 Subject: [PATCH 26/29] fixed reviewed comments Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- go/test/endtoend/utils/mysql.go | 4 +- .../vtgate/queries/random/query_gen.go | 109 +++++++----------- .../vtgate/queries/random/random_expr_test.go | 5 +- .../vtgate/queries/random/random_test.go | 11 +- go/vt/sqlparser/precedence_test.go | 2 +- go/vt/sqlparser/random_expr.go | 22 ++-- go/vt/sqlparser/rewriter_test.go | 2 +- go/vt/sqlparser/walker_test.go | 4 +- 8 files changed, 60 insertions(+), 99 deletions(-) diff --git a/go/test/endtoend/utils/mysql.go b/go/test/endtoend/utils/mysql.go index a289a07c63a..6e85ec6bdf7 100644 --- a/go/test/endtoend/utils/mysql.go +++ b/go/test/endtoend/utils/mysql.go @@ -200,9 +200,7 @@ func compareVitessAndMySQLResults(t *testing.T, query string, vtConn *mysql.Conn orderBy = selStmt.GetOrderBy() != nil } - if orderBy && sqltypes.ResultsEqual([]sqltypes.Result{*vtQr}, []sqltypes.Result{*mysqlQr}) { - return nil - } else if sqltypes.ResultsEqualUnordered([]sqltypes.Result{*vtQr}, []sqltypes.Result{*mysqlQr}) { + if (orderBy && sqltypes.ResultsEqual([]sqltypes.Result{*vtQr}, []sqltypes.Result{*mysqlQr})) || sqltypes.ResultsEqualUnordered([]sqltypes.Result{*vtQr}, []sqltypes.Result{*mysqlQr}) { return nil } diff --git a/go/test/endtoend/vtgate/queries/random/query_gen.go b/go/test/endtoend/vtgate/queries/random/query_gen.go index 57f2ce69489..09225d5edfc 100644 --- a/go/test/endtoend/vtgate/queries/random/query_gen.go +++ b/go/test/endtoend/vtgate/queries/random/query_gen.go @@ -19,7 +19,6 @@ package random import ( "fmt" "math/rand" - "testing" "time" "golang.org/x/exp/slices" @@ -31,6 +30,9 @@ import ( // this file contains the structs and functions to generate random queries +// if true then known failing query types are still generated by randomQuery() +const testFailingQueries = false + type ( column struct { tableName string @@ -59,11 +61,11 @@ func (t *tableT) typeExpr(typ string) sqlparser.Expr { newTableName = sqlparser.String(tName.Name) } return sqlparser.NewColNameWithQualifier(randCol.name, sqlparser.NewTableName(newTableName)) - } else { - // delete randCol from table.columns - tableCopy.cols[idx] = tableCopy.cols[len(tableCopy.cols)-1] - tableCopy.cols = tableCopy.cols[:len(tableCopy.cols)-1] } + + // delete randCol from table.columns + tableCopy.cols[idx] = tableCopy.cols[len(tableCopy.cols)-1] + tableCopy.cols = tableCopy.cols[:len(tableCopy.cols)-1] } return nil @@ -119,30 +121,6 @@ func (c *column) getColumnName() string { return fmt.Sprintf("%s.%s", c.tableName, c.name) } -func TestRandomQuery(t *testing.T) { - schemaTables := []tableT{ - {name: sqlparser.NewTableName("emp")}, - {name: sqlparser.NewTableName("dept")}, - } - schemaTables[0].addColumns([]column{ - {name: "empno", typ: "bigint"}, - {name: "ename", typ: "varchar"}, - {name: "job", typ: "varchar"}, - {name: "mgr", typ: "bigint"}, - {name: "hiredate", typ: "date"}, - {name: "sal", typ: "bigint"}, - {name: "comm", typ: "bigint"}, - {name: "deptno", typ: "bigint"}, - }...) - schemaTables[1].addColumns([]column{ - {name: "deptno", typ: "bigint"}, - {name: "dname", typ: "varchar"}, - {name: "loc", typ: "varchar"}, - }...) - - fmt.Println(sqlparser.String(randomQuery(schemaTables, 3, 3))) -} - func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Select { sel := &sqlparser.Select{} sel.SetComments(sqlparser.Comments{"/*vt+ PLANNER=Gen4 */"}) @@ -162,7 +140,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel grouping []column ) // TODO: distinct makes vitess think there is grouping on aggregation columns - if TestFailingQueries || !isDistinct { + if testFailingQueries || !isDistinct { groupBy, groupSelectExprs, grouping = createGroupBy(tables, maxGroupBy) sel.AddSelectExprs(groupSelectExprs) sel.GroupBy = groupBy @@ -172,7 +150,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel // can add both aggregate and grouping columns to order by // TODO: order fails with distinct and outer joins - isOrdered := rand.Intn(2) < 1 && (!isDistinct || TestFailingQueries) && (!isJoin || TestFailingQueries) && TestFailingQueries + isOrdered := rand.Intn(2) < 1 && (!isDistinct || testFailingQueries) && (!isJoin || testFailingQueries) && testFailingQueries // TODO: order by fails a lot; probably related to the previously passing query // TODO: should be fixed soon if isOrdered { @@ -184,8 +162,8 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel // random predicate expression // TODO: random expressions cause a lot of failures - if rand.Intn(2) < 1 && TestFailingQueries { - predRandomExpr, _ := getRandomExpr(tables) + if rand.Intn(2) < 1 && testFailingQueries { + predRandomExpr := getRandomExpr(tables) sel.AddWhere(predRandomExpr) } @@ -193,7 +171,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel isHaving := rand.Intn(2) < 1 if isHaving { sel.AddHaving(sqlparser.AndExpressions(createHavingPredicates(tables)...)) - if rand.Intn(2) < 1 && TestFailingQueries { + if rand.Intn(2) < 1 && testFailingQueries { // TODO: having can only contain aggregate or grouping columns in mysql, works fine in vitess // TODO: Can fix this by putting only the table with the grouping and aggregates column (newTable) sel.AddHaving(sqlparser.AndExpressions(createWherePredicates(tables, false)...)) @@ -203,24 +181,24 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel // only add a limit if the grouping columns are ordered // TODO: limit fails a lot - if rand.Intn(2) < 1 && (isOrdered || len(groupBy) == 0) && TestFailingQueries { + if rand.Intn(2) < 1 && (isOrdered || len(groupBy) == 0) && testFailingQueries { sel.Limit = createLimit() } var newTable tableT // add random expression to select // TODO: random expressions cause a lot of failures - isRandomExpr := rand.Intn(2) < 1 && TestFailingQueries + isRandomExpr := rand.Intn(2) < 1 && testFailingQueries var ( randomExpr sqlparser.Expr typ string ) // TODO: selecting a random expression potentially with columns creates // TODO: only_full_group_by related errors in Vitess - if TestFailingQueries { - randomExpr, typ = getRandomExpr(tables) + if testFailingQueries { + randomExpr = getRandomExpr(tables) } else { - randomExpr, typ = getRandomExpr(nil) + randomExpr = getRandomExpr(nil) } if isRandomExpr { sel.SelectExprs = append(sel.SelectExprs, sqlparser.NewAliasedExpr(randomExpr, "crandom0")) @@ -243,7 +221,7 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel // derived tables (partially unsupported) // TODO: derived tables fails a lot - if rand.Intn(10) < 1 && TestFailingQueries { + if rand.Intn(10) < 1 && testFailingQueries { sel = randomQuery(schemaTables, 3, 3) } @@ -266,7 +244,7 @@ func createTablesAndJoin(schemaTables []tableT, sel *sqlparser.Select) ([]tableT } // TODO: outer joins produce mismatched results - isJoin := rand.Intn(2) < 1 && TestFailingQueries + isJoin := rand.Intn(2) < 1 && testFailingQueries if isJoin { newTable := randomEl(schemaTables) tables = append(tables, newTable) @@ -303,7 +281,7 @@ func createGroupBy(tables []tableT, maxGB int) (groupBy sqlparser.GroupBy, group tblIdx := rand.Intn(len(tables)) col := randomEl(tables[tblIdx].cols) // TODO: grouping by a date column sometimes errors - if col.typ == "date" && !TestFailingQueries { + if col.typ == "date" && !testFailingQueries { continue } groupBy = append(groupBy, newColumn(col)) @@ -311,12 +289,13 @@ func createGroupBy(tables []tableT, maxGB int) (groupBy sqlparser.GroupBy, group // add to select if rand.Intn(2) < 1 { groupSelectExprs = append(groupSelectExprs, newAliasedColumn(col, fmt.Sprintf("cgroup%d", i))) + // TODO: alias in a separate function to properly generate the having clause col.name = fmt.Sprintf("cgroup%d", i) grouping = append(grouping, col) } } - return groupBy, groupSelectExprs, grouping + return } // returns the aggregation columns as three types: sqlparser.SelectExprs, []column @@ -335,26 +314,26 @@ func createAggregations(tables []tableT, maxAggrs int) (aggrExprs sqlparser.Sele tblIdx, aggrIdx := rand.Intn(len(tables)), rand.Intn(len(aggregations)) col := randomEl(tables[tblIdx].cols) // TODO: aggregating on a date column sometimes errors - if col.typ == "date" && !TestFailingQueries { + if col.typ == "date" && !testFailingQueries { i-- continue } newAggregate := aggregations[aggrIdx](col) // TODO: collating on strings sometimes errors - if col.typ == "varchar" && !TestFailingQueries { - if _, ok := newAggregate.(*sqlparser.Min); ok { + if col.typ == "varchar" && !testFailingQueries { + switch newAggregate.(type) { + case *sqlparser.Min: i-- continue - } - if _, ok := newAggregate.(*sqlparser.Max); ok { + case *sqlparser.Max: i-- continue } } // TODO: type of sum() is incorrect (int64 vs decimal) in certain queries - if _, ok := newAggregate.(*sqlparser.Sum); ok && !TestFailingQueries { + if _, ok := newAggregate.(*sqlparser.Sum); ok && !testFailingQueries { i-- continue } @@ -367,11 +346,10 @@ func createAggregations(tables []tableT, maxAggrs int) (aggrExprs sqlparser.Sele col.typ = "decimal" } - col.name = sqlparser.String(newAggregate) col.name = fmt.Sprintf("caggr%d", i) aggregates = append(aggregates, col) } - return aggrExprs, aggregates + return } // orders on all non-aggregate SelectExprs and independently at random on all aggregate SelectExprs of sel @@ -383,14 +361,12 @@ func createOrderBy(groupBy sqlparser.GroupBy, aggrExprs sqlparser.SelectExprs) ( // randomly order on aggregation columns for i := range aggrExprs { - if aliasedExpr, ok := aggrExprs[i].(*sqlparser.AliasedExpr); ok { - if rand.Intn(2) < 1 { - orderBy = append(orderBy, sqlparser.NewOrder(aliasedExpr.Expr, getRandomOrderDirection())) - } + if aliasedExpr, ok := aggrExprs[i].(*sqlparser.AliasedExpr); ok && rand.Intn(2) < 1 { + orderBy = append(orderBy, sqlparser.NewOrder(aliasedExpr.Expr, getRandomOrderDirection())) } } - return orderBy + return } // compares two random columns (usually of the same type) @@ -437,11 +413,11 @@ func createWherePredicates(tables []tableT, isJoin bool) (predicates sqlparser.E // make sure the join predicate is never empty if len(predicates) == 0 && isJoin { - predRandomExpr, _ := getRandomExpr(tables) + predRandomExpr := getRandomExpr(tables) predicates = append(predicates, predRandomExpr) } - return predicates + return } // creates predicates for the having clause comparing a column to a random expression @@ -452,17 +428,15 @@ func createHavingPredicates(tables []tableT) (havingPredicates sqlparser.Exprs) // TODO: HAVING can only contain aggregate or grouping columns in mysql, works fine in vitess // TODO: Can fix this by putting only the table with the grouping and aggregates column (newTable) // TODO: but random expressions without the columns also fails - if TestFailingQueries { - predRandomExpr, _ := getRandomExpr(tables) + if testFailingQueries { + predRandomExpr := getRandomExpr(tables) havingPredicates = append(havingPredicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), lhs.Expr, predRandomExpr, nil)) - } else { - if rhs, ok1 := randomEl(aggrSelectExprs).(*sqlparser.AliasedExpr); ok1 { - havingPredicates = append(havingPredicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), lhs.Expr, rhs.Expr, nil)) - } + } else if rhs, ok1 := randomEl(aggrSelectExprs).(*sqlparser.AliasedExpr); ok1 { + havingPredicates = append(havingPredicates, sqlparser.NewComparisonExpr(getRandomComparisonExprOperator(), lhs.Expr, rhs.Expr, nil)) } } } - return havingPredicates + return } // creates sel.Limit @@ -471,14 +445,13 @@ func createLimit() *sqlparser.Limit { if rand.Intn(2) < 1 { offset := rand.Intn(10) return sqlparser.NewLimit(offset, limitNum) - } else { - return sqlparser.NewLimitWithoutOffset(limitNum) } + return sqlparser.NewLimitWithoutOffset(limitNum) } // returns a random expression and its type -func getRandomExpr(tables []tableT) (sqlparser.Expr, string) { +func getRandomExpr(tables []tableT) sqlparser.Expr { seed := time.Now().UnixNano() g := sqlparser.NewGenerator(seed, 2, slices2.Map(tables, func(t tableT) sqlparser.ExprGenerator { return &t })...) return g.Expression() diff --git a/go/test/endtoend/vtgate/queries/random/random_expr_test.go b/go/test/endtoend/vtgate/queries/random/random_expr_test.go index dd333bb4479..b4e1cfb9c2d 100644 --- a/go/test/endtoend/vtgate/queries/random/random_expr_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_expr_test.go @@ -17,7 +17,6 @@ limitations under the License. package random import ( - "fmt" "testing" "time" @@ -25,6 +24,7 @@ import ( "vitess.io/vitess/go/vt/sqlparser" ) +// This test tests that generating a random expression with a schema does not panic func TestRandomExprWithTables(t *testing.T) { schemaTables := []tableT{ {name: sqlparser.NewTableName("emp")}, @@ -48,6 +48,5 @@ func TestRandomExprWithTables(t *testing.T) { seed := time.Now().UnixNano() g := sqlparser.NewGenerator(seed, 3, slices2.Map(schemaTables, func(t tableT) sqlparser.ExprGenerator { return &t })...) - randomExpr, _ := g.Expression() - fmt.Println(sqlparser.String(randomExpr)) + g.Expression() } diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index dc870099228..de956525ffb 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -33,11 +33,8 @@ import ( // this test uses the AST defined in the sqlparser package to randomly generate queries -// if true then known failing query types are still generated by randomQuery() -const TestFailingQueries = false - // if true then execution will always stop on a "must fix" error: a mismatched results or EOF -const StopOnMustFixError = true +const stopOnMustFixError = true func start(t *testing.T) (utils.MySQLCompare, func()) { mcmp, err := utils.NewMySQLCompare(t, vtParams, mysqlParams) @@ -214,10 +211,10 @@ func TestRandom(t *testing.T) { {name: "loc", typ: "varchar"}, }...) - endBy := time.Now().Add(10 * time.Second) + endBy := time.Now().Add(1 * time.Second) var queryCount int - for time.Now().Before(endBy) && (!t.Failed() || TestFailingQueries) { + for time.Now().Before(endBy) && (!t.Failed() || testFailingQueries) { query := sqlparser.String(randomQuery(schemaTables, 3, 3)) _, vtErr := mcmp.ExecAllowAndCompareError(query) @@ -226,7 +223,7 @@ func TestRandom(t *testing.T) { fmt.Println(query) fmt.Println(vtErr) - if StopOnMustFixError { + if stopOnMustFixError { // EOF if sqlError, ok := vtErr.(*mysql.SQLError); ok && strings.Contains(sqlError.Message, "EOF") { break diff --git a/go/vt/sqlparser/precedence_test.go b/go/vt/sqlparser/precedence_test.go index 99ecea7fc01..cb8c1f23805 100644 --- a/go/vt/sqlparser/precedence_test.go +++ b/go/vt/sqlparser/precedence_test.go @@ -224,7 +224,7 @@ func TestRandom(t *testing.T) { break } // Given a random expression - randomExpr, _ := g.Expression() + randomExpr := g.Expression() inputQ := "select " + String(randomExpr) + " from t" // When it's parsed and unparsed diff --git a/go/vt/sqlparser/random_expr.go b/go/vt/sqlparser/random_expr.go index 72a7c45c5da..9b3b711c87f 100644 --- a/go/vt/sqlparser/random_expr.go +++ b/go/vt/sqlparser/random_expr.go @@ -1,5 +1,5 @@ /* -Copyright 2023 The Vitess Authors. +Copyright 2020 The Vitess Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -73,29 +73,23 @@ func (g *Generator) atMaxDepth() bool { - &, |, ^, +, -, *, /, div, %, <<, >> - IN, BETWEEN and CASE - IS NULL, IS NOT NULL, IS TRUE, IS NOT TRUE, IS FALSE, IS NOT FALSE + Returns the random expression (Expr) and its type (string) Note: It's important to update this method so that it produces all expressions that need precedence checking. It's currently missing function calls and string operators */ -func (g *Generator) Expression() (Expr, string) { - typ := "tinyint" +func (g *Generator) Expression() Expr { if g.randomBool() { - return g.booleanExpr(), typ + return g.booleanExpr() } + options := []exprF{ func() Expr { return g.intExpr() }, func() Expr { return g.stringExpr() }, func() Expr { return g.booleanExpr() }, } - fn := g.randomOf(options) - if fn == g.intExpr() { - typ = "bigint" - } else if fn == g.stringExpr() { - typ = "varchar" - } - - return fn, typ + return g.randomOf(options) } func (g *Generator) booleanExpr() Expr { @@ -231,10 +225,10 @@ func (g *Generator) caseExpr(valueF func() Expr) Expr { if exp == nil { cond = g.booleanExpr() } else { - cond, _ = g.Expression() + cond = g.Expression() } - val, _ := g.Expression() + val := g.Expression() whens = append(whens, &When{ Cond: cond, Val: val, diff --git a/go/vt/sqlparser/rewriter_test.go b/go/vt/sqlparser/rewriter_test.go index 3a9ef42bf36..9adae1b4a81 100644 --- a/go/vt/sqlparser/rewriter_test.go +++ b/go/vt/sqlparser/rewriter_test.go @@ -26,7 +26,7 @@ import ( func BenchmarkVisitLargeExpression(b *testing.B) { gen := NewGenerator(1, 5) - exp, _ := gen.Expression() + exp := gen.Expression() depth := 0 for i := 0; i < b.N; i++ { diff --git a/go/vt/sqlparser/walker_test.go b/go/vt/sqlparser/walker_test.go index acea63ef56b..5359235afa5 100644 --- a/go/vt/sqlparser/walker_test.go +++ b/go/vt/sqlparser/walker_test.go @@ -26,7 +26,7 @@ import ( func BenchmarkWalkLargeExpression(b *testing.B) { for i := 0; i < 10; i++ { b.Run(fmt.Sprintf("%d", i), func(b *testing.B) { - exp, _ := NewGenerator(int64(i*100), 5).Expression() + exp := NewGenerator(int64(i*100), 5).Expression() count := 0 for i := 0; i < b.N; i++ { err := Walk(func(node SQLNode) (kontinue bool, err error) { @@ -42,7 +42,7 @@ func BenchmarkWalkLargeExpression(b *testing.B) { func BenchmarkRewriteLargeExpression(b *testing.B) { for i := 1; i < 7; i++ { b.Run(fmt.Sprintf("%d", i), func(b *testing.B) { - exp, _ := NewGenerator(int64(i*100), i).Expression() + exp := NewGenerator(int64(i*100), i).Expression() count := 0 for i := 0; i < b.N; i++ { _ = Rewrite(exp, func(_ *Cursor) bool { From c1b1f597f6c56de1c0ba60190085dfdfcfc73055 Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Wed, 28 Jun 2023 01:18:11 -0700 Subject: [PATCH 27/29] added support for non-aggregate queries and a few more known failures Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- go/test/endtoend/vtgate/queries/random/query_gen.go | 11 +++++++++-- go/test/endtoend/vtgate/queries/random/random_test.go | 10 ++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/query_gen.go b/go/test/endtoend/vtgate/queries/random/query_gen.go index 09225d5edfc..b315f9ff49f 100644 --- a/go/test/endtoend/vtgate/queries/random/query_gen.go +++ b/go/test/endtoend/vtgate/queries/random/query_gen.go @@ -144,7 +144,9 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel groupBy, groupSelectExprs, grouping = createGroupBy(tables, maxGroupBy) sel.AddSelectExprs(groupSelectExprs) sel.GroupBy = groupBy + } + aggrExprs, aggregates := createAggregations(tables, maxAggrs) sel.AddSelectExprs(aggrExprs) @@ -200,7 +202,12 @@ func randomQuery(schemaTables []tableT, maxAggrs, maxGroupBy int) *sqlparser.Sel } else { randomExpr = getRandomExpr(nil) } - if isRandomExpr { + + // make sure we have at least one select expression + if isRandomExpr || len(sel.SelectExprs) == 0 { + // TODO: select distinct [literal] fails + sel.Distinct = false + sel.SelectExprs = append(sel.SelectExprs, sqlparser.NewAliasedExpr(randomExpr, "crandom0")) newTable.addColumns(column{ name: "crandom0", @@ -309,7 +316,7 @@ func createAggregations(tables []tableT, maxAggrs int) (aggrExprs sqlparser.Sele func(col column) sqlparser.Expr { return &sqlparser.Max{Arg: newColumn(col)} }, } - numAggrs := rand.Intn(maxAggrs) + 1 + numAggrs := rand.Intn(maxAggrs) for i := 0; i < numAggrs; i++ { tblIdx, aggrIdx := rand.Intn(len(tables)), rand.Intn(len(aggregations)) col := randomEl(tables[tblIdx].cols) diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index de956525ffb..1d29fef92cc 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -98,9 +98,16 @@ func TestMustFix(t *testing.T) { // mismatched results helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct max(tbl0.dname) as caggr0, 'cattle' as crandom0 from dept as tbl0, emp as tbl1 where tbl0.deptno != tbl1.sal group by tbl1.comm") + // mismatched results + helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) as caggr0, 1 as crandom0 from dept as tbl0, emp as tbl1 where 1 = 0") + // mismatched results helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(*) as caggr0, 1 as crandom0 from dept as tbl0, emp as tbl1 where 'octopus'") + // similar to previous two + // mismatched results + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct 'octopus' as crandom0 from dept as tbl0, emp as tbl1 where tbl0.deptno = tbl1.empno having count(*) = count(*)") + // mismatched results // previously failing, then succeeding query, now failing again helperTest(t, "select /*vt+ PLANNER=Gen4 */ count(tbl0.deptno) from dept as tbl0, emp as tbl1 group by tbl1.job order by tbl1.job limit 3") @@ -156,6 +163,9 @@ func TestKnownFailures(t *testing.T) { // vttablet: rpc error: code = NotFound desc = Unknown column 'cgroup0' in 'field list' (errno 1054) (sqlstate 42S22) (CallerID: userData1) helperTest(t, "select /*vt+ PLANNER=Gen4 */ tbl1.ename as cgroup0, max(tbl0.comm) as caggr0 from emp as tbl0, emp as tbl1 group by cgroup0") + // vttablet: rpc error: code = NotFound desc = Unknown column '347' in 'group statement' + helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct 347 as crandom0 from emp as tbl0") + // vttablet: rpc error: code = InvalidArgument desc = Can't group on 'count(*)' (errno 1056) (sqlstate 42000) (CallerID: userData1) helperTest(t, "select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.deptno") From 51c14313c971487d5257bfc5989ff97745b01fba Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Wed, 28 Jun 2023 22:29:00 -0700 Subject: [PATCH 28/29] updated tableT.name comment Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- go/test/endtoend/vtgate/queries/random/query_gen.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/random/query_gen.go b/go/test/endtoend/vtgate/queries/random/query_gen.go index b315f9ff49f..d673a904189 100644 --- a/go/test/endtoend/vtgate/queries/random/query_gen.go +++ b/go/test/endtoend/vtgate/queries/random/query_gen.go @@ -40,8 +40,10 @@ type ( typ string } tableT struct { - // name will be a tableName object if it is used, with name: alias or name if no alias is provided - // name will only be a DerivedTable for moving its data around + // the tableT struct can be used to represent the schema of a table or a derived table + // in the former case name will be a sqlparser.TableName, in the latter a sqlparser.DerivedTable + // in order to create a query with a derived table, its AST form is retrieved from name + // once the derived table is aliased, name is replaced by a sqlparser.TableName with that alias name sqlparser.SimpleTableExpr cols []column } @@ -330,10 +332,7 @@ func createAggregations(tables []tableT, maxAggrs int) (aggrExprs sqlparser.Sele // TODO: collating on strings sometimes errors if col.typ == "varchar" && !testFailingQueries { switch newAggregate.(type) { - case *sqlparser.Min: - i-- - continue - case *sqlparser.Max: + case *sqlparser.Min, *sqlparser.Max: i-- continue } From 455a7e02ca8bd5f9bbb317764a0ea453b32e94bd Mon Sep 17 00:00:00 2001 From: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> Date: Wed, 28 Jun 2023 23:54:10 -0700 Subject: [PATCH 29/29] added vtgate/queries/random to CI Signed-off-by: Arvind Murty <10248018+arvind-murty@users.noreply.github.com> --- .../queries/aggregation/aggregation_test.go | 48 ---------------- .../vtgate/queries/random/random_test.go | 56 +++++++++++++++++++ test/config.json | 9 +++ 3 files changed, 65 insertions(+), 48 deletions(-) diff --git a/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go b/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go index be94f0f2d1e..d6cb7baeb1d 100644 --- a/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go +++ b/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go @@ -452,54 +452,6 @@ func TestBuggyQueries(t *testing.T) { mcmp.Exec("select /*vt+ PLANNER=gen4 */sum(tbl1.a), min(tbl0.b) from t10 as tbl0, t10 as tbl1 left join t10 as tbl2 on tbl1.a = tbl2.a and tbl1.b = tbl2.k") mcmp.Exec("select /*vt+ PLANNER=gen4 */count(*) from t10 left join t10 as t11 on t10.a = t11.b where t11.a") - - // from random/random_test.go - mcmp.Exec("INSERT INTO emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) VALUES (7369,'SMITH','CLERK',7902,'1980-12-17',800,NULL,20), (7499,'ALLEN','SALESMAN',7698,'1981-02-20',1600,300,30), (7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30), (7566,'JONES','MANAGER',7839,'1981-04-02',2975,NULL,20), (7654,'MARTIN','SALESMAN',7698,'1981-09-28',1250,1400,30), (7698,'BLAKE','MANAGER',7839,'1981-05-01',2850,NULL,30), (7782,'CLARK','MANAGER',7839,'1981-06-09',2450,NULL,10), (7788,'SCOTT','ANALYST',7566,'1982-12-09',3000,NULL,20), (7839,'KING','PRESIDENT',NULL,'1981-11-17',5000,NULL,10), (7844,'TURNER','SALESMAN',7698,'1981-09-08',1500,0,30), (7876,'ADAMS','CLERK',7788,'1983-01-12',1100,NULL,20), (7900,'JAMES','CLERK',7698,'1981-12-03',950,NULL,30), (7902,'FORD','ANALYST',7566,'1981-12-03',3000,NULL,20), (7934,'MILLER','CLERK',7782,'1982-01-23',1300,NULL,10)") - mcmp.Exec("INSERT INTO dept(deptno, dname, loc) VALUES ('10','ACCOUNTING','NEW YORK'), ('20','RESEARCH','DALLAS'), ('30','SALES','CHICAGO'), ('40','OPERATIONS','BOSTON')") - - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(*) from dept as tbl0, emp as tbl1 where tbl0.deptno = tbl1.deptno group by tbl1.empno order by tbl1.empno", - `[[INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)]]`) - //mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(tbl0.deptno) from dept as tbl0, emp as tbl1 group by tbl1.job order by tbl1.job limit 3", - // `[[INT64(8)] [INT64(16)] [INT64(12)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*) from emp as tbl0 group by tbl0.empno order by tbl0.empno", - `[[INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ distinct count(*), tbl0.loc from dept as tbl0 group by tbl0.loc", - `[[INT64(1) VARCHAR("BOSTON")] [INT64(1) VARCHAR("CHICAGO")] [INT64(1) VARCHAR("DALLAS")] [INT64(1) VARCHAR("NEW YORK")]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.loc", - `[[INT64(1)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ sum(tbl1.comm) from emp as tbl0, emp as tbl1", - `[[DECIMAL(30800)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.mgr, tbl1.mgr, count(*) from emp as tbl1 group by tbl1.mgr", - `[[NULL NULL INT64(1)] [INT64(7566) INT64(7566) INT64(2)] [INT64(7698) INT64(7698) INT64(5)] [INT64(7782) INT64(7782) INT64(1)] [INT64(7788) INT64(7788) INT64(1)] [INT64(7839) INT64(7839) INT64(3)] [INT64(7902) INT64(7902) INT64(1)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.mgr, tbl1.mgr, count(*) from emp as tbl0, emp as tbl1 group by tbl1.mgr", - `[[NULL NULL INT64(14)] [INT64(7566) INT64(7566) INT64(28)] [INT64(7698) INT64(7698) INT64(70)] [INT64(7782) INT64(7782) INT64(14)] [INT64(7788) INT64(7788) INT64(14)] [INT64(7839) INT64(7839) INT64(42)] [INT64(7902) INT64(7902) INT64(14)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(tbl0.comm) from emp as tbl0, emp as tbl1 join dept as tbl2", - `[[INT64(784) INT64(784) INT64(224)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0, dept as tbl1", - `[[INT64(16) INT64(16)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0", - `[[INT64(4)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ min(tbl0.loc) from dept as tbl0", - `[[VARCHAR("BOSTON")]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.empno, max(tbl1.job) from dept as tbl0, emp as tbl1 group by tbl1.empno", - `[[INT64(7369) VARCHAR("CLERK")] [INT64(7499) VARCHAR("SALESMAN")] [INT64(7521) VARCHAR("SALESMAN")] [INT64(7566) VARCHAR("MANAGER")] [INT64(7654) VARCHAR("SALESMAN")] [INT64(7698) VARCHAR("MANAGER")] [INT64(7782) VARCHAR("MANAGER")] [INT64(7788) VARCHAR("ANALYST")] [INT64(7839) VARCHAR("PRESIDENT")] [INT64(7844) VARCHAR("SALESMAN")] [INT64(7876) VARCHAR("CLERK")] [INT64(7900) VARCHAR("CLERK")] [INT64(7902) VARCHAR("ANALYST")] [INT64(7934) VARCHAR("CLERK")]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.ename, max(tbl0.comm) from emp as tbl0, emp as tbl1 group by tbl1.ename", - `[[VARCHAR("ADAMS") INT64(1400)] [VARCHAR("ALLEN") INT64(1400)] [VARCHAR("BLAKE") INT64(1400)] [VARCHAR("CLARK") INT64(1400)] [VARCHAR("FORD") INT64(1400)] [VARCHAR("JAMES") INT64(1400)] [VARCHAR("JONES") INT64(1400)] [VARCHAR("KING") INT64(1400)] [VARCHAR("MARTIN") INT64(1400)] [VARCHAR("MILLER") INT64(1400)] [VARCHAR("SCOTT") INT64(1400)] [VARCHAR("SMITH") INT64(1400)] [VARCHAR("TURNER") INT64(1400)] [VARCHAR("WARD") INT64(1400)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl0.dname, tbl0.dname, min(tbl0.deptno) from dept as tbl0, dept as tbl1 group by tbl0.dname, tbl0.dname", - `[[VARCHAR("ACCOUNTING") VARCHAR("ACCOUNTING") INT64(10)] [VARCHAR("OPERATIONS") VARCHAR("OPERATIONS") INT64(40)] [VARCHAR("RESEARCH") VARCHAR("RESEARCH") INT64(20)] [VARCHAR("SALES") VARCHAR("SALES") INT64(30)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl0.dname, min(tbl1.deptno) from dept as tbl0, dept as tbl1 group by tbl0.dname, tbl1.dname", - `[[VARCHAR("ACCOUNTING") INT64(10)] [VARCHAR("ACCOUNTING") INT64(40)] [VARCHAR("ACCOUNTING") INT64(20)] [VARCHAR("ACCOUNTING") INT64(30)] [VARCHAR("OPERATIONS") INT64(10)] [VARCHAR("OPERATIONS") INT64(40)] [VARCHAR("OPERATIONS") INT64(20)] [VARCHAR("OPERATIONS") INT64(30)] [VARCHAR("RESEARCH") INT64(10)] [VARCHAR("RESEARCH") INT64(40)] [VARCHAR("RESEARCH") INT64(20)] [VARCHAR("RESEARCH") INT64(30)] [VARCHAR("SALES") INT64(10)] [VARCHAR("SALES") INT64(40)] [VARCHAR("SALES") INT64(20)] [VARCHAR("SALES") INT64(30)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ max(tbl0.hiredate) from emp as tbl0", - `[[DATE("1983-01-12")]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ min(tbl0.deptno) as caggr0, count(*) as caggr1 from dept as tbl0 left join dept as tbl1 on tbl1.loc = tbl1.dname", - `[[INT64(10) INT64(4)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(tbl1.loc) as caggr0 from dept as tbl1 left join dept as tbl2 on tbl1.loc = tbl2.loc where (tbl2.deptno)", - `[[INT64(4)]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ sum(tbl1.ename), min(tbl0.empno) from emp as tbl0, emp as tbl1 left join dept as tbl2 on tbl1.job = tbl2.loc and tbl1.comm = tbl2.deptno where ('trout') and tbl0.deptno = tbl1.comm", - `[[NULL NULL]]`) - mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ distinct max(tbl0.deptno), count(tbl0.job) from emp as tbl0, dept as tbl1 left join dept as tbl2 on tbl1.dname = tbl2.loc and tbl1.dname = tbl2.loc where (tbl2.loc) and tbl0.deptno = tbl1.deptno", - `[[NULL INT64(0)]]`) - } func TestMinMaxAcrossJoins(t *testing.T) { diff --git a/go/test/endtoend/vtgate/queries/random/random_test.go b/go/test/endtoend/vtgate/queries/random/random_test.go index 1d29fef92cc..a51b919e0dc 100644 --- a/go/test/endtoend/vtgate/queries/random/random_test.go +++ b/go/test/endtoend/vtgate/queries/random/random_test.go @@ -78,6 +78,8 @@ func helperTest(t *testing.T, query string) { } func TestMustFix(t *testing.T) { + t.Skip("Skip CI") + require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) @@ -130,6 +132,8 @@ func TestMustFix(t *testing.T) { } func TestKnownFailures(t *testing.T) { + t.Skip("Skip CI") + require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) @@ -252,3 +256,55 @@ func TestRandom(t *testing.T) { } fmt.Printf("Queries successfully executed: %d\n", queryCount) } + +func TestBuggyQueries(t *testing.T) { + mcmp, closer := start(t) + defer closer() + + require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "emp", clusterInstance.VtgateProcess.ReadVSchema)) + require.NoError(t, utils.WaitForAuthoritative(t, keyspaceName, "dept", clusterInstance.VtgateProcess.ReadVSchema)) + + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(*) from dept as tbl0, emp as tbl1 where tbl0.deptno = tbl1.deptno group by tbl1.empno order by tbl1.empno", + `[[INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)] [INT64(1) INT64(1) INT64(1)]]`) + //mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(tbl0.deptno) from dept as tbl0, emp as tbl1 group by tbl1.job order by tbl1.job limit 3", + // `[[INT64(8)] [INT64(16)] [INT64(12)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*) from emp as tbl0 group by tbl0.empno order by tbl0.empno", + `[[INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)] [INT64(1) INT64(1)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ distinct count(*), tbl0.loc from dept as tbl0 group by tbl0.loc", + `[[INT64(1) VARCHAR("BOSTON")] [INT64(1) VARCHAR("CHICAGO")] [INT64(1) VARCHAR("DALLAS")] [INT64(1) VARCHAR("NEW YORK")]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ distinct count(*) from dept as tbl0 group by tbl0.loc", + `[[INT64(1)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ sum(tbl1.comm) from emp as tbl0, emp as tbl1", + `[[DECIMAL(30800)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.mgr, tbl1.mgr, count(*) from emp as tbl1 group by tbl1.mgr", + `[[NULL NULL INT64(1)] [INT64(7566) INT64(7566) INT64(2)] [INT64(7698) INT64(7698) INT64(5)] [INT64(7782) INT64(7782) INT64(1)] [INT64(7788) INT64(7788) INT64(1)] [INT64(7839) INT64(7839) INT64(3)] [INT64(7902) INT64(7902) INT64(1)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.mgr, tbl1.mgr, count(*) from emp as tbl0, emp as tbl1 group by tbl1.mgr", + `[[NULL NULL INT64(14)] [INT64(7566) INT64(7566) INT64(28)] [INT64(7698) INT64(7698) INT64(70)] [INT64(7782) INT64(7782) INT64(14)] [INT64(7788) INT64(7788) INT64(14)] [INT64(7839) INT64(7839) INT64(42)] [INT64(7902) INT64(7902) INT64(14)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*), count(tbl0.comm) from emp as tbl0, emp as tbl1 join dept as tbl2", + `[[INT64(784) INT64(784) INT64(224)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*), count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0, dept as tbl1", + `[[INT64(16) INT64(16)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(*) from (select count(*) from dept as tbl0 group by tbl0.deptno) as tbl0", + `[[INT64(4)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ min(tbl0.loc) from dept as tbl0", + `[[VARCHAR("BOSTON")]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.empno, max(tbl1.job) from dept as tbl0, emp as tbl1 group by tbl1.empno", + `[[INT64(7369) VARCHAR("CLERK")] [INT64(7499) VARCHAR("SALESMAN")] [INT64(7521) VARCHAR("SALESMAN")] [INT64(7566) VARCHAR("MANAGER")] [INT64(7654) VARCHAR("SALESMAN")] [INT64(7698) VARCHAR("MANAGER")] [INT64(7782) VARCHAR("MANAGER")] [INT64(7788) VARCHAR("ANALYST")] [INT64(7839) VARCHAR("PRESIDENT")] [INT64(7844) VARCHAR("SALESMAN")] [INT64(7876) VARCHAR("CLERK")] [INT64(7900) VARCHAR("CLERK")] [INT64(7902) VARCHAR("ANALYST")] [INT64(7934) VARCHAR("CLERK")]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl1.ename, max(tbl0.comm) from emp as tbl0, emp as tbl1 group by tbl1.ename", + `[[VARCHAR("ADAMS") INT64(1400)] [VARCHAR("ALLEN") INT64(1400)] [VARCHAR("BLAKE") INT64(1400)] [VARCHAR("CLARK") INT64(1400)] [VARCHAR("FORD") INT64(1400)] [VARCHAR("JAMES") INT64(1400)] [VARCHAR("JONES") INT64(1400)] [VARCHAR("KING") INT64(1400)] [VARCHAR("MARTIN") INT64(1400)] [VARCHAR("MILLER") INT64(1400)] [VARCHAR("SCOTT") INT64(1400)] [VARCHAR("SMITH") INT64(1400)] [VARCHAR("TURNER") INT64(1400)] [VARCHAR("WARD") INT64(1400)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl0.dname, tbl0.dname, min(tbl0.deptno) from dept as tbl0, dept as tbl1 group by tbl0.dname, tbl0.dname", + `[[VARCHAR("ACCOUNTING") VARCHAR("ACCOUNTING") INT64(10)] [VARCHAR("OPERATIONS") VARCHAR("OPERATIONS") INT64(40)] [VARCHAR("RESEARCH") VARCHAR("RESEARCH") INT64(20)] [VARCHAR("SALES") VARCHAR("SALES") INT64(30)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ tbl0.dname, min(tbl1.deptno) from dept as tbl0, dept as tbl1 group by tbl0.dname, tbl1.dname", + `[[VARCHAR("ACCOUNTING") INT64(10)] [VARCHAR("ACCOUNTING") INT64(40)] [VARCHAR("ACCOUNTING") INT64(20)] [VARCHAR("ACCOUNTING") INT64(30)] [VARCHAR("OPERATIONS") INT64(10)] [VARCHAR("OPERATIONS") INT64(40)] [VARCHAR("OPERATIONS") INT64(20)] [VARCHAR("OPERATIONS") INT64(30)] [VARCHAR("RESEARCH") INT64(10)] [VARCHAR("RESEARCH") INT64(40)] [VARCHAR("RESEARCH") INT64(20)] [VARCHAR("RESEARCH") INT64(30)] [VARCHAR("SALES") INT64(10)] [VARCHAR("SALES") INT64(40)] [VARCHAR("SALES") INT64(20)] [VARCHAR("SALES") INT64(30)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ max(tbl0.hiredate) from emp as tbl0", + `[[DATE("1983-01-12")]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ min(tbl0.deptno) as caggr0, count(*) as caggr1 from dept as tbl0 left join dept as tbl1 on tbl1.loc = tbl1.dname", + `[[INT64(10) INT64(4)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ count(tbl1.loc) as caggr0 from dept as tbl1 left join dept as tbl2 on tbl1.loc = tbl2.loc where (tbl2.deptno)", + `[[INT64(4)]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ sum(tbl1.ename), min(tbl0.empno) from emp as tbl0, emp as tbl1 left join dept as tbl2 on tbl1.job = tbl2.loc and tbl1.comm = tbl2.deptno where ('trout') and tbl0.deptno = tbl1.comm", + `[[NULL NULL]]`) + mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ distinct max(tbl0.deptno), count(tbl0.job) from emp as tbl0, dept as tbl1 left join dept as tbl2 on tbl1.dname = tbl2.loc and tbl1.dname = tbl2.loc where (tbl2.loc) and tbl0.deptno = tbl1.deptno", + `[[NULL INT64(0)]]`) + +} diff --git a/test/config.json b/test/config.json index 0070720f1a0..222f38de078 100644 --- a/test/config.json +++ b/test/config.json @@ -621,6 +621,15 @@ "RetryMax": 1, "Tags": [] }, + "vtgate_queries_random": { + "File": "unused.go", + "Args": ["vitess.io/vitess/go/test/endtoend/vtgate/queries/random"], + "Command": [], + "Manual": false, + "Shard": "vtgate_queries", + "RetryMax": 1, + "Tags": [] + }, "vtgate_concurrentdml": { "File": "unused.go", "Args": ["vitess.io/vitess/go/test/endtoend/vtgate/concurrentdml"],