diff --git a/go/test/endtoend/vtgate/gen4/gen4_test.go b/go/test/endtoend/vtgate/gen4/gen4_test.go index fe0f5728a97..438f4cfc3f6 100644 --- a/go/test/endtoend/vtgate/gen4/gen4_test.go +++ b/go/test/endtoend/vtgate/gen4/gen4_test.go @@ -76,6 +76,9 @@ func TestGroupBy(t *testing.T) { assertMatches(t, conn, `select tcol1, tcol1 from t1 join t2 on t1.id = t2.id order by tcol1`, `[[VARCHAR("A") VARCHAR("A")] [VARCHAR("A") VARCHAR("A")] [VARCHAR("B") VARCHAR("B")] [VARCHAR("C") VARCHAR("C")]]`) + + assertMatches(t, conn, `select count(*) k, tcol1, tcol2, "abc" b from t2 group by tcol1, tcol2, b order by k, tcol2`, + `[[INT64(1) VARCHAR("B") VARCHAR("A") VARCHAR("abc")] [INT64(1) VARCHAR("C") VARCHAR("A") VARCHAR("abc")] [INT64(1) VARCHAR("C") VARCHAR("B") VARCHAR("abc")] [INT64(1) VARCHAR("A") VARCHAR("C") VARCHAR("abc")] [INT64(2) VARCHAR("A") VARCHAR("A") VARCHAR("abc")] [INT64(2) VARCHAR("B") VARCHAR("C") VARCHAR("abc")]]`) } func assertMatches(t *testing.T, conn *mysql.Conn, query, expected string) { diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection.go b/go/vt/vtgate/planbuilder/abstract/queryprojection.go index f960bf0fe5b..8adb8a1a98b 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection.go @@ -35,10 +35,11 @@ type ( // QueryProjection contains the information about the projections, group by and order by expressions used to do horizon planning. QueryProjection struct { - SelectExprs []SelectExpr - HasAggr bool - GroupByExprs []GroupBy - OrderExprs []OrderBy + SelectExprs []SelectExpr + HasAggr bool + GroupByExprs []GroupBy + OrderExprs []OrderBy + CanPushDownSorting bool } // OrderBy contains the expression to used in order by and also if ordering is needed at VTGate level then what the weight_string function expression to be sent down for evaluation. @@ -92,7 +93,6 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { } for _, group := range sel.GroupBy { - // todo dont ignore weightstringexpr expr, weightStrExpr, err := qp.getSimplifiedExpr(group, "group statement") if err != nil { return nil, err @@ -100,6 +100,7 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { qp.GroupByExprs = append(qp.GroupByExprs, GroupBy{Inner: expr, WeightStrExpr: weightStrExpr}) } + canPushDownSorting := true for _, order := range sel.OrderBy { expr, weightStrExpr, err := qp.getSimplifiedExpr(order.Expr, "order clause") if err != nil { @@ -112,7 +113,9 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { }, WeightStrExpr: weightStrExpr, }) + canPushDownSorting = canPushDownSorting && !sqlparser.ContainsAggregation(weightStrExpr) } + qp.CanPushDownSorting = canPushDownSorting return qp, nil } diff --git a/go/vt/vtgate/planbuilder/ordered_aggregate.go b/go/vt/vtgate/planbuilder/ordered_aggregate.go index d7e9a0a29bc..8dfbed63297 100644 --- a/go/vt/vtgate/planbuilder/ordered_aggregate.go +++ b/go/vt/vtgate/planbuilder/ordered_aggregate.go @@ -59,6 +59,7 @@ type orderedAggregate struct { resultsBuilder extraDistinct *sqlparser.ColName eaggr *engine.OrderedAggregate + columnOffset map[sqlparser.Expr]int } // checkAggregates analyzes the select expression for aggregates. If it determines diff --git a/go/vt/vtgate/planbuilder/queryprojection_test.go b/go/vt/vtgate/planbuilder/queryprojection_test.go deleted file mode 100644 index 328b54cb41a..00000000000 --- a/go/vt/vtgate/planbuilder/queryprojection_test.go +++ /dev/null @@ -1,17 +0,0 @@ -/* -Copyright 2021 The Vitess Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package planbuilder diff --git a/go/vt/vtgate/planbuilder/route_planning.go b/go/vt/vtgate/planbuilder/route_planning.go index 3fda413996a..88925ecdd81 100644 --- a/go/vt/vtgate/planbuilder/route_planning.go +++ b/go/vt/vtgate/planbuilder/route_planning.go @@ -197,7 +197,7 @@ func planHorizon(sel *sqlparser.Select, plan logicalPlan, semTable *semantics.Se needsTruncation = needsTruncation || colAdded } - if qp.HasAggr { + if qp.HasAggr && qp.CanPushDownSorting { var colAdded bool plan, colAdded, err = planOrderByUsingGroupBy(qp, plan, semTable) if err != nil { diff --git a/go/vt/vtgate/planbuilder/selectGen4.go b/go/vt/vtgate/planbuilder/selectGen4.go index 881990c085e..9276aa9d624 100644 --- a/go/vt/vtgate/planbuilder/selectGen4.go +++ b/go/vt/vtgate/planbuilder/selectGen4.go @@ -87,6 +87,13 @@ func pushProjection(expr *sqlparser.AliasedExpr, plan logicalPlan, semTable *sem } node.Cols = append(node.Cols, column) return len(node.Cols) - 1, true, nil + case *orderedAggregate: + for k, v := range node.columnOffset { + if sqlparser.EqualsExpr(expr.Expr, k) { + return v, false, nil + } + } + return 0, false, semantics.Gen4NotSupportedF("column not found in already added list: %s", sqlparser.String(expr)) default: return 0, false, vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, "%T not yet supported", node) } @@ -129,7 +136,8 @@ func planAggregations(qp *abstract.QueryProjection, plan logicalPlan, semTable * weightStrings: make(map[*resultColumn]int), truncater: eaggr, }, - eaggr: eaggr, + eaggr: eaggr, + columnOffset: map[sqlparser.Expr]int{}, } for _, e := range qp.SelectExprs { offset, _, err := pushProjection(e.Col, plan, semTable, true, false) @@ -143,6 +151,7 @@ func planAggregations(qp *abstract.QueryProjection, plan logicalPlan, semTable * Opcode: opcode, Col: offset, }) + oa.columnOffset[e.Col.Expr] = offset } } @@ -154,6 +163,25 @@ func planAggregations(qp *abstract.QueryProjection, plan logicalPlan, semTable * } colAdded = colAdded || added } + + if !qp.CanPushDownSorting { + var orderExprs []abstract.OrderBy + // if we can't at a later stage push down the sorting to our inputs, we have to do ordering here + for _, groupExpr := range qp.GroupByExprs { + orderExprs = append(orderExprs, abstract.OrderBy{ + Inner: &sqlparser.Order{Expr: groupExpr.Inner}, + WeightStrExpr: groupExpr.WeightStrExpr}, + ) + } + if len(orderExprs) > 0 { + newInput, added, err := planOrderBy(qp, orderExprs, plan, semTable) + if err != nil { + return nil, false, err + } + oa.input = newInput + return oa, colAdded || added, nil + } + } return oa, colAdded, nil } @@ -176,6 +204,7 @@ func planGroupByGen4(groupExpr abstract.GroupBy, plan logicalPlan, semTable *sem if err != nil { return false, err } + node.columnOffset[groupExpr.WeightStrExpr] = keyCol return colAdded || colAddedRecursively, nil default: return false, semantics.Gen4NotSupportedF("group by on: %T", plan) @@ -212,15 +241,25 @@ func planOrderBy(qp *abstract.QueryProjection, orderExprs []abstract.OrderBy, pl case *joinGen4: return planOrderByForJoin(qp, orderExprs, plan, semTable) case *orderedAggregate: + for _, order := range orderExprs { + if sqlparser.ContainsAggregation(order.WeightStrExpr) { + ms, err := createMemorySortPlanOnAggregation(plan, orderExprs) + if err != nil { + return nil, false, err + } + return ms, false, nil + } + } newInput, colAdded, err := planOrderBy(qp, orderExprs, plan.input, semTable) if err != nil { return nil, false, err } plan.input = newInput - return plan, colAdded, nil + case *memorySort: + return plan, false, nil default: - return nil, false, semantics.Gen4NotSupportedF("ordering on complex query") + return nil, false, semantics.Gen4NotSupportedF("ordering on complex query %T", plan) } } @@ -310,7 +349,49 @@ func planOrderByForJoin(qp *abstract.QueryProjection, orderExprs []abstract.Orde plan.Left = newLeft return plan, false, nil } + ms, colAdded, err := createMemorySortPlan(plan, orderExprs, semTable) + if err != nil { + return nil, false, err + } + return ms, colAdded, nil +} + +func createMemorySortPlanOnAggregation(plan *orderedAggregate, orderExprs []abstract.OrderBy) (logicalPlan, error) { + primitive := &engine.MemorySort{} + ms := &memorySort{ + resultsBuilder: resultsBuilder{ + logicalPlanCommon: newBuilderCommon(plan), + weightStrings: make(map[*resultColumn]int), + truncater: primitive, + }, + eMemorySort: primitive, + } + + for _, order := range orderExprs { + offset, found := findExprInOrderedAggr(plan, order) + if !found { + return nil, vterrors.New(vtrpcpb.Code_INTERNAL, "expected to find this expression") + } + ms.eMemorySort.OrderBy = append(ms.eMemorySort.OrderBy, engine.OrderbyParams{ + Col: offset, + WeightStringCol: -1, + Desc: order.Inner.Direction == sqlparser.DescOrder, + StarColFixedIndex: offset, + }) + } + return ms, nil +} + +func findExprInOrderedAggr(plan *orderedAggregate, order abstract.OrderBy) (int, bool) { + for expr, i := range plan.columnOffset { + if sqlparser.EqualsExpr(order.WeightStrExpr, expr) { + return i, true + } + } + return 0, false +} +func createMemorySortPlan(plan logicalPlan, orderExprs []abstract.OrderBy, semTable *semantics.SemTable) (logicalPlan, bool, error) { primitive := &engine.MemorySort{} ms := &memorySort{ resultsBuilder: resultsBuilder{ @@ -335,9 +416,7 @@ func planOrderByForJoin(qp *abstract.QueryProjection, orderExprs []abstract.Orde StarColFixedIndex: offset, }) } - return ms, colAdded, nil - } func allLeft(orderExprs []abstract.OrderBy, semTable *semantics.SemTable, lhsTables semantics.TableSet) bool { diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index 27594aeb0d4..e7ac5cc9e89 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -125,6 +125,31 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select count(*), a, textcol1, b from user group by a, textcol1, b", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(0)", + "GroupBy": "1, 5, 3", + "ResultColumns": 4, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select count(*), a, textcol1, b, weight_string(a), weight_string(textcol1), weight_string(b) from `user` where 1 != 1 group by a, textcol1, b", + "OrderBy": "1 ASC, 2 ASC, 3 ASC", + "Query": "select count(*), a, textcol1, b, weight_string(a), weight_string(textcol1), weight_string(b) from `user` group by a, textcol1, b order by a asc, textcol1 asc, b asc", + "Table": "`user`" + } + ] + } +} # scatter group by a integer column. Do not add weight strings for this. "select count(*), intcol from user group by intcol" @@ -190,6 +215,38 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select count(*) k, a, textcol1, b from user group by a, textcol1, b order by k, textcol1", + "Instructions": { + "OperatorType": "Sort", + "Variant": "Memory", + "OrderBy": "0 ASC, 5 ASC", + "ResultColumns": 4, + "Inputs": [ + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(0)", + "GroupBy": "1, 5, 3", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select count(*) as k, a, textcol1, b, weight_string(a), weight_string(textcol1), weight_string(b) from `user` where 1 != 1 group by a, textcol1, b", + "OrderBy": "1 ASC, 2 ASC, 3 ASC", + "Query": "select count(*) as k, a, textcol1, b, weight_string(a), weight_string(textcol1), weight_string(b) from `user` group by a, textcol1, b order by a asc, textcol1 asc, b asc", + "Table": "`user`" + } + ] + } + ] + } +} # count aggregate "select count(*) from user" @@ -454,6 +511,32 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select name, count(*) from user group by name", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(1)", + "GroupBy": "0", + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `name`, count(*), weight_string(`name`) from `user` where 1 != 1 group by `name`", + "OrderBy": "0 ASC", + "Query": "select `name`, count(*), weight_string(`name`) from `user` group by `name` order by `name` asc", + "Table": "`user`" + } + ] + } +} + # group by a unique vindex should use a simple route, even if aggr is complex "select id, 1+count(*) from user group by id" @@ -886,6 +969,7 @@ Gen4 plan same as above " select count(*) b from user group by b" "Can't group on 'b'" + # scatter aggregate multiple group by (columns) "select a, b, count(*) from user group by b, a" { @@ -1000,6 +1084,7 @@ Gen4 plan same as above # scatter aggregate group by invalid column number "select col from user group by 2" "Unknown column '2' in 'group statement'" +Gen4 plan same as above # scatter aggregate order by null "select count(*) from user order by null" @@ -1057,6 +1142,31 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select a, b, c, d, count(*) from user group by 1, 2, 3 order by 1, 2, 3", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(4)", + "GroupBy": "0, 1, 2", + "ResultColumns": 5, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` where 1 != 1 group by a, b, c", + "OrderBy": "0 ASC, 1 ASC, 2 ASC", + "Query": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` group by a, b, c order by a asc, b asc, c asc", + "Table": "`user`" + } + ] + } +} # scatter aggregate with named order by columns "select a, b, c, d, count(*) from user group by 1, 2, 3 order by a, b, c" @@ -1085,6 +1195,31 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select a, b, c, d, count(*) from user group by 1, 2, 3 order by a, b, c", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(4)", + "GroupBy": "0, 1, 2", + "ResultColumns": 5, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` where 1 != 1 group by a, b, c", + "OrderBy": "0 ASC, 1 ASC, 2 ASC", + "Query": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` group by a, b, c order by a asc, b asc, c asc", + "Table": "`user`" + } + ] + } +} # scatter aggregate with jumbled order by columns "select a, b, c, d, count(*) from user group by 1, 2, 3, 4 order by d, b, a, c" @@ -1113,6 +1248,31 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select a, b, c, d, count(*) from user group by 1, 2, 3, 4 order by d, b, a, c", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(4)", + "GroupBy": "0, 1, 2, 3", + "ResultColumns": 5, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c), weight_string(d) from `user` where 1 != 1 group by a, b, c, d", + "OrderBy": "3 ASC, 1 ASC, 0 ASC, 2 ASC", + "Query": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c), weight_string(d) from `user` group by a, b, c, d order by d asc, b asc, a asc, c asc", + "Table": "`user`" + } + ] + } +} # scatter aggregate with jumbled group by and order by columns "select a, b, c, d, count(*) from user group by 3, 2, 1, 4 order by d, b, a, c" @@ -1141,6 +1301,31 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select a, b, c, d, count(*) from user group by 3, 2, 1, 4 order by d, b, a, c", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(4)", + "GroupBy": "2, 1, 0, 3", + "ResultColumns": 5, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select a, b, c, d, count(*), weight_string(c), weight_string(b), weight_string(a), weight_string(d) from `user` where 1 != 1 group by c, b, a, d", + "OrderBy": "3 ASC, 1 ASC, 0 ASC, 2 ASC", + "Query": "select a, b, c, d, count(*), weight_string(c), weight_string(b), weight_string(a), weight_string(d) from `user` group by c, b, a, d order by d asc, b asc, a asc, c asc", + "Table": "`user`" + } + ] + } +} # scatter aggregate with some descending order by cols "select a, b, c, count(*) from user group by 3, 2, 1 order by 1 desc, 3 desc, b" @@ -1169,6 +1354,31 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select a, b, c, count(*) from user group by 3, 2, 1 order by 1 desc, 3 desc, b", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(3)", + "GroupBy": "2, 1, 0", + "ResultColumns": 4, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select a, b, c, count(*), weight_string(c), weight_string(b), weight_string(a) from `user` where 1 != 1 group by c, b, a", + "OrderBy": "0 DESC, 2 DESC, 1 ASC", + "Query": "select a, b, c, count(*), weight_string(c), weight_string(b), weight_string(a) from `user` group by c, b, a order by a desc, c desc, b asc", + "Table": "`user`" + } + ] + } +} # invalid order by column numner for scatter "select col, count(*) from user group by col order by 5 limit 10" @@ -1343,6 +1553,7 @@ Gen4 plan same as above # Group by out of range column number (code is duplicated from symab). "select id from user group by 2" "Unknown column '2' in 'group statement'" +Gen4 plan same as above # syntax error detected by planbuilder "select count(distinct *) from user"