From 3c8afda8c15e7dcdf0a0e5bd9e3171f3c324a1cf Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Tue, 6 Aug 2024 10:59:10 +0530 Subject: [PATCH 1/5] feat: prevent projection operator from being thrown away when its built Signed-off-by: Manan Gupta --- .../planbuilder/operators/apply_join.go | 2 +- .../planbuilder/operators/dml_with_input.go | 2 +- .../planbuilder/operators/offset_planning.go | 6 +- .../planbuilder/testdata/aggr_cases.json | 93 +++++++++++-------- 4 files changed, 61 insertions(+), 42 deletions(-) diff --git a/go/vt/vtgate/planbuilder/operators/apply_join.go b/go/vt/vtgate/planbuilder/operators/apply_join.go index d87fb529caf..ef36f6a6765 100644 --- a/go/vt/vtgate/planbuilder/operators/apply_join.go +++ b/go/vt/vtgate/planbuilder/operators/apply_join.go @@ -293,7 +293,7 @@ func (aj *ApplyJoin) AddWSColumn(ctx *plancontext.PlanningContext, offset int, u func (aj *ApplyJoin) planOffsets(ctx *plancontext.PlanningContext) Operator { if len(aj.Columns) > 0 { // we've already done offset planning - return aj + return nil } for _, col := range aj.JoinColumns.columns { // Read the type description for applyJoinColumn to understand the following code diff --git a/go/vt/vtgate/planbuilder/operators/dml_with_input.go b/go/vt/vtgate/planbuilder/operators/dml_with_input.go index 09859b90bac..3843e2f3fa8 100644 --- a/go/vt/vtgate/planbuilder/operators/dml_with_input.go +++ b/go/vt/vtgate/planbuilder/operators/dml_with_input.go @@ -114,7 +114,7 @@ func (d *DMLWithInput) planOffsets(ctx *plancontext.PlanningContext) Operator { } } d.BvList = bvList - return d + return nil } var _ Operator = (*DMLWithInput)(nil) diff --git a/go/vt/vtgate/planbuilder/operators/offset_planning.go b/go/vt/vtgate/planbuilder/operators/offset_planning.go index 7a9cedccb89..fd32c0be002 100644 --- a/go/vt/vtgate/planbuilder/operators/offset_planning.go +++ b/go/vt/vtgate/planbuilder/operators/offset_planning.go @@ -38,16 +38,20 @@ func planOffsets(ctx *plancontext.PlanningContext, root Operator) Operator { panic(vterrors.VT13001(fmt.Sprintf("should not see %T here", in))) case offsettable: newOp := op.planOffsets(ctx) + var anythingChanged *ApplyResult if newOp == nil { newOp = op + } else { + // We got a new operator from plan offsets. We should return that something has changed. + anythingChanged = Rewrote("planned offsets") } if DebugOperatorTree { fmt.Println("Planned offsets for:") fmt.Println(ToTree(newOp)) } - return newOp, nil + return newOp, anythingChanged } return in, NoRewrite } diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index eca27d81213..7a43ff3446c 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -6663,55 +6663,70 @@ "OrderBy": "(4|6) ASC, (5|7) ASC", "Inputs": [ { - "OperatorType": "Join", - "Variant": "HashLeftJoin", - "Collation": "binary", - "ComparisonType": "INT16", - "JoinColumnIndexes": "-1,1,-2,2,-3,3", - "Predicate": "`user`.col = ue.col", - "TableName": "`user`_user_extra", + "OperatorType": "Projection", + "Expressions": [ + "count(*) as count(*)", + "count(*) as count(*)", + "`user`.col as col", + "ue.col as col", + "`user`.foo as foo", + "ue.bar as bar", + "weight_string(`user`.foo) as weight_string(`user`.foo)", + "weight_string(ue.bar) as weight_string(ue.bar)" + ], "Inputs": [ { - "OperatorType": "Route", - "Variant": "Scatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select count(*), `user`.col, `user`.foo from `user` where 1 != 1 group by `user`.col, `user`.foo", - "Query": "select count(*), `user`.col, `user`.foo from `user` group by `user`.col, `user`.foo", - "Table": "`user`" - }, - { - "OperatorType": "Aggregate", - "Variant": "Ordered", - "Aggregates": "count_star(0)", - "GroupBy": "1, (2|3)", - "ResultColumns": 3, + "OperatorType": "Join", + "Variant": "HashLeftJoin", + "Collation": "binary", + "ComparisonType": "INT16", + "JoinColumnIndexes": "-1,1,-2,2,-3,3", + "Predicate": "`user`.col = ue.col", + "TableName": "`user`_user_extra", "Inputs": [ { - "OperatorType": "SimpleProjection", - "Columns": "2,0,1,3", + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select count(*), `user`.col, `user`.foo from `user` where 1 != 1 group by `user`.col, `user`.foo", + "Query": "select count(*), `user`.col, `user`.foo from `user` group by `user`.col, `user`.foo", + "Table": "`user`" + }, + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count_star(0)", + "GroupBy": "1, (2|3)", + "ResultColumns": 3, "Inputs": [ { - "OperatorType": "Sort", - "Variant": "Memory", - "OrderBy": "0 ASC, (1|3) ASC", + "OperatorType": "SimpleProjection", + "Columns": "2,0,1,3", "Inputs": [ { - "OperatorType": "Limit", - "Count": "10", + "OperatorType": "Sort", + "Variant": "Memory", + "OrderBy": "0 ASC, (1|3) ASC", "Inputs": [ { - "OperatorType": "Route", - "Variant": "Scatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra where 1 != 1) as ue where 1 != 1", - "Query": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra) as ue limit 10", - "Table": "user_extra" + "OperatorType": "Limit", + "Count": "10", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra where 1 != 1) as ue where 1 != 1", + "Query": "select ue.col, ue.bar, 1, weight_string(ue.bar) from (select col, bar from user_extra) as ue limit 10", + "Table": "user_extra" + } + ] } ] } From ad27f571b7670d6d2f02400f148a0f36ad940043 Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Tue, 6 Aug 2024 11:21:54 +0530 Subject: [PATCH 2/5] feat: prevent hash join from reusing column offsets while offset planning Signed-off-by: Manan Gupta --- .../vtgate/planbuilder/operators/hash_join.go | 23 +------------------ .../planbuilder/testdata/aggr_cases.json | 2 +- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/go/vt/vtgate/planbuilder/operators/hash_join.go b/go/vt/vtgate/planbuilder/operators/hash_join.go index 1928f4dda9e..23d0d061e21 100644 --- a/go/vt/vtgate/planbuilder/operators/hash_join.go +++ b/go/vt/vtgate/planbuilder/operators/hash_join.go @@ -326,20 +326,9 @@ func (hj *HashJoin) addColumn(ctx *plancontext.PlanningContext, in sqlparser.Exp inOffset = op.AddColumn(ctx, false, false, aeWrap(expr)) } - // we turn the + // we have to turn the incoming offset to an outgoing offset of the columns this operator is exposing internalOffset := offsetter(inOffset) - - // ok, we have an offset from the input operator. Let's check if we already have it - // in our list of incoming columns - - for idx, offset := range hj.ColumnOffsets { - if internalOffset == offset { - return idx - } - } - hj.ColumnOffsets = append(hj.ColumnOffsets, internalOffset) - return len(hj.ColumnOffsets) - 1 } @@ -434,17 +423,7 @@ func (hj *HashJoin) addSingleSidedColumn( // we have to turn the incoming offset to an outgoing offset of the columns this operator is exposing internalOffset := offsetter(inOffset) - - // ok, we have an offset from the input operator. Let's check if we already have it - // in our list of incoming columns - for idx, offset := range hj.ColumnOffsets { - if internalOffset == offset { - return idx - } - } - hj.ColumnOffsets = append(hj.ColumnOffsets, internalOffset) - return len(hj.ColumnOffsets) - 1 } diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index 7a43ff3446c..628a959af1d 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -6680,7 +6680,7 @@ "Variant": "HashLeftJoin", "Collation": "binary", "ComparisonType": "INT16", - "JoinColumnIndexes": "-1,1,-2,2,-3,3", + "JoinColumnIndexes": "-1,1,-2,2,-3,3,-3,3", "Predicate": "`user`.col = ue.col", "TableName": "`user`_user_extra", "Inputs": [ From 0594887d369ec1c51642dd367d13fd37e4a2434a Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Tue, 6 Aug 2024 11:36:32 +0530 Subject: [PATCH 3/5] test: add unit test that fails on main Signed-off-by: Manan Gupta --- .../planbuilder/testdata/from_cases.json | 113 ++++++++++++++++++ .../planbuilder/testdata/vschemas/schema.json | 6 + 2 files changed, 119 insertions(+) diff --git a/go/vt/vtgate/planbuilder/testdata/from_cases.json b/go/vt/vtgate/planbuilder/testdata/from_cases.json index 74a8b91430d..ca94f4ee866 100644 --- a/go/vt/vtgate/planbuilder/testdata/from_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/from_cases.json @@ -720,6 +720,119 @@ ] } }, + { + "comment": "Complex query that has hash left join underneath a memory sort and ordered aggregation", + "query": "select 1 from user join user_extra on user.id = user_extra.user_id join music on music.intcol = user_extra.col left join (select user_metadata.col, count(*) as count from user_metadata group by user_metadata.col) um on um.col = user_extra.col where user.id IN (103) group by user_extra.col, music.intcol", + "plan": { + "QueryType": "SELECT", + "Original": "select 1 from user join user_extra on user.id = user_extra.user_id join music on music.intcol = user_extra.col left join (select user_metadata.col, count(*) as count from user_metadata group by user_metadata.col) um on um.col = user_extra.col where user.id IN (103) group by user_extra.col, music.intcol", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "any_value(0) AS 1", + "GroupBy": "1, 4", + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "Sort", + "Variant": "Memory", + "OrderBy": "1 ASC, 4 ASC", + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "HashLeftJoin", + "Collation": "binary", + "ComparisonType": "FLOAT64", + "JoinColumnIndexes": "-1,-2,1,-2,-4,-1", + "Predicate": "user_extra.col = um.col", + "TableName": "music_`user`, user_extra_user_metadata", + "Inputs": [ + { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "L:0,R:0,R:0,L:1", + "JoinVars": { + "music_intcol": 1 + }, + "TableName": "music_`user`, user_extra", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select 1, music.intcol from music where 1 != 1 group by music.intcol", + "Query": "select 1, music.intcol from music group by music.intcol", + "Table": "music" + }, + { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select user_extra.col, user_extra.col from `user`, user_extra where 1 != 1 group by user_extra.col", + "Query": "select user_extra.col, user_extra.col from `user`, user_extra where `user`.id in (103) and user_extra.col = :music_intcol /* INT16 */ and `user`.id = user_extra.user_id group by user_extra.col", + "Table": "`user`, user_extra", + "Values": [ + "103" + ], + "Vindex": "user_index" + } + ] + }, + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "(0|1)", + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "SimpleProjection", + "Columns": "0,2", + "Inputs": [ + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "sum_count_star(1) AS count", + "GroupBy": "(0|2)", + "ResultColumns": 3, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select user_metadata.col, count(*) as `count`, weight_string(user_metadata.col) from user_metadata where 1 != 1 group by user_metadata.col, weight_string(user_metadata.col)", + "OrderBy": "(0|2) ASC", + "Query": "select user_metadata.col, count(*) as `count`, weight_string(user_metadata.col) from user_metadata group by user_metadata.col, weight_string(user_metadata.col) order by user_metadata.col asc", + "Table": "user_metadata" + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "TablesUsed": [ + "user.music", + "user.user", + "user.user_extra", + "user.user_metadata" + ] + } + }, { "comment": "Straight-join (ignores the straight_join hint)", "query": "select m1.col from unsharded as m1 straight_join unsharded as m2", diff --git a/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json b/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json index a8fe91e5d49..4fe275f2398 100644 --- a/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json +++ b/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json @@ -282,6 +282,12 @@ "column": "id", "name": "music_user_map" } + ], + "columns": [ + { + "name": "intcol", + "type": "INT16" + } ] }, "authoritative": { From 741a665e440c8e1b6a7bb505c0cf07b1f24e0f58 Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Tue, 6 Aug 2024 11:58:27 +0530 Subject: [PATCH 4/5] feat: add failing e2e test case Signed-off-by: Manan Gupta --- .../vtgate/vitess_tester/join/join.test | 32 ++++++++++++++++++- .../vtgate/vitess_tester/join/vschema.json | 8 +++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/go/test/endtoend/vtgate/vitess_tester/join/join.test b/go/test/endtoend/vtgate/vitess_tester/join/join.test index cffd3a1b3aa..72d79a1206e 100644 --- a/go/test/endtoend/vtgate/vitess_tester/join/join.test +++ b/go/test/endtoend/vtgate/vitess_tester/join/join.test @@ -25,6 +25,15 @@ CREATE TABLE `t3` CHARSET utf8mb4, COLLATE utf8mb4_unicode_ci; +CREATE TABLE `t4` +( + `id` bigint unsigned NOT NULL AUTO_INCREMENT, + `col` int unsigned NOT NULL, + PRIMARY KEY (`id`) +) ENGINE InnoDB, + CHARSET utf8mb4, + COLLATE utf8mb4_unicode_ci; + insert into t1 (id, name) values (1, 'A'), (2, 'B'), @@ -43,7 +52,28 @@ values (1, 'A'), (4, 'B'), (5, 'B'); +insert into t4 (id, col) +values (1, 1), + (2, 2), + (3, 3); + -- wait_authoritative t1 -- wait_authoritative t2 -- wait_authoritative t3 -select 42 from t1 join t2 on t1.id = t2.t1_id join t3 on t1.id = t3.id where t1.name or t2.id or t3.name; +select 42 +from t1 + join t2 on t1.id = t2.t1_id + join t3 on t1.id = t3.id +where t1.name + or t2.id + or t3.name; + +# Complex query that requires hash join underneath a memory sort and ordered aggregate +select 1 +from t1 + join t2 on t1.id = t2.t1_id + join t4 on t4.col = t2.id + left join (select t4.col, count(*) as count from t4 group by t4.col) t3 on t3.col = t2.id +where t1.id IN (1, 2) +group by t2.id, t4.col; + diff --git a/go/test/endtoend/vtgate/vitess_tester/join/vschema.json b/go/test/endtoend/vtgate/vitess_tester/join/vschema.json index b922d3f760c..1105b951e61 100644 --- a/go/test/endtoend/vtgate/vitess_tester/join/vschema.json +++ b/go/test/endtoend/vtgate/vitess_tester/join/vschema.json @@ -31,6 +31,14 @@ "name": "hash" } ] + }, + "t4": { + "column_vindexes": [ + { + "column": "id", + "name": "hash" + } + ] } } } From e2f137cc8abf51ea66091192f258971e667a2494 Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Wed, 7 Aug 2024 13:43:40 +0530 Subject: [PATCH 5/5] feat: refactor code to handle the case where newop is the same as op Signed-off-by: Manan Gupta --- .../vtgate/planbuilder/operators/offset_planning.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/go/vt/vtgate/planbuilder/operators/offset_planning.go b/go/vt/vtgate/planbuilder/operators/offset_planning.go index fd32c0be002..e8301c18823 100644 --- a/go/vt/vtgate/planbuilder/operators/offset_planning.go +++ b/go/vt/vtgate/planbuilder/operators/offset_planning.go @@ -38,20 +38,21 @@ func planOffsets(ctx *plancontext.PlanningContext, root Operator) Operator { panic(vterrors.VT13001(fmt.Sprintf("should not see %T here", in))) case offsettable: newOp := op.planOffsets(ctx) - var anythingChanged *ApplyResult - if newOp == nil { newOp = op - } else { - // We got a new operator from plan offsets. We should return that something has changed. - anythingChanged = Rewrote("planned offsets") } if DebugOperatorTree { fmt.Println("Planned offsets for:") fmt.Println(ToTree(newOp)) } - return newOp, anythingChanged + + if newOp == op { + return newOp, nil + } else { + // We got a new operator from plan offsets. We should return that something has changed. + return newOp, Rewrote("planning offsets introduced a new operator") + } } return in, NoRewrite }