From ced74e9cf2809fc57f56098ad72d86cf6d15e60e Mon Sep 17 00:00:00 2001 From: Drew Kimball Date: Thu, 15 Dec 2022 01:08:03 +0000 Subject: [PATCH] opt: allow lookup joins to preserve index ordering with DESC columns This patch fixes an oversight of #84689 that prevented lookup joins from maintaining the index ordering for each lookup if the index ordering contained descending columns. The execution logic will respect descending index columns as-is, so only the optimizer code needed to be changed. This will allow plans with lookup joins to avoid sorts in more cases. Fixes #88319 Release note (performance improvement): The optimizer can now avoid planning a sort in more cases with joins that perform lookups into an index with one or more columns sorted in descending order. This can significantly decrease the number of rows that have to be scanned in order to satisfy a `LIMIT` clause. --- pkg/kv/kvclient/kvstreamer/streamer.go | 6 +- pkg/sql/distsql_physical_planner.go | 9 +-- pkg/sql/execinfrapb/processors_sql.proto | 4 +- .../logictest/testdata/logic_test/lookup_join | 72 +++++++++---------- pkg/sql/opt/ordering/lookup_join.go | 39 +++++----- pkg/sql/opt/ordering/lookup_join_test.go | 11 ++- pkg/sql/opt/xform/testdata/physprops/ordering | 46 ++++++------ 7 files changed, 83 insertions(+), 104 deletions(-) diff --git a/pkg/kv/kvclient/kvstreamer/streamer.go b/pkg/kv/kvclient/kvstreamer/streamer.go index 4be7efaece6d..a3c4e527fe8f 100644 --- a/pkg/kv/kvclient/kvstreamer/streamer.go +++ b/pkg/kv/kvclient/kvstreamer/streamer.go @@ -429,11 +429,7 @@ func (s *Streamer) Init( // // In InOrder operation mode, responses will be delivered in reqs order. When // more than one row is returned for a given request, the rows for that request -// will be sorted in the order of the lookup index if the index contains only -// ascending columns. -// TODO(drewk): lift the restriction that index columns must be ASC in order to -// -// return results in lookup order. +// will be sorted in the order of the lookup index. // // It is the caller's responsibility to ensure that the memory footprint of reqs // (i.e. roachpb.Spans inside of the requests) is reasonable. Enqueue will diff --git a/pkg/sql/distsql_physical_planner.go b/pkg/sql/distsql_physical_planner.go index 9acdaa1c2c04..7db0bd3e8ecf 100644 --- a/pkg/sql/distsql_physical_planner.go +++ b/pkg/sql/distsql_physical_planner.go @@ -2522,17 +2522,14 @@ func (dsp *DistSQLPlanner) createPlanForLookupJoin( // If any of the ordering columns originate from the lookup table, this is a // case where we are ordering on a prefix of input columns followed by the - // lookup columns. We need to maintain the index ordering on each lookup. + // lookup columns. var maintainLookupOrdering bool numInputCols := len(plan.GetResultTypes()) for i := range n.reqOrdering { if n.reqOrdering[i].ColIdx >= numInputCols { + // We need to maintain the index ordering on each lookup. maintainLookupOrdering = true - if n.reqOrdering[i].Direction == encoding.Descending { - // Validate that an ordering on lookup columns does not contain - // descending columns. - panic(errors.AssertionFailedf("ordering on a lookup index with descending columns")) - } + break } } diff --git a/pkg/sql/execinfrapb/processors_sql.proto b/pkg/sql/execinfrapb/processors_sql.proto index d9fa0913ab70..caa87aa47375 100644 --- a/pkg/sql/execinfrapb/processors_sql.proto +++ b/pkg/sql/execinfrapb/processors_sql.proto @@ -395,9 +395,7 @@ message JoinReaderSpec { // only be set to true if maintain_ordering is also true. // maintain_lookup_ordering can be used if the output needs to be ordered by // a prefix of input columns followed by index (lookup) columns without - // requiring a (buffered) sort. As an additional restriction due to - // implementation details, maintain_lookup_ordering can only be used when the - // index columns that participate in the output ordering are all ASC. + // requiring a (buffered) sort. optional bool maintain_lookup_ordering = 22 [(gogoproto.nullable) = false]; } diff --git a/pkg/sql/logictest/testdata/logic_test/lookup_join b/pkg/sql/logictest/testdata/logic_test/lookup_join index 1ab35c44b665..94c468c38503 100644 --- a/pkg/sql/logictest/testdata/logic_test/lookup_join +++ b/pkg/sql/logictest/testdata/logic_test/lookup_join @@ -843,10 +843,10 @@ WHERE views.chat_id = 1 and views.user_id = 1; # have to sort its output). statement ok -CREATE TABLE xyz (x INT, y INT, z INT, PRIMARY KEY(x, y, z)); +CREATE TABLE xyz (x INT, y INT, z INT, PRIMARY KEY(x, y DESC, z)); statement ok -CREATE TABLE uvw (u INT, v INT, w INT, PRIMARY KEY(u, v, w)); +CREATE TABLE uvw (u INT, v INT, w INT, PRIMARY KEY(u, v, w DESC)); statement ok INSERT INTO xyz VALUES (1, 1, 1), (1, 1, 2), (1, 2, 3), (2, 1, 4), (2, 1, 5), (2, 1, 6), (3, 1, 7); @@ -855,86 +855,86 @@ statement ok INSERT INTO uvw VALUES (1, 1, 1), (1, 2, 2), (1, 2, 3), (2, 1, 4), (2, 1, 5), (2, 2, 6), (2, 2, 7); query IIIIII colnames -SELECT * FROM xyz INNER LOOKUP JOIN uvw ON x = u ORDER BY x, y, z, u, v, w +SELECT * FROM xyz INNER LOOKUP JOIN uvw ON x = u ORDER BY x, y DESC, z, u, v, w DESC ---- x y z u v w +1 2 3 1 1 1 +1 2 3 1 2 3 +1 2 3 1 2 2 1 1 1 1 1 1 -1 1 1 1 2 2 1 1 1 1 2 3 +1 1 1 1 2 2 1 1 2 1 1 1 -1 1 2 1 2 2 1 1 2 1 2 3 -1 2 3 1 1 1 -1 2 3 1 2 2 -1 2 3 1 2 3 -2 1 4 2 1 4 +1 1 2 1 2 2 2 1 4 2 1 5 -2 1 4 2 2 6 +2 1 4 2 1 4 2 1 4 2 2 7 -2 1 5 2 1 4 +2 1 4 2 2 6 2 1 5 2 1 5 -2 1 5 2 2 6 +2 1 5 2 1 4 2 1 5 2 2 7 -2 1 6 2 1 4 +2 1 5 2 2 6 2 1 6 2 1 5 -2 1 6 2 2 6 +2 1 6 2 1 4 2 1 6 2 2 7 +2 1 6 2 2 6 query IIIIII colnames -SELECT * FROM xyz INNER HASH JOIN uvw ON x = u ORDER BY x, y, z, u, v, w +SELECT * FROM xyz INNER HASH JOIN uvw ON x = u ORDER BY x, y DESC, z, u, v, w DESC ---- x y z u v w +1 2 3 1 1 1 +1 2 3 1 2 3 +1 2 3 1 2 2 1 1 1 1 1 1 -1 1 1 1 2 2 1 1 1 1 2 3 +1 1 1 1 2 2 1 1 2 1 1 1 -1 1 2 1 2 2 1 1 2 1 2 3 -1 2 3 1 1 1 -1 2 3 1 2 2 -1 2 3 1 2 3 -2 1 4 2 1 4 +1 1 2 1 2 2 2 1 4 2 1 5 -2 1 4 2 2 6 +2 1 4 2 1 4 2 1 4 2 2 7 -2 1 5 2 1 4 +2 1 4 2 2 6 2 1 5 2 1 5 -2 1 5 2 2 6 +2 1 5 2 1 4 2 1 5 2 2 7 -2 1 6 2 1 4 +2 1 5 2 2 6 2 1 6 2 1 5 -2 1 6 2 2 6 +2 1 6 2 1 4 2 1 6 2 2 7 +2 1 6 2 2 6 query IIIIII colnames -SELECT * FROM xyz INNER LOOKUP JOIN uvw ON x = u AND y = v ORDER BY u, x, v, y, z, w +SELECT * FROM xyz INNER LOOKUP JOIN uvw ON x = u AND y = v ORDER BY u, x, v, y DESC, z, w DESC ---- x y z u v w 1 1 1 1 1 1 1 1 2 1 1 1 -1 2 3 1 2 2 1 2 3 1 2 3 -2 1 4 2 1 4 +1 2 3 1 2 2 2 1 4 2 1 5 -2 1 5 2 1 4 +2 1 4 2 1 4 2 1 5 2 1 5 -2 1 6 2 1 4 +2 1 5 2 1 4 2 1 6 2 1 5 +2 1 6 2 1 4 query IIIIII colnames -SELECT * FROM xyz INNER HASH JOIN uvw ON x = u AND y = v ORDER BY u, x, v, y, z, w +SELECT * FROM xyz INNER HASH JOIN uvw ON x = u AND y = v ORDER BY u, x, v, y DESC, z, w DESC ---- x y z u v w 1 1 1 1 1 1 1 1 2 1 1 1 -1 2 3 1 2 2 1 2 3 1 2 3 -2 1 4 2 1 4 +1 2 3 1 2 2 2 1 4 2 1 5 -2 1 5 2 1 4 +2 1 4 2 1 4 2 1 5 2 1 5 -2 1 6 2 1 4 +2 1 5 2 1 4 2 1 6 2 1 5 +2 1 6 2 1 4 # Test inequality lookup joins. # Case with idxCol <= inputCol. diff --git a/pkg/sql/opt/ordering/lookup_join.go b/pkg/sql/opt/ordering/lookup_join.go index 4cc5a26c1b98..fbd24b48cef2 100644 --- a/pkg/sql/opt/ordering/lookup_join.go +++ b/pkg/sql/opt/ordering/lookup_join.go @@ -204,24 +204,22 @@ func lookupJoinBuildProvided(expr memo.RelExpr, required *props.OrderingChoice) // // It is possible for a lookup join to supply an ordering that references index // columns if the ordering consists of a series of input columns that form a key -// over the input, followed by the index columns in index order. Due to -// implementation details, currently the ordering columns from the index must be -// ASC. The following is a case where a lookup join could maintain an ordering -// over both input and index columns: +// over the input, followed by the index columns in index order. The following +// is a case where a lookup join could maintain an ordering over both input and +// index columns: // // CREATE TABLE ab (a INT, b INT, PRIMARY KEY(a, b)); -// CREATE TABLE xyz (x INT, y INT, z INT, PRIMARY KEY(x, y, z DESC)); -// SELECT * FROM ab INNER LOOKUP JOIN xy ON a = x ORDER BY a, b, x, y; +// CREATE TABLE xy (x INT, y INT, PRIMARY KEY(x, y DESC)); +// SELECT * FROM ab INNER LOOKUP JOIN xy ON a = x ORDER BY a, b, x, y DESC; // // Note that in this example the 'a' and 'b' columns form a key over the // input of the lookup join. Additionally, the 'x' column alone is not a key // for the 'xy' table, so each lookup may return multiple rows (which need -// to be ordered among themselves). Since the postfix of the ordering that -// references index columns is in index order (x, y) and has no DESC -// columns, the lookup join in the example can supply the ordering itself. -// On the other hand, switching 'b' and 'y' in the ordering, removing 'b', -// or adding the 'z' column to the required order would mean the query would -// require a sort. +// to be ordered among themselves). Since the suffix of the ordering that +// references index columns is in index order (x, y DESC), the lookup join in +// the example can supply the ordering itself. On the other hand, switching +// 'b' and 'y' in the ordering, removing 'b', or changing the ordering on 'y' to +// ASC would mean the query would require a sort. // // Note that the Columns field of the required OrderingChoice should reflect the // postfix of the required ordering that cannot be satisfied by input columns, @@ -236,11 +234,13 @@ func getLookupOrdCols( // joins can only maintain the index ordering for each individual input // row, so we need to disallow cases where different input rows may sort // the same on the input ordering. - // TODO(drewk): it is possible to take advantage of the index ordering - // when the input ordering does not form a key over the input. In this - // case, we would require that the index ordering columns for a given - // input row are functionally determined by the input ordering columns. - // This would disqualify IN constraints and inequalities. + // + // Note that it would be technically correct to use the index ordering when + // the input ordering does not form a key over the input iff the input + // ordering columns functionally determined the index ordering columns. + // However, in this case the addition of the index ordering columns would be + // trivial, since the ordering could be simplified to just include the input + // ordering columns (see OrderingChoice.Simplify). return nil, false } // The columns from the prefix of the required ordering satisfied by the @@ -272,11 +272,6 @@ func getLookupOrdCols( // satisfy the required ordering, so break instead of returning. break } - if idx.Column(i).Descending { - // The index ordering columns must be ASC in order for lookups to be - // returned in index order. - return nil, false - } indexOrder = append(indexOrder, opt.MakeOrderingColumn(idxColID, idx.Column(i).Descending)) } // Check if the index ordering satisfies the postfix of the required diff --git a/pkg/sql/opt/ordering/lookup_join_test.go b/pkg/sql/opt/ordering/lookup_join_test.go index 5567aedc41b4..7b116c3b51ae 100644 --- a/pkg/sql/opt/ordering/lookup_join_test.go +++ b/pkg/sql/opt/ordering/lookup_join_test.go @@ -137,15 +137,15 @@ func TestLookupJoinProvided(t *testing.T) { input: "+5", provided: "+1,+2", }, - { // case 8: the lookup join preserves the input ordering but cannot provide - // the entire required ordering because the index has a descending column. + { // case 8: the lookup join preserves the input ordering and maintains the + // ordering of the descending index on lookups. Joining on c1 = c5. index: descendingIndex, keyCols: opt.ColList{5}, inputKey: c(5, 6), outCols: c(2, 3, 4, 5, 6), required: "+(1|5),+6,-2", input: "+5,+6", - provided: "+5,+6", + provided: "+5,+6,-2", }, } @@ -317,14 +317,13 @@ func TestLookupJoinCanProvide(t *testing.T) { required: "+(1|5),+6,-2", canProvide: false, }, - { // Case 11: the ordering cannot be satisfied because the lookup index has - // a descending column. + { // Case 11: an ordering with a descending column can be satisfied.. idx: descendingIndex, keyCols: opt.ColList{5}, outCols: c(1, 2, 5, 6), inputKey: c(5, 6), required: "+(1|5),+6,-2", - canProvide: false, + canProvide: true, }, { // Case 12: the ordering cannot be satisfied because the required ordering // is missing index column c1. diff --git a/pkg/sql/opt/xform/testdata/physprops/ordering b/pkg/sql/opt/xform/testdata/physprops/ordering index e9e7fc233210..0f47809e3ad1 100644 --- a/pkg/sql/opt/xform/testdata/physprops/ordering +++ b/pkg/sql/opt/xform/testdata/physprops/ordering @@ -2715,8 +2715,7 @@ inner-join (lookup abc) │ └── ordering: +1,+2,+3 └── filters (true) -# Can supply the requested ordering because the descending column from the -# index does not take part in the ordering (no sort should be added). +# Preserving lookup ordering (no sort should be added). opt SELECT * FROM xyz INNER LOOKUP JOIN abc@abc_desc ON x = a ORDER BY x, y, z, a, b ---- @@ -2733,6 +2732,24 @@ inner-join (lookup abc@abc_desc) │ └── ordering: +1,+2,+3 └── filters (true) +# Preserving lookup ordering (no sort should be added). Index order includes a +# descending column. +opt +SELECT * FROM xyz INNER LOOKUP JOIN abc@abc_desc ON x = a ORDER BY x, y, z, a, b, c DESC +---- +inner-join (lookup abc@abc_desc) + ├── columns: x:1!null y:2!null z:3!null a:6!null b:7!null c:8!null + ├── flags: force lookup join (into right side) + ├── key columns: [1] = [6] + ├── key: (2,3,6-8) + ├── fd: (1)==(6), (6)==(1) + ├── ordering: +(1|6),+2,+3,+7,-8 [actual: +1,+2,+3,+7,-8] + ├── scan xyz + │ ├── columns: x:1!null y:2!null z:3!null + │ ├── key: (1-3) + │ └── ordering: +1,+2,+3 + └── filters (true) + # Cannot supply requested ordering because input and lookup ordering columns # are interleaved. opt @@ -2803,7 +2820,7 @@ sort (segmented) # Cannot supply the requested ordering because the direction of the 'c' column # is not the same as in the index. opt -SELECT * FROM xyz INNER LOOKUP JOIN abc ON x = a ORDER BY x, y, z, b, c DESC +SELECT * FROM xyz INNER LOOKUP JOIN abc@primary ON x = a ORDER BY x, y, z, b, c DESC ---- sort (segmented) ├── columns: x:1!null y:2!null z:3!null a:6!null b:7!null c:8!null @@ -2823,29 +2840,6 @@ sort (segmented) │ └── ordering: +1,+2,+3 └── filters (true) -# Cannot supply the requested ordering because the descending column from the -# index shows up in the ordering. -opt -SELECT * FROM xyz INNER LOOKUP JOIN abc@abc_desc ON x = a ORDER BY x, y, z, a, b, c DESC ----- -sort (segmented) - ├── columns: x:1!null y:2!null z:3!null a:6!null b:7!null c:8!null - ├── key: (2,3,6-8) - ├── fd: (1)==(6), (6)==(1) - ├── ordering: +(1|6),+2,+3,+7,-8 [actual: +1,+2,+3,+7,-8] - └── inner-join (lookup abc@abc_desc) - ├── columns: x:1!null y:2!null z:3!null a:6!null b:7!null c:8!null - ├── flags: force lookup join (into right side) - ├── key columns: [1] = [6] - ├── key: (2,3,6-8) - ├── fd: (1)==(6), (6)==(1) - ├── ordering: +1,+2,+3 - ├── scan xyz - │ ├── columns: x:1!null y:2!null z:3!null - │ ├── key: (1-3) - │ └── ordering: +1,+2,+3 - └── filters (true) - # Regression test for #85393 - use only columns from the required ordering when # building the provided ordering for Project operators. exec-ddl