diff --git a/pkg/sql/explain_tree_test.go b/pkg/sql/explain_tree_test.go index 72f9a5f59837..994cf4ecbb7f 100644 --- a/pkg/sql/explain_tree_test.go +++ b/pkg/sql/explain_tree_test.go @@ -378,9 +378,9 @@ create table t.actors ( 1 .id @S1 (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) 1 .original sql (SELECT name FROM t.public.actors WHERE name = 'Foo') (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) 1 .exec mode one row (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) -2 limit (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) -2 .count (2)[int] (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) -3 max1row (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) +2 max1row (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) +3 limit (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) +3 .count (2)[int] (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) 4 render (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) 4 .render 0 (@2)[string] (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) 5 scan (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id) @@ -442,16 +442,16 @@ create table t.actors ( }, Children: []*roachpb.ExplainTreePlanNode{ { - Name: "limit", - Attrs: []*roachpb.ExplainTreePlanNode_Attr{ - { - Key: "count", - Value: "_", - }, - }, + Name: "max1row", Children: []*roachpb.ExplainTreePlanNode{ { - Name: "max1row", + Name: "limit", + Attrs: []*roachpb.ExplainTreePlanNode_Attr{ + { + Key: "count", + Value: "_", + }, + }, Children: []*roachpb.ExplainTreePlanNode{ { Name: "render", diff --git a/pkg/sql/logictest/testdata/planner_test/needed_columns b/pkg/sql/logictest/testdata/planner_test/needed_columns index 7d056486c1f0..16f631282f50 100644 --- a/pkg/sql/logictest/testdata/planner_test/needed_columns +++ b/pkg/sql/logictest/testdata/planner_test/needed_columns @@ -129,9 +129,9 @@ root · · │ id @S1 · · │ original sql (SELECT 2 AS x FROM (SELECT 3 AS s)) · · │ exec mode one row · · - └── limit · · (x) x=CONST - │ count 2 · · - └── max1row · · (x) x=CONST + └── max1row · · (x) x=CONST + └── limit · · (x) x=CONST + │ count 2 · · └── render · · (x) x=CONST │ render 0 2 · · └── render · · (s[omitted]) · diff --git a/pkg/sql/logictest/testdata/planner_test/subquery b/pkg/sql/logictest/testdata/planner_test/subquery index 40e5269218f1..386dca5f03f8 100644 --- a/pkg/sql/logictest/testdata/planner_test/subquery +++ b/pkg/sql/logictest/testdata/planner_test/subquery @@ -12,9 +12,9 @@ root · · │ id @S1 │ original sql (SELECT 1) │ exec mode one row - └── limit · · - │ count 2 - └── max1row · · + └── max1row · · + └── limit · · + │ count 2 └── render · · └── emptyrow · · @@ -36,9 +36,9 @@ root · · │ id @S1 │ original sql (SELECT 42) │ exec mode one row - └── limit · · - │ count 2 - └── max1row · · + └── max1row · · + └── limit · · + │ count 2 └── render · · └── emptyrow · · @@ -73,13 +73,14 @@ root · · │ id @S1 │ original sql (SELECT a FROM abc) │ exec mode one row - └── limit · · - │ count 2 - └── max1row · · + └── max1row · · + └── limit · · + │ count 2 └── render · · └── scan · · · table abc@primary · spans ALL +· limit 2 query TTTTT EXPLAIN (VERBOSE) SELECT * FROM abc WHERE a = (SELECT max(a) FROM abc WHERE EXISTS(SELECT * FROM abc WHERE c=a+3)) @@ -103,9 +104,9 @@ root · · │ id @S2 · · │ original sql (SELECT max(a) FROM abc WHERE EXISTS (SELECT * FROM abc WHERE c = (a + 3))) · · │ exec mode one row · · - └── limit · · (max) · - │ count 2 · · - └── max1row · · (max) · + └── max1row · · (max) · + └── limit · · (max) · + │ count 2 · · └── group · · (max) · │ aggregate 0 max(a) · · │ scalar · · · @@ -156,9 +157,9 @@ root · · │ id @S1 │ original sql (SELECT 2) │ exec mode one row - └── limit · · - │ count 2 - └── max1row · · + └── max1row · · + └── limit · · + │ count 2 └── render · · └── emptyrow · · diff --git a/pkg/sql/opt/exec/execbuilder/testdata/subquery b/pkg/sql/opt/exec/execbuilder/testdata/subquery index 8d92f0abf3f5..fbc97ffb7ef4 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/subquery +++ b/pkg/sql/opt/exec/execbuilder/testdata/subquery @@ -21,9 +21,9 @@ root · · │ id @S1 │ original sql (SELECT 42) │ exec mode one row - └── limit · · - │ count 2 - └── max1row · · + └── max1row · · + └── limit · · + │ count 2 └── render · · └── emptyrow · · @@ -43,6 +43,7 @@ root · · └── scan · · · table abc@primary · spans ALL +· limit 1 query TTTTT EXPLAIN (VERBOSE) SELECT * FROM abc WHERE a = (SELECT max(a) FROM abc WHERE EXISTS(SELECT * FROM abc WHERE c=a+3)) @@ -56,7 +57,9 @@ root · · │ │ id @S1 · · │ │ original sql EXISTS (SELECT * FROM abc WHERE c = (a + 3)) · · │ │ exec mode exists · · - │ └── scan · · (a, b, c) · + │ └── limit · · (a, b, c) · + │ │ count 1 · · + │ └── scan · · (a, b, c) · │ table abc@primary · · │ spans ALL · · │ filter c = (a + 3) · · diff --git a/pkg/sql/opt/norm/custom_funcs.go b/pkg/sql/opt/norm/custom_funcs.go index cf04d2a78cab..d53dd20696ae 100644 --- a/pkg/sql/opt/norm/custom_funcs.go +++ b/pkg/sql/opt/norm/custom_funcs.go @@ -370,6 +370,12 @@ func (c *CustomFuncs) PruneOrdering( return ordCopy } +// EmptyOrdering returns a pseudo-choice that does not require any +// ordering. +func (c *CustomFuncs) EmptyOrdering() physical.OrderingChoice { + return physical.OrderingChoice{} +} + // ----------------------------------------------------------------------- // // Filter functions @@ -1237,6 +1243,21 @@ func (c *CustomFuncs) MakeOrderedGrouping( return &memo.GroupingPrivate{GroupingCols: groupingCols, Ordering: ordering} } +// IsLimited indicates whether a limit was pushed under the subquery +// already. See e.g. the rule IntroduceExistsLimit. +func (c *CustomFuncs) IsLimited(sub *memo.SubqueryPrivate) bool { + return sub.WasLimited +} + +// MakeLimited specifies that the subquery has a limit set +// already. This prevents e.g. the rule IntroduceExistsLimit from +// applying twice. +func (c *CustomFuncs) MakeLimited(sub *memo.SubqueryPrivate) *memo.SubqueryPrivate { + newSub := *sub + newSub.WasLimited = true + return &newSub +} + // ---------------------------------------------------------------------- // // Numeric Rules diff --git a/pkg/sql/opt/norm/rules/scalar.opt b/pkg/sql/opt/norm/rules/scalar.opt index 2f6a4e435f16..dba6b86d277f 100644 --- a/pkg/sql/opt/norm/rules/scalar.opt +++ b/pkg/sql/opt/norm/rules/scalar.opt @@ -167,6 +167,31 @@ [EliminateExistsGroupBy, Normalize] (Exists (GroupBy | DistinctOn $input:*) $subqueryPrivate:*) => (Exists $input $subqueryPrivate) +# ExistsLimit inserts a LIMIT 1 "under" Exists so as to save resources +# to make the EXISTS determination. +# +# This rule uses and sets a boolean "WasLimited" on the Exists +# node to ensure the rule is only applied once. This is because the +# rule expands to an Exists pattern that's also a valid input pattern +# and it would recurse otherwise. +# +# We avoid this rule if the query is decorrelated because the +# decorrelation rules get confused by the presence of a limit. +# (It will be worth re-considering this when a general-purpose apply +# operator is supported - in that case it can be definitely worthwhile +# pushing down a LIMIT 1 to limit the amount of work done on every row.) +[IntroduceExistsLimit, Normalize] +(Exists + $input:^(Project | GroupBy | DistinctOn) & ^(HasOuterCols $input) & ^(HasZeroOrOneRow $input) + $subqueryPrivate:* & ^(IsLimited $subqueryPrivate) +) +=> +(Exists + (Limit $input (Const 1) (EmptyOrdering)) + (MakeLimited $subqueryPrivate) +) + + # NormalizeJSONFieldAccess transforms field access into a containment with a # simpler LHS. This allows inverted index constraints to be generated in some # cases. diff --git a/pkg/sql/opt/norm/testdata/rules/decorrelate b/pkg/sql/opt/norm/testdata/rules/decorrelate index 18fd59c380e3..3bcbe6aa5029 100644 --- a/pkg/sql/opt/norm/testdata/rules/decorrelate +++ b/pkg/sql/opt/norm/testdata/rules/decorrelate @@ -2728,8 +2728,9 @@ select └── exists [type=bool] └── scan xy ├── columns: x:6(int!null) y:7(int) - ├── key: (6) - └── fd: (6)-->(7) + ├── limit: 1 + ├── key: () + └── fd: ()-->(6,7) # Hoist nested EXISTS. opt expect=HoistSelectExists @@ -2868,8 +2869,9 @@ select └── exists [type=bool] └── scan xy ├── columns: x:6(int!null) y:7(int) - ├── key: (6) - └── fd: (6)-->(7) + ├── limit: 1 + ├── key: () + └── fd: ()-->(6,7) # -------------------------------------------------- # HoistSelectExists + HoistSelectNotExists @@ -3383,8 +3385,9 @@ project ├── exists [type=bool] │ └── scan xy │ ├── columns: x:6(int!null) y:7(int) - │ ├── key: (6) - │ └── fd: (6)-->(7) + │ ├── limit: 1 + │ ├── key: () + │ └── fd: ()-->(6,7) └── true_agg IS NOT NULL [type=bool] # -------------------------------------------------- @@ -3470,8 +3473,9 @@ project ├── exists [type=bool] │ └── scan xy │ ├── columns: xy.x:12(int!null) xy.y:13(int) - │ ├── key: (12) - │ └── fd: (12)-->(13) + │ ├── limit: 1 + │ ├── key: () + │ └── fd: ()-->(12,13) └── variable: count_rows [type=int, outer=(16)] # Subquery in GroupBy aggregate (optbuilder creates correlated Project). @@ -3595,24 +3599,30 @@ values ├── fd: ()-->(13) └── tuple [type=tuple{bool}] └── exists [type=bool] - └── left-join + └── limit ├── columns: i:2(int) y:7(int) true:9(bool) rownum:12(int!null) - ├── fd: (12)-->(2), ()~~>(9) - ├── row-number - │ ├── columns: i:2(int) rownum:12(int!null) - │ ├── key: (12) - │ ├── fd: (12)-->(2) - │ └── scan a - │ └── columns: i:2(int) - ├── project - │ ├── columns: true:9(bool!null) y:7(int) - │ ├── fd: ()-->(9) - │ ├── scan xy - │ │ └── columns: y:7(int) - │ └── projections - │ └── true [type=bool] - └── filters - └── y = i [type=bool, outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)] + ├── cardinality: [0 - 1] + ├── key: () + ├── fd: ()-->(2,7,9,12) + ├── left-join + │ ├── columns: i:2(int) y:7(int) true:9(bool) rownum:12(int!null) + │ ├── fd: (12)-->(2), ()~~>(9) + │ ├── row-number + │ │ ├── columns: i:2(int) rownum:12(int!null) + │ │ ├── key: (12) + │ │ ├── fd: (12)-->(2) + │ │ └── scan a + │ │ └── columns: i:2(int) + │ ├── project + │ │ ├── columns: true:9(bool!null) y:7(int) + │ │ ├── fd: ()-->(9) + │ │ ├── scan xy + │ │ │ └── columns: y:7(int) + │ │ └── projections + │ │ └── true [type=bool] + │ └── filters + │ └── y = i [type=bool, outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)] + └── const: 1 [type=int] # Don't hoist uncorrelated subquery. opt @@ -4520,13 +4530,19 @@ select │ └── fd: (1)-->(2-5) └── filters └── exists [type=bool] - └── select + └── limit ├── columns: y:7(int!null) + ├── cardinality: [0 - 1] + ├── key: () ├── fd: ()-->(7) - ├── scan xy - │ └── columns: y:7(int) - └── filters - └── y = 5 [type=bool, outer=(7), constraints=(/7: [/5 - /5]; tight), fd=()-->(7)] + ├── select + │ ├── columns: y:7(int!null) + │ ├── fd: ()-->(7) + │ ├── scan xy + │ │ └── columns: y:7(int) + │ └── filters + │ └── y = 5 [type=bool, outer=(7), constraints=(/7: [/5 - /5]; tight), fd=()-->(7)] + └── const: 1 [type=int] # ANY in Join On condition. opt expect=NormalizeJoinAnyFilter @@ -4637,12 +4653,18 @@ select └── filters └── not [type=bool] └── exists [type=bool] - └── select + └── limit ├── columns: y:7(int) - ├── scan xy - │ └── columns: y:7(int) - └── filters - └── (y = 5) IS NOT false [type=bool, outer=(7)] + ├── cardinality: [0 - 1] + ├── key: () + ├── fd: ()-->(7) + ├── select + │ ├── columns: y:7(int) + │ ├── scan xy + │ │ └── columns: y:7(int) + │ └── filters + │ └── (y = 5) IS NOT false [type=bool, outer=(7)] + └── const: 1 [type=int] # NOT ANY in Join On condition. opt expect=NormalizeJoinNotAnyFilter diff --git a/pkg/sql/opt/norm/testdata/rules/inline b/pkg/sql/opt/norm/testdata/rules/inline index e57294bf639e..047f9b5cb9b3 100644 --- a/pkg/sql/opt/norm/testdata/rules/inline +++ b/pkg/sql/opt/norm/testdata/rules/inline @@ -595,8 +595,9 @@ project │ └── scan xy │ ├── columns: x:7(int!null) y:8(int) │ ├── constraint: /7: [/1 - /2] - │ ├── key: (7) - │ └── fd: (7)-->(8) + │ ├── limit: 1 + │ ├── key: () + │ └── fd: ()-->(7,8) └── (k + 1) * 2 [type=int, outer=(1)] # Correlated subquery should be hoisted as usual. diff --git a/pkg/sql/opt/norm/testdata/rules/scalar b/pkg/sql/opt/norm/testdata/rules/scalar index 904934f5e809..250b96bade7c 100644 --- a/pkg/sql/opt/norm/testdata/rules/scalar +++ b/pkg/sql/opt/norm/testdata/rules/scalar @@ -375,8 +375,9 @@ select └── exists [type=bool] └── scan a ├── columns: k:7(int!null) i:8(int) - ├── key: (7) - └── fd: (7)-->(8) + ├── limit: 1 + ├── key: () + └── fd: ()-->(7,8) # -------------------------------------------------- # EliminateExistsGroupBy @@ -424,7 +425,10 @@ select └── filters └── exists [type=bool] └── scan a - └── columns: s:10(string) + ├── columns: s:10(string) + ├── limit: 1 + ├── key: () + └── fd: ()-->(10) opt expect=EliminateExistsGroupBy SELECT * FROM a WHERE EXISTS(SELECT DISTINCT ON (i) s FROM a) @@ -440,7 +444,10 @@ select └── filters └── exists [type=bool] └── scan a - └── columns: i:8(int) s:10(string) + ├── columns: i:8(int) s:10(string) + ├── limit: 1 + ├── key: () + └── fd: ()-->(8,10) # -------------------------------------------------- # EliminateExistsGroupBy + EliminateExistsProject @@ -459,7 +466,10 @@ select └── filters └── exists [type=bool] └── scan a - └── columns: i:8(int) s:10(string) + ├── columns: i:8(int) s:10(string) + ├── limit: 1 + ├── key: () + └── fd: ()-->(8,10) # -------------------------------------------------- # NormalizeJSONFieldAccess diff --git a/pkg/sql/opt/ops/scalar.opt b/pkg/sql/opt/ops/scalar.opt index f8b8244bbbeb..9384249747f8 100644 --- a/pkg/sql/opt/ops/scalar.opt +++ b/pkg/sql/opt/ops/scalar.opt @@ -37,16 +37,21 @@ define Subquery { # across all the subquery operators. [Private] define SubqueryPrivate { - OriginalExpr Subquery - Ordering Ordering + OriginalExpr Subquery + Ordering Ordering - # RequestedCol is set if there could possibly be other columns in the input - # (say, if there was an ordering that must be respected) besides the one that - # will eventually be output. - RequestedCol ColumnID + # RequestedCol is set if there could possibly be other columns in the input + # (say, if there was an ordering that must be respected) besides the one that + # will eventually be output. + RequestedCol ColumnID - # Cmp is only used for AnyOp. - Cmp Operator + # Cmp is only used for AnyOp. + Cmp Operator + + # WasLimited indicates a limit was applied "under" the subquery to + # restrict how many rows are fetched to determine the result. See + # e.g. the rule IntroduceExistsLimit. + WasLimited bool } # Any is a SQL operator that applies a comparison to every row of an input diff --git a/pkg/sql/optimize.go b/pkg/sql/optimize.go index f770e08cb90a..1591c1899419 100644 --- a/pkg/sql/optimize.go +++ b/pkg/sql/optimize.go @@ -18,7 +18,6 @@ import ( "context" "github.com/cockroachdb/cockroach/pkg/sql/distsqlrun" - "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/util/log" ) @@ -68,19 +67,6 @@ func (p *planner) optimizeSubquery(ctx context.Context, sq *subquery) error { log.Infof(ctx, "optimizing subquery %d (%q)", sq.subquery.Idx, sq.subquery) } - if sq.execMode == distsqlrun.SubqueryExecModeExists || - sq.execMode == distsqlrun.SubqueryExecModeOneRow { - numRows := tree.DInt(1) - if sq.execMode == distsqlrun.SubqueryExecModeOneRow { - // When using a sub-query in a scalar context, we must - // appropriately reject sub-queries that return more than 1 - // row. - numRows = 2 - } - - sq.plan = &limitNode{plan: sq.plan, countExpr: tree.NewDInt(numRows)} - } - needed := make([]bool, len(planColumns(sq.plan))) if sq.execMode != distsqlrun.SubqueryExecModeExists { // EXISTS does not need values; the rest does. diff --git a/pkg/sql/subquery.go b/pkg/sql/subquery.go index f182cf5e7797..4321a9e067f7 100644 --- a/pkg/sql/subquery.go +++ b/pkg/sql/subquery.go @@ -275,10 +275,14 @@ func (v *subqueryVisitor) VisitPre(expr tree.Expr) (recurse bool, newExpr tree.E return false, expr } if t.Exists { + result.plan = &limitNode{plan: result.plan, countExpr: tree.NewDInt(1)} result.execMode = distsqlrun.SubqueryExecModeExists t.SetType(types.Bool) } else { - result.plan = &max1RowNode{plan: result.plan} + result.plan = &max1RowNode{ + plan: &limitNode{ + plan: result.plan, + countExpr: tree.NewDInt(2)}} result.execMode = distsqlrun.SubqueryExecModeOneRow }