Skip to content

Commit

Permalink
Merge #34801
Browse files Browse the repository at this point in the history
34801: sql: fix the subquery limit optimization r=knz a=knz

Found this while investigating #32054.

A while ago the HP was equipped with an optimization: when a subquery
is planned for EXISTS or "max 1 row" (scalar context), a LIMIT is
applied on its data source. This ensures that the data source does not
fetch more rows than strictly necessary to determine the subquery
result:

- for EXISTS, only 0 or 1 row are needed to decide the boolean;
- for scalar contexts, only 0, 1 or 2 rows are needed to decide the
  outcome 0 or 2 yield an error, only 1 gets a valid result.

This optimization was temporarily broken for the scalar case when
`max1row` was introduced (when local exec was subsumed by distsql),
because the limit was remaining "on top" of `max1row` and not
propagated down. This patch places it "under" so it gets propagated
again.

Release note (performance improvement): subqueries used with EXISTS or
as a scalar value now avoid fetching more rows than needed to decide
the outcome.

Co-authored-by: Raphael 'kena' Poss <[email protected]>
  • Loading branch information
craig[bot] and knz committed Feb 13, 2019
2 parents 6819541 + 462d3aa commit 8ac64be
Show file tree
Hide file tree
Showing 12 changed files with 176 additions and 98 deletions.
22 changes: 11 additions & 11 deletions pkg/sql/explain_tree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,9 +378,9 @@ create table t.actors (
1 .id @S1 (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
1 .original sql (SELECT name FROM t.public.actors WHERE name = 'Foo') (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
1 .exec mode one row (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
2 limit (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
2 .count (2)[int] (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
3 max1row (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
2 max1row (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
3 limit (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
3 .count (2)[int] (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
4 render (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
4 .render 0 (@2)[string] (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
5 scan (movie_id int, title string, name string) name=CONST; movie_id!=NULL; key(movie_id)
Expand Down Expand Up @@ -442,16 +442,16 @@ create table t.actors (
},
Children: []*roachpb.ExplainTreePlanNode{
{
Name: "limit",
Attrs: []*roachpb.ExplainTreePlanNode_Attr{
{
Key: "count",
Value: "_",
},
},
Name: "max1row",
Children: []*roachpb.ExplainTreePlanNode{
{
Name: "max1row",
Name: "limit",
Attrs: []*roachpb.ExplainTreePlanNode_Attr{
{
Key: "count",
Value: "_",
},
},
Children: []*roachpb.ExplainTreePlanNode{
{
Name: "render",
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/logictest/testdata/planner_test/needed_columns
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ root · ·
│ id @S1 · ·
│ original sql (SELECT 2 AS x FROM (SELECT 3 AS s)) · ·
│ exec mode one row · ·
└── limit · · (x) x=CONST
count 2 · ·
└── max1row · · (x) x=CONST
└── max1row · · (x) x=CONST
└── limit · · (x) x=CONST
count 2 · ·
└── render · · (x) x=CONST
│ render 0 2 · ·
└── render · · (s[omitted]) ·
Expand Down
31 changes: 16 additions & 15 deletions pkg/sql/logictest/testdata/planner_test/subquery
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ root · ·
│ id @S1
│ original sql (SELECT 1)
│ exec mode one row
└── limit · ·
count 2
└── max1row · ·
└── max1row · ·
└── limit · ·
count 2
└── render · ·
└── emptyrow · ·

Expand All @@ -36,9 +36,9 @@ root · ·
│ id @S1
│ original sql (SELECT 42)
│ exec mode one row
└── limit · ·
count 2
└── max1row · ·
└── max1row · ·
└── limit · ·
count 2
└── render · ·
└── emptyrow · ·

Expand Down Expand Up @@ -73,13 +73,14 @@ root · ·
│ id @S1
│ original sql (SELECT a FROM abc)
│ exec mode one row
└── limit · ·
count 2
└── max1row · ·
└── max1row · ·
└── limit · ·
count 2
└── render · ·
└── scan · ·
· table abc@primary
· spans ALL
· limit 2

query TTTTT
EXPLAIN (VERBOSE) SELECT * FROM abc WHERE a = (SELECT max(a) FROM abc WHERE EXISTS(SELECT * FROM abc WHERE c=a+3))
Expand All @@ -103,9 +104,9 @@ root · ·
│ id @S2 · ·
│ original sql (SELECT max(a) FROM abc WHERE EXISTS (SELECT * FROM abc WHERE c = (a + 3))) · ·
│ exec mode one row · ·
└── limit · · (max) ·
count 2 · ·
└── max1row · · (max) ·
└── max1row · · (max) ·
└── limit · · (max) ·
count 2 · ·
└── group · · (max) ·
│ aggregate 0 max(a) · ·
│ scalar · · ·
Expand Down Expand Up @@ -156,9 +157,9 @@ root · ·
│ id @S1
│ original sql (SELECT 2)
│ exec mode one row
└── limit · ·
count 2
└── max1row · ·
└── max1row · ·
└── limit · ·
count 2
└── render · ·
└── emptyrow · ·

Expand Down
11 changes: 7 additions & 4 deletions pkg/sql/opt/exec/execbuilder/testdata/subquery
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ root · ·
│ id @S1
│ original sql (SELECT 42)
│ exec mode one row
└── limit · ·
count 2
└── max1row · ·
└── max1row · ·
└── limit · ·
count 2
└── render · ·
└── emptyrow · ·

Expand All @@ -43,6 +43,7 @@ root · ·
└── scan · ·
· table abc@primary
· spans ALL
· limit 1

query TTTTT
EXPLAIN (VERBOSE) SELECT * FROM abc WHERE a = (SELECT max(a) FROM abc WHERE EXISTS(SELECT * FROM abc WHERE c=a+3))
Expand All @@ -56,7 +57,9 @@ root · ·
│ │ id @S1 · ·
│ │ original sql EXISTS (SELECT * FROM abc WHERE c = (a + 3)) · ·
│ │ exec mode exists · ·
│ └── scan · · (a, b, c) ·
│ └── limit · · (a, b, c) ·
│ │ count 1 · ·
│ └── scan · · (a, b, c) ·
│ table abc@primary · ·
│ spans ALL · ·
│ filter c = (a + 3) · ·
Expand Down
21 changes: 21 additions & 0 deletions pkg/sql/opt/norm/custom_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,12 @@ func (c *CustomFuncs) PruneOrdering(
return ordCopy
}

// EmptyOrdering returns a pseudo-choice that does not require any
// ordering.
func (c *CustomFuncs) EmptyOrdering() physical.OrderingChoice {
return physical.OrderingChoice{}
}

// -----------------------------------------------------------------------
//
// Filter functions
Expand Down Expand Up @@ -1237,6 +1243,21 @@ func (c *CustomFuncs) MakeOrderedGrouping(
return &memo.GroupingPrivate{GroupingCols: groupingCols, Ordering: ordering}
}

// IsLimited indicates whether a limit was pushed under the subquery
// already. See e.g. the rule IntroduceExistsLimit.
func (c *CustomFuncs) IsLimited(sub *memo.SubqueryPrivate) bool {
return sub.WasLimited
}

// MakeLimited specifies that the subquery has a limit set
// already. This prevents e.g. the rule IntroduceExistsLimit from
// applying twice.
func (c *CustomFuncs) MakeLimited(sub *memo.SubqueryPrivate) *memo.SubqueryPrivate {
newSub := *sub
newSub.WasLimited = true
return &newSub
}

// ----------------------------------------------------------------------
//
// Numeric Rules
Expand Down
25 changes: 25 additions & 0 deletions pkg/sql/opt/norm/rules/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,31 @@
[EliminateExistsGroupBy, Normalize]
(Exists (GroupBy | DistinctOn $input:*) $subqueryPrivate:*) => (Exists $input $subqueryPrivate)

# ExistsLimit inserts a LIMIT 1 "under" Exists so as to save resources
# to make the EXISTS determination.
#
# This rule uses and sets a boolean "WasLimited" on the Exists
# node to ensure the rule is only applied once. This is because the
# rule expands to an Exists pattern that's also a valid input pattern
# and it would recurse otherwise.
#
# We avoid this rule if the query is decorrelated because the
# decorrelation rules get confused by the presence of a limit.
# (It will be worth re-considering this when a general-purpose apply
# operator is supported - in that case it can be definitely worthwhile
# pushing down a LIMIT 1 to limit the amount of work done on every row.)
[IntroduceExistsLimit, Normalize]
(Exists
$input:^(Project | GroupBy | DistinctOn) & ^(HasOuterCols $input) & ^(HasZeroOrOneRow $input)
$subqueryPrivate:* & ^(IsLimited $subqueryPrivate)
)
=>
(Exists
(Limit $input (Const 1) (EmptyOrdering))
(MakeLimited $subqueryPrivate)
)


# NormalizeJSONFieldAccess transforms field access into a containment with a
# simpler LHS. This allows inverted index constraints to be generated in some
# cases.
Expand Down
92 changes: 57 additions & 35 deletions pkg/sql/opt/norm/testdata/rules/decorrelate
Original file line number Diff line number Diff line change
Expand Up @@ -2728,8 +2728,9 @@ select
└── exists [type=bool]
└── scan xy
├── columns: x:6(int!null) y:7(int)
├── key: (6)
└── fd: (6)-->(7)
├── limit: 1
├── key: ()
└── fd: ()-->(6,7)

# Hoist nested EXISTS.
opt expect=HoistSelectExists
Expand Down Expand Up @@ -2868,8 +2869,9 @@ select
└── exists [type=bool]
└── scan xy
├── columns: x:6(int!null) y:7(int)
├── key: (6)
└── fd: (6)-->(7)
├── limit: 1
├── key: ()
└── fd: ()-->(6,7)

# --------------------------------------------------
# HoistSelectExists + HoistSelectNotExists
Expand Down Expand Up @@ -3383,8 +3385,9 @@ project
├── exists [type=bool]
│ └── scan xy
│ ├── columns: x:6(int!null) y:7(int)
│ ├── key: (6)
│ └── fd: (6)-->(7)
│ ├── limit: 1
│ ├── key: ()
│ └── fd: ()-->(6,7)
└── true_agg IS NOT NULL [type=bool]

# --------------------------------------------------
Expand Down Expand Up @@ -3470,8 +3473,9 @@ project
├── exists [type=bool]
│ └── scan xy
│ ├── columns: xy.x:12(int!null) xy.y:13(int)
│ ├── key: (12)
│ └── fd: (12)-->(13)
│ ├── limit: 1
│ ├── key: ()
│ └── fd: ()-->(12,13)
└── variable: count_rows [type=int, outer=(16)]

# Subquery in GroupBy aggregate (optbuilder creates correlated Project).
Expand Down Expand Up @@ -3595,24 +3599,30 @@ values
├── fd: ()-->(13)
└── tuple [type=tuple{bool}]
└── exists [type=bool]
└── left-join
└── limit
├── columns: i:2(int) y:7(int) true:9(bool) rownum:12(int!null)
├── fd: (12)-->(2), ()~~>(9)
├── row-number
│ ├── columns: i:2(int) rownum:12(int!null)
│ ├── key: (12)
│ ├── fd: (12)-->(2)
│ └── scan a
│ └── columns: i:2(int)
├── project
│ ├── columns: true:9(bool!null) y:7(int)
│ ├── fd: ()-->(9)
│ ├── scan xy
│ │ └── columns: y:7(int)
│ └── projections
│ └── true [type=bool]
└── filters
└── y = i [type=bool, outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)]
├── cardinality: [0 - 1]
├── key: ()
├── fd: ()-->(2,7,9,12)
├── left-join
│ ├── columns: i:2(int) y:7(int) true:9(bool) rownum:12(int!null)
│ ├── fd: (12)-->(2), ()~~>(9)
│ ├── row-number
│ │ ├── columns: i:2(int) rownum:12(int!null)
│ │ ├── key: (12)
│ │ ├── fd: (12)-->(2)
│ │ └── scan a
│ │ └── columns: i:2(int)
│ ├── project
│ │ ├── columns: true:9(bool!null) y:7(int)
│ │ ├── fd: ()-->(9)
│ │ ├── scan xy
│ │ │ └── columns: y:7(int)
│ │ └── projections
│ │ └── true [type=bool]
│ └── filters
│ └── y = i [type=bool, outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)]
└── const: 1 [type=int]

# Don't hoist uncorrelated subquery.
opt
Expand Down Expand Up @@ -4520,13 +4530,19 @@ select
│ └── fd: (1)-->(2-5)
└── filters
└── exists [type=bool]
└── select
└── limit
├── columns: y:7(int!null)
├── cardinality: [0 - 1]
├── key: ()
├── fd: ()-->(7)
├── scan xy
│ └── columns: y:7(int)
└── filters
└── y = 5 [type=bool, outer=(7), constraints=(/7: [/5 - /5]; tight), fd=()-->(7)]
├── select
│ ├── columns: y:7(int!null)
│ ├── fd: ()-->(7)
│ ├── scan xy
│ │ └── columns: y:7(int)
│ └── filters
│ └── y = 5 [type=bool, outer=(7), constraints=(/7: [/5 - /5]; tight), fd=()-->(7)]
└── const: 1 [type=int]

# ANY in Join On condition.
opt expect=NormalizeJoinAnyFilter
Expand Down Expand Up @@ -4637,12 +4653,18 @@ select
└── filters
└── not [type=bool]
└── exists [type=bool]
└── select
└── limit
├── columns: y:7(int)
├── scan xy
│ └── columns: y:7(int)
└── filters
└── (y = 5) IS NOT false [type=bool, outer=(7)]
├── cardinality: [0 - 1]
├── key: ()
├── fd: ()-->(7)
├── select
│ ├── columns: y:7(int)
│ ├── scan xy
│ │ └── columns: y:7(int)
│ └── filters
│ └── (y = 5) IS NOT false [type=bool, outer=(7)]
└── const: 1 [type=int]

# NOT ANY in Join On condition.
opt expect=NormalizeJoinNotAnyFilter
Expand Down
Loading

0 comments on commit 8ac64be

Please sign in to comment.