Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

norm: push Select into UniqueKeyExpr #109903

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/select
Original file line number Diff line number Diff line change
Expand Up @@ -862,3 +862,81 @@ SELECT -488 OF FROM t;
----
of
-488

subtest 109751_regr

# Regression test for #109751

statement ok
CREATE TABLE trm (
id UUID NOT NULL,
trid UUID NOT NULL,
ts12 TIMESTAMP NOT NULL
);
INSERT INTO trm VALUES('5ebfedee-0dcf-41e6-a315-5fa0b51b9882',
'5ebfedee-0dcf-41e6-a315-5fa0b51b9882',
'1999-12-31 23:59:59');
INSERT INTO trm VALUES('5ebfedee-0dcf-41e6-a315-5fa0b51b9883',
'5ebfedee-0dcf-41e6-a315-5fa0b51b9883',
'1999-12-31 23:59:58');
INSERT INTO trm VALUES('5ebfedee-0dcf-41e6-a315-5fa0b51b9882',
'5ebfedee-0dcf-41e6-a315-5fa0b51b9882',
'1999-11-30 23:59:59');
INSERT INTO trm VALUES('5ebfedee-0dcf-41e6-a315-5fa0b51b9883',
'5ebfedee-0dcf-41e6-a315-5fa0b51b9883',
'1999-11-30 23:59:58');
INSERT INTO trm VALUES('5ebfedee-0dcf-41e6-a315-5fa0b51b9884',
'5ebfedee-0dcf-41e6-a315-5fa0b51b9884',
'1999-11-30 23:59:57');

statement ok
CREATE TABLE trrec (
id UUID NOT NULL,
trid STRING NOT NULL,
ts12 TIMESTAMP NOT NULL,
str16 STRING NULL,
INDEX trrec_idx5 (str16 ASC)
);
INSERT INTO trrec VALUES('5ebfedee-0dcf-41e6-a315-5fa0b51b9882', '1', '1999-12-31 23:59:59', '12345');
INSERT INTO trrec VALUES('5ebfedee-0dcf-41e6-a315-5fa0b51b9883', '2', '1999-12-31 23:59:58', '12345');
INSERT INTO trrec VALUES('5ebfedee-0dcf-41e6-a315-5fa0b51b9884', '3', '1999-12-31 23:59:57', '123456');

statement ok
CREATE TABLE trtab4 (
id UUID NOT NULL,
trid UUID NOT NULL,
dec1 DECIMAL(19,2) NOT NULL
);
INSERT INTO trtab4 VALUES('5ebfedee-0dcf-41e6-a315-5fa0b51b9882', '5ebfedee-0dcf-41e6-a315-5fa0b51b9882', 1.0);
INSERT INTO trtab4 VALUES('5ebfedee-0dcf-41e6-a315-5fa0b51b9883', '5ebfedee-0dcf-41e6-a315-5fa0b51b9883', 2.0);
INSERT INTO trtab4 VALUES('5ebfedee-0dcf-41e6-a315-5fa0b51b9884', '5ebfedee-0dcf-41e6-a315-5fa0b51b9884', 3.0);

query TTT
WITH
with2
AS (
SELECT
tq.trid, tq.dec1
FROM
trrec AS r INNER JOIN trtab4 AS tq ON r.id = tq.trid AND r.str16 = '12345'
)
SELECT tr.id, tr.trid, val3.ts12
FROM
trrec AS tr
INNER JOIN LATERAL (
SELECT q.dec1 FROM with2 AS q WHERE tr.id = q.trid
) AS q ON true
INNER JOIN LATERAL (
SELECT
m.ts12
FROM trm AS m WHERE tr.id = m.trid
ORDER BY m.ts12 ASC
LIMIT 1
) AS val3 ON true
WHERE
tr.str16 = '12345'
ORDER BY 1 DESC
;
----
5ebfedee-0dcf-41e6-a315-5fa0b51b9883 2 1999-11-30 23:59:58 +0000 +0000
5ebfedee-0dcf-41e6-a315-5fa0b51b9882 1 1999-11-30 23:59:59 +0000 +0000
30 changes: 30 additions & 0 deletions pkg/sql/opt/exec/execbuilder/relational.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ func (b *Builder) buildRelational(e memo.RelExpr) (execPlan, error) {
case *memo.ProjectSetExpr:
ep, err = b.buildProjectSet(t)

case *memo.UniqueKeyExpr:
ep, err = b.buildUniqueKey(t)

case *memo.WindowExpr:
ep, err = b.buildWindow(t)

Expand Down Expand Up @@ -2148,6 +2151,32 @@ func (b *Builder) buildOrdinality(ord *memo.OrdinalityExpr) (execPlan, error) {
return execPlan{root: node, outputCols: outputCols}, nil
}

// buildUniqueKey adds a column to its input which holds a unique number.
// There is no guarantee on the unique numbers produced, like if they are
// a sequence of numbers, or if they are deterministic. Currently this uses
// the ordinality operation, but this may change in the future, for example
// to use non-sequenced numbers.
func (b *Builder) buildUniqueKey(uniqueKeyExpr *memo.UniqueKeyExpr) (execPlan, error) {
input, err := b.buildRelational(uniqueKeyExpr.Input)
if err != nil {
return execPlan{}, err
}

colName := b.mem.Metadata().ColumnMeta(uniqueKeyExpr.ColID).Alias

node, err := b.factory.ConstructOrdinality(input.root, colName)
if err != nil {
return execPlan{}, err
}

// We have one additional unique key column, which is ordered at the end of
// the list.
outputCols := input.outputCols.Copy()
outputCols.Set(int(uniqueKeyExpr.ColID), outputCols.Len())

return execPlan{root: node, outputCols: outputCols}, nil
}

func (b *Builder) buildIndexJoin(join *memo.IndexJoinExpr) (execPlan, error) {
input, err := b.buildRelational(join.Input)
if err != nil {
Expand Down Expand Up @@ -3597,6 +3626,7 @@ var boundedStalenessAllowList = map[opt.Operator]struct{}{
opt.OffsetOp: {},
opt.SortOp: {},
opt.OrdinalityOp: {},
opt.UniqueKeyOp: {},
opt.Max1RowOp: {},
opt.ProjectSetOp: {},
opt.WindowOp: {},
Expand Down
47 changes: 47 additions & 0 deletions pkg/sql/opt/memo/logical_props_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1310,6 +1310,53 @@ func (b *logicalPropsBuilder) buildOrdinalityProps(ord *OrdinalityExpr, rel *pro
}
}

func (b *logicalPropsBuilder) buildUniqueKeyProps(
uniqueKeyExpr *UniqueKeyExpr, rel *props.Relational,
) {
BuildSharedProps(uniqueKeyExpr, &rel.Shared, b.evalCtx)

inputProps := uniqueKeyExpr.Input.Relational()

// Output Columns
// --------------
// An extra output column is added to those projected by input operator.
rel.OutputCols = inputProps.OutputCols.Copy()
rel.OutputCols.Add(uniqueKeyExpr.ColID)

// Not Null Columns
// ----------------
// The new output column is not null, and other columns inherit not null
// property from input.
rel.NotNullCols = inputProps.NotNullCols.Copy()
rel.NotNullCols.Add(uniqueKeyExpr.ColID)

// Outer Columns
// -------------
// Outer columns were already derived by BuildSharedProps.

// Functional Dependencies
// -----------------------
// Inherit functional dependencies from input, and add strict key FD for the
// additional key column.
rel.FuncDeps.CopyFrom(&inputProps.FuncDeps)
if key, ok := rel.FuncDeps.StrictKey(); ok {
// Any existing keys are still keys.
rel.FuncDeps.AddStrictKey(key, rel.OutputCols)
}
rel.FuncDeps.AddStrictKey(opt.MakeColSet(uniqueKeyExpr.ColID), rel.OutputCols)

// Cardinality
// -----------
// Inherit cardinality from input.
rel.Cardinality = inputProps.Cardinality

// Statistics
// ----------
if !b.disableStats {
b.sb.buildUniqueKey(uniqueKeyExpr, rel)
}
}

func (b *logicalPropsBuilder) buildWindowProps(window *WindowExpr, rel *props.Relational) {
BuildSharedProps(window, &rel.Shared, b.evalCtx)

Expand Down
49 changes: 49 additions & 0 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,9 @@ func (sb *statisticsBuilder) colStat(colSet opt.ColSet, e RelExpr) *props.Column
case opt.OrdinalityOp:
return sb.colStatOrdinality(colSet, e.(*OrdinalityExpr))

case opt.UniqueKeyOp:
return sb.colStatUniqueKey(colSet, e.(*UniqueKeyExpr))

case opt.WindowOp:
return sb.colStatWindow(colSet, e.(*WindowExpr))

Expand Down Expand Up @@ -2383,6 +2386,52 @@ func (sb *statisticsBuilder) colStatOrdinality(
return colStat
}

// +---------------+
// | UniqueKey |
// +---------------+

func (sb *statisticsBuilder) buildUniqueKey(
uniqueKeyExpr *UniqueKeyExpr, relProps *props.Relational,
) {
s := relProps.Statistics()
if zeroCardinality := s.Init(relProps); zeroCardinality {
// Short cut if cardinality is 0.
return
}
s.Available = sb.availabilityFromInput(uniqueKeyExpr)

inputStats := uniqueKeyExpr.Input.Relational().Statistics()

s.RowCount = inputStats.RowCount
sb.finalizeFromCardinality(relProps)
}

func (sb *statisticsBuilder) colStatUniqueKey(
colSet opt.ColSet, uniqueKeyExpr *UniqueKeyExpr,
) *props.ColumnStatistic {
relProps := uniqueKeyExpr.Relational()
s := relProps.Statistics()

colStat, _ := s.ColStats.Add(colSet)

inputColStat := sb.colStatFromChild(colSet, uniqueKeyExpr, 0 /* childIdx */)

if colSet.Contains(uniqueKeyExpr.ColID) {
// The ordinality column is a key, so every row is distinct.
colStat.DistinctCount = s.RowCount
colStat.NullCount = 0
} else {
colStat.DistinctCount = inputColStat.DistinctCount
colStat.NullCount = inputColStat.NullCount
}

if colSet.Intersects(relProps.NotNullCols) {
colStat.NullCount = 0
}
sb.finalizeFromRowCountAndDistinctCounts(colStat, s)
return colStat
}

// +------------+
// | Window |
// +------------+
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/memo/testdata/typing
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ project
│ ├── grouping columns: rownum:14(int!null)
│ ├── left-join (cross)
│ │ ├── columns: expr:5(int!null) true:11(bool) rownum:14(int!null)
│ │ ├── ordinality
│ │ ├── unique-key
│ │ │ ├── columns: expr:5(int!null) rownum:14(int!null)
│ │ │ └── project
│ │ │ ├── columns: expr:5(int!null)
Expand Down
10 changes: 5 additions & 5 deletions pkg/sql/opt/norm/decorrelate_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,10 +376,10 @@ func (c *CustomFuncs) ConstructApplyJoin(
// strong key exists and the input expression is a Scan or a Scan wrapped in a
// Select, EnsureKey returns a new Scan (possibly wrapped in a Select) with the
// preexisting primary key for the table. If the input is not a Scan or
// Select(Scan), EnsureKey wraps the input in an Ordinality operator, which
// Select(Scan), EnsureKey wraps the input in a UniqueKey operator, which
// provides a key column by uniquely numbering the rows. EnsureKey returns the
// input expression (perhaps augmented with a key column(s) or wrapped by
// Ordinality).
// UniqueKey).
func (c *CustomFuncs) EnsureKey(in memo.RelExpr) memo.RelExpr {
_, ok := c.CandidateKey(in)
if ok {
Expand All @@ -392,10 +392,10 @@ func (c *CustomFuncs) EnsureKey(in memo.RelExpr) memo.RelExpr {
return res
}

// Otherwise, wrap the input in an Ordinality operator.
// Otherwise, wrap the input in an UniqueKey operator.
colID := c.f.Metadata().AddColumn("rownum", types.Int)
private := memo.OrdinalityPrivate{ColID: colID}
return c.f.ConstructOrdinality(in, &private)
private := memo.UniqueKeyPrivate{ColID: colID}
return c.f.ConstructUniqueKey(in, &private)
}

// TryAddKeyToScan checks whether the input expression is a non-virtual table
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/opt/norm/prune_cols_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,14 @@ func (c *CustomFuncs) DerivePruneCols(e memo.RelExpr, disabledRules intsets.Fast
inputPruneCols := c.DerivePruneCols(ord.Input, disabledRules)
relProps.Rule.PruneCols = inputPruneCols.Difference(ord.Ordering.ColSet())

case opt.UniqueKeyOp:
if disabledRules.Contains(int(opt.PruneUniqueKeyCols)) {
// Avoid rule cycles.
break
}
uniqueKeyExpr := e.(*memo.UniqueKeyExpr)
relProps.Rule.PruneCols = c.DerivePruneCols(uniqueKeyExpr.Input, disabledRules)

case opt.IndexJoinOp, opt.LookupJoinOp, opt.MergeJoinOp:
// There is no need to prune columns projected by Index, Lookup or Merge
// joins, since its parent will always be an "alternate" expression in the
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/norm/rules/decorrelate.opt
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@
# column would not be constant necessarily, hence the use of ANY_NOT_NULL
# instead of CONST_AGG.
#
# An ordinality column only needs to be synthesized if "left" does not already
# A unique key column only needs to be synthesized if "left" does not already
# have a strict key. We wrap the output in a Project operator to ensure that
# the original output columns are preserved and the ordinality column is not
# inadvertently added as a new output column.
Expand Down Expand Up @@ -485,7 +485,7 @@
# In this example, the "notnull" canary is needed to determine if the value of
# the ARRAY_AGG aggregation should be NULL or {NULL}.
#
# An ordinality column only needs to be synthesized if "left" does not already
# A unique key column only needs to be synthesized if "left" does not already
# have a key. The "true" column only needs to be added if "input" does not
# already have a not-null column (and COUNT(*) is used).
#
Expand Down
21 changes: 21 additions & 0 deletions pkg/sql/opt/norm/rules/prune_cols.opt
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,27 @@
$passthrough
)

# PruneUniqueKeyCols discards UniqueKey input columns that are never used.
[PruneUniqueKeyCols, Normalize]
(Project
(UniqueKey $input:* $uniqueKeyPrivate:*)
$projections:*
$passthrough:* &
(CanPruneCols
$input
$needed:(UnionCols
(ProjectionOuterCols $projections)
$passthrough
)
)
)
=>
(Project
(UniqueKey (PruneCols $input $needed) $uniqueKeyPrivate)
$projections
$passthrough
)

# PruneExplainCols discards Explain input columns that are never used by its
# required physical properties.
[PruneExplainCols, Normalize]
Expand Down
28 changes: 28 additions & 0 deletions pkg/sql/opt/norm/rules/select.opt
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,34 @@ $input
(ExtractUnboundConditions $filters $inputCols)
)

# PushSelectIntoUniqueKey pushes the Select operator into its UniqueKey input.
# This is typically preferable because it allows the Select to be pushed into
# operations beneath the UniqueKey, minimizing the number of rows subsequent
# operations need to process and potentially pushing the Select down far enough
# to enable constrained scans. This may also enable other normalization rules,
# which match on Select expressions, to fire.
[PushSelectIntoUniqueKey, Normalize]
(Select
(UniqueKey $input:* $private:*)
$filters:[
...
$item:* &
(IsBoundBy $item $inputCols:(OutputCols $input))
...
]
)
=>
(Select
(UniqueKey
(Select
$input
(ExtractBoundConditions $filters $inputCols)
)
$private
)
(ExtractUnboundConditions $filters $inputCols)
)

# MergeSelectInnerJoin merges a Select operator with an InnerJoin input by
# AND'ing the filter conditions of each and creating a new InnerJoin with that
# On condition. This is only safe to do with InnerJoin in the general case
Expand Down
Loading