From c0a727ca504a6b3c049fb52c5cd3c23988333b0c Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Tue, 29 Nov 2022 17:37:15 -0800 Subject: [PATCH] sql: fix left semi and left anti virtual lookup joins This commit fixes the execution of the left semi and left anti virtual lookup joins. The bug was that we forgot to project away the looked up columns (coming from the "right" side) which could then lead to wrong columns being used higher up the tree. The bug was introduced during 22.1 release cycle where we added the optimizer support for generating plans that could contain left semi and left anti virtual lookup joins. This commit fixes that issue as well as the output columns of such joins (I'm not sure whether there is a user facing impact of having incorrect "output columns"). Additionally, this commit fixes the execution of these virtual lookup joins to correctly return the input row only once. Previously, for left anti joins we'd be producing an output row if there was a match (which is wrong), and for both left semi and left anti we would emit an output row every time there was a match (but this should be done only once). (Although I'm not sure whether it is possible for virtual indexes to result in multiple looked up rows.) Also, as a minor simplification this commit makes it so that the output rows are not added into the row container for left semi and left anti and the container is not instantiated at all. Release note (bug fix): CockroachDB previously could incorrectly evaluate queries that performed left semi and left anti "virtual lookup" joins on tables in `pg_catalog` or `information_schema`. These join types can be planned when a subquery is used inside of a filter condition. The bug was introduced in 22.1.0 and is now fixed. --- .../logictest/testdata/logic_test/pg_catalog | 36 +++++++++++ pkg/sql/opt/xform/join_funcs.go | 1 + pkg/sql/opt_exec_factory.go | 16 +++-- pkg/sql/virtual_table.go | 62 +++++++++++++++---- 4 files changed, 98 insertions(+), 17 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index a1a3eac1241f..ab1f63258bef 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -5818,3 +5818,39 @@ order by attnum; attnum attname 1 x 2 y + +# Regression test for not projecting away looked up columns by the left semi +# virtual lookup join (#91012). +statement ok +CREATE TABLE t91012 (id INT, a_id INT); + +query I +SELECT + count(*) +FROM + pg_class AS t INNER JOIN pg_attribute AS a ON t.oid = a.attrelid +WHERE + a.attnotnull = 'f' + AND a.attname = 'a_id' + AND t.relname = 't91012' + AND a.atttypid + IN (SELECT oid FROM pg_type WHERE typname = ANY (ARRAY['int8'])); +---- +1 + +# Regression test for incorrectly handling left anti virtual lookup joins +# (#88096). +statement ok +CREATE TYPE mytype AS enum('hello') + +query I +SELECT + count(*) +FROM + pg_type AS t +WHERE + t.typrelid = 0 + AND NOT EXISTS(SELECT 1 FROM pg_type AS el WHERE el.oid = t.typelem AND el.typarray = t.oid) + AND t.typname LIKE 'myt%' +---- +1 diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index 3554e21b7065..cb564de3fa3a 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -291,6 +291,7 @@ func (c *CustomFuncs) GenerateLookupJoins( // // It should be possible to support semi- and anti- joins. Left joins may be // possible with additional complexity. +// TODO(mgartner): update this comment. // // It should also be possible to support cases where all the virtual columns are // not covered by a single index by wrapping the lookup join in a Project that diff --git a/pkg/sql/opt_exec_factory.go b/pkg/sql/opt_exec_factory.go index df02934bf208..4aaa8bbcd048 100644 --- a/pkg/sql/opt_exec_factory.go +++ b/pkg/sql/opt_exec_factory.go @@ -761,10 +761,18 @@ func (ef *execFactory) constructVirtualTableLookupJoin( tableScan.index = idx vtableCols := colinfo.ResultColumnsFromColumns(tableDesc.GetID(), tableDesc.PublicColumns()) projectedVtableCols := planColumns(&tableScan) - outputCols := make(colinfo.ResultColumns, 0, len(inputCols)+len(projectedVtableCols)) - outputCols = append(outputCols, inputCols...) - outputCols = append(outputCols, projectedVtableCols...) - // joinType is either INNER or LEFT_OUTER. + var outputCols colinfo.ResultColumns + switch joinType { + case descpb.InnerJoin, descpb.LeftOuterJoin: + outputCols = make(colinfo.ResultColumns, 0, len(inputCols)+len(projectedVtableCols)) + outputCols = append(outputCols, inputCols...) + outputCols = append(outputCols, projectedVtableCols...) + case descpb.LeftSemiJoin, descpb.LeftAntiJoin: + outputCols = make(colinfo.ResultColumns, 0, len(inputCols)) + outputCols = append(outputCols, inputCols...) + default: + return nil, errors.AssertionFailedf("unexpected join type for virtual lookup join: %s", joinType.String()) + } pred := makePredicate(joinType, inputCols, projectedVtableCols) pred.onCond = pred.iVarHelper.Rebind(onCond) n := &vTableLookupJoinNode{ diff --git a/pkg/sql/virtual_table.go b/pkg/sql/virtual_table.go index 7a46138d2813..c7cfb6001cca 100644 --- a/pkg/sql/virtual_table.go +++ b/pkg/sql/virtual_table.go @@ -235,9 +235,13 @@ type vTableLookupJoinNode struct { // run contains the runtime state of this planNode. run struct { + // matched indicates whether the current input row had at least one + // match. + matched bool // row contains the next row to output. row tree.Datums - // rows contains the next rows to output, except for row. + // rows contains the next rows to output, except for row. Only allocated + // for inner and left outer joins. rows *rowcontainer.RowContainer keyCtx constraint.KeyContext @@ -257,10 +261,14 @@ var _ rowPusher = &vTableLookupJoinNode{} // startExec implements the planNode interface. func (v *vTableLookupJoinNode) startExec(params runParams) error { v.run.keyCtx = constraint.KeyContext{EvalCtx: params.EvalContext()} - v.run.rows = rowcontainer.NewRowContainer( - params.EvalContext().Mon.MakeBoundAccount(), - colinfo.ColTypeInfoFromResCols(v.columns), - ) + if v.joinType == descpb.InnerJoin || v.joinType == descpb.LeftOuterJoin { + v.run.rows = rowcontainer.NewRowContainer( + params.EvalContext().Mon.MakeBoundAccount(), + colinfo.ColTypeInfoFromResCols(v.columns), + ) + } else if v.joinType != descpb.LeftSemiJoin && v.joinType != descpb.LeftAntiJoin { + return errors.AssertionFailedf("unexpected join type for virtual lookup join: %s", v.joinType.String()) + } v.run.indexKeyDatums = make(tree.Datums, len(v.columns)) var err error db, err := params.p.Descriptors().GetImmutableDatabaseByName( @@ -285,7 +293,7 @@ func (v *vTableLookupJoinNode) Next(params runParams) (bool, error) { v.run.params = ¶ms for { // Check if there are any rows left to emit from the last input row. - if v.run.rows.Len() > 0 { + if v.run.rows != nil && v.run.rows.Len() > 0 { copy(v.run.row, v.run.rows.At(0)) v.run.rows.PopFirst(params.ctx) return true, nil @@ -315,18 +323,38 @@ func (v *vTableLookupJoinNode) Next(params runParams) (bool, error) { ) // Add the input row to the left of the scratch row. v.run.row = append(v.run.row[:0], inputRow...) + v.run.matched = false // Finally, we're ready to do the lookup. This invocation will push all of // the looked-up rows into v.run.rows. if err := genFunc(params.ctx, v); err != nil { return false, err } - if v.run.rows.Len() == 0 && v.joinType == descpb.LeftOuterJoin { - // No matches - construct an outer match. - v.run.row = v.run.row[:len(v.inputCols)] - for i := len(inputRow); i < len(v.columns); i++ { - v.run.row = append(v.run.row, tree.DNull) + switch v.joinType { + case descpb.LeftOuterJoin: + if !v.run.matched { + // No matches - construct an outer match. + v.run.row = v.run.row[:len(v.inputCols)] + for i := len(inputRow); i < len(v.columns); i++ { + v.run.row = append(v.run.row, tree.DNull) + } + return true, nil + } + case descpb.LeftSemiJoin: + if v.run.matched { + // This input row had a match, so it should be emitted. + // + // Reset our output row to just the contents of the input row. + v.run.row = v.run.row[:len(v.inputCols)] + return true, nil + } + case descpb.LeftAntiJoin: + if !v.run.matched { + // This input row didn't have a match, so it should be emitted. + // + // Reset our output row to just the contents of the input row. + v.run.row = v.run.row[:len(v.inputCols)] + return true, nil } - return true, nil } } } @@ -347,6 +375,12 @@ func (v *vTableLookupJoinNode) pushRow(lookedUpRow ...tree.Datum) error { v.run.row[len(v.inputCols):]); !ok || err != nil { return err } + v.run.matched = true + if v.joinType == descpb.LeftSemiJoin || v.joinType == descpb.LeftAntiJoin { + // Avoid adding the row into the container since for left semi and left + // anti joins we only care to know whether there was a match or not. + return nil + } _, err := v.run.rows.AddRow(v.run.params.ctx, v.run.row) return err } @@ -359,5 +393,7 @@ func (v *vTableLookupJoinNode) Values() tree.Datums { // Close implements the planNode interface. func (v *vTableLookupJoinNode) Close(ctx context.Context) { v.input.Close(ctx) - v.run.rows.Close(ctx) + if v.run.rows != nil { + v.run.rows.Close(ctx) + } }