From 57bd3c6cb311e3f9ceec5bf163938afb265e691a Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Thu, 25 May 2023 10:44:54 -0400 Subject: [PATCH] opt: loosen restriction on UDF mutations to the same table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To prevent index corruption described in #70731, optbuilder raises an error when a statement performs multiple mutations to the same table. This commit loosens this restriction for UDFs that perform mutations because it is overly strict. --- The index corruption described in #70731 occurs when a statement performs multiple writes to the same table. Any reads performed by successive writes see the snapshot of data as of the beginning of the statement. They do not read values as of the most recent write within the same statement. Because these successive writes are based on stale data, they can write incorrect KVs and cause inconsistencies between primary and secondary indexes. Each statement in a UDF body is essentially a child of the statement that is invoking the UDF. Mutations within UDFs are not as susceptible to the inconsistencies described above because a UDF with a mutation must be VOLATILE, and each statement in a VOLATILE UDFs reads at the latest sequence number. In other words, statements within UDFs can see previous writes made by any outer statement. This prevents inconsistencies due to writes based on stale reads. Therefore, the restriction that prevents multiple writes to the same table can be lifted in some cases when the writes are performed in UDFs. However, we cannot forgo restrictions for all writes in UDFs. A parent statement that calls a UDF cannot be allowed to mutate the same table that the UDF did. Unlike subsequent statements in the UDF after the write, the parent statement will not see the UDF's writes, and inconsistencies could occur. To define acceptable mutations to the same table within UDFs, we define a statement tree that represents the hierarchy of statements and sub-statements in a query. A sub-statement `sub` is any statement within a UDF. `sub`'s parent is the statement invoking the UDF. Other statements in the same UDF as `sub` are the `sub`'s siblings. Any statements in a UDF invoked by `sub` are `sub`'s children. For example, consider: CREATE FUNCTION f1() RETURNS INT LANGUAGE SQL AS 'SELECT 1'; CREATE FUNCTION f2() RETURNS INT LANGUAGE SQL AS 'SELECT 2 + f3()'; CREATE FUNCTION f3() RETURNS INT LANGUAGE SQL AS 'SELECT 3'; SELECT f1(), f2(), f3(); The statement tree for this SELECT would be: root: SELECT f1(), f2(), f3() ├── f1: SELECT 1 ├── f2: SELECT 2 + f3() │ └── f3: SELECT 3 └── f3: SELECT 3 We define multiple mutations to the same table as safe if, for every possible path from the root statement to a leaf statement, either of the following is true: 1. There is no more than one mutation to any table. 2. Or, any table with multiple mutations is modified only by simple INSERTs without ON CONFLICT clauses. As a consequence of this definition, a UDF is now allowed to mutate the same table as long as it does so in different statements in its body. Such statements are siblings in the statement tree, and therefore do not share any path from root to leaf. For example, this is now allowed: CREATE FUNCTION ups(a1 INT, a2 INT) RETURNS VOID LANGUAGE SQL AS $$ UPSERT INTO a VALUES (a1); UPSERT INTO a VALUES (a2); $$ Similarly, successive invocations of the same UDF that mutates a table are now allowed: CREATE FUNCTION upd(k0 INT, v0 INT) RETURNS VOID LANGUAGE SQL AS $$ UPDATE kv SET v = v0 WHERE k = k0; $$; SELECT upd(1, 2), upd(1, 3); The `statementTree` data structure has been added to enforce this definition. See its documentation for more details. Note: These restrictions will likely need to be revisited once we support recursive UDFs. Epic: CRDB-25388 Informs #70731 Release note: None --- .../logictest/testdata/logic_test/udf_delete | 37 +- .../logictest/testdata/logic_test/udf_update | 12 +- .../logictest/testdata/logic_test/udf_upsert | 57 ++- pkg/sql/logictest/testdata/logic_test/with | 10 +- pkg/sql/opt/optbuilder/BUILD.bazel | 3 + pkg/sql/opt/optbuilder/builder.go | 28 +- pkg/sql/opt/optbuilder/create_function.go | 2 +- pkg/sql/opt/optbuilder/delete.go | 2 +- pkg/sql/opt/optbuilder/insert.go | 6 +- pkg/sql/opt/optbuilder/scalar.go | 2 +- pkg/sql/opt/optbuilder/statement_tree.go | 164 +++++++ pkg/sql/opt/optbuilder/statement_tree_test.go | 412 ++++++++++++++++++ pkg/sql/opt/optbuilder/testdata/udf | 86 ++++ pkg/sql/opt/optbuilder/update.go | 2 +- pkg/sql/opt/optbuilder/util.go | 18 +- 15 files changed, 784 insertions(+), 57 deletions(-) create mode 100644 pkg/sql/opt/optbuilder/statement_tree.go create mode 100644 pkg/sql/opt/optbuilder/statement_tree_test.go diff --git a/pkg/sql/logictest/testdata/logic_test/udf_delete b/pkg/sql/logictest/testdata/logic_test/udf_delete index 5ba39cf9ff22..73a1eaf1d8d0 100644 --- a/pkg/sql/logictest/testdata/logic_test/udf_delete +++ b/pkg/sql/logictest/testdata/logic_test/udf_delete @@ -57,20 +57,49 @@ query II SELECT * FROM kv ---- -statement error pq: multiple modification subqueries of the same table "kv" are not supported -CREATE FUNCTION f_err(i INT, j INT) RETURNS SETOF RECORD AS +statement ok +CREATE FUNCTION f_kv2(i INT, j INT) RETURNS SETOF RECORD AS $$ DELETE FROM kv WHERE k = i RETURNING k, v; DELETE FROM kv WHERE v = j RETURNING k, v; $$ LANGUAGE SQL; -statement error pq: multiple modification subqueries of the same table "kv" are not supported -CREATE FUNCTION f_err(i INT, j INT) RETURNS SETOF RECORD AS +statement ok +INSERT INTO kv VALUES (1, 2), (3, 4), (5, 6), (7, 8) + +statement ok +SELECT f_kv2(1, 9) + +query II rowsort +SELECT * FROM kv +---- +3 4 +5 6 +7 8 + +statement ok +SELECT f_kv2(i, j) FROM (VALUES (3, 0), (0, 8)) v(i, j) + +query II +SELECT * FROM kv +---- +5 6 + +statement ok +CREATE FUNCTION f_no_op(i INT, j INT) RETURNS SETOF RECORD AS $$ INSERT INTO kv VALUES (i, j); DELETE FROM kv WHERE k=i AND v=j RETURNING k, v; $$ LANGUAGE SQL; +statement ok +SELECT f_no_op(9, 10) + +query II +SELECT * FROM kv +---- +5 6 + statement ok CREATE TABLE unindexed ( k INT PRIMARY KEY, diff --git a/pkg/sql/logictest/testdata/logic_test/udf_update b/pkg/sql/logictest/testdata/logic_test/udf_update index 7a8d23c2b992..de3a8c2c936a 100644 --- a/pkg/sql/logictest/testdata/logic_test/udf_update +++ b/pkg/sql/logictest/testdata/logic_test/udf_update @@ -68,8 +68,10 @@ SELECT f2(5,32); ---- (5,32) -statement error pq: multiple modification subqueries of the same table \"t\" are not supported +query TT SELECT f2(5,9), f2(7,11); +---- +(5,9) (7,11) query T SELECT f2(x,y) FROM (VALUES (1,16),(1,17)) v(x,y); @@ -80,10 +82,10 @@ SELECT f2(x,y) FROM (VALUES (1,16),(1,17)) v(x,y); query II rowsort SELECT * FROM t; ---- -1 17 -3 4 -5 32 -7 8 +1 17 +3 4 +5 9 +7 11 statement ok CREATE TABLE t2 (a INT, b INT, c INT); diff --git a/pkg/sql/logictest/testdata/logic_test/udf_upsert b/pkg/sql/logictest/testdata/logic_test/udf_upsert index 9ac57820ff56..d90f1e822f19 100644 --- a/pkg/sql/logictest/testdata/logic_test/udf_upsert +++ b/pkg/sql/logictest/testdata/logic_test/udf_upsert @@ -29,23 +29,27 @@ SELECT f_ocdn(1,2,1); ---- NULL -statement error pq: multiple modification subqueries of the same table \"t_ocdn\" are not supported +query TTTT SELECT f_ocdn(1,1,1), f_ocdn(3,2,2), f_ocdn(6,6,2), f_ocdn(2,1,1); +---- +NULL (3,2,2) (6,6,2) NULL -query T -SELECT f_ocdn(x, y, z) FROM (VALUES (1, 1, 1), (2, 2, 1), (3, 3, 3), (3, 4, 4)) v(x, y, z) +query T nosort +SELECT f_ocdn(x, y, z) FROM (VALUES (1, 1, 1), (2, 2, 1), (3, 3, 3), (3, 4, 4), (5, 5, 5)) v(x, y, z) ---- NULL -(2,2,1) -(3,3,3) NULL +NULL +NULL +(5,5,5) query III rowsort SELECT * FROM t_ocdn ---- -1 1 1 -2 2 1 -3 3 3 +1 1 1 +3 2 2 +5 5 5 +6 6 2 statement ok @@ -55,24 +59,45 @@ $$ $$ LANGUAGE SQL; statement ok -SELECT f_ocdn_2vals(5,5,5,5,5,5); +SELECT f_ocdn_2vals(7,7,7,7,7,7); query III rowsort SELECT * FROM t_ocdn; ---- -1 1 1 -2 2 1 -3 3 3 -5 5 5 +1 1 1 +3 2 2 +5 5 5 +6 6 2 +7 7 7 -statement error pq: multiple modification subqueries of the same table \"t_ocdn\" are not supported -CREATE FUNCTION f_err(i INT, j INT, k INT, m INT, n INT, o INT) RETURNS RECORD AS +statement ok +CREATE FUNCTION f_multi_ins(i INT, j INT, k INT, m INT, n INT, o INT) RETURNS RECORD AS $$ INSERT INTO t_ocdn VALUES (i, j, k) ON CONFLICT DO NOTHING; INSERT INTO t_ocdn VALUES (m, n, o) ON CONFLICT DO NOTHING; - SELECT * FROM t_ocdn WHERE t.a=i OR t.a=m; + SELECT * FROM t_ocdn WHERE a=i OR a=m ORDER BY a; $$ LANGUAGE SQL; +query TT +SELECT f_multi_ins(1, 1, 1, 1, 1, 1), f_multi_ins(1, 1, 1, 1, 1, 1) +---- +(1,1,1) (1,1,1) + +query TT +SELECT f_multi_ins(2, 2, 2, 3, 3, 3), f_multi_ins(3, 3, 3, 4, 4, 4) +---- +(3,2,2) (3,2,2) + +query III rowsort +SELECT * FROM t_ocdn +---- +1 1 1 +3 2 2 +4 4 4 +5 5 5 +6 6 2 +7 7 7 + subtest on_conflict_do_update statement ok diff --git a/pkg/sql/logictest/testdata/logic_test/with b/pkg/sql/logictest/testdata/logic_test/with index f70a367c2f83..8b4cc7cbd409 100644 --- a/pkg/sql/logictest/testdata/logic_test/with +++ b/pkg/sql/logictest/testdata/logic_test/with @@ -1052,7 +1052,7 @@ SELECT i, j FROM u@u_j_idx # Multiple upserts of the same row. Might succeed after issue 70731 is fixed, # depending on the implementation, but should not cause corruption. -query error pgcode 0A000 multiple modification subqueries of the same table +query error pgcode 0A000 multiple mutations of the same table WITH v AS (UPSERT INTO u VALUES (0, 1) RETURNING *), w AS (UPSERT INTO u SELECT i, j + 1 FROM v RETURNING *) @@ -1071,7 +1071,7 @@ SELECT i, j FROM u@u_j_idx # Multiple updates of the same row. Might succeed after issue 70731 is fixed, # depending on the implementation, but should not cause corruption. The order of # CTE execution is not necessarily defined here. -query error pgcode 0A000 multiple modification subqueries of the same table +query error pgcode 0A000 multiple mutations of the same table WITH v AS (UPDATE u SET j = 3 WHERE i = 0 RETURNING *), w AS (UPDATE u SET j = 4 WHERE i = 0 RETURNING *) @@ -1090,7 +1090,7 @@ SELECT i, j FROM u@u_j_idx # Multiple updates of the same row. Might succeed after issue 70731 is fixed, # depending on the implementation, but should not cause corruption. The order of # CTE execution should be deterministic. -query error pgcode 0A000 multiple modification subqueries of the same table +query error pgcode 0A000 multiple mutations of the same table WITH v AS (UPDATE u SET j = 5 WHERE i = 0 RETURNING *), w AS (UPDATE u SET j = v.j + 1 FROM v WHERE u.i = v.i RETURNING *) @@ -1108,7 +1108,7 @@ SELECT i, j FROM u@u_j_idx # Multiple inserts of the same row into the same table, most should become # updates due to conflicts. Might succeed after issue 70731 is fixed. -query error pgcode 0A000 multiple modification subqueries of the same table +query error pgcode 0A000 multiple mutations of the same table WITH v AS (INSERT INTO u VALUES (0, 42), (1, 42) ON CONFLICT (i) DO UPDATE SET j = 52 RETURNING *) INSERT INTO u SELECT i, j + 1 FROM v ON CONFLICT (i) DO UPDATE SET j = v.j + 100 RETURNING * @@ -1125,7 +1125,7 @@ SELECT i, j FROM u@u_j_idx # Multiple deletes of the same row. Might succeed after issue 70731 is fixed, # though the order of CTE execution is undefined. -query error pgcode 0A000 multiple modification subqueries of the same table +query error pgcode 0A000 multiple mutations of the same table WITH v AS (DELETE FROM u ORDER BY i LIMIT 1 RETURNING *), w AS (DELETE FROM u ORDER BY i LIMIT 2 RETURNING *) diff --git a/pkg/sql/opt/optbuilder/BUILD.bazel b/pkg/sql/opt/optbuilder/BUILD.bazel index 79b3397bad1d..e21492675b2f 100644 --- a/pkg/sql/opt/optbuilder/BUILD.bazel +++ b/pkg/sql/opt/optbuilder/BUILD.bazel @@ -37,6 +37,7 @@ go_library( "show_trace.go", "sql_fn.go", "srfs.go", + "statement_tree.go", "subquery.go", "union.go", "update.go", @@ -103,6 +104,7 @@ go_test( srcs = [ "builder_test.go", "name_resolution_test.go", + "statement_tree_test.go", "union_test.go", ], args = ["-test.timeout=55s"], @@ -112,6 +114,7 @@ go_test( "//pkg/settings/cluster", "//pkg/sql/catalog/colinfo", "//pkg/sql/catalog/colinfo/colinfotestutils", + "//pkg/sql/opt/cat", "//pkg/sql/opt/memo", "//pkg/sql/opt/testutils", "//pkg/sql/opt/testutils/opttester", diff --git a/pkg/sql/opt/optbuilder/builder.go b/pkg/sql/opt/optbuilder/builder.go index 9bf0dab2d9de..59a0ab1640d2 100644 --- a/pkg/sql/opt/optbuilder/builder.go +++ b/pkg/sql/opt/optbuilder/builder.go @@ -94,6 +94,10 @@ type Builder struct { catalog cat.Catalog scopeAlloc []scope + // stmtTree tracks the hierarchy of statements to ensure that multiple + // modifications to the same table cannot corrupt indexes (see #70731). + stmtTree statementTree + // ctes stores CTEs which may need to be built at the top-level. ctes cteSources @@ -163,12 +167,6 @@ type Builder struct { // query contains a correlated subquery. isCorrelated bool - // areAllTableMutationsSimpleInserts maps from each table mutated by the - // statement to true if all mutations of that table are simple inserts - // (without ON CONFLICT) or false otherwise. All mutated tables will have an - // entry in the map. - areAllTableMutationsSimpleInserts map[cat.StableID]bool - // subqueryNameIdx helps generate unique subquery names during star // expansion. subqueryNameIdx int @@ -261,11 +259,25 @@ func unimplementedWithIssueDetailf(issue int, detail, format string, args ...int // "context". This is used at the top-level of every statement, and inside // EXPLAIN, CREATE VIEW, CREATE TABLE AS. func (b *Builder) buildStmtAtRoot(stmt tree.Statement, desiredTypes []*types.T) (outScope *scope) { - // A "root" statement cannot refer to anything from an enclosing query, so we - // always start with an empty scope. + // A "root" statement cannot refer to anything from an enclosing query, so + // we always start with an empty scope. inScope := b.allocScope() + return b.buildStmtAtRootWithScope(stmt, desiredTypes, inScope) +} + +// buildStmtAtRootWithScope is similar to buildStmtAtRoot, but allows a scope to +// be provided. This is used at the top-level of a statement, that has a new +// context but can refer to variables that are declared outside the statement, +// like a statement within a UDF body that can reference UDF parameters. +func (b *Builder) buildStmtAtRootWithScope( + stmt tree.Statement, desiredTypes []*types.T, inScope *scope, +) (outScope *scope) { inScope.atRoot = true + // Push a new statement onto the statement tree. + b.stmtTree.Push() + defer b.stmtTree.Pop() + // Save any CTEs above the boundary. prevCTEs := b.ctes b.ctes = nil diff --git a/pkg/sql/opt/optbuilder/create_function.go b/pkg/sql/opt/optbuilder/create_function.go index 72697c5e6fbf..ef47200c13b0 100644 --- a/pkg/sql/opt/optbuilder/create_function.go +++ b/pkg/sql/opt/optbuilder/create_function.go @@ -169,7 +169,7 @@ func (b *Builder) buildCreateFunction(cf *tree.CreateFunction, inScope *scope) ( // volatility of stable functions. If folded, we only get a scalar and lose // the volatility. b.factory.FoldingControl().TemporarilyDisallowStableFolds(func() { - stmtScope = b.buildStmt(stmts[i].AST, nil /* desiredTypes */, bodyScope) + stmtScope = b.buildStmtAtRootWithScope(stmts[i].AST, nil /* desiredTypes */, bodyScope) }) checkStmtVolatility(targetVolatility, stmtScope, stmt.AST) diff --git a/pkg/sql/opt/optbuilder/delete.go b/pkg/sql/opt/optbuilder/delete.go index bcac82ceb73d..20d6e65d9c2a 100644 --- a/pkg/sql/opt/optbuilder/delete.go +++ b/pkg/sql/opt/optbuilder/delete.go @@ -50,7 +50,7 @@ func (b *Builder) buildDelete(del *tree.Delete, inScope *scope) (outScope *scope b.checkPrivilege(depName, tab, privilege.SELECT) // Check if this table has already been mutated in another subquery. - b.checkMultipleMutations(tab, false /* simpleInsert */) + b.checkMultipleMutations(tab, generalMutation) var mb mutationBuilder mb.init(b, "delete", tab, alias) diff --git a/pkg/sql/opt/optbuilder/insert.go b/pkg/sql/opt/optbuilder/insert.go index 972a24cc15a8..6293404fd714 100644 --- a/pkg/sql/opt/optbuilder/insert.go +++ b/pkg/sql/opt/optbuilder/insert.go @@ -214,7 +214,11 @@ func (b *Builder) buildInsert(ins *tree.Insert, inScope *scope) (outScope *scope } // Check if this table has already been mutated in another subquery. - b.checkMultipleMutations(tab, ins.OnConflict == nil /* simpleInsert */) + mutType := generalMutation + if ins.OnConflict == nil { + mutType = simpleInsert + } + b.checkMultipleMutations(tab, mutType) var mb mutationBuilder if ins.OnConflict != nil && ins.OnConflict.IsUpsertAlias() { diff --git a/pkg/sql/opt/optbuilder/scalar.go b/pkg/sql/opt/optbuilder/scalar.go index b48b2ea60406..b64f2a8f6b9e 100644 --- a/pkg/sql/opt/optbuilder/scalar.go +++ b/pkg/sql/opt/optbuilder/scalar.go @@ -697,7 +697,7 @@ func (b *Builder) buildUDF( b.insideUDF = true isMultiColDataSource := false for i := range stmts { - stmtScope := b.buildStmt(stmts[i].AST, nil /* desiredTypes */, bodyScope) + stmtScope := b.buildStmtAtRootWithScope(stmts[i].AST, nil /* desiredTypes */, bodyScope) expr := stmtScope.expr physProps := stmtScope.makePhysicalProps() diff --git a/pkg/sql/opt/optbuilder/statement_tree.go b/pkg/sql/opt/optbuilder/statement_tree.go new file mode 100644 index 000000000000..11b4ce67db75 --- /dev/null +++ b/pkg/sql/opt/optbuilder/statement_tree.go @@ -0,0 +1,164 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package optbuilder + +import ( + "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" + "github.com/cockroachdb/cockroach/pkg/util/intsets" + "github.com/cockroachdb/errors" +) + +// statementTree tracks the hierarchy of statements within a query. It is used +// for preventing multiple modifications to the same table that can cause index +// corruption (see #70731). See CanMutateTable for more details. +// +// The statement tree is built up using only Push and Pop. Push pushes a new +// statement onto the tree as a child of the current statement and sets the new +// statement as the current statement. Pop sets the current statement's parent +// as the new current statement. For example, consider a root statement with two +// children: +// +// statement1 +// ├── statement2 +// └── statement3 +// +// This tree is built with the following sequence of Push and Pop: +// +// var st statementTree +// st.Push() // Push statement 1 as the root +// st.Push() // Push statement 2 as a child of statement 1 +// st.Pop() // Pop statement 2 +// st.Push() // Push statement 3 as a child of statement 1 +// +// Note that we don't actually build a tree-like data structure to represent a +// statement tree. The API only allows modifying the tree with Push and Pop. A +// statement's children only need to be checked for conflicts after those +// statements have been popped. This means we can simplify the implementation to +// using a stack. In order to use a stack, Pop combines all the mutations of the +// popped statement into a set of children mutations in its parent statement. +// This set of children mutations is checked for conflicts during +// CanMutateTable, which is equivalent to maintaining and traversing the entire +// sub-tree of the popped statement. +type statementTree struct { + stmts []statementTreeNode +} + +// mutationType represents a set of mutation types that can be applied to a +// table. +type mutationType uint8 + +const ( + // simpleInsert represents an INSERT with no ON CONFLICT clause. + simpleInsert mutationType = iota + // generalMutation represents all types of mutations except for a simple + // INSERT. + generalMutation +) + +// statementTreeNode represents a single statement in the hierarchy of +// statements within a query. +type statementTreeNode struct { + parent *statementTreeNode + simpleInsertTables intsets.Fast + generalMutationTables intsets.Fast + childrenSimpleInsertTables intsets.Fast + childrenGeneralMutationTables intsets.Fast +} + +// conflictsWithMutation returns true if the statement node conflicts with the +// given mutation table and type. +func (n *statementTreeNode) conflictsWithMutation(tabID cat.StableID, typ mutationType) bool { + return typ == generalMutation && n.simpleInsertTables.Contains(int(tabID)) || + n.generalMutationTables.Contains(int(tabID)) +} + +// childrenConflictWithMutation returns true if any children of the statement +// node conflict with the given mutation table and type. +func (n *statementTreeNode) childrenConflictWithMutation( + tabID cat.StableID, typ mutationType, +) bool { + return typ == generalMutation && n.childrenSimpleInsertTables.Contains(int(tabID)) || + n.childrenGeneralMutationTables.Contains(int(tabID)) +} + +// Push pushes a new statement onto the tree as a descendent of the current +// statement. The newly pushed statement becomes the current statement. +func (st *statementTree) Push() { + st.stmts = append(st.stmts, statementTreeNode{}) +} + +// Pop sets the parent of the current statement as the new current statement. +func (st *statementTree) Pop() { + popped := &st.stmts[len(st.stmts)-1] + st.stmts = st.stmts[:len(st.stmts)-1] + if len(st.stmts) > 0 { + // Combine the popped statement's mutations and child mutations into the + // child statements of its parent (the new current statement). + curr := &st.stmts[len(st.stmts)-1] + curr.childrenSimpleInsertTables.UnionWith(popped.simpleInsertTables) + curr.childrenSimpleInsertTables.UnionWith(popped.childrenSimpleInsertTables) + curr.childrenGeneralMutationTables.UnionWith(popped.generalMutationTables) + curr.childrenGeneralMutationTables.UnionWith(popped.childrenGeneralMutationTables) + } +} + +// CanMutateTable returns true if the table can be mutated without concern for +// index corruption due to multiple modifications to the same table. It returns +// true if, for the current statement and all its ancestors and descendents, +// either of the following is true: +// +// 1. There are no other mutations to the given table. +// 2. The given mutation type is a simple INSERT and there exists only simple +// INSERT mutations to the given table. +// +// If there is a non-simple-INSERT mutation to a table, it must be the only +// mutation, simple or otherwise, to that table in the direct lineage of any +// statement. +// +// For example, the following statement tree is valid. The two UPDATEs to t2 are +// allowed because they exist in sibling statements, i.e., neither of the +// statements are ancestors nor descendents of the other. +// +// statement1: UPDATE t1 +// ├── statement2: UPDATE t2 +// └── statement3: UPDATE t2 +// +// The following statement tree is not valid because statement1 is the parent of +// statement3, and they both update t1. +// +// statement1: UPDATE t1 +// ├── statement2: UPDATE t2 +// └── statement3: UPDATE t1 +func (st *statementTree) CanMutateTable(tabID cat.StableID, typ mutationType) bool { + if len(st.stmts) == 0 { + panic(errors.AssertionFailedf("unexpected empty tree")) + } + curr := &st.stmts[len(st.stmts)-1] + // Check the children of the current statement for a conflict. + if curr.childrenConflictWithMutation(tabID, typ) { + return false + } + // Check the current statement and all parent statements for a conflict. + for i := range st.stmts { + n := &st.stmts[i] + if n.conflictsWithMutation(tabID, typ) { + return false + } + } + // The new mutation is valid, so track it. + switch typ { + case simpleInsert: + curr.simpleInsertTables.Add(int(tabID)) + case generalMutation: + curr.generalMutationTables.Add(int(tabID)) + } + return true +} diff --git a/pkg/sql/opt/optbuilder/statement_tree_test.go b/pkg/sql/opt/optbuilder/statement_tree_test.go new file mode 100644 index 000000000000..b23fa1456adf --- /dev/null +++ b/pkg/sql/opt/optbuilder/statement_tree_test.go @@ -0,0 +1,412 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package optbuilder + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" +) + +func TestStatementTree(t *testing.T) { + type cmd uint8 + const ( + push cmd = 1 << iota + pop + mut + simple + t1 + t2 + fail + ) + type testCase struct { + cmds []cmd + } + testCases := []testCase{ + // 0. + // Push, CanMutateTable(t1, simpleInsert), Pop + { + cmds: []cmd{ + push, + mut | t1 | simple, + pop, + }, + }, + // 1. + // Push, CanMutateTable(t1, default), Pop + { + cmds: []cmd{ + push, + mut | t1, + pop, + }, + }, + // 2. + // Push, CanMutateTable(t1, default), CanMutateTable(t2, default), Pop + { + cmds: []cmd{ + push, + mut | t1, + mut | t2, + pop, + }, + }, + // 3. + // Push, CanMutateTable(t1, default), CanMutateTable(t1, simpleInsert) FAIL + { + cmds: []cmd{ + push, + mut | t1, + mut | t1 | simple | fail, + }, + }, + // 4. + // Push, CanMutateTable(t1, simpleInsert), CanMutateTable(t1, default) FAIL + { + cmds: []cmd{ + push, + mut | t1 | simple, + mut | t1 | fail, + }, + }, + // 5. + // Push, CanMutateTable(t1, default), CanMutateTable(t2, default), CanMutateTable(t1, default) FAIL + { + cmds: []cmd{ + push, + mut | t1, + mut | t2, + mut | t1 | fail, + }, + }, + // 6. + // Push + // CanMutateTable(t1, default) + // Push + // CanMutateTable(t1, default) FAIL + { + cmds: []cmd{ + push, + mut | t1, + push, + mut | t1 | fail, + }, + }, + // 7. + // Push + // CanMutateTable(t1, default) + // Push + // CanMutateTable(t1, simpleInsert) FAIL + { + cmds: []cmd{ + push, + mut | t1, + push, + mut | t1 | simple | fail, + }, + }, + // 8. + // Push + // CanMutateTable(t1, simpleInsert) + // Push + // CanMutateTable(t1, default) FAIL + { + cmds: []cmd{ + push, + mut | t1 | simple, + push, + mut | t1 | fail, + }, + }, + // 9. + // Push + // CanMutateTable(t1, simpleInsert) + // Push + // CanMutateTable(t1, simpleInsert) + // Push + // CanMutateTable(t1, simpleInsert) + // Pop + // Pop + // Pop + { + cmds: []cmd{ + push, + mut | t1 | simple, + push, + mut | t1 | simple, + push, + mut | t1 | simple, + pop, + pop, + pop, + }, + }, + // 10. + // Push + // CanMutateTable(t1, simpleInsert) + // Push + // CanMutateTable(t1, simpleInsert) + // Push + // CanMutateTable(t1, default) FAIL + { + cmds: []cmd{ + push, + mut | t1 | simple, + push, + mut | t1 | simple, + push, + mut | t1 | fail, + }, + }, + // 11. + // Push + // CanMutateTable(t1, simpleInsert) + // Push + // CanMutateTable(t2, simpleInsert) + // Push + // CanMutateTable(t1, default) FAIL + { + cmds: []cmd{ + push, + mut | t1 | simple, + push, + mut | t2 | simple, + push, + mut | t1 | fail, + }, + }, + // 12. + // Push + // CanMutateTable(t1, default) + // Push + // CanMutateTable(t2, simpleInsert) + // Push + // CanMutateTable(t1, simpleInsert) FAIL + { + cmds: []cmd{ + push, + mut | t1, + push, + mut | t2 | simple, + push, + mut | t1 | simple | fail, + }, + }, + // 13. + // Push + // CanMutateTable(t1, simpleInsert) + // Push + // CanMutateTable(t1, simpleInsert) + // Pop + // CanMutateTable(t1, default) FAIL + { + cmds: []cmd{ + push, + mut | t1 | simple, + push, + mut | t1 | simple, + pop, + mut | t1 | fail, + }, + }, + // 14. + // Push + // CanMutateTable(t1, simpleInsert) + // Push + // CanMutateTable(t1, simpleInsert) + // Pop + // Push + // CanMutateTable(t1, simpleInsert) + // Pop + // Pop + { + cmds: []cmd{ + push, + mut | t1 | simple, + push, + mut | t1 | simple, + pop, + push, + mut | t1 | simple, + pop, + pop, + }, + }, + // 15. + // Push + // CanMutateTable(t1, simpleInsert) + // Push + // CanMutateTable(t1, simpleInsert) + // Push + // CanMutateTable(t1, simpleInsert) + // Pop + // Pop + // Pop + { + cmds: []cmd{ + push, + mut | t1 | simple, + push, + mut | t1 | simple, + push, + mut | t1 | simple, + pop, + pop, + pop, + }, + }, + // 16. + // Push + // Push + // CanMutateTable(t1, default) + // Pop + // Push + // CanMutateTable(t1, default) + // Pop + // Pop + { + cmds: []cmd{ + push, + push, + mut | t1, + pop, + push, + mut | t1, + pop, + pop, + }, + }, + // 17. + // Push + // Push + // CanMutateTable(t1, simpleInsert) + // Pop + // CanMutateTable(t1, default) FAIL + { + cmds: []cmd{ + push, + push, + mut | t1 | simple, + pop, + mut | t1 | fail, + }, + }, + // 18. + // Push + // Push + // CanMutateTable(t1, default) + // Pop + // CanMutateTable(t1, simpleInsert) FAIL + { + cmds: []cmd{ + push, + push, + mut | t1, + pop, + mut | t1 | simple | fail, + }, + }, + // 19. + // Push + // Push + // Push + // CanMutateTable(t1, default) + // Pop + // CanMutateTable(t1, simpleInsert) FAIL + { + cmds: []cmd{ + push, + push, + push, + mut | t1, + pop, + mut | t1 | simple | fail, + }, + }, + // 20. + // Push + // Push + // CanMutateTable(t2, simpleInsert) + // Push + // CanMutateTable(t2, simpleInsert) + // Pop + // Push + // CanMutateTable(t2, simpleInsert) + // Pop + // Push + // CanMutateTable(t1, simpleInsert) + // Pop + // Push + // CanMutateTable(t2, simpleInsert) + // Pop + // Pop + // CanMutateTable(t2, simpleInsert) + // CanMutateTable(t1, simpleInsert) + // CanMutateTable(t1, default) FAIL + { + cmds: []cmd{ + push, + push, + mut | t2 | simple, + push, + mut | t2 | simple, + pop, + push, + mut | t2 | simple, + pop, + push, + mut | t1 | simple, + pop, + push, + mut | t2 | simple, + pop, + pop, + mut | t2 | simple, + mut | t1 | simple, + mut | t1 | fail, + }, + }, + } + + for i, tc := range testCases { + var mt statementTree + for j, c := range tc.cmds { + switch { + case c&push == push: + mt.Push() + + case c&pop == pop: + mt.Pop() + + case c&mut == mut: + var tabID cat.StableID + switch { + case c&t1 == t1: + tabID = 1 + case c&t2 == t2: + tabID = 2 + } + + typ := generalMutation + if c&simple == simple { + typ = simpleInsert + } + + res := mt.CanMutateTable(tabID, typ) + + expected := c&fail != fail + if res != expected { + t.Fatalf("test case %d: expected: %v at command %d, got: %v", i, expected, j, res) + } + } + } + } +} diff --git a/pkg/sql/opt/optbuilder/testdata/udf b/pkg/sql/opt/optbuilder/testdata/udf index 6e8ca2d3a858..c79ffc833db3 100644 --- a/pkg/sql/opt/optbuilder/testdata/udf +++ b/pkg/sql/opt/optbuilder/testdata/udf @@ -1620,3 +1620,89 @@ project │ │ └── false │ └── null └── const: 1 + + +# -------------------------------------------------- +# UDFs with mutations. +# -------------------------------------------------- + +exec-ddl +CREATE FUNCTION ups(x INT, y INT, z INT) RETURNS BOOL LANGUAGE SQL AS $$ + UPSERT INTO abc VALUES (x, y, z); + SELECT true; +$$ +---- + +# A single UPSERT to abc is allowed. +build format=hide-all +SELECT ups(1, 2, 3) +---- +project + ├── values + │ └── () + └── projections + └── ups(1, 2, 3) + +# Multiple mutations to abc by the root statement and the UDF. +build +UPDATE abc SET c = 3 WHERE ups(1, 2, 3) +---- +error (0A000): multiple mutations of the same table "abc" are not supported unless they all use INSERT without ON CONFLICT; this is to prevent data corruption, see documentation of sql.multiple_modifications_of_table.enabled + +# Multiple mutations to abc by the root statement and the UDF. +build +WITH x AS ( + SELECT a FROM abc WHERE ups(1, 2, 3) +), y AS ( + UPSERT INTO abc VALUES (4, 5, 6) RETURNING j +) +SELECT * FROM x +---- +error (0A000): multiple mutations of the same table "abc" are not supported unless they all use INSERT without ON CONFLICT; this is to prevent data corruption, see documentation of sql.multiple_modifications_of_table.enabled + +exec-ddl +CREATE FUNCTION upd_ups(x INT, y INT, z INT) RETURNS VOID LANGUAGE SQL AS $$ + UPDATE abc SET c = 3 WHERE ups(x, y, z); +$$ +---- + +# Multiple mutations to abc by the two UDFs invoked. +build +SELECT upd_ups(1, 2, 3) +---- +error (0A000): multiple mutations of the same table "abc" are not supported unless they all use INSERT without ON CONFLICT; this is to prevent data corruption, see documentation of sql.multiple_modifications_of_table.enabled + +exec-ddl +CREATE FUNCTION ups2(a1 INT, b1 INT, c1 INT, a2 INT, b2 INT, c2 INT) RETURNS BOOL LANGUAGE SQL AS $$ + UPSERT INTO abc VALUES (a1, b1, c1); + UPSERT INTO abc VALUES (a2, b2, c2); + SELECT true; +$$ +---- + +# Multiple mutations to abc in sibling statements in the UDF is allowed. +build format=hide-all +SELECT ups2(1, 2, 3, 4, 5, 6) +---- +project + ├── values + │ └── () + └── projections + └── ups2(1, 2, 3, 4, 5, 6) + +exec-ddl +CREATE FUNCTION ups3(a1 INT, b1 INT, c1 INT, a2 INT, b2 INT, c2 INT) RETURNS BOOL LANGUAGE SQL AS $$ + UPSERT INTO abc VALUES (a1, b1, c1); + SELECT ups(a2, b2, c2); +$$ +---- + +# Multiple mutations to abc in sibling statements in the UDF is allowed. +build format=hide-all +SELECT ups3(1, 2, 3, 4, 5, 6) +---- +project + ├── values + │ └── () + └── projections + └── ups3(1, 2, 3, 4, 5, 6) diff --git a/pkg/sql/opt/optbuilder/update.go b/pkg/sql/opt/optbuilder/update.go index 502e414de34b..7e2057a26d16 100644 --- a/pkg/sql/opt/optbuilder/update.go +++ b/pkg/sql/opt/optbuilder/update.go @@ -86,7 +86,7 @@ func (b *Builder) buildUpdate(upd *tree.Update, inScope *scope) (outScope *scope b.checkPrivilege(depName, tab, privilege.SELECT) // Check if this table has already been mutated in another subquery. - b.checkMultipleMutations(tab, false /* simpleInsert */) + b.checkMultipleMutations(tab, generalMutation) var mb mutationBuilder mb.init(b, "update", tab, alias) diff --git a/pkg/sql/opt/optbuilder/util.go b/pkg/sql/opt/optbuilder/util.go index 4862e650ba6c..357c216e04dd 100644 --- a/pkg/sql/opt/optbuilder/util.go +++ b/pkg/sql/opt/optbuilder/util.go @@ -512,24 +512,14 @@ func (b *Builder) resolveSchemaForCreate( return sch, resName } -func (b *Builder) checkMultipleMutations(tab cat.Table, simpleInsert bool) { - if b.areAllTableMutationsSimpleInserts == nil { - b.areAllTableMutationsSimpleInserts = make(map[cat.StableID]bool) - } - allSimpleInserts, prevMutations := b.areAllTableMutationsSimpleInserts[tab.ID()] - if !prevMutations { - b.areAllTableMutationsSimpleInserts[tab.ID()] = simpleInsert - return - } - allSimpleInserts = allSimpleInserts && simpleInsert - b.areAllTableMutationsSimpleInserts[tab.ID()] = allSimpleInserts - if !allSimpleInserts && +func (b *Builder) checkMultipleMutations(tab cat.Table, typ mutationType) { + if !b.stmtTree.CanMutateTable(tab.ID(), typ) && !multipleModificationsOfTableEnabled.Get(&b.evalCtx.Settings.SV) && !b.evalCtx.SessionData().MultipleModificationsOfTable { panic(pgerror.Newf( pgcode.FeatureNotSupported, - "multiple modification subqueries of the same table %q are not supported unless "+ - "they all use INSERT without ON CONFLICT; this is to prevent data corruption, see "+ + "multiple mutations of the same table %q are not supported unless they all "+ + "use INSERT without ON CONFLICT; this is to prevent data corruption, see "+ "documentation of sql.multiple_modifications_of_table.enabled", tab.Name(), )) }