Skip to content

Commit

Permalink
sql: fix record-returning udfs when used as data source
Browse files Browse the repository at this point in the history
When record-returning UDFs (both implicit and `RECORD` return types) are
used as a data source in a query, the result should be treated as a row
with separate columns instead of a tuple, which is how UDF output is
normally treated. This PR closes this gap between CRDB and Postgres.

For example:

```
CREATE FUNCTION f() RETURNS RECORD AS
$$
  SELECT 1, 2, 3;
$$ LANGUAGE SQL

SELECT f()
    f
--------
 (1,2,3)

SELECT * FROM f() as foo(a int, b int, c int);
 a | b | c
---+---+---
 1 | 2 | 3
```

The behavior is the same for implicit record return types.

Epic: CRDB-19496
Fixes: #97059

Release note (sql change): UDFs with record-returning types now return a
row instead of a tuple.
  • Loading branch information
rharding6373 committed Mar 10, 2023
1 parent a95ffcd commit 0fb39a5
Show file tree
Hide file tree
Showing 11 changed files with 249 additions and 42 deletions.
143 changes: 138 additions & 5 deletions pkg/sql/logictest/testdata/logic_test/udf_record
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,13 @@ SELECT f_one();
----
(1)

# TODO(97059): The following query should require a column definition list.
statement ok
statement error pq: a column definition list is required for functions returning \"record\"
SELECT * FROM f_one();

# TODO(97059): The following query should produce a row, not a tuple.
query T
query I
SELECT * FROM f_one() AS foo (a INT);
----
(1)
1

statement ok
CREATE FUNCTION f_const() RETURNS RECORD AS
Expand Down Expand Up @@ -130,3 +128,138 @@ query T
SELECT f_table();
----
(1,5)

# subtest datasource

statement ok
CREATE FUNCTION f_tup() RETURNS RECORD AS
$$
SELECT ROW(1, 2, 3);
$$ LANGUAGE SQL;

query T
SELECT f_tup();
----
(1,2,3)

statement error pq: a column definition list is required for functions returning \"record\"
SELECT * FROM f_tup();

query III colnames
SELECT * FROM f_tup() as foo(a int, b int, c int);
----
a b c
1 2 3

statement ok
CREATE FUNCTION f_col() RETURNS RECORD AS
$$
SELECT 1, 2, 3;
$$ LANGUAGE SQL;

query T
SELECT f_col();
----
(1,2,3)

query III colnames
SELECT * FROM f_col() as foo(a int, b int, c int);
----
a b c
1 2 3

statement ok
CREATE TABLE t_imp (a INT PRIMARY KEY, b INT);
INSERT INTO t_imp VALUES (1, 10), (2, 4), (3, 32);

statement ok
CREATE FUNCTION f_imp() RETURNS t_imp AS
$$
SELECT * FROM t_imp ORDER BY a LIMIT 1;
$$ LANGUAGE SQL;

query II colnames
SELECT * FROM f_imp();
----
a b
1 10

statement ok
CREATE TYPE udt AS ENUM ('a', 'b', 'c');

statement ok
CREATE FUNCTION f_udt() RETURNS udt AS
$$
SELECT 'a'::udt;
$$ LANGUAGE SQL;

query T
SELECT * FROM f_udt();
----
a

statement ok
CREATE FUNCTION f_udt_record() RETURNS RECORD AS
$$
SELECT 'a'::udt;
$$ LANGUAGE SQL;

query T
SELECT * FROM f_udt() AS foo(u udt);
----
a

query II rowsort
SELECT * FROM f_setof() AS foo(a INT, b INT);
----
1 5
2 6
3 7

statement ok
CREATE FUNCTION f_setof_imp() RETURNS SETOF t_imp AS
$$
SELECT * FROM t_imp;
$$ LANGUAGE SQL;

query II rowsort
SELECT * FROM f_setof_imp()
----
1 10
2 4
3 32

statement ok
CREATE FUNCTION f_strict() RETURNS RECORD STRICT AS
$$
SELECT 1, 2, 3;
$$ LANGUAGE SQL;

query III
SELECT * FROM f_strict() AS foo(a INT, b INT, c INT);
----
1 2 3

statement ok
CREATE FUNCTION f_setof_strict() RETURNS SETOF RECORD STRICT AS
$$
SELECT * FROM t_imp;
$$ LANGUAGE SQL;

query II rowsort
SELECT * FROM f_setof_strict() AS foo(a INT, b INT);
----
1 10
2 4
3 32

statement ok
CREATE FUNCTION f_arg(IN a INT8, IN b INT8) RETURNS RECORD AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

query II
SELECT * FROM f_arg(1,2) AS foo(a INT, b INT);
----
1 2
35 changes: 23 additions & 12 deletions pkg/sql/logictest/testdata/logic_test/udf_setof
Original file line number Diff line number Diff line change
Expand Up @@ -166,25 +166,36 @@ CREATE FUNCTION all_ab() RETURNS SETOF ab LANGUAGE SQL AS $$
SELECT a, b FROM ab
$$

# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
query T rowsort
query II rowsort
SELECT * FROM all_ab()
----
(1,10)
(2,20)
(3,30)
(4,40)
1 10
2 20
3 30
4 40

statement ok
CREATE FUNCTION all_ab_tuple() RETURNS SETOF ab LANGUAGE SQL AS $$
SELECT (a, b) FROM ab
$$

# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
query T rowsort
query II rowsort
SELECT * FROM all_ab_tuple()
----
1 10
2 20
3 30
4 40

statement ok
CREATE FUNCTION all_ab_record() RETURNS SETOF RECORD LANGUAGE SQL AS $$
SELECT a, b FROM ab
$$

query II rowsort
SELECT * FROM all_ab_tuple()
----
(1,10)
(2,20)
(3,30)
(4,40)
1 10
2 20
3 30
4 40
4 changes: 4 additions & 0 deletions pkg/sql/opt/exec/execbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ func (b *Builder) buildExistsSubquery(
types.Bool,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
),
tree.DBoolFalse,
}, types.Bool), nil
Expand Down Expand Up @@ -753,6 +754,7 @@ func (b *Builder) buildSubquery(
subquery.Typ,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
), nil
}

Expand Down Expand Up @@ -809,6 +811,7 @@ func (b *Builder) buildSubquery(
subquery.Typ,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
), nil
}

Expand Down Expand Up @@ -891,6 +894,7 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ
udf.Typ,
enableStepping,
udf.CalledOnNullInput,
udf.MultiColOutput,
), nil
}

Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/ops/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,9 @@ define UDFPrivate {
# inputs are NULL. If false, the function will not be evaluated in the
# presence of NULL inputs, and will instead evaluate directly to NULL.
CalledOnNullInput bool

# MultiColOutput is true if the function may return multiple columns.
MultiColOutput bool
}

# KVOptions is a set of KVOptionItems that specify arbitrary keys and values
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/optbuilder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ type Builder struct {
// are disabled and only statements whitelisted are allowed.
insideFuncDef bool

// If set, we are processing a data source.
insideDataSource bool

// If set, we are collecting view dependencies in schemaDeps. This can only
// happen inside view/function definitions.
//
Expand Down
70 changes: 50 additions & 20 deletions pkg/sql/opt/optbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,7 @@ func (b *Builder) buildUDF(
// Build an expression for each statement in the function body.
rels := make(memo.RelListExpr, len(stmts))
isSetReturning := o.Class == tree.GeneratorClass
isMultiColOutput := false
for i := range stmts {
stmtScope := b.buildStmt(stmts[i].AST, nil /* desiredTypes */, bodyScope)
expr := stmtScope.expr
Expand All @@ -711,30 +712,51 @@ func (b *Builder) buildUDF(
// result columns of the last statement. If the result column is a tuple,
// then use its tuple contents for the return instead.
isSingleTupleResult := len(stmtScope.cols) == 1 && stmtScope.cols[0].typ.Family() == types.TupleFamily
if types.IsRecordType(f.ResolvedType()) {
if types.IsRecordType(rtyp) {
if isSingleTupleResult {
f.ResolvedType().InternalType.TupleContents = stmtScope.cols[0].typ.TupleContents()
rtyp.InternalType.TupleContents = stmtScope.cols[0].typ.TupleContents()
rtyp.InternalType.TupleLabels = stmtScope.cols[0].typ.TupleLabels()
} else {
tc := make([]*types.T, len(stmtScope.cols))
tl := make([]string, len(stmtScope.cols))
for i, col := range stmtScope.cols {
tc[i] = col.typ
tl[i] = col.name.MetadataName()
}
f.ResolvedType().InternalType.TupleContents = tc
rtyp.InternalType.TupleContents = tc
rtyp.InternalType.TupleLabels = tl
}
}
if b.insideDataSource && rtyp.Family() == types.TupleFamily {
isMultiColOutput = true
if isSingleTupleResult {
// When used as a data source, we need to expand the tuple into
// individual columns.
stmtScope = bodyScope.push()
cols := physProps.Presentation
elems := make([]scopeColumn, len(rtyp.TupleContents()))
for i := range rtyp.TupleContents() {
e := b.factory.ConstructColumnAccess(b.factory.ConstructVariable(cols[0].ID), memo.TupleOrdinal(i))
col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp.TupleContents()[i], nil, e)
elems[i] = *col
}
expr = b.constructProject(expr, elems)
physProps = stmtScope.makePhysicalProps()
}
}

// If there are multiple output columns or the output type is a record and
// the output column is not a tuple, we must combine them into a tuple -
// only a single column can be returned from a UDF.
cols := physProps.Presentation
if len(cols) > 1 || (types.IsRecordType(f.ResolvedType()) && !isSingleTupleResult) {
if !isMultiColOutput && (len(cols) > 1 || (types.IsRecordType(rtyp) && !isSingleTupleResult)) {
elems := make(memo.ScalarListExpr, len(cols))
for i := range cols {
elems[i] = b.factory.ConstructVariable(cols[i].ID)
}
tup := b.factory.ConstructTuple(elems, f.ResolvedType())
tup := b.factory.ConstructTuple(elems, rtyp)
stmtScope = bodyScope.push()
col := b.synthesizeColumn(stmtScope, scopeColName(""), f.ResolvedType(), nil /* expr */, tup)
col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp, nil /* expr */, tup)
expr = b.constructProject(expr, []scopeColumn{*col})
physProps = stmtScope.makePhysicalProps()
}
Expand All @@ -747,17 +769,17 @@ func (b *Builder) buildUDF(
// column is already a tuple.
returnCol := physProps.Presentation[0].ID
returnColMeta := b.factory.Metadata().ColumnMeta(returnCol)
if !types.IsRecordType(f.ResolvedType()) && !returnColMeta.Type.Identical(f.ResolvedType()) {
if !cast.ValidCast(returnColMeta.Type, f.ResolvedType(), cast.ContextAssignment) {
if !types.IsRecordType(rtyp) && !isMultiColOutput && !returnColMeta.Type.Identical(rtyp) {
if !cast.ValidCast(returnColMeta.Type, rtyp, cast.ContextAssignment) {
panic(sqlerrors.NewInvalidAssignmentCastError(
returnColMeta.Type, f.ResolvedType(), returnColMeta.Alias))
returnColMeta.Type, rtyp, returnColMeta.Alias))
}
cast := b.factory.ConstructAssignmentCast(
b.factory.ConstructVariable(physProps.Presentation[0].ID),
f.ResolvedType(),
rtyp,
)
stmtScope = bodyScope.push()
col := b.synthesizeColumn(stmtScope, scopeColName(""), f.ResolvedType(), nil /* expr */, cast)
col := b.synthesizeColumn(stmtScope, scopeColName(""), rtyp, nil /* expr */, cast)
expr = b.constructProject(expr, []scopeColumn{*col})
physProps = stmtScope.makePhysicalProps()
}
Expand All @@ -772,12 +794,13 @@ func (b *Builder) buildUDF(
out = b.factory.ConstructUDF(
args,
&memo.UDFPrivate{
Name: def.Name,
Params: params,
Body: rels,
Typ: f.ResolvedType(),
SetReturning: isSetReturning,
Volatility: o.Volatility,
Name: def.Name,
Params: params,
Body: rels,
Typ: f.ResolvedType(),
SetReturning: isSetReturning,
Volatility: o.Volatility,
MultiColOutput: isMultiColOutput,
},
)

Expand Down Expand Up @@ -819,10 +842,17 @@ func (b *Builder) buildUDF(
)
}

// Synthesize an output column for set-returning UDFs.
if isSetReturning && outCol == nil {
outCol = b.synthesizeColumn(outScope, scopeColName(""), f.ResolvedType(), nil /* expr */, out)
// Synthesize an output columns if necessary.
if outCol == nil {
if isMultiColOutput {
f.ResolvedOverload().ReturnsRecordType = types.IsRecordType(rtyp)
return b.finishBuildGeneratorFunction(f, f.ResolvedOverload(), out, inScope, outScope, outCol)
}
if outScope != nil {
outCol = b.synthesizeColumn(outScope, scopeColName(""), f.ResolvedType(), nil /* expr */, out)
}
}

return b.finishBuildScalar(f, out, inScope, outScope, outCol)
}

Expand Down
Loading

0 comments on commit 0fb39a5

Please sign in to comment.