Skip to content

Commit

Permalink
sql: fix record-returning udfs when used as data source
Browse files Browse the repository at this point in the history
When record-returning UDFs (both implicit and `RECORD` return types) are
used as a data source in a query, the result should be treated as a row
with separate columns instead of a tuple, which is how UDF output is
normally treated. This PR closes this gap between CRDB and Postgres.

For example:

```
CREATE FUNCTION f() RETURNS RECORD AS
$$
  SELECT 1, 2, 3;
$$ LANGUAGE SQL

SELECT f()
    f
--------
 (1,2,3)

SELECT * FROM f() as foo(a int, b int, c int);
 a | b | c
---+---+---
 1 | 2 | 3
```

The behavior is the same for implicit record return types.

Epic: CRDB-19496
Fixes: #97059

Release note: None
  • Loading branch information
rharding6373 committed Apr 7, 2023
1 parent 8e6f530 commit 1c35278
Show file tree
Hide file tree
Showing 17 changed files with 434 additions and 46 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/enums
Original file line number Diff line number Diff line change
Expand Up @@ -1768,7 +1768,7 @@ $$;
query TT
SELECT "🙏"('😊'), "🙏"(NULL:::"Emoji 😉")
----
NULL NULL
NULL mixed

statement ok
CREATE DATABASE "DB➕➕";
Expand Down
240 changes: 235 additions & 5 deletions pkg/sql/logictest/testdata/logic_test/udf_record
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,13 @@ SELECT f_one();
----
(1)

# TODO(97059): The following query should require a column definition list.
statement ok
statement error pq: a column definition list is required for functions returning \"record\"
SELECT * FROM f_one();

# TODO(97059): The following query should produce a row, not a tuple.
query T
query I
SELECT * FROM f_one() AS foo (a INT);
----
(1)
1

statement ok
CREATE FUNCTION f_const() RETURNS RECORD AS
Expand Down Expand Up @@ -130,3 +128,235 @@ query T
SELECT f_table();
----
(1,5)

# subtest datasource

statement ok
CREATE FUNCTION f_tup() RETURNS RECORD AS
$$
SELECT ROW(1, 2, 3);
$$ LANGUAGE SQL;

query T
SELECT f_tup();
----
(1,2,3)

statement error pq: a column definition list is required for functions returning \"record\"
SELECT * FROM f_tup();

query III colnames
SELECT * FROM f_tup() as foo(a int, b int, c int);
----
a b c
1 2 3

statement ok
CREATE FUNCTION f_col() RETURNS RECORD AS
$$
SELECT 1, 2, 3;
$$ LANGUAGE SQL;

query T
SELECT f_col();
----
(1,2,3)

query III colnames
SELECT * FROM f_col() as foo(a int, b int, c int);
----
a b c
1 2 3

statement ok
CREATE TABLE t_imp (a INT PRIMARY KEY, b INT);
INSERT INTO t_imp VALUES (1, 10), (2, 4), (3, 32);

statement ok
CREATE FUNCTION f_imp() RETURNS t_imp AS
$$
SELECT * FROM t_imp ORDER BY a LIMIT 1;
$$ LANGUAGE SQL;

query II colnames
SELECT * FROM f_imp();
----
a b
1 10

statement ok
CREATE TYPE udt AS ENUM ('a', 'b', 'c');

statement ok
CREATE FUNCTION f_udt() RETURNS udt AS
$$
SELECT 'a'::udt;
$$ LANGUAGE SQL;

query T
SELECT * FROM f_udt();
----
a

statement ok
CREATE FUNCTION f_udt_record() RETURNS RECORD AS
$$
SELECT 'a'::udt;
$$ LANGUAGE SQL;

query T
SELECT * FROM f_udt() AS foo(u udt);
----
a

query II rowsort
SELECT * FROM f_setof() AS foo(a INT, b INT);
----
1 5
2 6
3 7

statement ok
CREATE FUNCTION f_setof_imp() RETURNS SETOF t_imp AS
$$
SELECT * FROM t_imp;
$$ LANGUAGE SQL;

query II rowsort
SELECT * FROM f_setof_imp()
----
1 10
2 4
3 32

statement ok
CREATE FUNCTION f_strict() RETURNS RECORD STRICT AS
$$
SELECT 1, 2, 3;
$$ LANGUAGE SQL;

query III
SELECT * FROM f_strict() AS foo(a INT, b INT, c INT);
----
1 2 3

statement ok
CREATE FUNCTION f_setof_strict() RETURNS SETOF RECORD STRICT AS
$$
SELECT * FROM t_imp;
$$ LANGUAGE SQL;

query II rowsort
SELECT * FROM f_setof_strict() AS foo(a INT, b INT);
----
1 10
2 4
3 32

statement ok
CREATE FUNCTION f_strict_arg(IN a INT, IN b INT) RETURNS RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

query II colnames
SELECT * FROM f_strict_arg(1,2) AS foo(a INT, b INT);
----
a b
1 2

query II colnames
SELECT * FROM f_strict_arg(NULL, 2) AS foo(a INT, b INT);
----
a b
NULL NULL

statement ok
CREATE FUNCTION f_strict_arg_setof(IN a INT, IN b INT) RETURNS SETOF RECORD STRICT AS
$$
SELECT a, b FROM generate_series(1,3);
$$ LANGUAGE SQL;

query II colnames
SELECT * FROM f_strict_arg_setof(1,2) AS foo(a INT, b INT);
----
a b
1 2
1 2
1 2

# Strict SETOF UDF with NULL input returns 0 rows.
query II colnames
SELECT * FROM f_strict_arg_setof(NULL,2) AS foo(a INT, b INT);
----
a b

statement ok
CREATE TABLE n (a INT PRIMARY KEY, b INT);
INSERT INTO n VALUES (1, 5), (2, NULL);

query III colnames
WITH narg AS (SELECT b AS input FROM n WHERE a = 2) SELECT * FROM narg, f_strict_arg(narg.input, 2) AS foo(a INT, b INT);
----
input a b
NULL NULL NULL

query III colnames
WITH narg AS (SELECT b AS input FROM n WHERE a = 2) SELECT * FROM narg, f_strict_arg_SETOF(narg.input, 2) AS foo(a INT, b INT);
----
input a b

statement ok
CREATE FUNCTION f_arg(IN a INT8, IN b INT8) RETURNS RECORD AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

query II
SELECT * FROM f_arg(1,2) AS foo(a INT, b INT);
----
1 2

# Test ambiguous function signatures with records
subtest ambiguity

# setof
statement ok
CREATE FUNCTION f_amb_setof(a INT8, b INT8) RETURNS SETOF RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

statement ok
CREATE FUNCTION f_amb_setof(a INT8, b STRING) RETURNS SETOF RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

# TODO(harding): In postgres, calls to f_amb_setof should succeed and return 0 rows.
statement error pq: ambiguous call: f_amb_setof\(int, unknown\), candidates are
SELECT f_amb_setof(1, NULL);

statement error pq: ambiguous call: f_amb_setof\(int, unknown\), candidates are
SELECT * FROM f_amb_setof(1, NULL) as foo(a int, b int);

statement ok
CREATE FUNCTION f_amb(a INT, b INT) RETURNS RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

statement ok
CREATE FUNCTION f_amb(a INT, b STRING) RETURNS RECORD STRICT AS
$$
SELECT a, b;
$$ LANGUAGE SQL;

# TODO(harding): In postgres, calls to f_amb should succeed and return NULL.
statement error pq: ambiguous call: f_amb\(int, unknown\), candidates are
SELECT f_amb(1, NULL);

# TODO(harding): In postgres, calls to f_amb as a data source should succeed
# and return NULL NULL.
statement error pq: ambiguous call: f_amb\(int, unknown\), candidates are
SELECT * FROM f_amb(1, NULL) as foo(a int, b int);
35 changes: 23 additions & 12 deletions pkg/sql/logictest/testdata/logic_test/udf_setof
Original file line number Diff line number Diff line change
Expand Up @@ -166,25 +166,36 @@ CREATE FUNCTION all_ab() RETURNS SETOF ab LANGUAGE SQL AS $$
SELECT a, b FROM ab
$$

# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
query T rowsort
query II rowsort
SELECT * FROM all_ab()
----
(1,10)
(2,20)
(3,30)
(4,40)
1 10
2 20
3 30
4 40

statement ok
CREATE FUNCTION all_ab_tuple() RETURNS SETOF ab LANGUAGE SQL AS $$
SELECT (a, b) FROM ab
$$

# TODO(mgartner): This should return separate columns, not a tuple. See #97059.
query T rowsort
query II rowsort
SELECT * FROM all_ab_tuple()
----
1 10
2 20
3 30
4 40

statement ok
CREATE FUNCTION all_ab_record() RETURNS SETOF RECORD LANGUAGE SQL AS $$
SELECT a, b FROM ab
$$

query II rowsort
SELECT * FROM all_ab_tuple()
----
(1,10)
(2,20)
(3,30)
(4,40)
1 10
2 20
3 30
4 40
8 changes: 8 additions & 0 deletions pkg/sql/opt/exec/execbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,8 @@ func (b *Builder) buildExistsSubquery(
types.Bool,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
),
tree.DBoolFalse,
}, types.Bool), nil
Expand Down Expand Up @@ -779,6 +781,8 @@ func (b *Builder) buildSubquery(
subquery.Typ,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
), nil
}

Expand Down Expand Up @@ -831,6 +835,8 @@ func (b *Builder) buildSubquery(
subquery.Typ,
false, /* enableStepping */
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
), nil
}

Expand Down Expand Up @@ -913,6 +919,8 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ
udf.Typ,
enableStepping,
udf.CalledOnNullInput,
udf.MultiColOutput,
udf.Generator,
), nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/ops/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,12 @@ define UDFPrivate {
# operators. Non-scalar UDFs are always in a project-set operator, while
# scalar UDFs can be if used as a data source (e.g. SELECT * FROM udf()).
CalledOnNullInput bool

# MultiColOutput is true if the function may return multiple columns.
MultiColOutput bool

# Generator is true if the function may output a set of rows.
Generator bool
}

# KVOptions is a set of KVOptionItems that specify arbitrary keys and values
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/optbuilder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ type Builder struct {
// within.
insideUDF bool

// insideDataSource is true when we are processing a data source.
insideDataSource bool

// If set, we are collecting view dependencies in schemaDeps. This can only
// happen inside view/function definitions.
//
Expand Down
Loading

0 comments on commit 1c35278

Please sign in to comment.