Skip to content

Commit

Permalink
Refactor the IN operator to use '= ANY()' syntax
Browse files Browse the repository at this point in the history
 This change reduces use of internal function calls from the
IN implementation to optimize performance. This also changes
IN behavior to correctly return NULL upon NULL elements
included in the list that IN checks against.

Added and corrected regression tests.

Co-authored by: Josh Innis <[email protected]>
  • Loading branch information
dehowef committed Sep 19, 2023
1 parent dc82449 commit 7ca8ef0
Show file tree
Hide file tree
Showing 4 changed files with 220 additions and 19 deletions.
75 changes: 70 additions & 5 deletions regress/expected/expr.out
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,28 @@ $$RETURN {bool: true, int: 1} IN ['str', 1, 1.0, true, null, {bool: true, int: 1
t
(1 row)

SELECT * FROM cypher('expr',
$$RETURN 1 IN [1.0, [NULL]]$$) AS r(c boolean);
c
---
t
(1 row)

SELECT * FROM cypher('expr',
$$RETURN [NULL] IN [1.0, [NULL]]$$) AS r(c boolean);
c
---
t
(1 row)

-- should return SQL null, nothing
SELECT * FROM cypher('expr',
$$RETURN true IN NULL $$) AS r(c boolean);
c
---

(1 row)

SELECT * FROM cypher('expr',
$$RETURN null IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
c
Expand All @@ -220,49 +241,93 @@ $$RETURN 'str' IN null $$) AS r(c boolean);

(1 row)

-- should all return false
SELECT * FROM cypher('expr',
$$RETURN 0 IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
c
---
f

(1 row)

SELECT * FROM cypher('expr',
$$RETURN 1.1 IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
c
---
f

(1 row)

SELECT * FROM cypher('expr',
$$RETURN 'Str' IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
c
---
f

(1 row)

SELECT * FROM cypher('expr',
$$RETURN [1,3,5,[2,4,5]] IN ['str', 1, 1.0, true, null, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
c
---
f

(1 row)

SELECT * FROM cypher('expr',
$$RETURN {bool: true, int: 2} IN ['str', 1, 1.0, true, null, {bool: true, int: 1}, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
c
---

(1 row)

-- should return false
SELECT * FROM cypher('expr',
$$RETURN 'str' IN ['StR', 1, true]$$) AS r(c boolean);
c
---
f
(1 row)

SELECT * FROM cypher('expr',
$$RETURN 2 IN ['StR', 1. true]$$) AS r(c boolean);
ERROR: syntax error at or near "true"
LINE 2: $$RETURN 2 IN ['StR', 1. true]$$) AS r(c boolean);
^
SELECT * FROM cypher('expr',
$$RETURN false IN ['StR', 1, true]$$) AS r(c boolean);
c
---
f
(1 row)

SELECT * FROM cypher('expr',
$$RETURN [1,2] IN ['StR', 1, 2, true]$$) AS r(c boolean);
c
---
f
(1 row)

SELECT * FROM cypher('expr',
$$RETURN 1 in [[1]]$$) AS r(c boolean);
c
---
f
(1 row)

SELECT * FROM cypher('expr',
$$RETURN 1 IN [[null]]$$) AS r(c boolean);
c
---
f
(1 row)

-- should error - ERROR: object of IN must be a list
SELECT * FROM cypher('expr',
$$RETURN null IN 'str' $$) AS r(c boolean);
ERROR: object of IN must be a list
LINE 2: $$RETURN null IN 'str' $$) AS r(c boolean);
^
SELECT * FROM cypher('expr',
$$RETURN 'str' IN 'str' $$) AS r(c boolean);
ERROR: object of IN must be a list
LINE 2: $$RETURN 'str' IN 'str' $$) AS r(c boolean);
^
-- list access
SELECT * FROM cypher('expr',
$$RETURN [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10][0]$$) AS r(c agtype);
Expand Down
20 changes: 19 additions & 1 deletion regress/sql/expr.sql
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,19 @@ SELECT * FROM cypher('expr',
$$RETURN [1,3,5,[2,4,6]] IN ['str', 1, 1.0, true, null, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN {bool: true, int: 1} IN ['str', 1, 1.0, true, null, {bool: true, int: 1}, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN 1 IN [1.0, [NULL]]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN [NULL] IN [1.0, [NULL]]$$) AS r(c boolean);
-- should return SQL null, nothing
SELECT * FROM cypher('expr',
$$RETURN true IN NULL $$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN null IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN null IN ['str', 1, 1.0, true]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN 'str' IN null $$) AS r(c boolean);
-- should all return false
SELECT * FROM cypher('expr',
$$RETURN 0 IN ['str', 1, 1.0, true, null]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
Expand All @@ -139,6 +144,19 @@ SELECT * FROM cypher('expr',
$$RETURN [1,3,5,[2,4,5]] IN ['str', 1, 1.0, true, null, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN {bool: true, int: 2} IN ['str', 1, 1.0, true, null, {bool: true, int: 1}, [1,3,5,[2,4,6]]]$$) AS r(c boolean);
-- should return false
SELECT * FROM cypher('expr',
$$RETURN 'str' IN ['StR', 1, true]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN 2 IN ['StR', 1. true]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN false IN ['StR', 1, true]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN [1,2] IN ['StR', 1, 2, true]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN 1 in [[1]]$$) AS r(c boolean);
SELECT * FROM cypher('expr',
$$RETURN 1 IN [[null]]$$) AS r(c boolean);
-- should error - ERROR: object of IN must be a list
SELECT * FROM cypher('expr',
$$RETURN null IN 'str' $$) AS r(c boolean);
Expand Down
142 changes: 130 additions & 12 deletions src/backend/parser/cypher_expr.c
Original file line number Diff line number Diff line change
Expand Up @@ -489,25 +489,143 @@ static Node *transform_AEXPR_OP(cypher_parsestate *cpstate, A_Expr *a)
a->location);
}

//static Node *transform_AEXPR_IN(cypher_parsestate *cpstate, A_Expr *a)
//{
// Oid func_in_oid;
// FuncExpr *result;
// List *args = NIL;

// args = lappend(args, transform_cypher_expr_recurse(cpstate, a->rexpr));
// args = lappend(args, transform_cypher_expr_recurse(cpstate, a->lexpr));

/* get the agtype_access_slice function */
// func_in_oid = get_ag_func_oid("agtype_in_operator", 2, AGTYPEOID,
// AGTYPEOID);

// result = makeFuncExpr(func_in_oid, AGTYPEOID, args, InvalidOid, InvalidOid,
// COERCE_EXPLICIT_CALL);

// result->location = exprLocation(a->lexpr);

// return (Node *)result;
//}

static Node *transform_AEXPR_IN(cypher_parsestate *cpstate, A_Expr *a)
{
Oid func_in_oid;
FuncExpr *result;
List *args = NIL;
ParseState *pstate = (ParseState *)cpstate;
cypher_list *rexpr;
Node *result = NULL;
Node *lexpr;
List *rexprs;
List *rvars;
List *rnonvars;
bool useOr;
ListCell *l;

/* Check for null arguments in the list to return NULL*/
if (!is_ag_node(a->rexpr, cypher_list))
{
if (nodeTag(a->rexpr) == T_A_Const)
{
A_Const *r_a_const = (A_Const*)a->rexpr;
if (r_a_const->isnull == true)
{
return (Node *)makeConst(AGTYPEOID, -1, InvalidOid, -1, (Datum)NULL, true, false);
}
}

ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("object of IN must be a list")));
}

args = lappend(args, transform_cypher_expr_recurse(cpstate, a->rexpr));
args = lappend(args, transform_cypher_expr_recurse(cpstate, a->lexpr));
Assert(is_ag_node(a->rexpr, cypher_list));

/* get the agtype_access_slice function */
func_in_oid = get_ag_func_oid("agtype_in_operator", 2, AGTYPEOID,
AGTYPEOID);
// If the operator is <>, combine with AND not OR.
if (strcmp(strVal(linitial(a->name)), "<>") == 0)
useOr = false;
else
useOr = true;

lexpr = transform_cypher_expr_recurse(cpstate, a->lexpr);

rexprs = rvars = rnonvars = NIL;

rexpr = (cypher_list *)a->rexpr;

foreach(l, (List *) rexpr->elems)
{
Node *rexpr = transform_cypher_expr_recurse(cpstate, lfirst(l));

result = makeFuncExpr(func_in_oid, AGTYPEOID, args, InvalidOid, InvalidOid,
COERCE_EXPLICIT_CALL);
rexprs = lappend(rexprs, rexpr);
if (contain_vars_of_level(rexpr, 0))
rvars = lappend(rvars, rexpr);
else
rnonvars = lappend(rnonvars, rexpr);
}


/*
* ScalarArrayOpExpr is only going to be useful if there's more than one
* non-Var righthand item.
*/
if (list_length(rnonvars) > 1)
{
List *allexprs;
Oid scalar_type;
List *aexprs;
ArrayExpr *newa;

allexprs = list_concat(list_make1(lexpr), rnonvars);

scalar_type = AGTYPEOID;

result->location = exprLocation(a->lexpr);
Assert (verify_common_type(scalar_type, allexprs));
/*
* coerce all the right-hand non-Var inputs to the common type
* and build an ArrayExpr for them.
*/

return (Node *)result;
aexprs = NIL;
foreach(l, rnonvars)
{
Node *rexpr = (Node *) lfirst(l);

rexpr = coerce_to_common_type(pstate, rexpr, AGTYPEOID, "IN");
aexprs = lappend(aexprs, rexpr);
}
newa = makeNode(ArrayExpr);
newa->array_typeid = get_array_type(AGTYPEOID);
/* array_collid will be set by parse_collate.c */
newa->element_typeid = AGTYPEOID;
newa->elements = aexprs;
newa->multidims = false;
result = (Node *) make_scalar_array_op(pstate, a->name, useOr,
lexpr, (Node *) newa, a->location);

/* Consider only the Vars (if any) in the loop below */
rexprs = rvars;
}

// Must do it the hard way, with a boolean expression tree.
foreach(l, rexprs)
{
Node *rexpr = (Node *) lfirst(l);
Node *cmp;

// Ordinary scalar operator
cmp = (Node *) make_op(pstate, a->name, copyObject(lexpr), rexpr,
pstate->p_last_srf, a->location);

cmp = coerce_to_boolean(pstate, cmp, "IN");
if (result == NULL)
result = cmp;
else
result = (Node *) makeBoolExpr(useOr ? OR_EXPR : AND_EXPR,
list_make2(result, cmp),
a->location);
}

return result;
}

static Node *transform_BoolExpr(cypher_parsestate *cpstate, BoolExpr *expr)
Expand Down
2 changes: 1 addition & 1 deletion src/backend/utils/adt/agtype_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ Datum agtype_exists_all_agtype(PG_FUNCTION_ARGS)

PG_FUNCTION_INFO_V1(agtype_contains);
/*
* <@ operator for agtype. Returns true if the right agtype path/value entries
* @> operator for agtype. Returns true if the right agtype path/value entries
* contained at the top level within the left agtype value
*/
Datum agtype_contains(PG_FUNCTION_ARGS)
Expand Down

0 comments on commit 7ca8ef0

Please sign in to comment.