Skip to content

Commit

Permalink
Add Python bindings for string literal support in AST (rapidsai#13073)
Browse files Browse the repository at this point in the history
Depends on rapidsai#13061
Add Python bindings for string scalar support in AST
Add unit test for string comparison - column vs column, column vs literal.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Ashwin Srinath (https://github.com/shwina)

URL: rapidsai#13073
  • Loading branch information
karthikeyann authored May 8, 2023
1 parent 0a5065f commit 3a45d2c
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 32 deletions.
16 changes: 3 additions & 13 deletions python/cudf/cudf/_lib/expressions.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

from libc.stdint cimport int32_t, int64_t
from libcpp.memory cimport unique_ptr
Expand All @@ -9,25 +9,15 @@ from cudf._lib.cpp.expressions cimport (
literal,
operation,
)
from cudf._lib.cpp.scalar.scalar cimport numeric_scalar

ctypedef enum scalar_type_t:
INT
DOUBLE


ctypedef union int_or_double_scalar_ptr:
unique_ptr[numeric_scalar[int64_t]] int_ptr
unique_ptr[numeric_scalar[double]] double_ptr
from cudf._lib.cpp.scalar.scalar cimport numeric_scalar, scalar, string_scalar


cdef class Expression:
cdef unique_ptr[expression] c_obj


cdef class Literal(Expression):
cdef scalar_type_t c_scalar_type
cdef int_or_double_scalar_ptr c_scalar
cdef unique_ptr[scalar] c_scalar


cdef class ColumnReference(Expression):
Expand Down
25 changes: 9 additions & 16 deletions python/cudf/cudf/_lib/expressions.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

from enum import Enum

Expand Down Expand Up @@ -77,27 +77,20 @@ class TableReference(Enum):
# restrictive at the moment.
cdef class Literal(Expression):
def __cinit__(self, value):
# TODO: Would love to find a better solution than unions for literals.
cdef int intval
cdef double doubleval

if isinstance(value, int):
self.c_scalar_type = scalar_type_t.INT
intval = value
self.c_scalar.int_ptr = make_unique[numeric_scalar[int64_t]](
intval, True
)
self.c_scalar.reset(new numeric_scalar[int64_t](value, True))
self.c_obj = <expression_ptr> make_unique[libcudf_exp.literal](
<numeric_scalar[int64_t] &>dereference(self.c_scalar.int_ptr)
<numeric_scalar[int64_t] &>dereference(self.c_scalar)
)
elif isinstance(value, float):
self.c_scalar_type = scalar_type_t.DOUBLE
doubleval = value
self.c_scalar.double_ptr = make_unique[numeric_scalar[double]](
doubleval, True
self.c_scalar.reset(new numeric_scalar[double](value, True))
self.c_obj = <expression_ptr> make_unique[libcudf_exp.literal](
<numeric_scalar[double] &>dereference(self.c_scalar)
)
elif isinstance(value, str):
self.c_scalar.reset(new string_scalar(value.encode(), True))
self.c_obj = <expression_ptr> make_unique[libcudf_exp.literal](
<numeric_scalar[double] &>dereference(self.c_scalar.double_ptr)
<string_scalar &>dereference(self.c_scalar)
)


Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/_internals/expressions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

import ast
import functools
Expand Down Expand Up @@ -115,7 +115,7 @@ def visit_Name(self, node):
self.stack.append(ColumnReference(col_id))

def visit_Constant(self, node):
if not isinstance(node, ast.Num):
if not isinstance(node, (ast.Num, ast.Str)):
raise ValueError(
f"Unsupported literal {repr(node.value)} of type "
"{type(node.value).__name__}"
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7063,7 +7063,8 @@ def eval(self, expr: str, inplace: bool = False, **kwargs):
Specifically, `&` must be used for bitwise operators on integers,
not `and`, which is specifically for the logical and between
booleans.
* Only numerical types are currently supported.
* Only numerical types currently support all operators.
* String types currently support comparison operators.
* Operators generally will not cast automatically. Users are
responsible for casting columns to suitable types before
evaluating a function.
Expand Down
3 changes: 3 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9820,6 +9820,9 @@ def df_eval(request):
float,
),
("a_b_are_equal = (a == b)", int),
("a > b", str),
("a < '1'", str),
('a == "1"', str),
],
)
def test_dataframe_eval(df_eval, expr, dtype):
Expand Down

0 comments on commit 3a45d2c

Please sign in to comment.