diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst index ebf5fab3052..558268ea495 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst @@ -1,6 +1,6 @@ -======= -copying -======= +======== +datetime +======== .. automodule:: cudf._lib.pylibcudf.datetime :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/expressions.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/expressions.rst new file mode 100644 index 00000000000..03f769ee861 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/expressions.rst @@ -0,0 +1,6 @@ +=========== +expressions +=========== + +.. automodule:: cudf._lib.pylibcudf.expressions + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst index 5899d272160..505765bba0f 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst @@ -15,6 +15,7 @@ This page provides API documentation for pylibcudf. concatenate copying datetime + expressions filling gpumemoryview groupby diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 5a067e84f56..38b7e9ebe04 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -21,7 +21,6 @@ set(cython_sources copying.pyx csv.pyx datetime.pyx - expressions.pyx filling.pyx groupby.pyx hash.pyx diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py index 18b95f5f2e1..34c0e29d0b1 100644 --- a/python/cudf/cudf/_lib/__init__.py +++ b/python/cudf/cudf/_lib/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import numpy as np from . import ( @@ -8,7 +8,6 @@ copying, csv, datetime, - expressions, filling, groupby, hash, diff --git a/python/cudf/cudf/_lib/expressions.pyx b/python/cudf/cudf/_lib/expressions.pyx deleted file mode 100644 index 3fb29279ed7..00000000000 --- a/python/cudf/cudf/_lib/expressions.pyx +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. - -from enum import Enum - -import numpy as np - -from cython.operator cimport dereference -from libc.stdint cimport int64_t -from libcpp.memory cimport make_unique, unique_ptr -from libcpp.string cimport string -from libcpp.utility cimport move - -from cudf._lib.pylibcudf.libcudf cimport expressions as libcudf_exp -from cudf._lib.pylibcudf.libcudf.types cimport size_type -from cudf._lib.pylibcudf.libcudf.wrappers.timestamps cimport ( - timestamp_ms, - timestamp_us, -) - -# Necessary for proper casting, see below. -ctypedef int32_t underlying_type_ast_operator - - -# Aliases for simplicity -ctypedef unique_ptr[libcudf_exp.expression] expression_ptr - - -class ASTOperator(Enum): - ADD = libcudf_exp.ast_operator.ADD - SUB = libcudf_exp.ast_operator.SUB - MUL = libcudf_exp.ast_operator.MUL - DIV = libcudf_exp.ast_operator.DIV - TRUE_DIV = libcudf_exp.ast_operator.TRUE_DIV - FLOOR_DIV = libcudf_exp.ast_operator.FLOOR_DIV - MOD = libcudf_exp.ast_operator.MOD - PYMOD = libcudf_exp.ast_operator.PYMOD - POW = libcudf_exp.ast_operator.POW - EQUAL = libcudf_exp.ast_operator.EQUAL - NULL_EQUAL = libcudf_exp.ast_operator.NULL_EQUAL - NOT_EQUAL = libcudf_exp.ast_operator.NOT_EQUAL - LESS = libcudf_exp.ast_operator.LESS - GREATER = libcudf_exp.ast_operator.GREATER - LESS_EQUAL = libcudf_exp.ast_operator.LESS_EQUAL - GREATER_EQUAL = libcudf_exp.ast_operator.GREATER_EQUAL - BITWISE_AND = libcudf_exp.ast_operator.BITWISE_AND - BITWISE_OR = libcudf_exp.ast_operator.BITWISE_OR - BITWISE_XOR = libcudf_exp.ast_operator.BITWISE_XOR - LOGICAL_AND = libcudf_exp.ast_operator.LOGICAL_AND - NULL_LOGICAL_AND = libcudf_exp.ast_operator.NULL_LOGICAL_AND - LOGICAL_OR = libcudf_exp.ast_operator.LOGICAL_OR - NULL_LOGICAL_OR = libcudf_exp.ast_operator.NULL_LOGICAL_OR - # Unary operators - IDENTITY = libcudf_exp.ast_operator.IDENTITY - IS_NULL = libcudf_exp.ast_operator.IS_NULL - SIN = libcudf_exp.ast_operator.SIN - COS = libcudf_exp.ast_operator.COS - TAN = libcudf_exp.ast_operator.TAN - ARCSIN = libcudf_exp.ast_operator.ARCSIN - ARCCOS = libcudf_exp.ast_operator.ARCCOS - ARCTAN = libcudf_exp.ast_operator.ARCTAN - SINH = libcudf_exp.ast_operator.SINH - COSH = libcudf_exp.ast_operator.COSH - TANH = libcudf_exp.ast_operator.TANH - ARCSINH = libcudf_exp.ast_operator.ARCSINH - ARCCOSH = libcudf_exp.ast_operator.ARCCOSH - ARCTANH = libcudf_exp.ast_operator.ARCTANH - EXP = libcudf_exp.ast_operator.EXP - LOG = libcudf_exp.ast_operator.LOG - SQRT = libcudf_exp.ast_operator.SQRT - CBRT = libcudf_exp.ast_operator.CBRT - CEIL = libcudf_exp.ast_operator.CEIL - FLOOR = libcudf_exp.ast_operator.FLOOR - ABS = libcudf_exp.ast_operator.ABS - RINT = libcudf_exp.ast_operator.RINT - BIT_INVERT = libcudf_exp.ast_operator.BIT_INVERT - NOT = libcudf_exp.ast_operator.NOT - - -class TableReference(Enum): - LEFT = libcudf_exp.table_reference.LEFT - RIGHT = libcudf_exp.table_reference.RIGHT - - -# Note that this function only currently supports numeric literals. libcudf -# expressions don't really support other types yet though, so this isn't -# restrictive at the moment. -cdef class Literal(Expression): - def __cinit__(self, value): - if isinstance(value, int): - self.c_scalar.reset(new numeric_scalar[int64_t](value, True)) - self.c_obj = move(make_unique[libcudf_exp.literal]( - dereference(self.c_scalar) - )) - elif isinstance(value, float): - self.c_scalar.reset(new numeric_scalar[double](value, True)) - self.c_obj = move(make_unique[libcudf_exp.literal]( - dereference(self.c_scalar) - )) - elif isinstance(value, str): - self.c_scalar.reset(new string_scalar(value.encode(), True)) - self.c_obj = move(make_unique[libcudf_exp.literal]( - dereference(self.c_scalar) - )) - elif isinstance(value, np.datetime64): - scale, _ = np.datetime_data(value.dtype) - int_value = value.astype(np.int64) - if scale == "ms": - self.c_scalar.reset(new timestamp_scalar[timestamp_ms]( - int_value, True) - ) - self.c_obj = move(make_unique[libcudf_exp.literal]( - dereference(self.c_scalar) - )) - elif scale == "us": - self.c_scalar.reset(new timestamp_scalar[timestamp_us]( - int_value, True) - ) - self.c_obj = move(make_unique[libcudf_exp.literal]( - dereference(self.c_scalar) - )) - else: - raise NotImplementedError( - f"Unhandled datetime scale {scale=}" - ) - else: - raise NotImplementedError( - f"Don't know how to make literal with type {type(value)}" - ) - - -cdef class ColumnReference(Expression): - def __cinit__(self, size_type index): - self.c_obj = move(make_unique[libcudf_exp.column_reference]( - index - )) - - -cdef class Operation(Expression): - def __cinit__(self, op, Expression left, Expression right=None): - cdef libcudf_exp.ast_operator op_value = ( - op.value - ) - - if right is None: - self.c_obj = move(make_unique[libcudf_exp.operation]( - op_value, dereference(left.c_obj) - )) - else: - self.c_obj = move(make_unique[libcudf_exp.operation]( - op_value, dereference(left.c_obj), dereference(right.c_obj) - )) - -cdef class ColumnNameReference(Expression): - def __cinit__(self, string name): - self.c_obj = \ - move(make_unique[libcudf_exp.column_name_reference](name)) diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 158fb6051c3..e7959d21e01 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -37,12 +37,12 @@ cimport cudf._lib.pylibcudf.libcudf.io.data_sink as cudf_io_data_sink cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types cimport cudf._lib.pylibcudf.libcudf.types as cudf_types from cudf._lib.column cimport Column -from cudf._lib.expressions cimport Expression from cudf._lib.io.utils cimport ( make_sinks_info, make_source_info, update_struct_field_names, ) +from cudf._lib.pylibcudf.expressions cimport Expression from cudf._lib.pylibcudf.io.datasource cimport NativeFileDatasource from cudf._lib.pylibcudf.libcudf.expressions cimport expression from cudf._lib.pylibcudf.libcudf.io.parquet cimport ( diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt index a2d11bbea6e..0800fa18e94 100644 --- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt @@ -20,6 +20,7 @@ set(cython_sources concatenate.pyx copying.pyx datetime.pyx + expressions.pyx filling.pyx gpumemoryview.pyx groupby.pyx diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd index da2b7806203..26e89b818d3 100644 --- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd @@ -8,6 +8,7 @@ from . cimport ( concatenate, copying, datetime, + expressions, filling, groupby, join, diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py index acbc84d7177..e89a5ed9f96 100644 --- a/python/cudf/cudf/_lib/pylibcudf/__init__.py +++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py @@ -7,6 +7,7 @@ concatenate, copying, datetime, + expressions, filling, groupby, interop, diff --git a/python/cudf/cudf/_lib/expressions.pxd b/python/cudf/cudf/_lib/pylibcudf/expressions.pxd similarity index 50% rename from python/cudf/cudf/_lib/expressions.pxd rename to python/cudf/cudf/_lib/pylibcudf/expressions.pxd index 4a20c5fc545..64825b89d9f 100644 --- a/python/cudf/cudf/_lib/expressions.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/expressions.pxd @@ -1,36 +1,31 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. - -from libc.stdint cimport int32_t, int64_t +# Copyright (c) 2024, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr +from libcpp.string cimport string from cudf._lib.pylibcudf.libcudf.expressions cimport ( - column_reference, + ast_operator, expression, - literal, - operation, -) -from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport ( - numeric_scalar, - scalar, - string_scalar, - timestamp_scalar, + table_reference, ) +from .scalar cimport Scalar + cdef class Expression: cdef unique_ptr[expression] c_obj - cdef class Literal(Expression): - cdef unique_ptr[scalar] c_scalar - + # Hold on to input scalar so it doesn't get gc'ed + cdef Scalar scalar cdef class ColumnReference(Expression): pass - cdef class Operation(Expression): - pass + # Hold on to the input expressions so + # they don't get gc'ed + cdef Expression right + cdef Expression left cdef class ColumnNameReference(Expression): pass diff --git a/python/cudf/cudf/_lib/pylibcudf/expressions.pyx b/python/cudf/cudf/_lib/pylibcudf/expressions.pyx new file mode 100644 index 00000000000..38de11406ad --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/expressions.pyx @@ -0,0 +1,195 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from cudf._lib.pylibcudf.libcudf.expressions import \ + ast_operator as ASTOperator # no-cython-lint +from cudf._lib.pylibcudf.libcudf.expressions import \ + table_reference as TableReference # no-cython-lint + +from cython.operator cimport dereference +from libc.stdint cimport int32_t, int64_t +from libcpp.memory cimport make_unique, unique_ptr +from libcpp.string cimport string +from libcpp.utility cimport move + +from cudf._lib.pylibcudf.libcudf cimport expressions as libcudf_exp +from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport ( + duration_scalar, + numeric_scalar, + string_scalar, + timestamp_scalar, +) +from cudf._lib.pylibcudf.libcudf.types cimport size_type, type_id +from cudf._lib.pylibcudf.libcudf.wrappers.durations cimport ( + duration_ms, + duration_ns, + duration_s, + duration_us, +) +from cudf._lib.pylibcudf.libcudf.wrappers.timestamps cimport ( + timestamp_ms, + timestamp_ns, + timestamp_s, + timestamp_us, +) + +from .scalar cimport Scalar +from .traits cimport is_chrono, is_numeric +from .types cimport DataType + +# Aliases for simplicity +ctypedef unique_ptr[libcudf_exp.expression] expression_ptr + +cdef class Literal(Expression): + """ + A literal value used in an abstract syntax tree. + + For details, see :cpp:class:`cudf::ast::literal`. + + Parameters + ---------- + value : Scalar + The Scalar value of the Literal. + Must be either numeric, string, or a timestamp/duration scalar. + """ + def __cinit__(self, Scalar value): + self.scalar = value + cdef DataType typ = value.type() + cdef type_id tid = value.type().id() + if not (is_numeric(typ) or is_chrono(typ) or tid == type_id.STRING): + raise ValueError( + "Only numeric, string, or timestamp/duration scalars are accepted" + ) + # TODO: Accept type-erased scalar in AST C++ code + # Then a lot of this code can be deleted + if tid == type_id.INT64: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.INT32: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.FLOAT64: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.FLOAT32: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.STRING: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.TIMESTAMP_NANOSECONDS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.TIMESTAMP_MICROSECONDS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.TIMESTAMP_MILLISECONDS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.TIMESTAMP_MILLISECONDS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.TIMESTAMP_SECONDS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.DURATION_NANOSECONDS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.DURATION_MICROSECONDS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.DURATION_MILLISECONDS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.DURATION_MILLISECONDS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.DURATION_SECONDS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + else: + raise NotImplementedError( + f"Don't know how to make literal with type id {tid}" + ) + +cdef class ColumnReference(Expression): + """ + An expression referring to data from a column in a table. + + For details, see :cpp:class:`cudf::ast::column_reference`. + + Parameters + ---------- + index : size_type + The index of this column in the table + (provided when the expression is evaluated). + table_source : TableReference, default TableReferenece.LEFT + Which table to use in cases with two tables (e.g. joins) + """ + def __cinit__( + self, + size_type index, + table_reference table_source=table_reference.LEFT + ): + self.c_obj = move(make_unique[libcudf_exp.column_reference]( + index, table_source + )) + + +cdef class Operation(Expression): + """ + An operation expression holds an operator and zero or more operands. + + For details, see :cpp:class:`cudf::ast::operation`. + + Parameters + ---------- + op : Operator + left : Expression + Left input expression (left operand) + right: Expression, default None + Right input expression (right operand). + You should only pass this if the input expression is a binary operation. + """ + def __cinit__(self, ast_operator op, Expression left, Expression right=None): + self.left = left + self.right = right + if right is None: + self.c_obj = move(make_unique[libcudf_exp.operation]( + op, dereference(left.c_obj) + )) + else: + self.c_obj = move(make_unique[libcudf_exp.operation]( + op, dereference(left.c_obj), dereference(right.c_obj) + )) + +cdef class ColumnNameReference(Expression): + """ + An expression referring to data from a column in a table. + + For details, see :cpp:class:`cudf::ast::column_name_reference`. + + Parameters + ---------- + column_name : str + Name of this column in the table metadata + (provided when the expression is evaluated). + """ + def __cinit__(self, str name): + self.c_obj = \ + move(make_unique[libcudf_exp.column_name_reference]( + (name.encode("utf-8")) + )) diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt index 699e85ce567..b04e94f1546 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt @@ -12,8 +12,8 @@ # the License. # ============================================================================= -set(cython_sources aggregation.pyx binaryop.pyx copying.pyx reduce.pyx replace.pyx round.pyx - stream_compaction.pyx types.pyx unary.pyx +set(cython_sources aggregation.pyx binaryop.pyx copying.pyx expressions.pyx reduce.pyx replace.pyx + round.pyx stream_compaction.pyx types.pyx unary.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/expressions.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/expressions.pxd index 279d969db50..427e16d4ff8 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/expressions.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/expressions.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. +from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr from libcpp.string cimport string @@ -14,63 +15,63 @@ from cudf._lib.pylibcudf.libcudf.types cimport size_type cdef extern from "cudf/ast/expressions.hpp" namespace "cudf::ast" nogil: - ctypedef enum ast_operator: + cpdef enum class ast_operator(int32_t): # Binary operators - ADD "cudf::ast::ast_operator::ADD" - SUB "cudf::ast::ast_operator::SUB" - MUL "cudf::ast::ast_operator::MUL" - DIV "cudf::ast::ast_operator::DIV" - TRUE_DIV "cudf::ast::ast_operator::TRUE_DIV" - FLOOR_DIV "cudf::ast::ast_operator::FLOOR_DIV" - MOD "cudf::ast::ast_operator::MOD" - PYMOD "cudf::ast::ast_operator::PYMOD" - POW "cudf::ast::ast_operator::POW" - EQUAL "cudf::ast::ast_operator::EQUAL" - NULL_EQUAL "cudf::ast::ast_operator::NULL_EQUAL" - NOT_EQUAL "cudf::ast::ast_operator::NOT_EQUAL" - LESS "cudf::ast::ast_operator::LESS" - GREATER "cudf::ast::ast_operator::GREATER" - LESS_EQUAL "cudf::ast::ast_operator::LESS_EQUAL" - GREATER_EQUAL "cudf::ast::ast_operator::GREATER_EQUAL" - BITWISE_AND "cudf::ast::ast_operator::BITWISE_AND" - BITWISE_OR "cudf::ast::ast_operator::BITWISE_OR" - BITWISE_XOR "cudf::ast::ast_operator::BITWISE_XOR" - NULL_LOGICAL_AND "cudf::ast::ast_operator::NULL_LOGICAL_AND" - LOGICAL_AND "cudf::ast::ast_operator::LOGICAL_AND" - NULL_LOGICAL_OR "cudf::ast::ast_operator::NULL_LOGICAL_OR" - LOGICAL_OR "cudf::ast::ast_operator::LOGICAL_OR" + ADD + SUB + MUL + DIV + TRUE_DIV + FLOOR_DIV + MOD + PYMOD + POW + EQUAL + NULL_EQUAL + NOT_EQUAL + LESS + GREATER + LESS_EQUAL + GREATER_EQUAL + BITWISE_AND + BITWISE_OR + BITWISE_XOR + NULL_LOGICAL_AND + LOGICAL_AND + NULL_LOGICAL_OR + LOGICAL_OR # Unary operators - IDENTITY "cudf::ast::ast_operator::IDENTITY" - IS_NULL "cudf::ast::ast_operator::IS_NULL" - SIN "cudf::ast::ast_operator::SIN" - COS "cudf::ast::ast_operator::COS" - TAN "cudf::ast::ast_operator::TAN" - ARCSIN "cudf::ast::ast_operator::ARCSIN" - ARCCOS "cudf::ast::ast_operator::ARCCOS" - ARCTAN "cudf::ast::ast_operator::ARCTAN" - SINH "cudf::ast::ast_operator::SINH" - COSH "cudf::ast::ast_operator::COSH" - TANH "cudf::ast::ast_operator::TANH" - ARCSINH "cudf::ast::ast_operator::ARCSINH" - ARCCOSH "cudf::ast::ast_operator::ARCCOSH" - ARCTANH "cudf::ast::ast_operator::ARCTANH" - EXP "cudf::ast::ast_operator::EXP" - LOG "cudf::ast::ast_operator::LOG" - SQRT "cudf::ast::ast_operator::SQRT" - CBRT "cudf::ast::ast_operator::CBRT" - CEIL "cudf::ast::ast_operator::CEIL" - FLOOR "cudf::ast::ast_operator::FLOOR" - ABS "cudf::ast::ast_operator::ABS" - RINT "cudf::ast::ast_operator::RINT" - BIT_INVERT "cudf::ast::ast_operator::BIT_INVERT" - NOT "cudf::ast::ast_operator::NOT" + IDENTITY + IS_NULL + SIN + COS + TAN + ARCSIN + ARCCOS + ARCTAN + SINH + COSH + TANH + ARCSINH + ARCCOSH + ARCTANH + EXP + LOG + SQRT + CBRT + CEIL + FLOOR + ABS + RINT + BIT_INVERT + NOT cdef cppclass expression: pass - ctypedef enum table_reference: - LEFT "cudf::ast::table_reference::LEFT" - RIGHT "cudf::ast::table_reference::RIGHT" + cpdef enum class table_reference(int32_t): + LEFT + RIGHT cdef cppclass literal(expression): # Due to https://github.com/cython/cython/issues/3198, we need to diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/expressions.pyx b/python/cudf/cudf/_lib/pylibcudf/libcudf/expressions.pyx new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 86a4a60eef1..622725e06a3 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -19,8 +19,8 @@ from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer cimport cudf._lib.pylibcudf.libcudf.transform as libcudf_transform from cudf._lib.column cimport Column -from cudf._lib.expressions cimport Expression from cudf._lib.pylibcudf cimport transform as plc_transform +from cudf._lib.pylibcudf.expressions cimport Expression from cudf._lib.pylibcudf.libcudf.column.column cimport column from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view from cudf._lib.pylibcudf.libcudf.expressions cimport expression diff --git a/python/cudf/cudf/core/_internals/expressions.py b/python/cudf/cudf/core/_internals/expressions.py index 393a68dd844..63714a78572 100644 --- a/python/cudf/cudf/core/_internals/expressions.py +++ b/python/cudf/cudf/core/_internals/expressions.py @@ -4,7 +4,10 @@ import ast import functools -from cudf._lib.expressions import ( +import pyarrow as pa + +import cudf._lib.pylibcudf as plc +from cudf._lib.pylibcudf.expressions import ( ASTOperator, ColumnReference, Expression, @@ -122,7 +125,9 @@ def visit_Constant(self, node): f"Unsupported literal {repr(node.value)} of type " "{type(node.value).__name__}" ) - self.stack.append(Literal(node.value)) + self.stack.append( + Literal(plc.interop.from_arrow(pa.scalar(node.value))) + ) def visit_UnaryOp(self, node): self.visit(node.operand) @@ -132,7 +137,7 @@ def visit_UnaryOp(self, node): # operand, so there's no way to know whether this should be a float # or an int. We should maybe see what Spark does, and this will # probably require casting. - self.nodes.append(Literal(-1)) + self.nodes.append(Literal(plc.interop.from_arrow(pa.scalar(-1)))) op = ASTOperator.MUL self.stack.append(Operation(op, self.nodes[-1], self.nodes[-2])) elif isinstance(node.op, ast.UAdd): diff --git a/python/cudf/cudf/pylibcudf_tests/test_expressions.py b/python/cudf/cudf/pylibcudf_tests/test_expressions.py new file mode 100644 index 00000000000..f661512caad --- /dev/null +++ b/python/cudf/cudf/pylibcudf_tests/test_expressions.py @@ -0,0 +1,50 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +import pyarrow as pa +import pytest + +import cudf._lib.pylibcudf as plc + +# We can't really evaluate these expressions, so just make sure +# construction works properly + + +def test_literal_construction_invalid(): + with pytest.raises(ValueError): + plc.expressions.Literal( + plc.interop.from_arrow(pa.scalar(None, type=pa.list_(pa.int64()))) + ) + + +@pytest.mark.parametrize( + "tableref", + [ + plc.expressions.TableReference.LEFT, + plc.expressions.TableReference.RIGHT, + ], +) +def test_columnref_construction(tableref): + plc.expressions.ColumnReference(1.0, tableref) + + +def test_columnnameref_construction(): + plc.expressions.ColumnNameReference("abc") + + +@pytest.mark.parametrize( + "kwargs", + [ + # Unary op + { + "op": plc.expressions.ASTOperator.IDENTITY, + "left": plc.expressions.ColumnReference(1), + }, + # Binop + { + "op": plc.expressions.ASTOperator.ADD, + "left": plc.expressions.ColumnReference(1), + "right": plc.expressions.ColumnReference(2), + }, + ], +) +def test_astoperation_construction(kwargs): + plc.expressions.Operation(**kwargs)