rapidsai · rapids-bot · Jul 19, 2024 · Apr 4, 2024 · Apr 4, 2024 · Apr 4, 2024
@@ -290,6 +290,17 @@ cudf::data_type binary_operation_fixed_point_output_type(binary_operator op,
 
 namespace binops {
 
+/**
+ * @brief Returns true if the binary operator is supported for the given input types.
+ *
+ * @param out The output data type
+ * @param lhs The left-hand cudf::data_type
+ * @param rhs The right-hand cudf::data_type
+ * @param op The binary operator
+ * @return true if the binary operator is supported for the given input types
+ */
+bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op);
+
 /**
  * @brief Computes output valid mask for op between a column and a scalar
  *

@@ -50,6 +50,11 @@
 namespace cudf {
 namespace binops {
 
+bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op)
+{
+  return cudf::binops::compiled::is_supported_operation(out, lhs, rhs, op);
+}
+
 /**
  * @brief Computes output valid mask for op between a column and a scalar
  */
@@ -194,7 +199,7 @@ std::unique_ptr<column> binary_operation(LhsType const& lhs,
                                          rmm::device_async_resource_ref mr)
 {
   if constexpr (std::is_same_v<LhsType, column_view> and std::is_same_v<RhsType, column_view>)
-    CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match");
+    CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match", std::invalid_argument);
 
   if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING and
       output_type.id() == type_id::STRING and

@@ -84,3 +84,17 @@ cpdef Column binary_operation(
         raise ValueError(f"Invalid arguments {lhs} and {rhs}")
 
     return Column.from_libcudf(move(result))
+
+
+def is_supported_operation(
+    DataType out,
+    DataType lhs,
+    DataType rhs,
+    binary_operator op
+):
+    return cpp_binaryop.is_supported_operation(
+        out.c_obj,
+        lhs.c_obj,
+        rhs.c_obj,
+        op
+    )
@@ -1,9 +1,11 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-from libc.stdint cimport int32_t
+from libc.stdint cimport bool, int32_t
+from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 
+from cudf._lib.exception_handler cimport cudf_exception_handler
 from cudf._lib.pylibcudf.libcudf.column.column cimport column
 from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
 from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
@@ -19,48 +21,66 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
         TRUE_DIV
         FLOOR_DIV
         MOD
+        PMOD
         PYMOD
         POW
         INT_POW
+        LOG_BASE
+        ATAN2
+        SHIFT_LEFT
+        SHIFT_RIGHT
+        SHIFT_RIGHT_UNSIGNED
+        BITWISE_AND
+        BITWISE_OR
+        BITWISE_XOR
+        LOGICAL_AND
+        LOGICAL_OR
         EQUAL
         NOT_EQUAL
         LESS
         GREATER
         LESS_EQUAL
         GREATER_EQUAL
         NULL_EQUALS
-        NULL_NOT_EQUALS
-        BITWISE_AND
-        BITWISE_OR
-        BITWISE_XOR
-        LOGICAL_AND
-        LOGICAL_OR
+        NULL_MAX
+        NULL_MIN
         GENERIC_BINARY
+        NULL_LOGICAL_AND
+        NULL_LOGICAL_OR
+        INVALID_BINARY
 
     cdef unique_ptr[column] binary_operation (
         const scalar& lhs,
         const column_view& rhs,
         binary_operator op,
         data_type output_type
-    ) except +
+    ) except +cudf_exception_handler
 
     cdef unique_ptr[column] binary_operation (
         const column_view& lhs,
         const scalar& rhs,
         binary_operator op,
         data_type output_type
-    ) except +
+    ) except +cudf_exception_handler
 
     cdef unique_ptr[column] binary_operation (
         const column_view& lhs,
         const column_view& rhs,
         binary_operator op,
         data_type output_type
-    ) except +
+    ) except +cudf_exception_handler
 
     cdef unique_ptr[column] binary_operation (
         const column_view& lhs,
         const column_view& rhs,
         const string& op,
         data_type output_type
-    ) except +
+    ) except +cudf_exception_handler
+
+cdef extern from "cudf/binaryop.hpp" namespace "cudf::binops" nogil:
+    cdef bool is_supported_operation(
+        data_type output_type,
+        data_type lhs_type,
+        data_type rhs_type,
+        binary_operator op
+    ) except +cudf_exception_handler
@@ -0,0 +1,247 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+
-
-
+import numpy as np
+import pyarrow as pa
+import pytest
+from utils import assert_column_eq
+
+from cudf._lib import pylibcudf as plc
+
+
+def idfn(param):
+    ltype, rtype, outtype = param
+    return f"{ltype}-{rtype}-{outtype}"
+
+
+@pytest.fixture(params=[True, False], ids=["nulls", "no_nulls"])
+def nulls(request):
+    return request.param
+
+
+@pytest.fixture
+def pa_data(request, nulls):
+    ltype, rtype, outtype = request.param
+    values = make_col(ltype, nulls), make_col(rtype, nulls), outtype
+    return values
+
+
+@pytest.fixture
+def plc_data(pa_data):
+    lhs, rhs, outtype = pa_data
+    return (
+        plc.interop.from_arrow(lhs),
+        plc.interop.from_arrow(rhs),
+        plc.interop.from_arrow(pa.from_numpy_dtype(np.dtype(outtype))),
+    )
+
+
+def make_col(dtype, nulls):
+    if dtype == "int64":
+        data = [1, 2, 3, 4, 5]
+        pa_type = pa.int32()
+    elif dtype == "uint64":
+        data = [1, 2, 3, 4, 5]
+        pa_type = pa.uint32()
+    elif dtype == "float64":
+        data = [1.0, 2.0, 3.0, 4.0, 5.0]
+        pa_type = pa.float32()
+    elif dtype == "bool":
+        data = [True, False, True, False, True]
+        pa_type = pa.bool_()
+    elif dtype == "timestamp64[ns]":
+        data = [
+            np.datetime64("2022-01-01"),
+            np.datetime64("2022-01-02"),
+            np.datetime64("2022-01-03"),
+            np.datetime64("2022-01-04"),
+            np.datetime64("2022-01-05"),
+        ]
+        pa_type = pa.timestamp("ns")
+    elif dtype == "timedelta64[ns]":
+        data = [
+            np.timedelta64(1, "ns"),
+            np.timedelta64(2, "ns"),
+            np.timedelta64(3, "ns"),
+            np.timedelta64(4, "ns"),
+            np.timedelta64(5, "ns"),
+        ]
+        pa_type = pa.duration("ns")
+    else:
+        raise ValueError("Unsupported dtype")
+
+    if nulls:
+        data[3] = None
+
+    return pa.array(data, type=pa_type)
+
+
+def _test_binaryop_inner(pa_data, plc_data, pyop, plc_op):
+    lhs_py, rhs_py, outty_py = pa_data
+    lhs_plc, rhs_plc, outty_plc = plc_data
+
+    def get_result():
+        return plc.binaryop.binary_operation(
+            lhs_plc,
+            rhs_plc,
+            plc_op,
+            outty_plc,
+        )
+
+    if not plc.binaryop.is_supported_operation(
+        outty_plc, lhs_plc.type(), rhs_plc.type(), plc_op
+    ):
+        with pytest.raises(TypeError):
+            get_result()
+        return
+
+    expect = [
+        pyop(x, y) for x, y in zip(lhs_py.to_pylist(), rhs_py.to_pylist())
+    ]
+    expect = pa.array(expect, type=outty_py)
+    got = get_result()
+    assert_column_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "pa_data",
+    [
+        ("int64", "int64", "int64"),
+        ("int64", "float64", "float64"),
+        ("int64", "int64", "datetime64[ns]"),
+    ],
+    indirect=True,
+    ids=idfn,
+)
+def test_add(pa_data, plc_data):
+    def add(x, y):
+        if x is None or y is None:
+            return None
+        return x + y
+
+    _test_binaryop_inner(
+        pa_data,
+        plc_data,
+        add,
+        plc.binaryop.BinaryOperator.ADD,
+    )
+
+
+@pytest.mark.parametrize(
+    "pa_data",
+    [("int64", "int64", "int64"), ("int64", "float64", "float64")],
+    indirect=True,
+    ids=idfn,
+)
+def test_sub(pa_data, plc_data):
+    def sub(x, y):
+        if x is None or y is None:
+            return None
+        return x - y
+
+    _test_binaryop_inner(
+        pa_data,
+        plc_data,
+        sub,
+        plc.binaryop.BinaryOperator.SUB,
+    )
+
+
+@pytest.mark.parametrize(
+    "pa_data",
+    [("int64", "int64", "int64"), ("int64", "float64", "float64")],
+    indirect=True,
+    ids=idfn,
+)
+def test_mul(pa_data, plc_data):
+    def mul(x, y):
+        if x is None or y is None:
+            return None
+        return x * y
+
+    _test_binaryop_inner(
+        pa_data,
+        plc_data,
+        mul,
+        plc.binaryop.BinaryOperator.MUL,
+    )
+
+
+@pytest.mark.parametrize(
+    "pa_data",
+    [("int64", "int64", "int64"), ("int64", "float64", "float64")],
+    indirect=True,
+    ids=idfn,
+)
+def test_div(pa_data, plc_data):
+    def div(x, y):
+        if x is None or y is None:
+            return None
+        return x / y
+
+    _test_binaryop_inner(
+        pa_data,
+        plc_data,
+        div,
+        plc.binaryop.BinaryOperator.DIV,
+    )
+
+
+@pytest.mark.parametrize(
+    "pa_data",
+    [("int64", "int64", "int64"), ("int64", "float64", "float64")],
+    indirect=True,
+    ids=idfn,
+)
+def test_floordiv(pa_data, plc_data):
+    def floordiv(x, y):
+        if x is None or y is None:
+            return None
+        return x // y
+
+    _test_binaryop_inner(
+        pa_data,
+        plc_data,
+        floordiv,
+        plc.binaryop.BinaryOperator.FLOOR_DIV,
+    )
+
+
+@pytest.mark.parametrize(
+    "pa_data",
+    [("int64", "int64", "int64"), ("int64", "float64", "float64")],
+    indirect=True,
+    ids=idfn,
+)
+def test_truediv(pa_data, plc_data):
+    def truediv(x, y):
+        if x is None or y is None:
+            return None
+        return x / y
+
+    _test_binaryop_inner(
+        pa_data,
+        plc_data,
+        truediv,
+        plc.binaryop.BinaryOperator.TRUE_DIV,
+    )
+
+
+@pytest.mark.parametrize(
+    "pa_data",
+    [("int64", "int64", "int64"), ("int64", "float64", "float64")],
+    indirect=True,
+    ids=idfn,
+)
+def test_mod(pa_data, plc_data):
+    def mod(x, y):
+        if x is None or y is None:
+            return None
+        return x % y
+
+    _test_binaryop_inner(
+        pa_data,
+        plc_data,
+        mod,
+        plc.binaryop.BinaryOperator.MOD,
-        plc.binaryop.BinaryOperator.MOD,
+        plc.binaryop.BinaryOperator.PYMOD,
-        plc.binaryop.BinaryOperator.MOD,
+        plc.binaryop.BinaryOperator.PYMOD,
+    )