From 1b482bf2cba9b47e656e42c717ede677f480537e Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 4 Apr 2024 08:59:43 -0500 Subject: [PATCH 01/21] patch changes from original PR --- cpp/include/cudf/binaryop.hpp | 13 +- cpp/src/binaryop/binaryop.cpp | 10 +- python/cudf/cudf/_lib/cpp/binaryop.pxd | 41 ++++-- python/cudf/cudf/_lib/pylibcudf/binaryop.pyx | 14 ++ .../cudf/pylibcudf_tests/test_binaryops.py | 131 ++++++++++++++++++ 5 files changed, 197 insertions(+), 12 deletions(-) create mode 100644 python/cudf/cudf/pylibcudf_tests/test_binaryops.py diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 9df4b4eb00f..bfca3a5d090 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -237,6 +237,17 @@ cudf::data_type binary_operation_fixed_point_output_type(binary_operator op, namespace binops { +/** + * @brief Returns true if the binary operator is supported for the given input types. + * + * @param out The output data type + * @param lhs The left-hand cudf::data_type + * @param rhs The right-hand cudf::data_type + * @param op The binary operator + * @return true if the binary operator is supported for the given input types + */ +bool is_supported_binaryop(data_type out, data_type lhs, data_type rhs, binary_operator op); + /** * @brief Computes output valid mask for op between a column and a scalar * diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index be91c3b4d08..c150f1d5e6e 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -49,6 +49,14 @@ namespace cudf { namespace binops { +/** + * @brief Returns true if the binary operator is supported for the given input types + */ +bool is_supported_binaryop(data_type out, data_type lhs, data_type rhs, binary_operator op) +{ + return cudf::binops::compiled::is_supported_operation(out, lhs, rhs, op); +} + /** * @brief Computes output valid mask for op between a column and a scalar */ @@ -192,7 +200,7 @@ std::unique_ptr binary_operation(LhsType const& lhs, rmm::mr::device_memory_resource* mr) { if constexpr (std::is_same_v and std::is_same_v) - CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match"); + CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match", std::invalid_argument); if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING and output_type.id() == type_id::STRING and diff --git a/python/cudf/cudf/_lib/cpp/binaryop.pxd b/python/cudf/cudf/_lib/cpp/binaryop.pxd index 735216e656a..a6bbae203c7 100644 --- a/python/cudf/cudf/_lib/cpp/binaryop.pxd +++ b/python/cudf/cudf/_lib/cpp/binaryop.pxd @@ -1,6 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from libc.stdint cimport int32_t +from libc.stdint cimport bool, int32_t +from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.string cimport string @@ -8,6 +9,7 @@ from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.scalar.scalar cimport scalar from cudf._lib.cpp.types cimport data_type +from cudf._lib.exception_handler cimport cudf_exception_handler cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: @@ -19,9 +21,20 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: TRUE_DIV FLOOR_DIV MOD + PMOD PYMOD POW INT_POW + LOG_BASE + ATAN2 + SHIFT_LEFT + SHIFT_RIGHT + SHIFT_RIGHT_UNSIGNED + BITWISE_AND + BITWISE_OR + BITWISE_XOR + LOGICAL_AND + LOGICAL_OR EQUAL NOT_EQUAL LESS @@ -29,37 +42,45 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: LESS_EQUAL GREATER_EQUAL NULL_EQUALS - BITWISE_AND - BITWISE_OR - BITWISE_XOR - LOGICAL_AND - LOGICAL_OR + NULL_MAX + NULL_MIN GENERIC_BINARY + NULL_LOGICAL_AND + NULL_LOGICAL_OR + INVALID_BINARY cdef unique_ptr[column] binary_operation ( const scalar& lhs, const column_view& rhs, binary_operator op, data_type output_type - ) except + + ) except +cudf_exception_handler cdef unique_ptr[column] binary_operation ( const column_view& lhs, const scalar& rhs, binary_operator op, data_type output_type - ) except + + ) except +cudf_exception_handler cdef unique_ptr[column] binary_operation ( const column_view& lhs, const column_view& rhs, binary_operator op, data_type output_type - ) except + + ) except +cudf_exception_handler cdef unique_ptr[column] binary_operation ( const column_view& lhs, const column_view& rhs, const string& op, data_type output_type - ) except + + ) except +cudf_exception_handler + +cdef extern from "cudf/binaryop.hpp" namespace "cudf::binops" nogil: + cdef bool is_supported_binaryop( + data_type output_type, + data_type lhs_type, + data_type rhs_type, + binary_operator op + ) except +cudf_exception_handler diff --git a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx index 16de7757469..656b51787cf 100644 --- a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx @@ -84,3 +84,17 @@ cpdef Column binary_operation( raise ValueError(f"Invalid arguments {lhs} and {rhs}") return Column.from_libcudf(move(result)) + + +def _is_supported_binaryop( + DataType out, + DataType lhs, + DataType rhs, + binary_operator op +): + return cpp_binaryop.is_supported_binaryop( + out.c_obj, + lhs.c_obj, + rhs.c_obj, + op + ) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py new file mode 100644 index 00000000000..541c6215c8d --- /dev/null +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -0,0 +1,131 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import itertools + +import numpy as np +import pyarrow as pa +import pytest +from utils import assert_array_eq, column_from_arrow + +from cudf._lib import pylibcudf as plc +from cudf._lib.types import dtype_to_pylibcudf_type + + +@pytest.fixture(scope="module") +def columns(): + return { + "int8": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.int8())), + "int16": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.int16())), + "int32": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.int32())), + "int64": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.int64())), + "uint8": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.uint8())), + "uint16": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.uint16())), + "uint32": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.uint32())), + "uint64": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.uint64())), + "float32": column_from_arrow( + pa.array([1.0, 2.0, 3.0, 4.0], type=pa.float32()) + ), + "float64": column_from_arrow( + pa.array([1.0, 2.0, 3.0, 4.0], type=pa.float64()) + ), + "object": column_from_arrow( + pa.array(["a", "b", "c", "d"], type=pa.string()) + ), + "bool": column_from_arrow( + pa.array([True, False, True, False], type=pa.bool_()) + ), + "datetime64[ns]": column_from_arrow( + pa.array([1, 2, 3, 4], type=pa.timestamp("ns")) + ), + "datetime64[ms]": column_from_arrow( + pa.array([1, 2, 3, 4], type=pa.timestamp("ms")) + ), + "datetime64[us]": column_from_arrow( + pa.array([1, 2, 3, 4], type=pa.timestamp("us")) + ), + "datetime64[s]": column_from_arrow( + pa.array([1, 2, 3, 4], type=pa.timestamp("s")) + ), + "timedelta64[ns]": column_from_arrow( + pa.array([1, 2, 3, 4], type=pa.duration("ns")) + ), + "timedelta64[ms]": column_from_arrow( + pa.array([1, 2, 3, 4], type=pa.duration("ms")) + ), + "timedelta64[us]": column_from_arrow( + pa.array([1, 2, 3, 4], type=pa.duration("us")) + ), + "timedelta64[s]": column_from_arrow( + pa.array([1, 2, 3, 4], type=pa.duration("s")) + ), + } + + +LIBCUDF_SUPPORTED_TYPES = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", + "object", + "bool", + "datetime64[ns]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[s]", + "timedelta64[ns]", + "timedelta64[ms]", + "timedelta64[us]", + "timedelta64[s]", +] + + +def generate_binaryops_tests(): + tests = [] + for op in plc.binaryop.BinaryOperator.__members__.values(): + for combination in itertools.combinations_with_replacement( + LIBCUDF_SUPPORTED_TYPES, 3 + ): + tests.append((*combination, op)) + return tests + + +@pytest.mark.parametrize("lty, rty, outty, op", generate_binaryops_tests()) +def test_binaryops(columns, lty, rty, outty, op): + lhs = columns[lty] + rhs = columns[rty] + pylibcudf_outty = dtype_to_pylibcudf_type(outty) + + if plc.binaryop._is_supported_operation( + pylibcudf_outty, + dtype_to_pylibcudf_type(lty), + dtype_to_pylibcudf_type(rty), + op, + ): + expect_data = np.array([2, 4, 6, 8]).astype(outty) + + expect = pa.array(expect_data, type=pa.from_numpy_dtype(outty)) + got = plc.binaryop.binary_operation(lhs, rhs, op, pylibcudf_outty) + breakpoint() + assert_array_eq(got, expect) + else: + with pytest.raises(TypeError): + plc.binaryop.binary_operation(lhs, rhs, op, pylibcudf_outty) + + +def test_mismatched_sizes(): + lhs = column_from_arrow(pa.array([1, 2, 3, 4], type=pa.int32())) + rhs = column_from_arrow(pa.array([1, 2, 3], type=pa.int32())) + + with pytest.raises(ValueError, match="Column sizes don't match"): + plc.binaryop.binary_operation( + lhs, + rhs, + plc.binaryop.BinaryOperator.ADD, + dtype_to_pylibcudf_type("int32"), + ) From 0a46a0f063c314121d76145890b4672c1613d7b6 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 4 Apr 2024 10:46:19 -0500 Subject: [PATCH 02/21] first halfway decent testing strategy --- .../cudf/pylibcudf_tests/test_binaryops.py | 188 ++++++++++-------- 1 file changed, 106 insertions(+), 82 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index 541c6215c8d..bb8ae20adc5 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -1,131 +1,155 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -import itertools -import numpy as np import pyarrow as pa import pytest -from utils import assert_array_eq, column_from_arrow +from utils import assert_column_eq from cudf._lib import pylibcudf as plc from cudf._lib.types import dtype_to_pylibcudf_type +LIBCUDF_SUPPORTED_TYPES = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", + "object", + "bool", + "datetime64[ns]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[s]", + "timedelta64[ns]", + "timedelta64[ms]", + "timedelta64[us]", + "timedelta64[s]", +] + @pytest.fixture(scope="module") def columns(): return { - "int8": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.int8())), - "int16": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.int16())), - "int32": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.int32())), - "int64": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.int64())), - "uint8": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.uint8())), - "uint16": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.uint16())), - "uint32": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.uint32())), - "uint64": column_from_arrow(pa.array([1, 2, 3, 4], type=pa.uint64())), - "float32": column_from_arrow( + "int8": plc.interop.from_arrow(pa.array([1, 2, 3, 4], type=pa.int8())), + "int16": plc.interop.from_arrow( + pa.array([1, 2, 3, 4], type=pa.int16()) + ), + "int32": plc.interop.from_arrow( + pa.array([1, 2, 3, 4], type=pa.int32()) + ), + "int64": plc.interop.from_arrow( + pa.array([1, 2, 3, 4], type=pa.int64()) + ), + "uint8": plc.interop.from_arrow( + pa.array([1, 2, 3, 4], type=pa.uint8()) + ), + "uint16": plc.interop.from_arrow( + pa.array([1, 2, 3, 4], type=pa.uint16()) + ), + "uint32": plc.interop.from_arrow( + pa.array([1, 2, 3, 4], type=pa.uint32()) + ), + "uint64": plc.interop.from_arrow( + pa.array([1, 2, 3, 4], type=pa.uint64()) + ), + "float32": plc.interop.from_arrow( pa.array([1.0, 2.0, 3.0, 4.0], type=pa.float32()) ), - "float64": column_from_arrow( + "float64": plc.interop.from_arrow( pa.array([1.0, 2.0, 3.0, 4.0], type=pa.float64()) ), - "object": column_from_arrow( + "object": plc.interop.from_arrow( pa.array(["a", "b", "c", "d"], type=pa.string()) ), - "bool": column_from_arrow( + "bool": plc.interop.from_arrow( pa.array([True, False, True, False], type=pa.bool_()) ), - "datetime64[ns]": column_from_arrow( + "datetime64[ns]": plc.interop.from_arrow( pa.array([1, 2, 3, 4], type=pa.timestamp("ns")) ), - "datetime64[ms]": column_from_arrow( + "datetime64[ms]": plc.interop.from_arrow( pa.array([1, 2, 3, 4], type=pa.timestamp("ms")) ), - "datetime64[us]": column_from_arrow( + "datetime64[us]": plc.interop.from_arrow( pa.array([1, 2, 3, 4], type=pa.timestamp("us")) ), - "datetime64[s]": column_from_arrow( + "datetime64[s]": plc.interop.from_arrow( pa.array([1, 2, 3, 4], type=pa.timestamp("s")) ), - "timedelta64[ns]": column_from_arrow( + "timedelta64[ns]": plc.interop.from_arrow( pa.array([1, 2, 3, 4], type=pa.duration("ns")) ), - "timedelta64[ms]": column_from_arrow( + "timedelta64[ms]": plc.interop.from_arrow( pa.array([1, 2, 3, 4], type=pa.duration("ms")) ), - "timedelta64[us]": column_from_arrow( + "timedelta64[us]": plc.interop.from_arrow( pa.array([1, 2, 3, 4], type=pa.duration("us")) ), - "timedelta64[s]": column_from_arrow( + "timedelta64[s]": plc.interop.from_arrow( pa.array([1, 2, 3, 4], type=pa.duration("s")) ), } -LIBCUDF_SUPPORTED_TYPES = [ - "int8", - "int16", - "int32", - "int64", - "uint8", - "uint16", - "uint32", - "uint64", - "float32", - "float64", - "object", - "bool", - "datetime64[ns]", - "datetime64[ms]", - "datetime64[us]", - "datetime64[s]", - "timedelta64[ns]", - "timedelta64[ms]", - "timedelta64[us]", - "timedelta64[s]", -] +@pytest.fixture(scope="module", params=LIBCUDF_SUPPORTED_TYPES) +def binop_lhs_ty(request): + return request.param -def generate_binaryops_tests(): - tests = [] - for op in plc.binaryop.BinaryOperator.__members__.values(): - for combination in itertools.combinations_with_replacement( - LIBCUDF_SUPPORTED_TYPES, 3 - ): - tests.append((*combination, op)) - return tests +@pytest.fixture(scope="module", params=LIBCUDF_SUPPORTED_TYPES) +def binop_rhs_ty(request): + return request.param -@pytest.mark.parametrize("lty, rty, outty, op", generate_binaryops_tests()) -def test_binaryops(columns, lty, rty, outty, op): - lhs = columns[lty] - rhs = columns[rty] - pylibcudf_outty = dtype_to_pylibcudf_type(outty) +@pytest.fixture(scope="module", params=LIBCUDF_SUPPORTED_TYPES) +def binop_out_ty(request): + return request.param - if plc.binaryop._is_supported_operation( - pylibcudf_outty, - dtype_to_pylibcudf_type(lty), - dtype_to_pylibcudf_type(rty), - op, - ): - expect_data = np.array([2, 4, 6, 8]).astype(outty) - expect = pa.array(expect_data, type=pa.from_numpy_dtype(outty)) - got = plc.binaryop.binary_operation(lhs, rhs, op, pylibcudf_outty) - breakpoint() - assert_array_eq(got, expect) - else: - with pytest.raises(TypeError): - plc.binaryop.binary_operation(lhs, rhs, op, pylibcudf_outty) +@pytest.fixture( + scope="module", + params=list(plc.binaryop.BinaryOperator.__members__.values()), +) +def binary_operators(request): + return request.param -def test_mismatched_sizes(): - lhs = column_from_arrow(pa.array([1, 2, 3, 4], type=pa.int32())) - rhs = column_from_arrow(pa.array([1, 2, 3], type=pa.int32())) - - with pytest.raises(ValueError, match="Column sizes don't match"): - plc.binaryop.binary_operation( - lhs, - rhs, - plc.binaryop.BinaryOperator.ADD, - dtype_to_pylibcudf_type("int32"), +@pytest.fixture(scope="module") +def add_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + fail = False + if not plc.binaryop._is_supported_binaryop( + dtype_to_pylibcudf_type(binop_out_ty), + dtype_to_pylibcudf_type(binop_lhs_ty), + dtype_to_pylibcudf_type(binop_rhs_ty), + plc.binaryop.BinaryOperator.ADD, + ): + fail = True + return (binop_lhs_ty, binop_rhs_ty, binop_out_ty, fail) + + +def test_add(add_tests, columns): + binop_lhs_ty, binop_rhs_ty, binop_out_ty, fail = add_tests + lhs = columns[binop_lhs_ty] + rhs = columns[binop_rhs_ty] + pylibcudf_outty = dtype_to_pylibcudf_type(binop_out_ty) + + if not fail: + expect_data = ( + plc.interop.to_arrow(lhs).to_numpy() + + plc.interop.to_arrow(rhs).to_numpy() + ).astype(binop_out_ty) + expect = pa.array(expect_data) + got = plc.binaryop.binary_operation( + lhs, rhs, plc.binaryop.BinaryOperator.ADD, pylibcudf_outty ) + assert_column_eq(got, expect) + else: + with pytest.raises(TypeError): + plc.binaryop.binary_operation( + lhs, rhs, plc.binaryop.BinaryOperator.ADD, pylibcudf_outty + ) From f5f33e6573723c1d3336232db6eda368c708dcf6 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 4 Apr 2024 15:23:43 -0500 Subject: [PATCH 03/21] refactor out a lot of repeated code --- .../cudf/pylibcudf_tests/test_binaryops.py | 540 +++++++++++++++++- 1 file changed, 511 insertions(+), 29 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index bb8ae20adc5..d41f014fae0 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -1,6 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +import numpy as np import pyarrow as pa import pytest from utils import assert_column_eq @@ -9,28 +10,30 @@ from cudf._lib.types import dtype_to_pylibcudf_type LIBCUDF_SUPPORTED_TYPES = [ - "int8", - "int16", - "int32", + # "int8", + # "int16", + # "int32", "int64", - "uint8", - "uint16", - "uint32", + # "uint8", + # "uint16", + # "uint32", "uint64", - "float32", + # "float32", "float64", "object", "bool", "datetime64[ns]", - "datetime64[ms]", - "datetime64[us]", - "datetime64[s]", + # "datetime64[ms]", + # "datetime64[us]", + # "datetime64[s]", "timedelta64[ns]", - "timedelta64[ms]", - "timedelta64[us]", - "timedelta64[s]", + # "timedelta64[ms]", + # "timedelta64[us]", + # "timedelta64[s]", ] +BINARY_OPS = list(plc.binaryop.BinaryOperator.__members__.values()) + @pytest.fixture(scope="module") def columns(): @@ -113,43 +116,522 @@ def binop_out_ty(request): @pytest.fixture( scope="module", - params=list(plc.binaryop.BinaryOperator.__members__.values()), + params=BINARY_OPS, ) -def binary_operators(request): +def binary_operator(request): return request.param -@pytest.fixture(scope="module") -def add_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): +def make_test(binop_lhs_ty, binop_rhs_ty, binop_out_ty, binary_operator): fail = False if not plc.binaryop._is_supported_binaryop( dtype_to_pylibcudf_type(binop_out_ty), dtype_to_pylibcudf_type(binop_lhs_ty), dtype_to_pylibcudf_type(binop_rhs_ty), - plc.binaryop.BinaryOperator.ADD, + binary_operator, ): fail = True return (binop_lhs_ty, binop_rhs_ty, binop_out_ty, fail) -def test_add(add_tests, columns): - binop_lhs_ty, binop_rhs_ty, binop_out_ty, fail = add_tests +@pytest.fixture(scope="module") +def add_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.ADD, + ) + + +@pytest.fixture(scope="module") +def sub_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.SUB, + ) + + +@pytest.fixture(scope="module") +def mul_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.MUL, + ) + + +@pytest.fixture(scope="module") +def div_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.DIV, + ) + + +@pytest.fixture(scope="module") +def true_div_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.TRUE_DIV, + ) + + +@pytest.fixture(scope="module") +def floor_div_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.FLOOR_DIV, + ) + + +@pytest.fixture(scope="module") +def mod_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.MOD, + ) + + +@pytest.fixture(scope="module") +def pmod_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + # TODO + pass + + +@pytest.fixture(scope="module") +def pymod_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + # TODO + pass + + +@pytest.fixture(scope="module") +def pow_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.POW, + ) + + +@pytest.fixture(scope="module") +def int_pow_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.INT_POW, + ) + + +@pytest.fixture(scope="module") +def log_base_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.LOG_BASE, + ) + + +@pytest.fixture(scope="module") +def atan2_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.ATAN2, + ) + + +@pytest.fixture(scope="module") +def shift_left_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.SHIFT_LEFT, + ) + + +@pytest.fixture(scope="module") +def shift_right_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.SHIFT_RIGHT, + ) + + +@pytest.fixture(scope="module") +def shift_right_unsigned_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, + ) + + +@pytest.fixture(scope="module") +def bitwise_and_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.BITWISE_AND, + ) + + +@pytest.fixture(scope="module") +def bitwise_or_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.BITWISE_OR, + ) + + +@pytest.fixture(scope="module") +def logical_and_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.LOGICAL_AND, + ) + + +@pytest.fixture(scope="module") +def logical_or_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.LOGICAL_OR, + ) + + +@pytest.fixture(scope="module") +def equal_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.EQUAL, + ) + + +@pytest.fixture(scope="module") +def not_equal_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.NOT_EQUAL, + ) + + +@pytest.fixture(scope="module") +def less_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.LESS, + ) + + +@pytest.fixture(scope="module") +def greater_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.GREATER, + ) + + +@pytest.fixture(scope="module") +def less_equal_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.LESS_EQUAL, + ) + + +@pytest.fixture(scope="module") +def greater_equal_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.GREATER_EQUAL, + ) + + +@pytest.fixture(scope="module") +def null_equals_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.NULL_EQUALS, + ) + + +@pytest.fixture(scope="module") +def null_max_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.NULL_MAX, + ) + + +@pytest.fixture(scope="module") +def null_min_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.NULL_MIN, + ) + + +@pytest.fixture(scope="module") +def generic_binary_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.GENERIC_BINARY, + ) + + +@pytest.fixture(scope="module") +def null_logical_and_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.NULL_LOGICAL_AND, + ) + + +@pytest.fixture(scope="module") +def null_logical_or_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.NULL_LOGICAL_OR, + ) + + +@pytest.fixture(scope="module") +def invalid_binary_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.INVALID_BINARY, + ) + + +def _test_binaryop_inner(test, columns, pyop, cuop): + binop_lhs_ty, binop_rhs_ty, binop_out_ty, fail = test lhs = columns[binop_lhs_ty] rhs = columns[binop_rhs_ty] pylibcudf_outty = dtype_to_pylibcudf_type(binop_out_ty) if not fail: - expect_data = ( - plc.interop.to_arrow(lhs).to_numpy() - + plc.interop.to_arrow(rhs).to_numpy() + expect_data = pyop( + plc.interop.to_arrow(lhs).to_numpy(), + plc.interop.to_arrow(rhs).to_numpy(), ).astype(binop_out_ty) expect = pa.array(expect_data) - got = plc.binaryop.binary_operation( - lhs, rhs, plc.binaryop.BinaryOperator.ADD, pylibcudf_outty - ) + got = plc.binaryop.binary_operation(lhs, rhs, cuop, pylibcudf_outty) assert_column_eq(got, expect) else: with pytest.raises(TypeError): - plc.binaryop.binary_operation( - lhs, rhs, plc.binaryop.BinaryOperator.ADD, pylibcudf_outty - ) + plc.binaryop.binary_operation(lhs, rhs, cuop, pylibcudf_outty) + + +def test_add(add_tests, columns): + _test_binaryop_inner( + add_tests, columns, np.add, plc.binaryop.BinaryOperator.ADD + ) + + +def test_sub(sub_tests, columns): + _test_binaryop_inner( + sub_tests, columns, np.subtract, plc.binaryop.BinaryOperator.SUB + ) + + +def test_mul(mul_tests, columns): + _test_binaryop_inner( + mul_tests, columns, np.multiply, plc.binaryop.BinaryOperator.MUL + ) + + +def test_div(div_tests, columns): + _test_binaryop_inner( + div_tests, columns, np.divide, plc.binaryop.BinaryOperator.DIV + ) + + +def test_true_div(true_div_tests, columns): + _test_binaryop_inner( + true_div_tests, + columns, + np.true_divide, + plc.binaryop.BinaryOperator.TRUE_DIV, + ) + + +def test_floor_div(floor_div_tests, columns): + _test_binaryop_inner( + floor_div_tests, + columns, + np.floor_divide, + plc.binaryop.BinaryOperator.FLOOR_DIV, + ) + + +def test_mod(mod_tests, columns): + _test_binaryop_inner( + mod_tests, columns, np.mod, plc.binaryop.BinaryOperator.MOD + ) + + +def test_pow(pow_tests, columns): + _test_binaryop_inner( + pow_tests, columns, np.power, plc.binaryop.BinaryOperator.POW + ) + + +def test_shift_left(shift_left_tests, columns): + _test_binaryop_inner( + shift_left_tests, + columns, + np.left_shift, + plc.binaryop.BinaryOperator.SHIFT_LEFT, + ) + + +def test_shift_right(shift_right_tests, columns): + _test_binaryop_inner( + shift_right_tests, + columns, + np.right_shift, + plc.binaryop.BinaryOperator.SHIFT_RIGHT, + ) + + +def test_bitwise_and(bitwise_and_tests, columns): + _test_binaryop_inner( + bitwise_and_tests, + columns, + np.bitwise_and, + plc.binaryop.BinaryOperator.BITWISE_AND, + ) + + +def test_bitwise_or(bitwise_or_tests, columns): + _test_binaryop_inner( + bitwise_or_tests, + columns, + np.bitwise_or, + plc.binaryop.BinaryOperator.BITWISE_OR, + ) + + +def test_bitwise_xor(bitwise_xor_tests, columns): + _test_binaryop_inner( + bitwise_xor_tests, + columns, + np.bitwise_xor, + plc.binaryop.BinaryOperator.BITWISE_XOR, + ) + + +def test_logical_and(logical_and_tests, columns): + _test_binaryop_inner( + logical_and_tests, + columns, + np.logical_and, + plc.binaryop.BinaryOperator.LOGICAL_AND, + ) + + +def test_logical_or(logical_or_tests, columns): + _test_binaryop_inner( + logical_or_tests, + columns, + np.logical_or, + plc.binaryop.BinaryOperator.LOGICAL_OR, + ) + + +def test_equal(equal_tests, columns): + _test_binaryop_inner( + equal_tests, columns, np.equal, plc.binaryop.BinaryOperator.EQUAL + ) + + +def test_not_equal(not_equal_tests, columns): + _test_binaryop_inner( + not_equal_tests, + columns, + np.not_equal, + plc.binaryop.BinaryOperator.NOT_EQUAL, + ) + + +def test_less(less_tests, columns): + _test_binaryop_inner( + less_tests, columns, np.less, plc.binaryop.BinaryOperator.LESS + ) + + +def test_greater(greater_tests, columns): + _test_binaryop_inner( + greater_tests, columns, np.greater, plc.binaryop.BinaryOperator.GREATER + ) + + +def test_less_equal(less_equal_tests, columns): + _test_binaryop_inner( + less_equal_tests, + columns, + np.less_equal, + plc.binaryop.BinaryOperator.LESS_EQUAL, + ) + + +def test_greater_equal(greater_equal_tests, columns): + _test_binaryop_inner( + greater_equal_tests, + columns, + np.greater_equal, + plc.binaryop.BinaryOperator.GREATER_EQUAL, + ) From 61ab85b3aee5f4d552575503f5ffb9d1952b640f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 4 Apr 2024 15:36:12 -0500 Subject: [PATCH 04/21] cleanup --- .../cudf/pylibcudf_tests/test_binaryops.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index d41f014fae0..3374e0a1f75 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -134,6 +134,25 @@ def make_test(binop_lhs_ty, binop_rhs_ty, binop_out_ty, binary_operator): return (binop_lhs_ty, binop_rhs_ty, binop_out_ty, fail) +def _test_binaryop_inner(test, columns, pyop, cuop): + binop_lhs_ty, binop_rhs_ty, binop_out_ty, fail = test + lhs = columns[binop_lhs_ty] + rhs = columns[binop_rhs_ty] + pylibcudf_outty = dtype_to_pylibcudf_type(binop_out_ty) + + if not fail: + expect_data = pyop( + plc.interop.to_arrow(lhs).to_numpy(), + plc.interop.to_arrow(rhs).to_numpy(), + ).astype(binop_out_ty) + expect = pa.array(expect_data) + got = plc.binaryop.binary_operation(lhs, rhs, cuop, pylibcudf_outty) + assert_column_eq(got, expect) + else: + with pytest.raises(TypeError): + plc.binaryop.binary_operation(lhs, rhs, cuop, pylibcudf_outty) + + @pytest.fixture(scope="module") def add_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): return make_test( @@ -456,25 +475,6 @@ def invalid_binary_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): ) -def _test_binaryop_inner(test, columns, pyop, cuop): - binop_lhs_ty, binop_rhs_ty, binop_out_ty, fail = test - lhs = columns[binop_lhs_ty] - rhs = columns[binop_rhs_ty] - pylibcudf_outty = dtype_to_pylibcudf_type(binop_out_ty) - - if not fail: - expect_data = pyop( - plc.interop.to_arrow(lhs).to_numpy(), - plc.interop.to_arrow(rhs).to_numpy(), - ).astype(binop_out_ty) - expect = pa.array(expect_data) - got = plc.binaryop.binary_operation(lhs, rhs, cuop, pylibcudf_outty) - assert_column_eq(got, expect) - else: - with pytest.raises(TypeError): - plc.binaryop.binary_operation(lhs, rhs, cuop, pylibcudf_outty) - - def test_add(add_tests, columns): _test_binaryop_inner( add_tests, columns, np.add, plc.binaryop.BinaryOperator.ADD From ba03539ee0571cc70fdcdfa6cbf05c9b12d6a047 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 4 Apr 2024 18:30:34 -0500 Subject: [PATCH 05/21] add some more tests --- .../cudf/pylibcudf_tests/test_binaryops.py | 65 ++++++++++++++++--- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index 3374e0a1f75..2a4df812499 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -1,6 +1,8 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +import warnings + import numpy as np import pyarrow as pa import pytest @@ -225,8 +227,12 @@ def mod_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): @pytest.fixture(scope="module") def pmod_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - # TODO - pass + return make_test( + binop_lhs_ty, + binop_rhs_ty, + binop_out_ty, + plc.binaryop.BinaryOperator.PMOD, + ) @pytest.fixture(scope="module") @@ -509,12 +515,14 @@ def test_true_div(true_div_tests, columns): def test_floor_div(floor_div_tests, columns): - _test_binaryop_inner( - floor_div_tests, - columns, - np.floor_divide, - plc.binaryop.BinaryOperator.FLOOR_DIV, - ) + with warnings.catch_warnings(record=True): + warnings.filterwarnings("ignore", message="divide by zero") + _test_binaryop_inner( + floor_div_tests, + columns, + np.floor_divide, + plc.binaryop.BinaryOperator.FLOOR_DIV, + ) def test_mod(mod_tests, columns): @@ -523,12 +531,42 @@ def test_mod(mod_tests, columns): ) +def test_pmod(pmod_tests, columns): + with warnings.catch_warnings(record=True): + warnings.filterwarnings("ignore", message="divide by zero") + warnings.filterwarnings("ignore", message="invalid value") + _test_binaryop_inner( + pmod_tests, + columns, + lambda x, y: (x % y + y) % x, + plc.binaryop.BinaryOperator.PMOD, + ) + + def test_pow(pow_tests, columns): _test_binaryop_inner( pow_tests, columns, np.power, plc.binaryop.BinaryOperator.POW ) +def test_int_pow(int_pow_tests, columns): + _test_binaryop_inner( + int_pow_tests, columns, np.power, plc.binaryop.BinaryOperator.INT_POW + ) + + +def test_log_base(log_base_tests, columns): + with warnings.catch_warnings(record=True): + warnings.filterwarnings("ignore", message="divide by zero") + warnings.filterwarnings("ignore", message="invalid value") + _test_binaryop_inner( + log_base_tests, + columns, + lambda x, y: np.log(y) / np.log(x), + plc.binaryop.BinaryOperator.LOG_BASE, + ) + + def test_shift_left(shift_left_tests, columns): _test_binaryop_inner( shift_left_tests, @@ -547,6 +585,17 @@ def test_shift_right(shift_right_tests, columns): ) +def test_shift_right_unsigned(shift_right_unsigned_tests, columns): + _test_binaryop_inner( + shift_right_unsigned_tests, + columns, + lambda x, y: np.right_shift( + x.astype(np.int64), y.astype(np.int64) + ).astype(x.dtype), + plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, + ) + + def test_bitwise_and(bitwise_and_tests, columns): _test_binaryop_inner( bitwise_and_tests, From 4911f33b69f2becb22067e7eb63e224dafde725f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 5 Apr 2024 08:14:23 -0500 Subject: [PATCH 06/21] start to work nulls in --- .../cudf/pylibcudf_tests/test_binaryops.py | 97 +++++++++++++++---- 1 file changed, 76 insertions(+), 21 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index 2a4df812499..cc906e411c6 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -40,67 +40,74 @@ @pytest.fixture(scope="module") def columns(): return { - "int8": plc.interop.from_arrow(pa.array([1, 2, 3, 4], type=pa.int8())), + "int8": plc.interop.from_arrow( + pa.array([1, None, 3, None], type=pa.int8()) + ), "int16": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.int16()) + pa.array([1, 2, None, None], type=pa.int16()) ), "int32": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.int32()) + pa.array([1, None, 3, None], type=pa.int32()) ), "int64": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.int64()) + pa.array([1, 2, None, None], type=pa.int64()) ), "uint8": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.uint8()) + pa.array([1, None, 3, None], type=pa.uint8()) ), "uint16": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.uint16()) + pa.array([1, 2, None, None], type=pa.uint16()) ), "uint32": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.uint32()) + pa.array([1, None, 3, None], type=pa.uint32()) ), "uint64": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.uint64()) + pa.array([1, 2, None, None], type=pa.uint64()) ), "float32": plc.interop.from_arrow( - pa.array([1.0, 2.0, 3.0, 4.0], type=pa.float32()) + pa.array([1.0, None, 3.0, None], type=pa.float32()) ), "float64": plc.interop.from_arrow( - pa.array([1.0, 2.0, 3.0, 4.0], type=pa.float64()) + pa.array([1.0, 2.0, None, None], type=pa.float64()) ), "object": plc.interop.from_arrow( - pa.array(["a", "b", "c", "d"], type=pa.string()) + pa.array(["a", None, "c", None], type=pa.string()) ), "bool": plc.interop.from_arrow( - pa.array([True, False, True, False], type=pa.bool_()) + pa.array([True, False, None, None], type=pa.bool_()) ), "datetime64[ns]": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.timestamp("ns")) + pa.array([1, None, 3, None], type=pa.timestamp("ns")) ), "datetime64[ms]": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.timestamp("ms")) + pa.array([1, 2, None, None], type=pa.timestamp("ms")) ), "datetime64[us]": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.timestamp("us")) + pa.array([1, None, 3, None], type=pa.timestamp("us")) ), "datetime64[s]": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.timestamp("s")) + pa.array([1, 2, None, None], type=pa.timestamp("s")) ), "timedelta64[ns]": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.duration("ns")) + pa.array([1, None, 3, None], type=pa.duration("ns")) ), "timedelta64[ms]": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.duration("ms")) + pa.array([1, 2, None, None], type=pa.duration("ms")) ), "timedelta64[us]": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.duration("us")) + pa.array([1, None, 3, None], type=pa.duration("us")) ), "timedelta64[s]": plc.interop.from_arrow( - pa.array([1, 2, 3, 4], type=pa.duration("s")) + pa.array([1, 2, None, None], type=pa.duration("s")) ), } +@pytest.fixture(scope="module") +def null_columns(columns): + breakpoint() + + @pytest.fixture(scope="module", params=LIBCUDF_SUPPORTED_TYPES) def binop_lhs_ty(request): return request.param @@ -141,7 +148,6 @@ def _test_binaryop_inner(test, columns, pyop, cuop): lhs = columns[binop_lhs_ty] rhs = columns[binop_rhs_ty] pylibcudf_outty = dtype_to_pylibcudf_type(binop_out_ty) - if not fail: expect_data = pyop( plc.interop.to_arrow(lhs).to_numpy(), @@ -567,6 +573,12 @@ def test_log_base(log_base_tests, columns): ) +def test_atan2(atan2_tests, columns): + _test_binaryop_inner( + atan2_tests, columns, np.arctan2, plc.binaryop.BinaryOperator.ATAN2 + ) + + def test_shift_left(shift_left_tests, columns): _test_binaryop_inner( shift_left_tests, @@ -684,3 +696,46 @@ def test_greater_equal(greater_equal_tests, columns): np.greater_equal, plc.binaryop.BinaryOperator.GREATER_EQUAL, ) + + +def test_null_equals(null_equals_tests, columns): + def numpy_null_equals(x, y): + # TODO + pass + + _test_binaryop_inner( + null_equals_tests, + columns, + numpy_null_equals, + plc.binaryop.BinaryOperator.GREATER_EQUAL, + ) + + +def test_null_max(null_max_tests, columns): + # TODO + pass + + +def test_null_min(null_min_tests, columns): + # TODO + pass + + +def test_generic_binary(generic_binary_tests, columns): + # TODO + pass + + +def test_null_logical_and(null_logical_and_tests, columns): + # TODO + pass + + +def test_null_logical_or(null_logical_or_tests, columns): + # TODO + pass + + +def test_invalid_binary(invalid_binary_tests, columns): + # TODO + pass From 562c765d68b7dcd2f44a3d4df736422ff0eecc4a Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 1 Jul 2024 16:04:43 -0700 Subject: [PATCH 07/21] start to refactor, pass add, almost sub --- .../cudf/cudf/pylibcudf_tests/common/utils.py | 1 - .../cudf/pylibcudf_tests/test_binaryops.py | 52 +++++++++++++++---- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/common/utils.py b/python/cudf/cudf/pylibcudf_tests/common/utils.py index bf927e661fe..c955de93230 100644 --- a/python/cudf/cudf/pylibcudf_tests/common/utils.py +++ b/python/cudf/cudf/pylibcudf_tests/common/utils.py @@ -49,7 +49,6 @@ def assert_column_eq( lhs = lhs.combine_chunks() if isinstance(rhs, pa.ChunkedArray): rhs = rhs.combine_chunks() - assert lhs.equals(rhs) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index cc906e411c6..40c3fe97d54 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -149,11 +149,25 @@ def _test_binaryop_inner(test, columns, pyop, cuop): rhs = columns[binop_rhs_ty] pylibcudf_outty = dtype_to_pylibcudf_type(binop_out_ty) if not fail: - expect_data = pyop( - plc.interop.to_arrow(lhs).to_numpy(), - plc.interop.to_arrow(rhs).to_numpy(), - ).astype(binop_out_ty) - expect = pa.array(expect_data) + + def op(x, y, dtype): + result = pyop(x, y) + if result is None: + return result + else: + if hasattr(result, "to_numpy"): + return result.to_numpy().astype(dtype) + else: + return np.dtype(dtype).type(result).astype(dtype) + + expect_data = [ + op(x, y, binop_out_ty) + for x, y in zip( + plc.interop.to_arrow(lhs).to_pylist(), + plc.interop.to_arrow(rhs).to_pylist(), + ) + ] + expect = pa.array(expect_data, type=pa.from_numpy_dtype(binop_out_ty)) got = plc.binaryop.binary_operation(lhs, rhs, cuop, pylibcudf_outty) assert_column_eq(got, expect) else: @@ -488,26 +502,46 @@ def invalid_binary_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): def test_add(add_tests, columns): + def add(x, y): + if x is None or y is None: + return None + return x + y + _test_binaryop_inner( - add_tests, columns, np.add, plc.binaryop.BinaryOperator.ADD + add_tests, columns, add, plc.binaryop.BinaryOperator.ADD ) def test_sub(sub_tests, columns): + def sub(x, y): + if x is None or y is None: + return None + return x - y + _test_binaryop_inner( - sub_tests, columns, np.subtract, plc.binaryop.BinaryOperator.SUB + sub_tests, columns, sub, plc.binaryop.BinaryOperator.SUB ) def test_mul(mul_tests, columns): + def mul(x, y): + if x is None or y is None: + return None + return x * y + _test_binaryop_inner( - mul_tests, columns, np.multiply, plc.binaryop.BinaryOperator.MUL + mul_tests, columns, mul, plc.binaryop.BinaryOperator.MUL ) def test_div(div_tests, columns): + def div(x, y): + if x is None or y is None: + return None + return x / y + _test_binaryop_inner( - div_tests, columns, np.divide, plc.binaryop.BinaryOperator.DIV + div_tests, columns, div, plc.binaryop.BinaryOperator.DIV ) From e533469450fca06fcb11530a4c8071d2c95a14b6 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 8 Jul 2024 06:56:01 -0700 Subject: [PATCH 08/21] simpler approach --- .../cudf/pylibcudf_tests/test_binaryops.py | 843 +++--------------- 1 file changed, 137 insertions(+), 706 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index 40c3fe97d54..52616e4bd97 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -1,7 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -import warnings +from decimal import Decimal import numpy as np import pyarrow as pa @@ -9,767 +9,198 @@ from utils import assert_column_eq from cudf._lib import pylibcudf as plc -from cudf._lib.types import dtype_to_pylibcudf_type - -LIBCUDF_SUPPORTED_TYPES = [ - # "int8", - # "int16", - # "int32", - "int64", - # "uint8", - # "uint16", - # "uint32", - "uint64", - # "float32", - "float64", - "object", - "bool", - "datetime64[ns]", - # "datetime64[ms]", - # "datetime64[us]", - # "datetime64[s]", - "timedelta64[ns]", - # "timedelta64[ms]", - # "timedelta64[us]", - # "timedelta64[s]", -] - -BINARY_OPS = list(plc.binaryop.BinaryOperator.__members__.values()) - - -@pytest.fixture(scope="module") -def columns(): - return { - "int8": plc.interop.from_arrow( - pa.array([1, None, 3, None], type=pa.int8()) - ), - "int16": plc.interop.from_arrow( - pa.array([1, 2, None, None], type=pa.int16()) - ), - "int32": plc.interop.from_arrow( - pa.array([1, None, 3, None], type=pa.int32()) - ), - "int64": plc.interop.from_arrow( - pa.array([1, 2, None, None], type=pa.int64()) - ), - "uint8": plc.interop.from_arrow( - pa.array([1, None, 3, None], type=pa.uint8()) - ), - "uint16": plc.interop.from_arrow( - pa.array([1, 2, None, None], type=pa.uint16()) - ), - "uint32": plc.interop.from_arrow( - pa.array([1, None, 3, None], type=pa.uint32()) - ), - "uint64": plc.interop.from_arrow( - pa.array([1, 2, None, None], type=pa.uint64()) - ), - "float32": plc.interop.from_arrow( - pa.array([1.0, None, 3.0, None], type=pa.float32()) - ), - "float64": plc.interop.from_arrow( - pa.array([1.0, 2.0, None, None], type=pa.float64()) - ), - "object": plc.interop.from_arrow( - pa.array(["a", None, "c", None], type=pa.string()) - ), - "bool": plc.interop.from_arrow( - pa.array([True, False, None, None], type=pa.bool_()) - ), - "datetime64[ns]": plc.interop.from_arrow( - pa.array([1, None, 3, None], type=pa.timestamp("ns")) - ), - "datetime64[ms]": plc.interop.from_arrow( - pa.array([1, 2, None, None], type=pa.timestamp("ms")) - ), - "datetime64[us]": plc.interop.from_arrow( - pa.array([1, None, 3, None], type=pa.timestamp("us")) - ), - "datetime64[s]": plc.interop.from_arrow( - pa.array([1, 2, None, None], type=pa.timestamp("s")) - ), - "timedelta64[ns]": plc.interop.from_arrow( - pa.array([1, None, 3, None], type=pa.duration("ns")) - ), - "timedelta64[ms]": plc.interop.from_arrow( - pa.array([1, 2, None, None], type=pa.duration("ms")) - ), - "timedelta64[us]": plc.interop.from_arrow( - pa.array([1, None, 3, None], type=pa.duration("us")) - ), - "timedelta64[s]": plc.interop.from_arrow( - pa.array([1, 2, None, None], type=pa.duration("s")) - ), - } - - -@pytest.fixture(scope="module") -def null_columns(columns): - breakpoint() - - -@pytest.fixture(scope="module", params=LIBCUDF_SUPPORTED_TYPES) -def binop_lhs_ty(request): - return request.param - - -@pytest.fixture(scope="module", params=LIBCUDF_SUPPORTED_TYPES) -def binop_rhs_ty(request): - return request.param - - -@pytest.fixture(scope="module", params=LIBCUDF_SUPPORTED_TYPES) -def binop_out_ty(request): - return request.param -@pytest.fixture( - scope="module", - params=BINARY_OPS, -) -def binary_operator(request): +@pytest.fixture(params=[True, False]) +def nulls(request): return request.param -def make_test(binop_lhs_ty, binop_rhs_ty, binop_out_ty, binary_operator): - fail = False - if not plc.binaryop._is_supported_binaryop( - dtype_to_pylibcudf_type(binop_out_ty), - dtype_to_pylibcudf_type(binop_lhs_ty), - dtype_to_pylibcudf_type(binop_rhs_ty), - binary_operator, - ): - fail = True - return (binop_lhs_ty, binop_rhs_ty, binop_out_ty, fail) - - -def _test_binaryop_inner(test, columns, pyop, cuop): - binop_lhs_ty, binop_rhs_ty, binop_out_ty, fail = test - lhs = columns[binop_lhs_ty] - rhs = columns[binop_rhs_ty] - pylibcudf_outty = dtype_to_pylibcudf_type(binop_out_ty) - if not fail: - - def op(x, y, dtype): - result = pyop(x, y) - if result is None: - return result - else: - if hasattr(result, "to_numpy"): - return result.to_numpy().astype(dtype) - else: - return np.dtype(dtype).type(result).astype(dtype) - - expect_data = [ - op(x, y, binop_out_ty) - for x, y in zip( - plc.interop.to_arrow(lhs).to_pylist(), - plc.interop.to_arrow(rhs).to_pylist(), - ) - ] - expect = pa.array(expect_data, type=pa.from_numpy_dtype(binop_out_ty)) - got = plc.binaryop.binary_operation(lhs, rhs, cuop, pylibcudf_outty) - assert_column_eq(got, expect) - else: - with pytest.raises(TypeError): - plc.binaryop.binary_operation(lhs, rhs, cuop, pylibcudf_outty) - - -@pytest.fixture(scope="module") -def add_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.ADD, - ) - - -@pytest.fixture(scope="module") -def sub_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.SUB, - ) - - -@pytest.fixture(scope="module") -def mul_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.MUL, - ) - - -@pytest.fixture(scope="module") -def div_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.DIV, - ) - - -@pytest.fixture(scope="module") -def true_div_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.TRUE_DIV, - ) - - -@pytest.fixture(scope="module") -def floor_div_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.FLOOR_DIV, - ) - - -@pytest.fixture(scope="module") -def mod_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.MOD, - ) - - -@pytest.fixture(scope="module") -def pmod_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.PMOD, - ) - - -@pytest.fixture(scope="module") -def pymod_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - # TODO - pass - - -@pytest.fixture(scope="module") -def pow_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.POW, - ) - - -@pytest.fixture(scope="module") -def int_pow_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.INT_POW, - ) - - -@pytest.fixture(scope="module") -def log_base_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.LOG_BASE, - ) - - -@pytest.fixture(scope="module") -def atan2_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.ATAN2, - ) - - -@pytest.fixture(scope="module") -def shift_left_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.SHIFT_LEFT, - ) - - -@pytest.fixture(scope="module") -def shift_right_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.SHIFT_RIGHT, - ) - - -@pytest.fixture(scope="module") -def shift_right_unsigned_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, - ) - - -@pytest.fixture(scope="module") -def bitwise_and_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.BITWISE_AND, - ) - - -@pytest.fixture(scope="module") -def bitwise_or_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.BITWISE_OR, - ) - - -@pytest.fixture(scope="module") -def logical_and_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.LOGICAL_AND, - ) - - -@pytest.fixture(scope="module") -def logical_or_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.LOGICAL_OR, - ) - - -@pytest.fixture(scope="module") -def equal_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.EQUAL, - ) - - -@pytest.fixture(scope="module") -def not_equal_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.NOT_EQUAL, - ) - - -@pytest.fixture(scope="module") -def less_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.LESS, - ) - - -@pytest.fixture(scope="module") -def greater_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.GREATER, - ) +@pytest.fixture +def pa_int_col(nulls): + return pa.array([1, 2, 3 if not nulls else None, 4, 5], type=pa.int32()) -@pytest.fixture(scope="module") -def less_equal_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.LESS_EQUAL, - ) +@pytest.fixture +def plc_int_col(pa_int_col): + return plc.interop.from_arrow(pa_int_col) -@pytest.fixture(scope="module") -def greater_equal_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.GREATER_EQUAL, - ) +@pytest.fixture +def pa_uint_col(nulls): + return pa.array([1, 2, 3 if not nulls else None, 4, 5], type=pa.uint32()) -@pytest.fixture(scope="module") -def null_equals_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.NULL_EQUALS, - ) +@pytest.fixture +def plc_uint_col(pa_uint_col): + return plc.interop.from_arrow(pa_uint_col) -@pytest.fixture(scope="module") -def null_max_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.NULL_MAX, +@pytest.fixture +def pa_float_col(nulls): + return pa.array( + [1.0, 2.0, 3.0 if not nulls else None, 4.0, 5.0], type=pa.float32() ) -@pytest.fixture(scope="module") -def null_min_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.NULL_MIN, - ) +@pytest.fixture +def plc_float_col(pa_float_col): + return plc.interop.from_arrow(pa_float_col) -@pytest.fixture(scope="module") -def generic_binary_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.GENERIC_BINARY, +@pytest.fixture +def pa_bool_col(nulls): + return pa.array( + [True, False, True if not nulls else None, False, True], + type=pa.bool_(), ) -@pytest.fixture(scope="module") -def null_logical_and_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.NULL_LOGICAL_AND, - ) +@pytest.fixture +def plc_bool_col(pa_bool_col): + return plc.interop.from_arrow(pa_bool_col) -@pytest.fixture(scope="module") -def null_logical_or_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.NULL_LOGICAL_OR, +@pytest.fixture +def pa_timestamp_col(nulls): + return pa.array( + [ + np.datetime64("2022-01-01"), + np.datetime64("2022-01-02"), + np.datetime64("2022-01-03") if not nulls else None, + np.datetime64("2022-01-04"), + np.datetime64("2022-01-05"), + ], + type=pa.timestamp("ns"), ) -@pytest.fixture(scope="module") -def invalid_binary_tests(binop_lhs_ty, binop_rhs_ty, binop_out_ty): - return make_test( - binop_lhs_ty, - binop_rhs_ty, - binop_out_ty, - plc.binaryop.BinaryOperator.INVALID_BINARY, - ) +@pytest.fixture +def plc_timestamp_col(pa_timestamp_col): + return plc.interop.from_arrow(pa_timestamp_col) -def test_add(add_tests, columns): - def add(x, y): - if x is None or y is None: - return None - return x + y - - _test_binaryop_inner( - add_tests, columns, add, plc.binaryop.BinaryOperator.ADD +@pytest.fixture +def pa_duration_col(nulls): + return pa.array( + [ + np.timedelta64(1, "ns"), + np.timedelta64(2, "ns"), + np.timedelta64(3, "ns") if not nulls else None, + np.timedelta64(4, "ns"), + np.timedelta64(5, "ns"), + ], + type=pa.duration("ns"), ) -def test_sub(sub_tests, columns): - def sub(x, y): - if x is None or y is None: - return None - return x - y - - _test_binaryop_inner( - sub_tests, columns, sub, plc.binaryop.BinaryOperator.SUB - ) - +@pytest.fixture +def plc_duration_col(pa_duration_col): + return plc.interop.from_arrow(pa_duration_col) -def test_mul(mul_tests, columns): - def mul(x, y): - if x is None or y is None: - return None - return x * y - _test_binaryop_inner( - mul_tests, columns, mul, plc.binaryop.BinaryOperator.MUL +@pytest.fixture +def pa_decimal_col(nulls): + return pa.array( + [ + Decimal("1.23"), + Decimal("4.56"), + Decimal("7.89") if not nulls else None, + Decimal("0.12"), + Decimal("3.45"), + ], + type=pa.decimal128(9, 2), ) -def test_div(div_tests, columns): - def div(x, y): - if x is None or y is None: - return None - return x / y +@pytest.fixture +def plc_decimal_col(pa_decimal_col): + return plc.interop.from_arrow(pa_decimal_col) - _test_binaryop_inner( - div_tests, columns, div, plc.binaryop.BinaryOperator.DIV - ) - -def test_true_div(true_div_tests, columns): - _test_binaryop_inner( - true_div_tests, - columns, - np.true_divide, - plc.binaryop.BinaryOperator.TRUE_DIV, +def test_add( + pa_int_col, pa_float_col, plc_int_col, plc_float_col, plc_duration_col +): + expect = pa.compute.add(pa_int_col, pa_float_col).cast(pa.int32()) + got = plc.binaryop.binary_operation( + plc_int_col, + plc_float_col, + plc.binaryop.BinaryOperator.ADD, + plc.DataType(plc.TypeId.INT32), ) + assert_column_eq(expect, got) -def test_floor_div(floor_div_tests, columns): - with warnings.catch_warnings(record=True): - warnings.filterwarnings("ignore", message="divide by zero") - _test_binaryop_inner( - floor_div_tests, - columns, - np.floor_divide, - plc.binaryop.BinaryOperator.FLOOR_DIV, + with pytest.raises(TypeError): + plc.binaryop.binary_operation( + plc_duration_col, + plc_duration_col, + plc.binaryop.BinaryOperator.ADD, + plc.DataType(plc.TypeId.INT32), ) -def test_mod(mod_tests, columns): - _test_binaryop_inner( - mod_tests, columns, np.mod, plc.binaryop.BinaryOperator.MOD +def test_sub( + pa_int_col, pa_float_col, plc_int_col, plc_float_col, plc_duration_col +): + expect = pa.compute.subtract(pa_int_col, pa_float_col).cast(pa.int32()) + got = plc.binaryop.binary_operation( + plc_int_col, + plc_float_col, + plc.binaryop.BinaryOperator.SUB, + plc.DataType(plc.TypeId.INT32), ) + assert_column_eq(expect, got) -def test_pmod(pmod_tests, columns): - with warnings.catch_warnings(record=True): - warnings.filterwarnings("ignore", message="divide by zero") - warnings.filterwarnings("ignore", message="invalid value") - _test_binaryop_inner( - pmod_tests, - columns, - lambda x, y: (x % y + y) % x, - plc.binaryop.BinaryOperator.PMOD, + with pytest.raises(TypeError): + plc.binaryop.binary_operation( + plc_duration_col, + plc_duration_col, + plc.binaryop.BinaryOperator.SUB, + plc.DataType(plc.TypeId.INT32), ) -def test_pow(pow_tests, columns): - _test_binaryop_inner( - pow_tests, columns, np.power, plc.binaryop.BinaryOperator.POW - ) - - -def test_int_pow(int_pow_tests, columns): - _test_binaryop_inner( - int_pow_tests, columns, np.power, plc.binaryop.BinaryOperator.INT_POW +def test_mul( + pa_int_col, pa_float_col, plc_int_col, plc_float_col, plc_duration_col +): + expect = pa.compute.multiply(pa_int_col, pa_float_col).cast(pa.int32()) + got = plc.binaryop.binary_operation( + plc_int_col, + plc_float_col, + plc.binaryop.BinaryOperator.MUL, + plc.DataType(plc.TypeId.INT32), ) + assert_column_eq(expect, got) -def test_log_base(log_base_tests, columns): - with warnings.catch_warnings(record=True): - warnings.filterwarnings("ignore", message="divide by zero") - warnings.filterwarnings("ignore", message="invalid value") - _test_binaryop_inner( - log_base_tests, - columns, - lambda x, y: np.log(y) / np.log(x), - plc.binaryop.BinaryOperator.LOG_BASE, + with pytest.raises(TypeError): + plc.binaryop.binary_operation( + plc_duration_col, + plc_duration_col, + plc.binaryop.BinaryOperator.MUL, + plc.DataType(plc.TypeId.INT32), ) -def test_atan2(atan2_tests, columns): - _test_binaryop_inner( - atan2_tests, columns, np.arctan2, plc.binaryop.BinaryOperator.ATAN2 - ) - - -def test_shift_left(shift_left_tests, columns): - _test_binaryop_inner( - shift_left_tests, - columns, - np.left_shift, - plc.binaryop.BinaryOperator.SHIFT_LEFT, - ) - - -def test_shift_right(shift_right_tests, columns): - _test_binaryop_inner( - shift_right_tests, - columns, - np.right_shift, - plc.binaryop.BinaryOperator.SHIFT_RIGHT, - ) - - -def test_shift_right_unsigned(shift_right_unsigned_tests, columns): - _test_binaryop_inner( - shift_right_unsigned_tests, - columns, - lambda x, y: np.right_shift( - x.astype(np.int64), y.astype(np.int64) - ).astype(x.dtype), - plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, - ) - - -def test_bitwise_and(bitwise_and_tests, columns): - _test_binaryop_inner( - bitwise_and_tests, - columns, - np.bitwise_and, - plc.binaryop.BinaryOperator.BITWISE_AND, - ) - - -def test_bitwise_or(bitwise_or_tests, columns): - _test_binaryop_inner( - bitwise_or_tests, - columns, - np.bitwise_or, - plc.binaryop.BinaryOperator.BITWISE_OR, - ) - - -def test_bitwise_xor(bitwise_xor_tests, columns): - _test_binaryop_inner( - bitwise_xor_tests, - columns, - np.bitwise_xor, - plc.binaryop.BinaryOperator.BITWISE_XOR, - ) - - -def test_logical_and(logical_and_tests, columns): - _test_binaryop_inner( - logical_and_tests, - columns, - np.logical_and, - plc.binaryop.BinaryOperator.LOGICAL_AND, - ) - - -def test_logical_or(logical_or_tests, columns): - _test_binaryop_inner( - logical_or_tests, - columns, - np.logical_or, - plc.binaryop.BinaryOperator.LOGICAL_OR, - ) - - -def test_equal(equal_tests, columns): - _test_binaryop_inner( - equal_tests, columns, np.equal, plc.binaryop.BinaryOperator.EQUAL - ) - - -def test_not_equal(not_equal_tests, columns): - _test_binaryop_inner( - not_equal_tests, - columns, - np.not_equal, - plc.binaryop.BinaryOperator.NOT_EQUAL, - ) - - -def test_less(less_tests, columns): - _test_binaryop_inner( - less_tests, columns, np.less, plc.binaryop.BinaryOperator.LESS - ) - - -def test_greater(greater_tests, columns): - _test_binaryop_inner( - greater_tests, columns, np.greater, plc.binaryop.BinaryOperator.GREATER - ) - - -def test_less_equal(less_equal_tests, columns): - _test_binaryop_inner( - less_equal_tests, - columns, - np.less_equal, - plc.binaryop.BinaryOperator.LESS_EQUAL, - ) - - -def test_greater_equal(greater_equal_tests, columns): - _test_binaryop_inner( - greater_equal_tests, - columns, - np.greater_equal, - plc.binaryop.BinaryOperator.GREATER_EQUAL, - ) - - -def test_null_equals(null_equals_tests, columns): - def numpy_null_equals(x, y): - # TODO - pass - - _test_binaryop_inner( - null_equals_tests, - columns, - numpy_null_equals, - plc.binaryop.BinaryOperator.GREATER_EQUAL, +def test_div( + pa_int_col, pa_float_col, plc_int_col, plc_float_col, plc_duration_col +): + expect = pa.compute.divide(pa_int_col, pa_float_col).cast(pa.int32()) + got = plc.binaryop.binary_operation( + plc_int_col, + plc_float_col, + plc.binaryop.BinaryOperator.DIV, + plc.DataType(plc.TypeId.INT32), ) + assert_column_eq(expect, got) -def test_null_max(null_max_tests, columns): - # TODO - pass - - -def test_null_min(null_min_tests, columns): - # TODO - pass - - -def test_generic_binary(generic_binary_tests, columns): - # TODO - pass - - -def test_null_logical_and(null_logical_and_tests, columns): - # TODO - pass - - -def test_null_logical_or(null_logical_or_tests, columns): - # TODO - pass - - -def test_invalid_binary(invalid_binary_tests, columns): - # TODO - pass + with pytest.raises(TypeError): + plc.binaryop.binary_operation( + plc_float_col, + plc_duration_col, + plc.binaryop.BinaryOperator.DIV, + plc.DataType(plc.TypeId.INT32), + ) From 322a7de3584ffcb920d5f0afb9aee7bed8b976d8 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 9 Jul 2024 09:47:59 -0700 Subject: [PATCH 09/21] address reviews --- cpp/include/cudf/binaryop.hpp | 2 +- cpp/src/binaryop/binaryop.cpp | 5 +---- python/cudf/cudf/_lib/pylibcudf/binaryop.pyx | 4 ++-- python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd | 2 +- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 366356d7562..c74c91e39c2 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -299,7 +299,7 @@ namespace binops { * @param op The binary operator * @return true if the binary operator is supported for the given input types */ -bool is_supported_binaryop(data_type out, data_type lhs, data_type rhs, binary_operator op); +bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op); /** * @brief Computes output valid mask for op between a column and a scalar diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index f27716c2654..3ac8547baad 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -50,10 +50,7 @@ namespace cudf { namespace binops { -/** - * @brief Returns true if the binary operator is supported for the given input types - */ -bool is_supported_binaryop(data_type out, data_type lhs, data_type rhs, binary_operator op) +bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op) { return cudf::binops::compiled::is_supported_operation(out, lhs, rhs, op); } diff --git a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx index 50485197e2c..aae44259bbf 100644 --- a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx @@ -86,13 +86,13 @@ cpdef Column binary_operation( return Column.from_libcudf(move(result)) -def _is_supported_binaryop( +def is_supported_operation( DataType out, DataType lhs, DataType rhs, binary_operator op ): - return cpp_binaryop.is_supported_binaryop( + return cpp_binaryop.is_supported_operation( out.c_obj, lhs.c_obj, rhs.c_obj, diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd index 867571ecbd0..da487d231fc 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd @@ -78,7 +78,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: ) except +cudf_exception_handler cdef extern from "cudf/binaryop.hpp" namespace "cudf::binops" nogil: - cdef bool is_supported_binaryop( + cdef bool is_supported_operation( data_type output_type, data_type lhs_type, data_type rhs_type, From ecbb895a46f278ea22683c431b7107b2adc0a2eb Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 10 Jul 2024 06:21:21 -0700 Subject: [PATCH 10/21] refactor again --- .../cudf/pylibcudf_tests/test_binaryops.py | 341 ++++++++++-------- 1 file changed, 191 insertions(+), 150 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index 52616e4bd97..1f6d6f80e0b 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -1,8 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from decimal import Decimal - import numpy as np import pyarrow as pa import pytest @@ -11,196 +9,239 @@ from cudf._lib import pylibcudf as plc -@pytest.fixture(params=[True, False]) -def nulls(request): - return request.param - - -@pytest.fixture -def pa_int_col(nulls): - return pa.array([1, 2, 3 if not nulls else None, 4, 5], type=pa.int32()) - - -@pytest.fixture -def plc_int_col(pa_int_col): - return plc.interop.from_arrow(pa_int_col) - +def idfn(param): + ltype, rtype, outtype = param + return f"{ltype}-{rtype}-{outtype}" -@pytest.fixture -def pa_uint_col(nulls): - return pa.array([1, 2, 3 if not nulls else None, 4, 5], type=pa.uint32()) - - -@pytest.fixture -def plc_uint_col(pa_uint_col): - return plc.interop.from_arrow(pa_uint_col) - -@pytest.fixture -def pa_float_col(nulls): - return pa.array( - [1.0, 2.0, 3.0 if not nulls else None, 4.0, 5.0], type=pa.float32() - ) +@pytest.fixture(params=[True, False], ids=["nulls", "no_nulls"]) +def nulls(request): + return request.param @pytest.fixture -def plc_float_col(pa_float_col): - return plc.interop.from_arrow(pa_float_col) +def pa_data(request, nulls): + ltype, rtype, outtype = request.param + values = make_col(ltype, nulls), make_col(rtype, nulls), outtype + return values @pytest.fixture -def pa_bool_col(nulls): - return pa.array( - [True, False, True if not nulls else None, False, True], - type=pa.bool_(), +def plc_data(pa_data): + lhs, rhs, outtype = pa_data + return ( + plc.interop.from_arrow(lhs), + plc.interop.from_arrow(rhs), + plc.interop.from_arrow(pa.from_numpy_dtype(np.dtype(outtype))), ) -@pytest.fixture -def plc_bool_col(pa_bool_col): - return plc.interop.from_arrow(pa_bool_col) - - -@pytest.fixture -def pa_timestamp_col(nulls): - return pa.array( - [ +def make_col(dtype, nulls): + if dtype == "int64": + data = [1, 2, 3, 4, 5] + pa_type = pa.int32() + elif dtype == "uint64": + data = [1, 2, 3, 4, 5] + pa_type = pa.uint32() + elif dtype == "float64": + data = [1.0, 2.0, 3.0, 4.0, 5.0] + pa_type = pa.float32() + elif dtype == "bool": + data = [True, False, True, False, True] + pa_type = pa.bool_() + elif dtype == "timestamp64[ns]": + data = [ np.datetime64("2022-01-01"), np.datetime64("2022-01-02"), - np.datetime64("2022-01-03") if not nulls else None, + np.datetime64("2022-01-03"), np.datetime64("2022-01-04"), np.datetime64("2022-01-05"), - ], - type=pa.timestamp("ns"), - ) - - -@pytest.fixture -def plc_timestamp_col(pa_timestamp_col): - return plc.interop.from_arrow(pa_timestamp_col) - - -@pytest.fixture -def pa_duration_col(nulls): - return pa.array( - [ + ] + pa_type = pa.timestamp("ns") + elif dtype == "timedelta64[ns]": + data = [ np.timedelta64(1, "ns"), np.timedelta64(2, "ns"), - np.timedelta64(3, "ns") if not nulls else None, + np.timedelta64(3, "ns"), np.timedelta64(4, "ns"), np.timedelta64(5, "ns"), - ], - type=pa.duration("ns"), - ) + ] + pa_type = pa.duration("ns") + else: + raise ValueError("Unsupported dtype") + if nulls: + data[3] = None -@pytest.fixture -def plc_duration_col(pa_duration_col): - return plc.interop.from_arrow(pa_duration_col) + return pa.array(data, type=pa_type) -@pytest.fixture -def pa_decimal_col(nulls): - return pa.array( - [ - Decimal("1.23"), - Decimal("4.56"), - Decimal("7.89") if not nulls else None, - Decimal("0.12"), - Decimal("3.45"), - ], - type=pa.decimal128(9, 2), - ) +def _test_binaryop_inner(pa_data, plc_data, pyop, plc_op): + lhs_py, rhs_py, outty_py = pa_data + lhs_plc, rhs_plc, outty_plc = plc_data + def get_result(): + return plc.binaryop.binary_operation( + lhs_plc, + rhs_plc, + plc_op, + outty_plc, + ) -@pytest.fixture -def plc_decimal_col(pa_decimal_col): - return plc.interop.from_arrow(pa_decimal_col) + if not plc.binaryop.is_supported_operation( + outty_plc, lhs_plc.type(), rhs_plc.type(), plc_op + ): + with pytest.raises(TypeError): + get_result() + return + + expect = [ + pyop(x, y) for x, y in zip(lhs_py.to_pylist(), rhs_py.to_pylist()) + ] + expect = pa.array(expect, type=outty_py) + got = get_result() + assert_column_eq(expect, got) -def test_add( - pa_int_col, pa_float_col, plc_int_col, plc_float_col, plc_duration_col -): - expect = pa.compute.add(pa_int_col, pa_float_col).cast(pa.int32()) - got = plc.binaryop.binary_operation( - plc_int_col, - plc_float_col, +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_add(pa_data, plc_data): + def add(x, y): + if x is None or y is None: + return None + return x + y + + _test_binaryop_inner( + pa_data, + plc_data, + add, plc.binaryop.BinaryOperator.ADD, - plc.DataType(plc.TypeId.INT32), ) - assert_column_eq(expect, got) - - with pytest.raises(TypeError): - plc.binaryop.binary_operation( - plc_duration_col, - plc_duration_col, - plc.binaryop.BinaryOperator.ADD, - plc.DataType(plc.TypeId.INT32), - ) - -def test_sub( - pa_int_col, pa_float_col, plc_int_col, plc_float_col, plc_duration_col -): - expect = pa.compute.subtract(pa_int_col, pa_float_col).cast(pa.int32()) - got = plc.binaryop.binary_operation( - plc_int_col, - plc_float_col, +@pytest.mark.parametrize( + "pa_data", + [("int64", "int64", "int64"), ("int64", "float64", "float64")], + indirect=True, + ids=idfn, +) +def test_sub(pa_data, plc_data): + def sub(x, y): + if x is None or y is None: + return None + return x - y + + _test_binaryop_inner( + pa_data, + plc_data, + sub, plc.binaryop.BinaryOperator.SUB, - plc.DataType(plc.TypeId.INT32), ) - assert_column_eq(expect, got) - with pytest.raises(TypeError): - plc.binaryop.binary_operation( - plc_duration_col, - plc_duration_col, - plc.binaryop.BinaryOperator.SUB, - plc.DataType(plc.TypeId.INT32), - ) +@pytest.mark.parametrize( + "pa_data", + [("int64", "int64", "int64"), ("int64", "float64", "float64")], + indirect=True, + ids=idfn, +) +def test_mul(pa_data, plc_data): + def mul(x, y): + if x is None or y is None: + return None + return x * y + + _test_binaryop_inner( + pa_data, + plc_data, + mul, + plc.binaryop.BinaryOperator.MUL, + ) -def test_mul( - pa_int_col, pa_float_col, plc_int_col, plc_float_col, plc_duration_col -): - expect = pa.compute.multiply(pa_int_col, pa_float_col).cast(pa.int32()) - got = plc.binaryop.binary_operation( - plc_int_col, - plc_float_col, - plc.binaryop.BinaryOperator.MUL, - plc.DataType(plc.TypeId.INT32), +@pytest.mark.parametrize( + "pa_data", + [("int64", "int64", "int64"), ("int64", "float64", "float64")], + indirect=True, + ids=idfn, +) +def test_div(pa_data, plc_data): + def div(x, y): + if x is None or y is None: + return None + return x / y + + _test_binaryop_inner( + pa_data, + plc_data, + div, + plc.binaryop.BinaryOperator.DIV, ) - assert_column_eq(expect, got) - with pytest.raises(TypeError): - plc.binaryop.binary_operation( - plc_duration_col, - plc_duration_col, - plc.binaryop.BinaryOperator.MUL, - plc.DataType(plc.TypeId.INT32), - ) +@pytest.mark.parametrize( + "pa_data", + [("int64", "int64", "int64"), ("int64", "float64", "float64")], + indirect=True, + ids=idfn, +) +def test_floordiv(pa_data, plc_data): + def floordiv(x, y): + if x is None or y is None: + return None + return x // y + + _test_binaryop_inner( + pa_data, + plc_data, + floordiv, + plc.binaryop.BinaryOperator.FLOOR_DIV, + ) -def test_div( - pa_int_col, pa_float_col, plc_int_col, plc_float_col, plc_duration_col -): - expect = pa.compute.divide(pa_int_col, pa_float_col).cast(pa.int32()) - got = plc.binaryop.binary_operation( - plc_int_col, - plc_float_col, - plc.binaryop.BinaryOperator.DIV, - plc.DataType(plc.TypeId.INT32), +@pytest.mark.parametrize( + "pa_data", + [("int64", "int64", "int64"), ("int64", "float64", "float64")], + indirect=True, + ids=idfn, +) +def test_truediv(pa_data, plc_data): + def truediv(x, y): + if x is None or y is None: + return None + return x / y + + _test_binaryop_inner( + pa_data, + plc_data, + truediv, + plc.binaryop.BinaryOperator.TRUE_DIV, ) - assert_column_eq(expect, got) - with pytest.raises(TypeError): - plc.binaryop.binary_operation( - plc_float_col, - plc_duration_col, - plc.binaryop.BinaryOperator.DIV, - plc.DataType(plc.TypeId.INT32), - ) +@pytest.mark.parametrize( + "pa_data", + [("int64", "int64", "int64"), ("int64", "float64", "float64")], + indirect=True, + ids=idfn, +) +def test_mod(pa_data, plc_data): + def mod(x, y): + if x is None or y is None: + return None + return x % y + + _test_binaryop_inner( + pa_data, + plc_data, + mod, + plc.binaryop.BinaryOperator.MOD, + ) From 076b83d50e6eb5d701c73742f31176e0772ce446 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 10 Jul 2024 12:03:34 -0700 Subject: [PATCH 11/21] address reviews, add tests --- python/cudf/cudf/_lib/pylibcudf/binaryop.pyx | 25 +- .../cudf/_lib/pylibcudf/libcudf/binaryop.pxd | 3 +- .../cudf/pylibcudf_tests/test_binaryops.py | 660 ++++++++++++++++-- 3 files changed, 634 insertions(+), 54 deletions(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx index aae44259bbf..6a223df5799 100644 --- a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx @@ -2,6 +2,7 @@ from cython.operator import dereference +from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -86,12 +87,34 @@ cpdef Column binary_operation( return Column.from_libcudf(move(result)) -def is_supported_operation( +cpdef bool is_supported_operation( DataType out, DataType lhs, DataType rhs, binary_operator op ): + """Returns true if the binary operator is supported for the given input types. + + For details, see :cpp:func:`cudf::binops::is_supported_operation`. + + Parameters + ---------- + out : DataType + The output data type. + lhs : DataType + The left hand side data type. + rhs : DataType + The right hand side data type. + op : BinaryOperator + The operation to check. + + Returns + ------- + bool + True if the operation is supported, False otherwise + + """ + return cpp_binaryop.is_supported_operation( out.c_obj, lhs.c_obj, diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd index da487d231fc..b34fea6a775 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from libc.stdint cimport bool, int32_t +from libc.stdint cimport int32_t from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.string cimport string @@ -44,6 +44,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: NULL_EQUALS NULL_MAX NULL_MIN + NULL_NOT_EQUALS GENERIC_BINARY NULL_LOGICAL_AND NULL_LOGICAL_OR diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index 1f6d6f80e0b..cd75e07e921 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -11,7 +11,7 @@ def idfn(param): ltype, rtype, outtype = param - return f"{ltype}-{rtype}-{outtype}" + return "-".join(map(str, param)) @pytest.fixture(params=[True, False], ids=["nulls", "no_nulls"]) @@ -95,10 +95,7 @@ def get_result(): get_result() return - expect = [ - pyop(x, y) for x, y in zip(lhs_py.to_pylist(), rhs_py.to_pylist()) - ] - expect = pa.array(expect, type=outty_py) + expect = pyop(lhs_py, rhs_py).cast(outty_py) got = get_result() assert_column_eq(expect, got) @@ -114,90 +111,111 @@ def get_result(): ids=idfn, ) def test_add(pa_data, plc_data): - def add(x, y): - if x is None or y is None: - return None - return x + y - _test_binaryop_inner( pa_data, plc_data, - add, + pa.compute.add, plc.binaryop.BinaryOperator.ADD, ) @pytest.mark.parametrize( "pa_data", - [("int64", "int64", "int64"), ("int64", "float64", "float64")], + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], indirect=True, ids=idfn, ) def test_sub(pa_data, plc_data): - def sub(x, y): - if x is None or y is None: - return None - return x - y - _test_binaryop_inner( pa_data, plc_data, - sub, + pa.compute.subtract, plc.binaryop.BinaryOperator.SUB, ) @pytest.mark.parametrize( "pa_data", - [("int64", "int64", "int64"), ("int64", "float64", "float64")], + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], indirect=True, ids=idfn, ) def test_mul(pa_data, plc_data): - def mul(x, y): - if x is None or y is None: - return None - return x * y - _test_binaryop_inner( pa_data, plc_data, - mul, + pa.compute.multiply, plc.binaryop.BinaryOperator.MUL, ) @pytest.mark.parametrize( "pa_data", - [("int64", "int64", "int64"), ("int64", "float64", "float64")], + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], indirect=True, ids=idfn, ) def test_div(pa_data, plc_data): - def div(x, y): - if x is None or y is None: - return None - return x / y - _test_binaryop_inner( pa_data, plc_data, - div, + pa.compute.divide, plc.binaryop.BinaryOperator.DIV, ) @pytest.mark.parametrize( "pa_data", - [("int64", "int64", "int64"), ("int64", "float64", "float64")], + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "timedelta64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_truediv(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.divide, + plc.binaryop.BinaryOperator.TRUE_DIV, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], indirect=True, ids=idfn, ) def test_floordiv(pa_data, plc_data): def floordiv(x, y): - if x is None or y is None: - return None - return x // y + x = x.to_pylist() + y = y.to_pylist() + + def slr_func(x, y): + if x is None or y is None: + return None + return x // y + + return pa.array([slr_func(x, y) for x, y in zip(x, y)]) _test_binaryop_inner( pa_data, @@ -209,39 +227,577 @@ def floordiv(x, y): @pytest.mark.parametrize( "pa_data", - [("int64", "int64", "int64"), ("int64", "float64", "float64")], + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], indirect=True, ids=idfn, ) -def test_truediv(pa_data, plc_data): - def truediv(x, y): - if x is None or y is None: - return None - return x / y +def test_mod(pa_data, plc_data): + def mod(x, y): + x = x.to_pylist() + y = y.to_pylist() + + def slr_func(x, y): + if x is None or y is None: + return None + return x % y + + return pa.array([slr_func(x, y) for x, y in zip(x, y)]) _test_binaryop_inner( pa_data, plc_data, - truediv, - plc.binaryop.BinaryOperator.TRUE_DIV, + mod, + plc.binaryop.BinaryOperator.MOD, ) @pytest.mark.parametrize( "pa_data", - [("int64", "int64", "int64"), ("int64", "float64", "float64")], + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], indirect=True, ids=idfn, ) -def test_mod(pa_data, plc_data): - def mod(x, y): - if x is None or y is None: - return None - return x % y +def test_pmod(pa_data, plc_data): + def pmod(x, y): + x = x.to_pylist() + y = y.to_pylist() + + def slr_func(x, y): + if x is None or y is None: + return None + return (x % y + y) % y + + return pa.array([slr_func(x, y) for x, y in zip(x, y)]) _test_binaryop_inner( pa_data, plc_data, - mod, - plc.binaryop.BinaryOperator.MOD, + pmod, + plc.binaryop.BinaryOperator.PMOD, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_pymod(pa_data, plc_data): + def pymod(x, y): + x = x.to_pylist() + y = y.to_pylist() + + def slr_func(x, y): + if x is None or y is None: + return None + return x % y + + return pa.array([slr_func(x, y) for x, y in zip(x, y)]) + + _test_binaryop_inner( + pa_data, + plc_data, + pymod, + plc.binaryop.BinaryOperator.PYMOD, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "timedelta64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_pow(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.power, + plc.binaryop.BinaryOperator.POW, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_int_pow(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.power, + plc.binaryop.BinaryOperator.INT_POW, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("float64", "float64", "float64"), + ("int64", "float64", "float64"), + ("int64", "int64", "timedelta64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_log_base(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.logb, + plc.binaryop.BinaryOperator.LOG_BASE, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("float64", "float64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "timedelta64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_atan2(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.atan2, + plc.binaryop.BinaryOperator.ATAN2, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_shift_left(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.shift_left, + plc.binaryop.BinaryOperator.SHIFT_LEFT, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_shift_right(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.shift_right, + plc.binaryop.BinaryOperator.SHIFT_RIGHT, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_shift_right_unsigned(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.shift_right_unsigned, + plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_bitwise_and(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.bit_wise_and, + plc.binaryop.BinaryOperator.BITWISE_AND, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_bitwise_or(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.bit_wise_or, + plc.binaryop.BinaryOperator.BITWISE_OR, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "datetime64[ns]"), + ], + indirect=True, + ids=idfn, +) +def test_bitwise_xor(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.bit_wise_xor, + plc.binaryop.BinaryOperator.BITWISE_XOR, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "int64"), + ], + indirect=True, + ids=idfn, +) +def test_logical_and(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.and_, + plc.binaryop.BinaryOperator.LOGICAL_AND, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "int64"), + ], + indirect=True, + ids=idfn, +) +def test_logical_or(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.or_, + plc.binaryop.BinaryOperator.LOGICAL_OR, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "int64"), + ], + indirect=True, + ids=idfn, +) +def test_equal(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.equal, + plc.binaryop.BinaryOperator.EQUAL, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "int64"), + ], + indirect=True, + ids=idfn, +) +def test_not_equal(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.not_equal, + plc.binaryop.BinaryOperator.NOT_EQUAL, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "int64"), + ], + indirect=True, + ids=idfn, +) +def test_less(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.less, + plc.binaryop.BinaryOperator.LESS, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "int64"), + ], + indirect=True, + ids=idfn, +) +def test_greater(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.greater, + plc.binaryop.BinaryOperator.GREATER, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "int64"), + ], + indirect=True, + ids=idfn, +) +def test_less_equal(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.less_equal, + plc.binaryop.BinaryOperator.LESS_EQUAL, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "int64"), + ], + indirect=True, + ids=idfn, +) +def test_greater_equal(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.greater_equal, + plc.binaryop.BinaryOperator.GREATER_EQUAL, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "int64"), + ], + indirect=True, + ids=idfn, +) +def test_null_equals(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.equal, + plc.binaryop.BinaryOperator.NULL_EQUALS, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "datetime64[ns]"), + ("int64", "float64", "float64"), + ], + indirect=True, + ids=idfn, +) +def test_null_max(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.max, + plc.binaryop.BinaryOperator.NULL_MAX, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "datetime64[ns]"), + ("int64", "float64", "float64"), + ], + indirect=True, + ids=idfn, +) +def test_null_min(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.min, + plc.binaryop.BinaryOperator.NULL_MIN, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ("int64", "int64", "int64"), + ], + indirect=True, + ids=idfn, +) +def test_null_not_equals(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.not_equal, + plc.binaryop.BinaryOperator.NULL_NOT_EQUALS, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ], + indirect=True, + ids=idfn, +) +def test_generic_binary(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.generic_binary, + plc.binaryop.BinaryOperator.GENERIC_BINARY, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ], + indirect=True, + ids=idfn, +) +def test_null_logical_and(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.and_, + plc.binaryop.BinaryOperator.NULL_LOGICAL_AND, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ], + indirect=True, + ids=idfn, +) +def test_null_logical_or(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + pa.compute.or_, + plc.binaryop.BinaryOperator.NULL_LOGICAL_OR, + ) + + +@pytest.mark.parametrize( + "pa_data", + [ + ("int64", "int64", "int64"), + ("int64", "float64", "float64"), + ], + indirect=True, + ids=idfn, +) +def test_invalid_binary(pa_data, plc_data): + _test_binaryop_inner( + pa_data, + plc_data, + None, + plc.binaryop.BinaryOperator.INVALID_BINARY, ) From e06d5cd4fd0fd94a3b84eadce48213e04b00f3a5 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 11 Jul 2024 15:16:54 -0700 Subject: [PATCH 12/21] fix a few ops --- .../cudf/cudf/pylibcudf_tests/test_binaryops.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index cd75e07e921..e95c3b8f40d 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -440,10 +440,21 @@ def test_shift_right(pa_data, plc_data): ids=idfn, ) def test_shift_right_unsigned(pa_data, plc_data): + def shift_right_unsigned(x, y): + x = x.to_pylist() + y = y.to_pylist() + + def logical_right_shift(x, y): + unsigned_x = np.uint32(x) + result = unsigned_x >> y + return result + + return pa.array([logical_right_shift(x, y) for x, y in zip(x, y)]) + _test_binaryop_inner( pa_data, plc_data, - pa.compute.shift_right_unsigned, + shift_right_unsigned, plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, ) @@ -689,7 +700,7 @@ def test_null_max(pa_data, plc_data): _test_binaryop_inner( pa_data, plc_data, - pa.compute.max, + pa.compute.max_element_wise, plc.binaryop.BinaryOperator.NULL_MAX, ) @@ -707,7 +718,7 @@ def test_null_min(pa_data, plc_data): _test_binaryop_inner( pa_data, plc_data, - pa.compute.min, + pa.compute.min_element_wise, plc.binaryop.BinaryOperator.NULL_MIN, ) From 473845f2881f9e3fd043e25fe7f56cf843fde9ba Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 11 Jul 2024 15:55:02 -0700 Subject: [PATCH 13/21] few more fixes --- python/cudf/cudf/pylibcudf_tests/test_binaryops.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index e95c3b8f40d..97dcadecada 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -94,7 +94,6 @@ def get_result(): with pytest.raises(TypeError): get_result() return - expect = pyop(lhs_py, rhs_py).cast(outty_py) got = get_result() assert_column_eq(expect, got) @@ -375,7 +374,7 @@ def test_log_base(pa_data, plc_data): @pytest.mark.parametrize( "pa_data", [ - ("float64", "float64", "int64"), + ("float64", "float64", "float64"), ("int64", "float64", "float64"), ("int64", "int64", "timedelta64[ns]"), ], @@ -445,6 +444,8 @@ def shift_right_unsigned(x, y): y = y.to_pylist() def logical_right_shift(x, y): + if x is None or y is None: + return None unsigned_x = np.uint32(x) result = unsigned_x >> y return result @@ -755,7 +756,7 @@ def test_generic_binary(pa_data, plc_data): _test_binaryop_inner( pa_data, plc_data, - pa.compute.generic_binary, + None, plc.binaryop.BinaryOperator.GENERIC_BINARY, ) From 6e0c8161cd19af8f4fbc9ac091373f30c7be3179 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 15 Jul 2024 05:52:51 -0700 Subject: [PATCH 14/21] address more reviews --- python/cudf/cudf/_lib/pylibcudf/binaryop.pxd | 9 +++++++ python/cudf/cudf/_lib/pylibcudf/binaryop.pyx | 26 +++++++++---------- .../cudf/pylibcudf_tests/test_binaryops.py | 1 - 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/binaryop.pxd b/python/cudf/cudf/_lib/pylibcudf/binaryop.pxd index 9a8c8e49dcf..2411e28ac66 100644 --- a/python/cudf/cudf/_lib/pylibcudf/binaryop.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/binaryop.pxd @@ -1,5 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from libcpp cimport bool + from cudf._lib.pylibcudf.libcudf.binaryop cimport binary_operator from .column cimport Column @@ -22,3 +24,10 @@ cpdef Column binary_operation( binary_operator op, DataType output_type ) + +cpdef bool is_supported_operation( + DataType out, + DataType lhs, + DataType rhs, + binary_operator op +) diff --git a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx index 6a223df5799..beb5a374501 100644 --- a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx @@ -93,26 +93,26 @@ cpdef bool is_supported_operation( DataType rhs, binary_operator op ): - """Returns true if the binary operator is supported for the given input types. - - For details, see :cpp:func:`cudf::binops::is_supported_operation`. + """Check if an operation is supported for the given data types. Parameters ---------- - out : DataType - The output data type. - lhs : DataType - The left hand side data type. - rhs : DataType - The right hand side data type. - op : BinaryOperator - The operation to check. + out + Output data type + lhs + Left operand type + rhs + Right operand type + op + Binary operation to check Returns ------- - bool - True if the operation is supported, False otherwise + True if the operation is supported for the requested types. + See Also + -------- + :cpp:func:`is_supported_operation` """ return cpp_binaryop.is_supported_operation( diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index 97dcadecada..082d9414498 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -1,6 +1,5 @@ # Copyright (c) 2024, NVIDIA CORPORATION. - import numpy as np import pyarrow as pa import pytest From 77c709fcc7cb0ceff816e9f9526372897d0adda7 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 16 Jul 2024 06:15:02 -0700 Subject: [PATCH 15/21] fix cpp test --- cpp/tests/binaryop/binop-verify-input-test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/binaryop/binop-verify-input-test.cpp b/cpp/tests/binaryop/binop-verify-input-test.cpp index 1346dcd4666..def6e94452e 100644 --- a/cpp/tests/binaryop/binop-verify-input-test.cpp +++ b/cpp/tests/binaryop/binop-verify-input-test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Copyright 2018-2019 BlazingDB, Inc. * Copyright 2018 Christian Noboa Mardini @@ -42,5 +42,5 @@ TEST_F(BinopVerifyInputTest, Vector_Vector_ErrorSecondOperandVectorZeroSize) EXPECT_THROW(cudf::binary_operation( lhs, rhs, cudf::binary_operator::ADD, cudf::data_type(cudf::type_id::INT64)), - cudf::logic_error); + std::invalid_argument); } From 4e653d636ce366a272622673f64339d9b50f86fc Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 16 Jul 2024 06:40:01 -0700 Subject: [PATCH 16/21] fix atan2 --- .../cudf/cudf/pylibcudf_tests/test_binaryops.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index 082d9414498..df5fa1abb88 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -1,5 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +import math + import numpy as np import pyarrow as pa import pytest @@ -381,10 +383,21 @@ def test_log_base(pa_data, plc_data): ids=idfn, ) def test_atan2(pa_data, plc_data): + def atan2(x, y): + x = x.to_pylist() + y = y.to_pylist() + + def atan2_none_safe(x, y): + if x is None or y is None: + return None + return math.atan2(x, y) + + return pa.array([atan2_none_safe(x, y) for x, y in zip(x, y)]) + _test_binaryop_inner( pa_data, plc_data, - pa.compute.atan2, + atan2, plc.binaryop.BinaryOperator.ATAN2, ) From 6968f9588aac520ef692a5470b31b00917ed2785 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 16 Jul 2024 07:21:56 -0700 Subject: [PATCH 17/21] update assert_column_eq to handle nans --- python/cudf/cudf/pylibcudf_tests/common/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/cudf/cudf/pylibcudf_tests/common/utils.py b/python/cudf/cudf/pylibcudf_tests/common/utils.py index efb192b3251..0150461a4c1 100644 --- a/python/cudf/cudf/pylibcudf_tests/common/utils.py +++ b/python/cudf/cudf/pylibcudf_tests/common/utils.py @@ -109,6 +109,16 @@ def _make_fields_nullable(typ): lhs_type = _make_fields_nullable(lhs.type) lhs = rhs.cast(lhs_type) + lhs_nans = pa.compute.is_nan(lhs) + rhs_nans = pa.compute.is_nan(rhs) + assert lhs_nans.equals(rhs_nans) + + if any(lhs_nans.to_pylist()) or any(rhs_nans.to_pylist()): + # masks must be equal at this point + mask = pa.compute.fill_null(pa.compute.invert(lhs_nans), True) + lhs = lhs.filter(mask) + rhs = rhs.filter(mask) + assert lhs.equals(rhs) From cc8afe93432c0fd9f5a49f2725d3a73e3e0f0e9d Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 16 Jul 2024 10:35:23 -0700 Subject: [PATCH 18/21] only check nans for floating --- .../cudf/cudf/pylibcudf_tests/common/utils.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/common/utils.py b/python/cudf/cudf/pylibcudf_tests/common/utils.py index 0150461a4c1..ad2f1695cc1 100644 --- a/python/cudf/cudf/pylibcudf_tests/common/utils.py +++ b/python/cudf/cudf/pylibcudf_tests/common/utils.py @@ -109,15 +109,16 @@ def _make_fields_nullable(typ): lhs_type = _make_fields_nullable(lhs.type) lhs = rhs.cast(lhs_type) - lhs_nans = pa.compute.is_nan(lhs) - rhs_nans = pa.compute.is_nan(rhs) - assert lhs_nans.equals(rhs_nans) - - if any(lhs_nans.to_pylist()) or any(rhs_nans.to_pylist()): - # masks must be equal at this point - mask = pa.compute.fill_null(pa.compute.invert(lhs_nans), True) - lhs = lhs.filter(mask) - rhs = rhs.filter(mask) + if pa.types.is_floating(lhs.type) and pa.types.is_floating(rhs.type): + lhs_nans = pa.compute.is_nan(lhs) + rhs_nans = pa.compute.is_nan(rhs) + assert lhs_nans.equals(rhs_nans) + + if any(lhs_nans.to_pylist()) or any(rhs_nans.to_pylist()): + # masks must be equal at this point + mask = pa.compute.fill_null(pa.compute.invert(lhs_nans), True) + lhs = lhs.filter(mask) + rhs = rhs.filter(mask) assert lhs.equals(rhs) From 8c441495418dd772404be835e64a61f3415e27dd Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 17 Jul 2024 18:21:33 -0700 Subject: [PATCH 19/21] address reviews --- python/cudf/cudf/_lib/pylibcudf/binaryop.pyx | 26 +++++++++---------- .../cudf/cudf/pylibcudf_tests/common/utils.py | 2 +- .../cudf/pylibcudf_tests/test_binaryops.py | 12 ++++----- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx index beb5a374501..ba1c9d1a45a 100644 --- a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx @@ -95,24 +95,22 @@ cpdef bool is_supported_operation( ): """Check if an operation is supported for the given data types. + For details, see :cpp:func:`cudf::binops::is_supported_operation`. + Parameters ---------- - out - Output data type - lhs - Left operand type - rhs - Right operand type - op - Binary operation to check - + out : DataType + The output data type. + lhs : DataType + The left hand side data type. + rhs : DataType + The right hand side data type. + op : BinaryOperator + The operation to check. Returns ------- - True if the operation is supported for the requested types. - - See Also - -------- - :cpp:func:`is_supported_operation` + bool + True if the operation is supported, False otherwise """ return cpp_binaryop.is_supported_operation( diff --git a/python/cudf/cudf/pylibcudf_tests/common/utils.py b/python/cudf/cudf/pylibcudf_tests/common/utils.py index ad2f1695cc1..7f73d042c42 100644 --- a/python/cudf/cudf/pylibcudf_tests/common/utils.py +++ b/python/cudf/cudf/pylibcudf_tests/common/utils.py @@ -114,7 +114,7 @@ def _make_fields_nullable(typ): rhs_nans = pa.compute.is_nan(rhs) assert lhs_nans.equals(rhs_nans) - if any(lhs_nans.to_pylist()) or any(rhs_nans.to_pylist()): + if pa.compute.any(lhs_nans) or pa.compute.any(rhs_nans): # masks must be equal at this point mask = pa.compute.fill_null(pa.compute.invert(lhs_nans), True) lhs = lhs.filter(mask) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index df5fa1abb88..d9694c2aa9e 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -43,10 +43,10 @@ def make_col(dtype, nulls): pa_type = pa.int32() elif dtype == "uint64": data = [1, 2, 3, 4, 5] - pa_type = pa.uint32() + pa_type = pa.uint64() elif dtype == "float64": data = [1.0, 2.0, 3.0, 4.0, 5.0] - pa_type = pa.float32() + pa_type = pa.float64() elif dtype == "bool": data = [True, False, True, False, True] pa_type = pa.bool_() @@ -94,10 +94,10 @@ def get_result(): ): with pytest.raises(TypeError): get_result() - return - expect = pyop(lhs_py, rhs_py).cast(outty_py) - got = get_result() - assert_column_eq(expect, got) + else: + expect = pyop(lhs_py, rhs_py).cast(outty_py) + got = get_result() + assert_column_eq(expect, got) @pytest.mark.parametrize( From 959e9a78b5ca72a09b3d21e00ce3b0c250d5ed55 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 18 Jul 2024 05:49:23 -0700 Subject: [PATCH 20/21] refactor again --- .../cudf/pylibcudf_tests/test_binaryops.py | 1461 ++++++++--------- 1 file changed, 710 insertions(+), 751 deletions(-) diff --git a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py index d9694c2aa9e..a83caf39ead 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_binaryops.py +++ b/python/cudf/cudf/pylibcudf_tests/test_binaryops.py @@ -11,8 +11,9 @@ def idfn(param): - ltype, rtype, outtype = param - return "-".join(map(str, param)) + ltype, rtype, outtype, plc_op, _ = param + params = (plc_op.name, ltype, rtype, outtype) + return "-".join(map(str, params)) @pytest.fixture(params=[True, False], ids=["nulls", "no_nulls"]) @@ -20,27 +21,10 @@ def nulls(request): return request.param -@pytest.fixture -def pa_data(request, nulls): - ltype, rtype, outtype = request.param - values = make_col(ltype, nulls), make_col(rtype, nulls), outtype - return values - - -@pytest.fixture -def plc_data(pa_data): - lhs, rhs, outtype = pa_data - return ( - plc.interop.from_arrow(lhs), - plc.interop.from_arrow(rhs), - plc.interop.from_arrow(pa.from_numpy_dtype(np.dtype(outtype))), - ) - - def make_col(dtype, nulls): if dtype == "int64": data = [1, 2, 3, 4, 5] - pa_type = pa.int32() + pa_type = pa.int64() elif dtype == "uint64": data = [1, 2, 3, 4, 5] pa_type = pa.uint64() @@ -77,751 +61,726 @@ def make_col(dtype, nulls): return pa.array(data, type=pa_type) -def _test_binaryop_inner(pa_data, plc_data, pyop, plc_op): - lhs_py, rhs_py, outty_py = pa_data - lhs_plc, rhs_plc, outty_plc = plc_data - - def get_result(): - return plc.binaryop.binary_operation( - lhs_plc, - rhs_plc, - plc_op, - outty_plc, - ) - - if not plc.binaryop.is_supported_operation( - outty_plc, lhs_plc.type(), rhs_plc.type(), plc_op - ): - with pytest.raises(TypeError): - get_result() - else: - expect = pyop(lhs_py, rhs_py).cast(outty_py) - got = get_result() - assert_column_eq(expect, got) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_add(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.add, - plc.binaryop.BinaryOperator.ADD, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_sub(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.subtract, - plc.binaryop.BinaryOperator.SUB, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_mul(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.multiply, - plc.binaryop.BinaryOperator.MUL, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_div(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.divide, - plc.binaryop.BinaryOperator.DIV, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "timedelta64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_truediv(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.divide, - plc.binaryop.BinaryOperator.TRUE_DIV, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_floordiv(pa_data, plc_data): - def floordiv(x, y): - x = x.to_pylist() - y = y.to_pylist() - - def slr_func(x, y): - if x is None or y is None: - return None - return x // y - - return pa.array([slr_func(x, y) for x, y in zip(x, y)]) - - _test_binaryop_inner( - pa_data, - plc_data, - floordiv, - plc.binaryop.BinaryOperator.FLOOR_DIV, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_mod(pa_data, plc_data): - def mod(x, y): - x = x.to_pylist() - y = y.to_pylist() - - def slr_func(x, y): - if x is None or y is None: - return None - return x % y - - return pa.array([slr_func(x, y) for x, y in zip(x, y)]) - - _test_binaryop_inner( - pa_data, - plc_data, - mod, - plc.binaryop.BinaryOperator.MOD, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_pmod(pa_data, plc_data): - def pmod(x, y): - x = x.to_pylist() - y = y.to_pylist() - - def slr_func(x, y): - if x is None or y is None: - return None - return (x % y + y) % y - - return pa.array([slr_func(x, y) for x, y in zip(x, y)]) - - _test_binaryop_inner( - pa_data, - plc_data, - pmod, - plc.binaryop.BinaryOperator.PMOD, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_pymod(pa_data, plc_data): - def pymod(x, y): - x = x.to_pylist() - y = y.to_pylist() - - def slr_func(x, y): - if x is None or y is None: - return None - return x % y - - return pa.array([slr_func(x, y) for x, y in zip(x, y)]) - - _test_binaryop_inner( - pa_data, - plc_data, - pymod, - plc.binaryop.BinaryOperator.PYMOD, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "timedelta64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_pow(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.power, - plc.binaryop.BinaryOperator.POW, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_int_pow(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.power, - plc.binaryop.BinaryOperator.INT_POW, - ) +@pytest.fixture +def pa_data(request, nulls): + ltype, rtype, outtype = request.param + values = make_col(ltype, nulls), make_col(rtype, nulls), outtype + return values -@pytest.mark.parametrize( - "pa_data", - [ - ("float64", "float64", "float64"), - ("int64", "float64", "float64"), - ("int64", "int64", "timedelta64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_log_base(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.logb, - plc.binaryop.BinaryOperator.LOG_BASE, +@pytest.fixture +def plc_data(pa_data): + lhs, rhs, outtype = pa_data + return ( + plc.interop.from_arrow(lhs), + plc.interop.from_arrow(rhs), + plc.interop.from_arrow(pa.from_numpy_dtype(np.dtype(outtype))), ) -@pytest.mark.parametrize( - "pa_data", - [ - ("float64", "float64", "float64"), - ("int64", "float64", "float64"), - ("int64", "int64", "timedelta64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_atan2(pa_data, plc_data): - def atan2(x, y): - x = x.to_pylist() - y = y.to_pylist() - - def atan2_none_safe(x, y): - if x is None or y is None: - return None - return math.atan2(x, y) - - return pa.array([atan2_none_safe(x, y) for x, y in zip(x, y)]) - - _test_binaryop_inner( - pa_data, - plc_data, - atan2, - plc.binaryop.BinaryOperator.ATAN2, +@pytest.fixture +def tests(request, nulls): + ltype, rtype, py_outtype, plc_op, py_op = request.param + pa_lhs, pa_rhs = make_col(ltype, nulls), make_col(rtype, nulls) + plc_lhs, plc_rhs = ( + plc.interop.from_arrow(pa_lhs), + plc.interop.from_arrow(pa_rhs), ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_shift_left(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.shift_left, - plc.binaryop.BinaryOperator.SHIFT_LEFT, + plc_dtype = plc.interop.from_arrow( + pa.from_numpy_dtype(np.dtype(py_outtype)) ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_shift_right(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.shift_right, - plc.binaryop.BinaryOperator.SHIFT_RIGHT, + return ( + pa_lhs, + pa_rhs, + py_outtype, + plc_lhs, + plc_rhs, + plc_dtype, + py_op, + plc_op, ) -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_shift_right_unsigned(pa_data, plc_data): - def shift_right_unsigned(x, y): +def custom_pyop(func): + def wrapper(x, y): x = x.to_pylist() y = y.to_pylist() - def logical_right_shift(x, y): + def inner(x, y): if x is None or y is None: return None - unsigned_x = np.uint32(x) - result = unsigned_x >> y - return result - - return pa.array([logical_right_shift(x, y) for x, y in zip(x, y)]) - - _test_binaryop_inner( - pa_data, - plc_data, - shift_right_unsigned, - plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_bitwise_and(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.bit_wise_and, - plc.binaryop.BinaryOperator.BITWISE_AND, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_bitwise_or(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.bit_wise_or, - plc.binaryop.BinaryOperator.BITWISE_OR, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "datetime64[ns]"), - ], - indirect=True, - ids=idfn, -) -def test_bitwise_xor(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.bit_wise_xor, - plc.binaryop.BinaryOperator.BITWISE_XOR, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "int64"), - ], - indirect=True, - ids=idfn, -) -def test_logical_and(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.and_, - plc.binaryop.BinaryOperator.LOGICAL_AND, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "int64"), - ], - indirect=True, - ids=idfn, -) -def test_logical_or(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.or_, - plc.binaryop.BinaryOperator.LOGICAL_OR, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "int64"), - ], - indirect=True, - ids=idfn, -) -def test_equal(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.equal, - plc.binaryop.BinaryOperator.EQUAL, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "int64"), - ], - indirect=True, - ids=idfn, -) -def test_not_equal(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.not_equal, - plc.binaryop.BinaryOperator.NOT_EQUAL, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "int64"), - ], - indirect=True, - ids=idfn, -) -def test_less(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.less, - plc.binaryop.BinaryOperator.LESS, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "int64"), - ], - indirect=True, - ids=idfn, -) -def test_greater(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.greater, - plc.binaryop.BinaryOperator.GREATER, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "int64"), - ], - indirect=True, - ids=idfn, -) -def test_less_equal(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.less_equal, - plc.binaryop.BinaryOperator.LESS_EQUAL, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "int64"), - ], - indirect=True, - ids=idfn, -) -def test_greater_equal(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.greater_equal, - plc.binaryop.BinaryOperator.GREATER_EQUAL, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "int64"), - ], - indirect=True, - ids=idfn, -) -def test_null_equals(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.equal, - plc.binaryop.BinaryOperator.NULL_EQUALS, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "datetime64[ns]"), - ("int64", "float64", "float64"), - ], - indirect=True, - ids=idfn, -) -def test_null_max(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.max_element_wise, - plc.binaryop.BinaryOperator.NULL_MAX, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "datetime64[ns]"), - ("int64", "float64", "float64"), - ], - indirect=True, - ids=idfn, -) -def test_null_min(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.min_element_wise, - plc.binaryop.BinaryOperator.NULL_MIN, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ("int64", "int64", "int64"), - ], - indirect=True, - ids=idfn, -) -def test_null_not_equals(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.not_equal, - plc.binaryop.BinaryOperator.NULL_NOT_EQUALS, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ], - indirect=True, - ids=idfn, -) -def test_generic_binary(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - None, - plc.binaryop.BinaryOperator.GENERIC_BINARY, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ], - indirect=True, - ids=idfn, -) -def test_null_logical_and(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.and_, - plc.binaryop.BinaryOperator.NULL_LOGICAL_AND, - ) - - -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ], - indirect=True, - ids=idfn, -) -def test_null_logical_or(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - pa.compute.or_, - plc.binaryop.BinaryOperator.NULL_LOGICAL_OR, - ) + return func(x, y) + + return pa.array([inner(x, y) for x, y in zip(x, y)]) + + return wrapper + + +@custom_pyop +def py_floordiv(x, y): + return x // y + + +@custom_pyop +def py_pmod(x, y): + return (x % y + y) % y + + +@custom_pyop +def py_mod(x, y): + return x % y + + +@custom_pyop +def py_atan2(x, y): + return math.atan2(x, y) + + +@custom_pyop +def py_shift_right_unsigned(x, y): + unsigned_x = np.uint32(x) + result = unsigned_x >> y + return result + + +@pytest.mark.parametrize( + "tests", + [ + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.ADD, + pa.compute.add, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.ADD, + pa.compute.add, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.ADD, + pa.compute.add, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.SUB, + pa.compute.subtract, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.SUB, + pa.compute.subtract, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.SUB, + pa.compute.subtract, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.MUL, + pa.compute.multiply, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.MUL, + pa.compute.multiply, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.MUL, + pa.compute.multiply, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.DIV, + pa.compute.divide, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.DIV, + pa.compute.divide, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.DIV, + pa.compute.divide, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.TRUE_DIV, + pa.compute.divide, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.TRUE_DIV, + pa.compute.divide, + ), + ( + "int64", + "int64", + "timedelta64[ns]", + plc.binaryop.BinaryOperator.TRUE_DIV, + pa.compute.divide, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.FLOOR_DIV, + py_floordiv, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.FLOOR_DIV, + py_floordiv, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.FLOOR_DIV, + py_floordiv, + ), + ("int64", "int64", "int64", plc.binaryop.BinaryOperator.MOD, py_mod), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.MOD, + py_mod, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.MOD, + py_mod, + ), + ("int64", "int64", "int64", plc.binaryop.BinaryOperator.PMOD, py_pmod), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.PMOD, + py_pmod, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.PMOD, + py_pmod, + ), + ("int64", "int64", "int64", plc.binaryop.BinaryOperator.PYMOD, py_mod), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.PYMOD, + py_mod, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.PYMOD, + py_mod, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.POW, + pa.compute.power, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.POW, + pa.compute.power, + ), + ( + "int64", + "int64", + "timedelta64[ns]", + plc.binaryop.BinaryOperator.POW, + pa.compute.power, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.INT_POW, + pa.compute.power, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.INT_POW, + pa.compute.power, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.INT_POW, + pa.compute.power, + ), + ( + "float64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LOG_BASE, + pa.compute.logb, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LOG_BASE, + pa.compute.logb, + ), + ( + "int64", + "int64", + "timedelta64[ns]", + plc.binaryop.BinaryOperator.LOG_BASE, + pa.compute.logb, + ), + ( + "float64", + "float64", + "float64", + plc.binaryop.BinaryOperator.ATAN2, + py_atan2, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.ATAN2, + py_atan2, + ), + ( + "int64", + "int64", + "timedelta64[ns]", + plc.binaryop.BinaryOperator.ATAN2, + py_atan2, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.SHIFT_LEFT, + pa.compute.shift_left, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.SHIFT_LEFT, + pa.compute.shift_left, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.SHIFT_LEFT, + pa.compute.shift_left, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.SHIFT_RIGHT, + pa.compute.shift_right, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.SHIFT_RIGHT, + pa.compute.shift_right, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.SHIFT_RIGHT, + pa.compute.shift_right, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, + py_shift_right_unsigned, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, + py_shift_right_unsigned, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.SHIFT_RIGHT_UNSIGNED, + py_shift_right_unsigned, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.BITWISE_AND, + pa.compute.bit_wise_and, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.BITWISE_AND, + pa.compute.bit_wise_and, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.BITWISE_AND, + pa.compute.bit_wise_and, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.BITWISE_OR, + pa.compute.bit_wise_or, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.BITWISE_OR, + pa.compute.bit_wise_or, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.BITWISE_OR, + pa.compute.bit_wise_or, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.BITWISE_XOR, + pa.compute.bit_wise_xor, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.BITWISE_XOR, + pa.compute.bit_wise_xor, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.BITWISE_XOR, + pa.compute.bit_wise_xor, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.LOGICAL_AND, + pa.compute.and_, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LOGICAL_AND, + pa.compute.and_, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.LOGICAL_AND, + pa.compute.and_, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.LOGICAL_OR, + pa.compute.or_, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LOGICAL_OR, + pa.compute.or_, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.LOGICAL_OR, + pa.compute.or_, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.EQUAL, + pa.compute.equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.EQUAL, + pa.compute.equal, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.NOT_EQUAL, + pa.compute.not_equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NOT_EQUAL, + pa.compute.not_equal, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.LESS, + pa.compute.less, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LESS, + pa.compute.less, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.GREATER, + pa.compute.greater, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.GREATER, + pa.compute.greater, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.LESS_EQUAL, + pa.compute.less_equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.LESS_EQUAL, + pa.compute.less_equal, + ), + ( + "int64", + "int64", + "bool", + plc.binaryop.BinaryOperator.GREATER_EQUAL, + pa.compute.greater_equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.GREATER_EQUAL, + pa.compute.greater_equal, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.NULL_EQUALS, + pa.compute.equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_EQUALS, + pa.compute.equal, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.NULL_MAX, + pa.compute.max_element_wise, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_MAX, + pa.compute.max_element_wise, + ), + ( + "int64", + "int64", + "datetime64[ns]", + plc.binaryop.BinaryOperator.NULL_MIN, + pa.compute.min_element_wise, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_MIN, + pa.compute.min_element_wise, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.NULL_NOT_EQUALS, + pa.compute.not_equal, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_NOT_EQUALS, + pa.compute.not_equal, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.NULL_LOGICAL_AND, + pa.compute.and_, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_LOGICAL_AND, + pa.compute.and_, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.NULL_LOGICAL_OR, + pa.compute.or_, + ), + ( + "int64", + "float64", + "float64", + plc.binaryop.BinaryOperator.NULL_LOGICAL_OR, + pa.compute.or_, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.GENERIC_BINARY, + None, + ), + ( + "int64", + "int64", + "int64", + plc.binaryop.BinaryOperator.INVALID_BINARY, + None, + ), + ], + indirect=True, + ids=idfn, +) +def test_binaryops(tests): + ( + pa_lhs, + pa_rhs, + py_outtype, + plc_lhs, + plc_rhs, + plc_outtype, + py_op, + plc_op, + ) = tests + def get_result(): + return plc.binaryop.binary_operation( + plc_lhs, + plc_rhs, + plc_op, + plc_outtype, + ) -@pytest.mark.parametrize( - "pa_data", - [ - ("int64", "int64", "int64"), - ("int64", "float64", "float64"), - ], - indirect=True, - ids=idfn, -) -def test_invalid_binary(pa_data, plc_data): - _test_binaryop_inner( - pa_data, - plc_data, - None, - plc.binaryop.BinaryOperator.INVALID_BINARY, - ) + if not plc.binaryop.is_supported_operation( + plc_outtype, plc_lhs.type(), plc_rhs.type(), plc_op + ): + with pytest.raises(TypeError): + get_result() + else: + expect = py_op(pa_lhs, pa_rhs).cast(py_outtype) + got = get_result() + assert_column_eq(expect, got) From 849e586f2d150d73fa06db05d2cca57916be65b5 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 18 Jul 2024 13:41:22 -0700 Subject: [PATCH 21/21] adjust docstring --- python/cudf/cudf/_lib/pylibcudf/binaryop.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx index ba1c9d1a45a..44d9f4ad04a 100644 --- a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx @@ -95,7 +95,7 @@ cpdef bool is_supported_operation( ): """Check if an operation is supported for the given data types. - For details, see :cpp:func:`cudf::binops::is_supported_operation`. + For details, see :cpp:func::is_supported_operation`. Parameters ----------