Skip to content

Commit

Permalink
Implement null-aware NOT_EQUALS binop (rapidsai#15731)
Browse files Browse the repository at this point in the history
Fill out the table of null-aware comparison binops by also supporting a new NULL_NOT_EQUALS. This is the negation of NULL_EQUALS but implemented in a single pass, rather than binop(NULL_EQUALS) followed by uop(NEGATE).

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)
  - Mark Harris (https://github.com/harrism)

URL: rapidsai#15731
  • Loading branch information
wence- authored May 15, 2024
1 parent 0811523 commit 92b2b12
Show file tree
Hide file tree
Showing 22 changed files with 143 additions and 25 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ add_library(
src/binaryop/compiled/Mod.cu
src/binaryop/compiled/Mul.cu
src/binaryop/compiled/NullEquals.cu
src/binaryop/compiled/NullNotEquals.cu
src/binaryop/compiled/NullLogicalAnd.cu
src/binaryop/compiled/NullLogicalOr.cu
src/binaryop/compiled/NullMax.cu
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/binaryop/compiled_binaryop.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -111,5 +111,6 @@ BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NOT_EQUAL, bool
BINARYOP_BENCHMARK_DEFINE(timestamp_s, timestamp_s, LESS, bool);
BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s, GREATER, bool);
BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_EQUALS, bool);
BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_NOT_EQUALS, bool);
BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NULL_MAX, decimal32);
BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s, NULL_MIN, timestamp_s);
2 changes: 2 additions & 0 deletions cpp/include/cudf/binaryop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ enum class binary_operator : int32_t {
GREATER_EQUAL, ///< operator >=
NULL_EQUALS, ///< Returns true when both operands are null; false when one is null; the
///< result of equality when both are non-null
NULL_NOT_EQUALS, ///< Returns false when both operands are null; true when one is null; the
///< result of inequality when both are non-null
NULL_MAX, ///< Returns max of operands when both are non-null; returns the non-null
///< operand when one is null; or invalid when both are null
NULL_MIN, ///< Returns min of operands when both are non-null; returns the non-null
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ std::pair<rmm::device_buffer, size_type> scalar_col_valid_mask_and(
*/
inline bool is_null_dependent(binary_operator op)
{
return op == binary_operator::NULL_EQUALS || op == binary_operator::NULL_MIN ||
op == binary_operator::NULL_MAX || op == binary_operator::NULL_LOGICAL_AND ||
op == binary_operator::NULL_LOGICAL_OR;
return op == binary_operator::NULL_EQUALS || op == binary_operator::NULL_NOT_EQUALS ||
op == binary_operator::NULL_MIN || op == binary_operator::NULL_MAX ||
op == binary_operator::NULL_LOGICAL_AND || op == binary_operator::NULL_LOGICAL_OR;
}

/**
Expand Down Expand Up @@ -109,7 +109,8 @@ bool is_comparison_binop(binary_operator op)
op == binary_operator::GREATER or // operator >
op == binary_operator::LESS_EQUAL or // operator <=
op == binary_operator::GREATER_EQUAL or // operator >=
op == binary_operator::NULL_EQUALS; // 2 null = true; 1 null = false; else ==
op == binary_operator::NULL_EQUALS or // 2 null = true; 1 null = false; else ==
op == binary_operator::NULL_NOT_EQUALS; // 2 null = false; 1 null = true; else !=
}

/**
Expand Down
26 changes: 26 additions & 0 deletions cpp/src/binaryop/compiled/NullNotEquals.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "binary_ops.cuh"

namespace cudf::binops::compiled {
template void apply_binary_op<ops::NullNotEquals>(mutable_column_view&,
column_view const&,
column_view const&,
bool is_lhs_scalar,
bool is_rhs_scalar,
rmm::cuda_stream_view);
} // namespace cudf::binops::compiled
4 changes: 3 additions & 1 deletion cpp/src/binaryop/compiled/binary_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ case binary_operator::LOG_BASE: apply_binary_op<ops::LogBase>(out, l
case binary_operator::ATAN2: apply_binary_op<ops::ATan2>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::PMOD: apply_binary_op<ops::PMod>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::NULL_EQUALS: apply_binary_op<ops::NullEquals>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::NULL_NOT_EQUALS: apply_binary_op<ops::NullNotEquals>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::NULL_MAX: apply_binary_op<ops::NullMax>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::NULL_MIN: apply_binary_op<ops::NullMin>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
case binary_operator::NULL_LOGICAL_AND: apply_binary_op<ops::NullLogicalAnd>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
Expand Down Expand Up @@ -412,8 +413,9 @@ void apply_sorting_struct_binary_op(mutable_column_view& out,
// Struct child column type and structure mismatches are caught within the two_table_comparator
switch (op) {
case binary_operator::EQUAL: [[fallthrough]];
case binary_operator::NOT_EQUAL: [[fallthrough]];
case binary_operator::NULL_EQUALS: [[fallthrough]];
case binary_operator::NOT_EQUAL:
case binary_operator::NULL_NOT_EQUALS:
detail::apply_struct_equality_op(
out,
lhs,
Expand Down
1 change: 1 addition & 0 deletions cpp/src/binaryop/compiled/binary_ops.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ struct ops_wrapper {
type_dispatcher(rhs.type(), type_casted_accessor<TypeCommon>{}, i, rhs, is_rhs_scalar);
auto result = [&]() {
if constexpr (std::is_same_v<BinaryOperator, ops::NullEquals> or
std::is_same_v<BinaryOperator, ops::NullNotEquals> or
std::is_same_v<BinaryOperator, ops::NullLogicalAnd> or
std::is_same_v<BinaryOperator, ops::NullLogicalOr> or
std::is_same_v<BinaryOperator, ops::NullMax> or
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/binaryop/compiled/binary_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ void apply_binary_op(mutable_column_view& out,
* @brief Deploys single type or double type dispatcher that runs equality operation on each element
* of @p lhs and @p rhs columns.
*
* Comparison operators are EQUAL, NOT_EQUAL, NULL_EQUALS.
* Comparison operators are EQUAL, NOT_EQUAL, NULL_EQUALS, NULL_NOT_EQUALS.
* @p out type is boolean.
*
* This template is instantiated for each binary operator.
Expand Down
17 changes: 14 additions & 3 deletions cpp/src/binaryop/compiled/operation.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -422,15 +422,26 @@ struct NullEquals {
TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) -> decltype(x == y)
{
output_valid = true;
if (!lhs_valid && !rhs_valid) return true;
if (lhs_valid && rhs_valid) return x == y;
return false;
return !lhs_valid && !rhs_valid;
}
// To allow std::is_invocable_v = true
template <typename TypeLhs, typename TypeRhs>
__device__ inline auto operator()(TypeLhs x, TypeRhs y) -> decltype(x == y);
};

struct NullNotEquals {
template <typename TypeLhs, typename TypeRhs>
__device__ inline auto operator()(
TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) -> decltype(x != y)
{
return !NullEquals{}(x, y, lhs_valid, rhs_valid, output_valid);
}
// To allow std::is_invocable_v = true
template <typename TypeLhs, typename TypeRhs>
__device__ inline auto operator()(TypeLhs x, TypeRhs y) -> decltype(x != y);
};

struct NullMax {
template <typename TypeLhs,
typename TypeRhs,
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/binaryop/compiled/struct_binary_ops.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -148,7 +148,7 @@ void apply_struct_equality_op(mutable_column_view& out,
rmm::cuda_stream_view stream)
{
CUDF_EXPECTS(op == binary_operator::EQUAL || op == binary_operator::NOT_EQUAL ||
op == binary_operator::NULL_EQUALS,
op == binary_operator::NULL_EQUALS || op == binary_operator::NULL_NOT_EQUALS,
"Unsupported operator for these types",
cudf::data_type_error);

Expand Down
4 changes: 3 additions & 1 deletion cpp/src/binaryop/compiled/util.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -182,6 +182,8 @@ struct is_supported_operation_functor {
case binary_operator::LESS_EQUAL: return bool_op<ops::LessEqual, TypeLhs, TypeRhs>(out);
case binary_operator::GREATER_EQUAL: return bool_op<ops::GreaterEqual, TypeLhs, TypeRhs>(out);
case binary_operator::NULL_EQUALS: return bool_op<ops::NullEquals, TypeLhs, TypeRhs>(out);
case binary_operator::NULL_NOT_EQUALS:
return bool_op<ops::NullNotEquals, TypeLhs, TypeRhs>(out);
case binary_operator::NULL_LOGICAL_AND:
return bool_op<ops::NullLogicalAnd, TypeLhs, TypeRhs>(out);
case binary_operator::NULL_LOGICAL_OR:
Expand Down
34 changes: 34 additions & 0 deletions cpp/tests/binaryop/binop-compiled-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,40 @@ TYPED_TEST(BinaryOperationCompiledTest_NullOps, NullEquals_Vector_Vector)
using BinaryOperationCompiledTest_NullOpsString =
BinaryOperationCompiledTest_NullOps<cudf::test::Types<std::string, std::string, std::string>>;
TEST_F(BinaryOperationCompiledTest_NullOpsString, NullEquals_Vector_Vector)
{
using TypeOut = bool;
using TypeLhs = std::string;
using TypeRhs = std::string;
using NULL_NOT_EQUALS = cudf::library::operation::NullNotEquals<TypeOut, TypeLhs, TypeRhs>;

auto lhs = lhs_random_column<TypeLhs>(col_size);
auto rhs = rhs_random_column<TypeRhs>(col_size);
auto const expected = NullOp_Result<TypeOut, TypeLhs, TypeRhs, NULL_NOT_EQUALS>(lhs, rhs);

auto const result = cudf::binary_operation(
lhs, rhs, cudf::binary_operator::NULL_NOT_EQUALS, cudf::data_type(cudf::type_to_id<TypeOut>()));
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

TYPED_TEST(BinaryOperationCompiledTest_NullOps, NullNotEquals_Vector_Vector)
{
using TypeOut = bool;
using TypeLhs = typename TestFixture::TypeLhs;
using TypeRhs = typename TestFixture::TypeRhs;
using NULL_NOT_EQUALS = cudf::library::operation::NullNotEquals<TypeOut, TypeLhs, TypeRhs>;

auto lhs = lhs_random_column<TypeLhs>(col_size);
auto rhs = rhs_random_column<TypeRhs>(col_size);
auto const expected = NullOp_Result<TypeOut, TypeLhs, TypeRhs, NULL_NOT_EQUALS>(lhs, rhs);

auto const result = cudf::binary_operation(
lhs, rhs, cudf::binary_operator::NULL_NOT_EQUALS, cudf::data_type(cudf::type_to_id<TypeOut>()));
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
}

using BinaryOperationCompiledTest_NullOpsString =
BinaryOperationCompiledTest_NullOps<cudf::test::Types<std::string, std::string, std::string>>;
TEST_F(BinaryOperationCompiledTest_NullOpsString, NullNotEquals_Vector_Vector)
{
using TypeOut = bool;
using TypeLhs = std::string;
Expand Down
8 changes: 8 additions & 0 deletions cpp/tests/binaryop/util/operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,14 @@ struct NullEquals {
}
};

template <typename TypeOut, typename TypeLhs, typename TypeRhs>
struct NullNotEquals {
TypeOut operator()(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) const
{
return !NullEquals<TypeOut, TypeLhs, TypeRhs>()(x, y, lhs_valid, rhs_valid, output_valid);
}
};

template <typename TypeOut, typename TypeLhs, typename TypeRhs>
struct NullMax {
TypeOut operator()(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) const
Expand Down
9 changes: 5 additions & 4 deletions java/src/main/java/ai/rapids/cudf/BinaryOp.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,12 @@ public enum BinaryOp {
LESS_EQUAL(25), // <=
GREATER_EQUAL(26), // >=
NULL_EQUALS(27), // like EQUAL but NULL == NULL is TRUE and NULL == not NULL is FALSE
NULL_MAX(28), // MAX but NULL < not NULL
NULL_MIN(29), // MIN but NULL > not NULL
NULL_NOT_EQUALS(28), // negation of NULL_EQUALS
NULL_MAX(29), // MAX but NULL < not NULL
NULL_MIN(30), // MIN but NULL > not NULL
//NOT IMPLEMENTED YET GENERIC_BINARY(30);
NULL_LOGICAL_AND(31),
NULL_LOGICAL_OR(32);
NULL_LOGICAL_AND(32),
NULL_LOGICAL_OR(33);


static final EnumSet<BinaryOp> COMPARISON = EnumSet.of(
Expand Down
14 changes: 14 additions & 0 deletions java/src/main/java/ai/rapids/cudf/BinaryOperable.java
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,20 @@ default ColumnVector equalToNullAware(BinaryOperable rhs) {
return equalToNullAware(rhs, DType.BOOL8);
}

/**
* like notEqualTo but NULL != NULL is TRUE and NULL != not NULL is FALSE
*/
default ColumnVector notEqualToNullAware(BinaryOperable rhs, DType outType) {
return binaryOp(BinaryOp.NULL_NOT_EQUALS, rhs, outType);
}

/**
* like notEqualTo but NULL != NULL is TRUE and NULL != not NULL is FALSE
*/
default ColumnVector notEqualToNullAware(BinaryOperable rhs) {
return notEqualToNullAware(rhs, DType.BOOL8);
}

/**
* Returns the max non null value.
*/
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/binaryop.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def binaryop(lhs, rhs, op, dtype):
"""
# TODO: Shouldn't have to keep special-casing. We need to define a separate
# pipeline for libcudf binops that don't map to Python binops.
if op not in {"INT_POW", "NULL_EQUALS"}:
if op not in {"INT_POW", "NULL_EQUALS", "NULL_NOT_EQUALS"}:
op = op[2:-2]
op = op.upper()
op = _op_map.get(op, op)
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
LESS_EQUAL
GREATER_EQUAL
NULL_EQUALS
NULL_NOT_EQUALS
BITWISE_AND
BITWISE_OR
BITWISE_XOR
Expand Down
7 changes: 6 additions & 1 deletion python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,12 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
if not isinstance(other, CategoricalColumn):
raise ValueError
# Note: at this stage we are guaranteed that the dtypes are equal.
if not self.ordered and op not in {"__eq__", "__ne__", "NULL_EQUALS"}:
if not self.ordered and op not in {
"__eq__",
"__ne__",
"NULL_EQUALS",
"NULL_NOT_EQUALS",
}:
raise TypeError(
"The only binary operations supported by unordered "
"categorical columns are equality and inequality."
Expand Down
8 changes: 5 additions & 3 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,18 +570,20 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
out_dtype = _resolve_mixed_dtypes(lhs, rhs, "datetime64")
elif op in {
"__eq__",
"NULL_EQUALS",
"__ne__",
"NULL_EQUALS",
"NULL_NOT_EQUALS",
}:
out_dtype = cudf.dtype(np.bool_)
if isinstance(other, ColumnBase) and not isinstance(
other, DatetimeColumn
):
fill_value = op in ("__ne__", "NULL_NOT_EQUALS")
result = _all_bools_with_nulls(
self, other, bool_fill_value=op == "__ne__"
self, other, bool_fill_value=fill_value
)
if cudf.get_option("mode.pandas_compatible"):
result = result.fillna(op == "__ne__")
result = result.fillna(fill_value)
return result

if out_dtype is None:
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
"__eq__",
"__ne__",
"NULL_EQUALS",
"NULL_NOT_EQUALS",
}:
out_dtype = "bool"

Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5957,6 +5957,7 @@ def _binaryop(
"__ge__",
"__le__",
"NULL_EQUALS",
"NULL_NOT_EQUALS",
}:
lhs, rhs = (other, self) if reflect else (self, other)
return libcudf.binaryop.binaryop(
Expand Down
10 changes: 7 additions & 3 deletions python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
"__le__",
"__ge__",
"NULL_EQUALS",
"NULL_NOT_EQUALS",
}:
out_dtype = cudf.dtype(np.bool_)
elif op == "__mod__":
Expand All @@ -185,15 +186,18 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
elif other.dtype.kind in {"f", "i", "u"}:
if op in {"__mul__", "__mod__", "__truediv__", "__floordiv__"}:
out_dtype = self.dtype
elif op in {"__eq__", "NULL_EQUALS", "__ne__"}:
elif op in {"__eq__", "__ne__", "NULL_EQUALS", "NULL_NOT_EQUALS"}:
if isinstance(other, ColumnBase) and not isinstance(
other, TimeDeltaColumn
):
fill_value = op in ("__ne__", "NULL_NOT_EQUALS")
result = _all_bools_with_nulls(
self, other, bool_fill_value=op == "__ne__"
self,
other,
bool_fill_value=fill_value,
)
if cudf.get_option("mode.pandas_compatible"):
result = result.fillna(op == "__ne__")
result = result.fillna(fill_value)
return result

if out_dtype is None:
Expand Down

0 comments on commit 92b2b12

Please sign in to comment.