Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-24.08' into api/series/…
Browse files Browse the repository at this point in the history
…alignment
  • Loading branch information
mroeschke committed Jul 19, 2024
2 parents 029d776 + 4c46628 commit e593196
Show file tree
Hide file tree
Showing 19 changed files with 994 additions and 73 deletions.
11 changes: 11 additions & 0 deletions cpp/include/cudf/binaryop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,17 @@ cudf::data_type binary_operation_fixed_point_output_type(binary_operator op,

namespace binops {

/**
* @brief Returns true if the binary operator is supported for the given input types.
*
* @param out The output data type
* @param lhs The left-hand cudf::data_type
* @param rhs The right-hand cudf::data_type
* @param op The binary operator
* @return true if the binary operator is supported for the given input types
*/
bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op);

/**
* @brief Computes output valid mask for op between a column and a scalar
*
Expand Down
7 changes: 6 additions & 1 deletion cpp/src/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@
namespace cudf {
namespace binops {

bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op)
{
return cudf::binops::compiled::is_supported_operation(out, lhs, rhs, op);
}

/**
* @brief Computes output valid mask for op between a column and a scalar
*/
Expand Down Expand Up @@ -194,7 +199,7 @@ std::unique_ptr<column> binary_operation(LhsType const& lhs,
rmm::device_async_resource_ref mr)
{
if constexpr (std::is_same_v<LhsType, column_view> and std::is_same_v<RhsType, column_view>)
CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match");
CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match", std::invalid_argument);

if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING and
output_type.id() == type_id::STRING and
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/binaryop/binop-verify-input-test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Copyright 2018-2019 BlazingDB, Inc.
* Copyright 2018 Christian Noboa Mardini <[email protected]>
Expand Down Expand Up @@ -42,5 +42,5 @@ TEST_F(BinopVerifyInputTest, Vector_Vector_ErrorSecondOperandVectorZeroSize)

EXPECT_THROW(cudf::binary_operation(
lhs, rhs, cudf::binary_operator::ADD, cudf::data_type(cudf::type_id::INT64)),
cudf::logic_error);
std::invalid_argument);
}
64 changes: 63 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,69 @@ quiet-level = 3
line-length = 79

[tool.ruff.lint]
select = ["E", "F", "W", "D201", "D204", "D206", "D207", "D208", "D209", "D210", "D211", "D214", "D215", "D300", "D301", "D403", "D405", "D406", "D407", "D408", "D409", "D410", "D411", "D412", "D414", "D418", "TCH", "FA", "UP006", "UP007"]
typing-modules = ["cudf._typing"]
select = [
# pycodestyle Error
"E",
# Pyflakes
"F",
# pycodestyle Warning
"W",
# no-blank-line-before-function
"D201",
# one-blank-line-after-class
"D204",
# indent-with-spaces
"D206",
# under-indentation
"D207",
# over-indentation
"D208",
# new-line-after-last-paragraph
"D209",
# surrounding-whitespace
"D210",
# blank-line-before-class
"D211",
# section-not-over-indented
"D214",
# section-underline-not-over-indented
"D215",
# triple-single-quotes
"D300",
# escape-sequence-in-docstring
"D301",
# first-line-capitalized
"D403",
# capitalize-section-name
"D405",
# new-line-after-section-name
"D406",
# dashed-underline-after-section
"D407",
# section-underline-after-name
"D408",
# section-underline-matches-section-length
"D409",
# no-blank-line-after-section
"D410",
# no-blank-line-before-section
"D411",
# blank-lines-between-header-and-content
"D412",
# empty-docstring-section
"D414",
# overload-with-docstring
"D418",
# flake8-type-checking
"TCH",
# flake8-future-annotations
"FA",
# non-pep585-annotation
"UP006",
# non-pep604-annotation
"UP007"
]
ignore = [
# whitespace before :
"E203",
Expand Down
9 changes: 9 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/binaryop.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool

from cudf._lib.pylibcudf.libcudf.binaryop cimport binary_operator

from .column cimport Column
Expand All @@ -22,3 +24,10 @@ cpdef Column binary_operation(
binary_operator op,
DataType output_type
)

cpdef bool is_supported_operation(
DataType out,
DataType lhs,
DataType rhs,
binary_operator op
)
35 changes: 35 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/binaryop.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from cython.operator import dereference

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

Expand Down Expand Up @@ -84,3 +85,37 @@ cpdef Column binary_operation(
raise ValueError(f"Invalid arguments {lhs} and {rhs}")

return Column.from_libcudf(move(result))


cpdef bool is_supported_operation(
DataType out,
DataType lhs,
DataType rhs,
binary_operator op
):
"""Check if an operation is supported for the given data types.
For details, see :cpp:func::is_supported_operation`.
Parameters
----------
out : DataType
The output data type.
lhs : DataType
The left hand side data type.
rhs : DataType
The right hand side data type.
op : BinaryOperator
The operation to check.
Returns
-------
bool
True if the operation is supported, False otherwise
"""

return cpp_binaryop.is_supported_operation(
out.c_obj,
lhs.c_obj,
rhs.c_obj,
op
)
39 changes: 30 additions & 9 deletions python/cudf/cudf/_lib/pylibcudf/libcudf/binaryop.pxd
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libc.stdint cimport int32_t
from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.string cimport string

from cudf._lib.exception_handler cimport cudf_exception_handler
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
Expand All @@ -19,48 +21,67 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
TRUE_DIV
FLOOR_DIV
MOD
PMOD
PYMOD
POW
INT_POW
LOG_BASE
ATAN2
SHIFT_LEFT
SHIFT_RIGHT
SHIFT_RIGHT_UNSIGNED
BITWISE_AND
BITWISE_OR
BITWISE_XOR
LOGICAL_AND
LOGICAL_OR
EQUAL
NOT_EQUAL
LESS
GREATER
LESS_EQUAL
GREATER_EQUAL
NULL_EQUALS
NULL_MAX
NULL_MIN
NULL_NOT_EQUALS
BITWISE_AND
BITWISE_OR
BITWISE_XOR
LOGICAL_AND
LOGICAL_OR
GENERIC_BINARY
NULL_LOGICAL_AND
NULL_LOGICAL_OR
INVALID_BINARY

cdef unique_ptr[column] binary_operation (
const scalar& lhs,
const column_view& rhs,
binary_operator op,
data_type output_type
) except +
) except +cudf_exception_handler

cdef unique_ptr[column] binary_operation (
const column_view& lhs,
const scalar& rhs,
binary_operator op,
data_type output_type
) except +
) except +cudf_exception_handler

cdef unique_ptr[column] binary_operation (
const column_view& lhs,
const column_view& rhs,
binary_operator op,
data_type output_type
) except +
) except +cudf_exception_handler

cdef unique_ptr[column] binary_operation (
const column_view& lhs,
const column_view& rhs,
const string& op,
data_type output_type
) except +
) except +cudf_exception_handler

cdef extern from "cudf/binaryop.hpp" namespace "cudf::binops" nogil:
cdef bool is_supported_operation(
data_type output_type,
data_type lhs_type,
data_type rhs_type,
binary_operator op
) except +cudf_exception_handler
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _check_and_cast_columns_with_other(

other_is_scalar = is_scalar(other)
if other_is_scalar:
if isinstance(other, float) and not np.isnan(other):
if isinstance(other, (float, np.floating)) and not np.isnan(other):
try:
is_safe = source_dtype.type(other) == other
except OverflowError:
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -1458,9 +1458,10 @@ def column_empty_like(
return column_empty(row_count, dtype, masked)


def _has_any_nan(arbitrary):
def _has_any_nan(arbitrary: pd.Series | np.ndarray) -> bool:
"""Check if an object dtype Series or array contains NaN."""
return any(
((isinstance(x, float) or isinstance(x, np.floating)) and np.isnan(x))
isinstance(x, (float, np.floating)) and np.isnan(x)
for x in np.asarray(arbitrary)
)

Expand Down Expand Up @@ -2312,9 +2313,8 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
# Notice, we can always cast pure null columns
not_null_col_dtypes = [o.dtype for o in objs if o.null_count != len(o)]
if len(not_null_col_dtypes) and all(
_is_non_decimal_numeric_dtype(dtyp)
and np.issubdtype(dtyp, np.datetime64)
for dtyp in not_null_col_dtypes
_is_non_decimal_numeric_dtype(dtype) and dtype.kind == "M"
for dtype in not_null_col_dtypes
):
common_dtype = find_common_type(not_null_col_dtypes)
# Cast all columns to the common dtype
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ def isin(self, values: Sequence) -> ColumnBase:
return cudf.core.tools.datetimes._isin_datetimelike(self, values)

def can_cast_safely(self, to_dtype: Dtype) -> bool:
if np.issubdtype(to_dtype, np.datetime64):
if to_dtype.kind == "M": # type: ignore[union-attr]
to_res, _ = np.datetime_data(to_dtype)
self_res, _ = np.datetime_data(self.dtype)

Expand Down
9 changes: 5 additions & 4 deletions python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,10 +564,11 @@ def take(self, lists_indices: ColumnLike) -> ParentType:
raise ValueError(
"lists_indices and list column is of different " "size."
)
if not _is_non_decimal_numeric_dtype(
lists_indices_col.children[1].dtype
) or not np.issubdtype(
lists_indices_col.children[1].dtype, np.integer
if (
not _is_non_decimal_numeric_dtype(
lists_indices_col.children[1].dtype
)
or lists_indices_col.children[1].dtype.kind not in "iu"
):
raise TypeError(
"lists_indices should be column of values of index types."
Expand Down
28 changes: 10 additions & 18 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,25 +225,17 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
tmp = self if reflect else other
# Guard against division by zero for integers.
if (
(tmp.dtype.type in int_float_dtype_mapping)
and (tmp.dtype.type != np.bool_)
and (
(
(
np.isscalar(tmp)
or (
isinstance(tmp, cudf.Scalar)
# host to device copy
and tmp.is_valid()
)
)
and (0 == tmp)
)
or ((isinstance(tmp, NumericalColumn)) and (0 in tmp))
)
tmp.dtype.type in int_float_dtype_mapping
and tmp.dtype.kind != "b"
):
out_dtype = cudf.dtype("float64")

if isinstance(tmp, NumericalColumn) and 0 in tmp:
out_dtype = cudf.dtype("float64")
elif isinstance(tmp, cudf.Scalar):
if tmp.is_valid() and tmp == 0:
# tmp == 0 can return NA
out_dtype = cudf.dtype("float64")
elif is_scalar(tmp) and tmp == 0:
out_dtype = cudf.dtype("float64")
if op in {
"__lt__",
"__gt__",
Expand Down
Loading

0 comments on commit e593196

Please sign in to comment.