From 6ece34bea17e3445c5f1a64661f54ac7d8d373c0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Mar 2022 17:22:26 -0700 Subject: [PATCH 01/22] Enable all binary operators on Column, use them for binops in Frame, and enable __array_ufunc__ for numpy compatibility. --- python/cudf/cudf/core/column/column.py | 128 +++++++++++++++++++++++-- python/cudf/cudf/core/frame.py | 13 ++- 2 files changed, 128 insertions(+), 13 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 2919b62b49c..8b11eda724f 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -86,6 +86,49 @@ Slice = TypeVar("Slice", bound=slice) +# Mapping from ufuncs to the corresponding binary operators. +_ufunc_binary_operations = { + # Arithmetic binary operations. + "add": "add", + "subtract": "sub", + "multiply": "mul", + "matmul": "matmul", + "divide": "truediv", + "true_divide": "truediv", + "floor_divide": "floordiv", + "power": "pow", + "float_power": "pow", + "remainder": "mod", + "mod": "mod", + "fmod": "mod", + # Bitwise binary operations. + "bitwise_and": "and", + "bitwise_or": "or", + "bitwise_xor": "xor", + # Comparison binary operators + "greater": "gt", + "greater_equal": "ge", + "less": "lt", + "less_equal": "le", + "not_equal": "ne", + "equal": "eq", +} + +# These operators need to be mapped to their inverses when performing a +# reflected ufunc operation because no reflected version of the operators +# themselves exist. When these operators are invoked directly (not via +# __array_ufunc__) Python takes care of calling the inverse operation. +_ops_without_reflection = { + "gt": "lt", + "ge": "le", + "lt": "gt", + "le": "ge", + # ne and eq are symmetric, so they are their own inverse op + "ne": "ne", + "eq": "eq", +} + + class ColumnBase(Column, Serializable, Reducible, NotIterable): _VALID_REDUCTIONS = { "any", @@ -1038,15 +1081,12 @@ def __sub__(self, other): def __mul__(self, other): return self.binary_operator("mul", other) - def __eq__(self, other): - return self.binary_operator("eq", other) - - def __ne__(self, other): - return self.binary_operator("ne", other) - def __or__(self, other): return self.binary_operator("or", other) + def __xor__(self, other): + return self.binary_operator("xor", other) + def __and__(self, other): return self.binary_operator("and", other) @@ -1062,6 +1102,42 @@ def __mod__(self, other): def __pow__(self, other): return self.binary_operator("pow", other) + def __radd__(self, other): + return self.binary_operator("add", other, reflect=True) + + def __rsub__(self, other): + return self.binary_operator("sub", other, reflect=True) + + def __rmul__(self, other): + return self.binary_operator("mul", other, reflect=True) + + def __ror__(self, other): + return self.binary_operator("or", other, reflect=True) + + def __rxor__(self, other): + return self.binary_operator("xor", other, reflect=True) + + def __rand__(self, other): + return self.binary_operator("and", other, reflect=True) + + def __rfloordiv__(self, other): + return self.binary_operator("floordiv", other, reflect=True) + + def __rtruediv__(self, other): + return self.binary_operator("truediv", other, reflect=True) + + def __rmod__(self, other): + return self.binary_operator("mod", other, reflect=True) + + def __rpow__(self, other): + return self.binary_operator("pow", other, reflect=True) + + def __eq__(self, other): + return self.binary_operator("eq", other) + + def __ne__(self, other): + return self.binary_operator("ne", other) + def __lt__(self, other): return self.binary_operator("lt", other) @@ -1074,6 +1150,46 @@ def __le__(self, other): def __ge__(self, other): return self.binary_operator("ge", other) + # TODO: Share this logic with Frame if possible (the implementation is + # identical at this point). + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # We don't currently support reduction, accumulation, etc. We also + # don't support any special kwargs or higher arity ufuncs than binary. + if method != "__call__" or kwargs or ufunc.nin > 2: + return NotImplemented + + fname = ufunc.__name__ + if fname in _ufunc_binary_operations: + reflect = self is not inputs[0] + other = inputs[0] if reflect else inputs[1] + + op = _ufunc_binary_operations[fname] + if reflect and op in _ops_without_reflection: + op = _ops_without_reflection[op] + reflect = False + op = f"__{'r' if reflect else ''}{op}__" + + # Float_power returns float irrespective of the input type. + if fname == "float_power": + return getattr(self, op)(other).astype(float) + return getattr(self, op)(other) + + # Special handling for various unary operations. + if fname == "negative": + return self * -1 + if fname == "positive": + return self.copy(deep=True) + if fname == "invert": + return ~self + if fname == "absolute": + # TODO: Implement the abs operator properly. + return self.unary_operator("abs") + if fname == "fabs": + return self.unary_operator("abs").astype(np.float64) + + # None is a sentinel used by subclasses to trigger cupy dispatch. + return None + def searchsorted( self, value, diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index a9d7fce9d9b..0b813e83d9d 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3,6 +3,7 @@ from __future__ import annotations import copy +import operator import pickle import warnings from collections import abc @@ -2535,8 +2536,6 @@ def _colwise_binop( A dict of columns constructed from the result of performing the requested operation on the operands. """ - fn = fn[2:-2] - # Now actually perform the binop on the columns in left and right. output = {} for ( @@ -2567,11 +2566,11 @@ def _colwise_binop( # are not numerical using the new binops mixin. outcol = ( - left_column.binary_operator(fn, right_column, reflect=reflect) - if right_column is not None - else column_empty( - left_column.size, left_column.dtype, masked=True - ) + column_empty(left_column.size, left_column.dtype, masked=True) + if right_column is None + else getattr(operator, fn)(right_column, left_column) + if reflect + else getattr(operator, fn)(left_column, right_column) ) if output_mask is not None: From 166aa654e3514430c07339984dbc257f0f667393 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 17 Mar 2022 15:57:21 -0700 Subject: [PATCH 02/22] Centralize array_ufunc implementation for Frames and Columns. --- python/cudf/cudf/core/column/column.py | 84 +------------------------- python/cudf/cudf/core/frame.py | 83 +------------------------ python/cudf/cudf/utils/utils.py | 84 ++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 163 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 8b11eda724f..615ce36373e 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -78,7 +78,7 @@ pandas_dtypes_alias_to_cudf_alias, pandas_dtypes_to_np_dtypes, ) -from cudf.utils.utils import NotIterable, mask_dtype +from cudf.utils.utils import NotIterable, _array_ufunc, mask_dtype T = TypeVar("T", bound="ColumnBase") # TODO: This workaround allows type hints for `slice`, since `slice` is a @@ -86,49 +86,6 @@ Slice = TypeVar("Slice", bound=slice) -# Mapping from ufuncs to the corresponding binary operators. -_ufunc_binary_operations = { - # Arithmetic binary operations. - "add": "add", - "subtract": "sub", - "multiply": "mul", - "matmul": "matmul", - "divide": "truediv", - "true_divide": "truediv", - "floor_divide": "floordiv", - "power": "pow", - "float_power": "pow", - "remainder": "mod", - "mod": "mod", - "fmod": "mod", - # Bitwise binary operations. - "bitwise_and": "and", - "bitwise_or": "or", - "bitwise_xor": "xor", - # Comparison binary operators - "greater": "gt", - "greater_equal": "ge", - "less": "lt", - "less_equal": "le", - "not_equal": "ne", - "equal": "eq", -} - -# These operators need to be mapped to their inverses when performing a -# reflected ufunc operation because no reflected version of the operators -# themselves exist. When these operators are invoked directly (not via -# __array_ufunc__) Python takes care of calling the inverse operation. -_ops_without_reflection = { - "gt": "lt", - "ge": "le", - "lt": "gt", - "le": "ge", - # ne and eq are symmetric, so they are their own inverse op - "ne": "ne", - "eq": "eq", -} - - class ColumnBase(Column, Serializable, Reducible, NotIterable): _VALID_REDUCTIONS = { "any", @@ -1150,45 +1107,8 @@ def __le__(self, other): def __ge__(self, other): return self.binary_operator("ge", other) - # TODO: Share this logic with Frame if possible (the implementation is - # identical at this point). def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - # We don't currently support reduction, accumulation, etc. We also - # don't support any special kwargs or higher arity ufuncs than binary. - if method != "__call__" or kwargs or ufunc.nin > 2: - return NotImplemented - - fname = ufunc.__name__ - if fname in _ufunc_binary_operations: - reflect = self is not inputs[0] - other = inputs[0] if reflect else inputs[1] - - op = _ufunc_binary_operations[fname] - if reflect and op in _ops_without_reflection: - op = _ops_without_reflection[op] - reflect = False - op = f"__{'r' if reflect else ''}{op}__" - - # Float_power returns float irrespective of the input type. - if fname == "float_power": - return getattr(self, op)(other).astype(float) - return getattr(self, op)(other) - - # Special handling for various unary operations. - if fname == "negative": - return self * -1 - if fname == "positive": - return self.copy(deep=True) - if fname == "invert": - return ~self - if fname == "absolute": - # TODO: Implement the abs operator properly. - return self.unary_operator("abs") - if fname == "fabs": - return self.unary_operator("abs").astype(np.float64) - - # None is a sentinel used by subclasses to trigger cupy dispatch. - return None + return _array_ufunc(self, ufunc, method, inputs, kwargs) def searchsorted( self, diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 0b813e83d9d..cdb4abcbeb7 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -50,54 +50,11 @@ from cudf.utils import ioutils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import find_common_type -from cudf.utils.utils import _cudf_nvtx_annotate +from cudf.utils.utils import _array_ufunc, _cudf_nvtx_annotate T = TypeVar("T", bound="Frame") -# Mapping from ufuncs to the corresponding binary operators. -_ufunc_binary_operations = { - # Arithmetic binary operations. - "add": "add", - "subtract": "sub", - "multiply": "mul", - "matmul": "matmul", - "divide": "truediv", - "true_divide": "truediv", - "floor_divide": "floordiv", - "power": "pow", - "float_power": "pow", - "remainder": "mod", - "mod": "mod", - "fmod": "mod", - # Bitwise binary operations. - "bitwise_and": "and", - "bitwise_or": "or", - "bitwise_xor": "xor", - # Comparison binary operators - "greater": "gt", - "greater_equal": "ge", - "less": "lt", - "less_equal": "le", - "not_equal": "ne", - "equal": "eq", -} - -# These operators need to be mapped to their inverses when performing a -# reflected ufunc operation because no reflected version of the operators -# themselves exist. When these operators are invoked directly (not via -# __array_ufunc__) Python takes care of calling the inverse operation. -_ops_without_reflection = { - "gt": "lt", - "ge": "le", - "lt": "gt", - "le": "ge", - # ne and eq are symmetric, so they are their own inverse op - "ne": "ne", - "eq": "eq", -} - - class Frame(BinaryOperand, Scannable): """A collection of Column objects with an optional index. @@ -2580,44 +2537,8 @@ def _colwise_binop( return output - # For more detail on this function and how it should work, see - # https://numpy.org/doc/stable/reference/ufuncs.html def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - # We don't currently support reduction, accumulation, etc. We also - # don't support any special kwargs or higher arity ufuncs than binary. - if method != "__call__" or kwargs or ufunc.nin > 2: - return NotImplemented - - fname = ufunc.__name__ - if fname in _ufunc_binary_operations: - reflect = self is not inputs[0] - other = inputs[0] if reflect else inputs[1] - - op = _ufunc_binary_operations[fname] - if reflect and op in _ops_without_reflection: - op = _ops_without_reflection[op] - reflect = False - op = f"__{'r' if reflect else ''}{op}__" - - # Float_power returns float irrespective of the input type. - if fname == "float_power": - return getattr(self, op)(other).astype(float) - return getattr(self, op)(other) - - # Special handling for various unary operations. - if fname == "negative": - return self * -1 - if fname == "positive": - return self.copy(deep=True) - if fname == "invert": - return ~self - if fname == "absolute": - return self.abs() - if fname == "fabs": - return self.abs().astype(np.float64) - - # None is a sentinel used by subclasses to trigger cupy dispatch. - return None + return _array_ufunc(self, ufunc, method, inputs, kwargs) def _apply_cupy_ufunc_to_operands( self, ufunc, cupy_func, operands, **kwargs diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 1bd3fa7558e..5f8a40f5806 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -24,6 +24,90 @@ mask_dtype = cudf.dtype(np.int32) mask_bitsize = mask_dtype.itemsize * 8 +# Mapping from ufuncs to the corresponding binary operators. +_ufunc_binary_operations = { + # Arithmetic binary operations. + "add": "add", + "subtract": "sub", + "multiply": "mul", + "matmul": "matmul", + "divide": "truediv", + "true_divide": "truediv", + "floor_divide": "floordiv", + "power": "pow", + "float_power": "pow", + "remainder": "mod", + "mod": "mod", + "fmod": "mod", + # Bitwise binary operations. + "bitwise_and": "and", + "bitwise_or": "or", + "bitwise_xor": "xor", + # Comparison binary operators + "greater": "gt", + "greater_equal": "ge", + "less": "lt", + "less_equal": "le", + "not_equal": "ne", + "equal": "eq", +} + +# These operators need to be mapped to their inverses when performing a +# reflected ufunc operation because no reflected version of the operators +# themselves exist. When these operators are invoked directly (not via +# __array_ufunc__) Python takes care of calling the inverse operation. +_ops_without_reflection = { + "gt": "lt", + "ge": "le", + "lt": "gt", + "le": "ge", + # ne and eq are symmetric, so they are their own inverse op + "ne": "ne", + "eq": "eq", +} + + +# This is the implementation of __array_ufunc__ used for Frame and Column. +# For more detail on this function and how it should work, see +# https://numpy.org/doc/stable/reference/ufuncs.html +def _array_ufunc(obj, ufunc, method, inputs, kwargs): + # We don't currently support reduction, accumulation, etc. We also + # don't support any special kwargs or higher arity ufuncs than binary. + if method != "__call__" or kwargs or ufunc.nin > 2: + return NotImplemented + + fname = ufunc.__name__ + if fname in _ufunc_binary_operations: + reflect = obj is not inputs[0] + other = inputs[0] if reflect else inputs[1] + + op = _ufunc_binary_operations[fname] + if reflect and op in _ops_without_reflection: + op = _ops_without_reflection[op] + reflect = False + op = f"__{'r' if reflect else ''}{op}__" + + # Float_power returns float irrespective of the input type. + if fname == "float_power": + return getattr(obj, op)(other).astype(float) + return getattr(obj, op)(other) + + # Special handling for various unary operations. + if fname == "negative": + return obj * -1 + if fname == "positive": + return obj.copy(deep=True) + if fname == "invert": + return ~obj + if fname == "absolute": + # TODO: Make sure all obj (mainly Column) implement abs. + return abs(obj) + if fname == "fabs": + return abs(obj).astype(np.float64) + + # None is a sentinel used by subclasses to trigger cupy dispatch. + return None + _EQUALITY_OPS = { "__eq__", From 66ea1622837e1e9cdc30b1e35de4ed9159af0374 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 17 Mar 2022 16:58:38 -0700 Subject: [PATCH 03/22] Rename type BinaryOperand to ColumnBinaryOperand and the binary_operator API to _binaryop. --- python/cudf/cudf/_lib/datetime.pyx | 4 +- python/cudf/cudf/_typing.py | 2 +- python/cudf/cudf/core/column/categorical.py | 8 +-- python/cudf/cudf/core/column/column.py | 60 ++++++++++----------- python/cudf/cudf/core/column/datetime.py | 15 +++--- python/cudf/cudf/core/column/decimal.py | 9 ++-- python/cudf/cudf/core/column/lists.py | 6 +-- python/cudf/cudf/core/column/numerical.py | 18 ++++--- python/cudf/cudf/core/column/string.py | 12 +++-- python/cudf/cudf/core/column/timedelta.py | 29 ++++++---- 10 files changed, 92 insertions(+), 71 deletions(-) diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index e41016645cd..00838664c0f 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -1,3 +1,5 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -57,7 +59,7 @@ def extract_datetime_component(Column col, object field): if field == "weekday": # Pandas counts Monday-Sunday as 0-6 # while we count Monday-Sunday as 1-7 - result = result.binary_operator("sub", result.dtype.type(1)) + result = result - result.dtype.type(1) return result diff --git a/python/cudf/cudf/_typing.py b/python/cudf/cudf/_typing.py index ca2024929f3..87988150fd3 100644 --- a/python/cudf/cudf/_typing.py +++ b/python/cudf/cudf/_typing.py @@ -25,7 +25,7 @@ ColumnLike = Any # binary operation -BinaryOperand = Union["cudf.Scalar", "cudf.core.column.ColumnBase"] +ColumnBinaryOperand = Union["cudf.Scalar", "cudf.core.column.ColumnBase"] DataFrameOrSeries = Union["cudf.Series", "cudf.DataFrame"] SeriesOrIndex = Union["cudf.Series", "cudf.core.index.BaseIndex"] diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index caab2294484..102ce79eb9e 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -24,7 +24,7 @@ import cudf from cudf import _lib as libcudf from cudf._lib.transform import bools_to_mask -from cudf._typing import ColumnLike, Dtype, ScalarLike +from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike from cudf.api.types import is_categorical_dtype, is_interval_dtype from cudf.core.buffer import Buffer from cudf.core.column import column @@ -875,8 +875,8 @@ def slice( offset=codes.offset, ) - def binary_operator( - self, op: str, rhs, reflect: bool = False + def _binaryop( + self, op: str, rhs: ColumnBinaryOperand, reflect: bool = False ) -> ColumnBase: if op not in {"eq", "ne", "lt", "le", "gt", "ge", "NULL_EQUALS"}: raise TypeError( @@ -894,7 +894,7 @@ def binary_operator( "The only binary operations supported by unordered " "categorical columns are equality and inequality." ) - return self.as_numerical.binary_operator(op, rhs.as_numerical) + return self.as_numerical._binaryop(op, rhs.as_numerical) def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn: if isinstance(other, column.ColumnBase): diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 615ce36373e..e1925c3861b 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -41,7 +41,7 @@ drop_nulls, ) from cudf._lib.transform import bools_to_mask -from cudf._typing import BinaryOperand, ColumnLike, Dtype, ScalarLike +from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike from cudf.api.types import ( _is_non_decimal_numeric_dtype, _is_scalar_or_zero_d_array, @@ -185,7 +185,7 @@ def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool: return False if check_dtypes and (self.dtype != other.dtype): return False - return self.binary_operator("NULL_EQUALS", other).all() + return self._binaryop("NULL_EQUALS", other).all() def all(self, skipna: bool = True) -> bool: # The skipna argument is only used for numerical columns. @@ -1030,82 +1030,82 @@ def __cuda_array_interface__(self): ) def __add__(self, other): - return self.binary_operator("add", other) + return self._binaryop("add", other) def __sub__(self, other): - return self.binary_operator("sub", other) + return self._binaryop("sub", other) def __mul__(self, other): - return self.binary_operator("mul", other) + return self._binaryop("mul", other) def __or__(self, other): - return self.binary_operator("or", other) + return self._binaryop("or", other) def __xor__(self, other): - return self.binary_operator("xor", other) + return self._binaryop("xor", other) def __and__(self, other): - return self.binary_operator("and", other) + return self._binaryop("and", other) def __floordiv__(self, other): - return self.binary_operator("floordiv", other) + return self._binaryop("floordiv", other) def __truediv__(self, other): - return self.binary_operator("truediv", other) + return self._binaryop("truediv", other) def __mod__(self, other): - return self.binary_operator("mod", other) + return self._binaryop("mod", other) def __pow__(self, other): - return self.binary_operator("pow", other) + return self._binaryop("pow", other) def __radd__(self, other): - return self.binary_operator("add", other, reflect=True) + return self._binaryop("add", other, reflect=True) def __rsub__(self, other): - return self.binary_operator("sub", other, reflect=True) + return self._binaryop("sub", other, reflect=True) def __rmul__(self, other): - return self.binary_operator("mul", other, reflect=True) + return self._binaryop("mul", other, reflect=True) def __ror__(self, other): - return self.binary_operator("or", other, reflect=True) + return self._binaryop("or", other, reflect=True) def __rxor__(self, other): - return self.binary_operator("xor", other, reflect=True) + return self._binaryop("xor", other, reflect=True) def __rand__(self, other): - return self.binary_operator("and", other, reflect=True) + return self._binaryop("and", other, reflect=True) def __rfloordiv__(self, other): - return self.binary_operator("floordiv", other, reflect=True) + return self._binaryop("floordiv", other, reflect=True) def __rtruediv__(self, other): - return self.binary_operator("truediv", other, reflect=True) + return self._binaryop("truediv", other, reflect=True) def __rmod__(self, other): - return self.binary_operator("mod", other, reflect=True) + return self._binaryop("mod", other, reflect=True) def __rpow__(self, other): - return self.binary_operator("pow", other, reflect=True) + return self._binaryop("pow", other, reflect=True) def __eq__(self, other): - return self.binary_operator("eq", other) + return self._binaryop("eq", other) def __ne__(self, other): - return self.binary_operator("ne", other) + return self._binaryop("ne", other) def __lt__(self, other): - return self.binary_operator("lt", other) + return self._binaryop("lt", other) def __gt__(self, other): - return self.binary_operator("gt", other) + return self._binaryop("gt", other) def __le__(self, other): - return self.binary_operator("le", other) + return self._binaryop("le", other) def __ge__(self, other): - return self.binary_operator("ge", other) + return self._binaryop("ge", other) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return _array_ufunc(self, ufunc, method, inputs, kwargs) @@ -1169,8 +1169,8 @@ def unary_operator(self, unaryop: str): f"Operation {unaryop} not supported for dtype {self.dtype}." ) - def binary_operator( - self, op: str, other: BinaryOperand, reflect: bool = False + def _binaryop( + self, op: str, other: ColumnBinaryOperand, reflect: bool = False ) -> ColumnBase: raise TypeError( f"Operation {op} not supported between dtypes {self.dtype} and " diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index b312f99829f..cf97a0d281a 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -14,7 +14,13 @@ import cudf from cudf import _lib as libcudf -from cudf._typing import DatetimeLikeScalar, Dtype, DtypeObj, ScalarLike +from cudf._typing import ( + ColumnBinaryOperand, + DatetimeLikeScalar, + Dtype, + DtypeObj, + ScalarLike, +) from cudf.api.types import is_scalar from cudf.core._compat import PANDAS_GE_120 from cudf.core.buffer import Buffer @@ -388,11 +394,8 @@ def quantile( return pd.Timestamp(result, unit=self.time_unit) return result.astype(self.dtype) - def binary_operator( - self, - op: str, - rhs: Union[ColumnBase, "cudf.Scalar"], - reflect: bool = False, + def _binaryop( + self, op: str, rhs: ColumnBinaryOperand, reflect: bool = False, ) -> ColumnBase: rhs = self._wrap_binop_normalization(rhs) if isinstance(rhs, cudf.DateOffset): diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index e011afbd0ff..22f95386e8b 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -14,7 +14,7 @@ from cudf._lib.strings.convert.convert_fixed_point import ( from_decimal as cpp_from_decimal, ) -from cudf._typing import Dtype +from cudf._typing import ColumnBinaryOperand, Dtype from cudf.api.types import is_integer_dtype, is_scalar from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, as_column @@ -60,13 +60,12 @@ def as_string_column( "cudf.core.column.StringColumn", as_column([], dtype="object") ) - def binary_operator(self, op, other, reflect=False): - if reflect: - self, other = other, self + def _binaryop(self, op, other: ColumnBinaryOperand, reflect=False): + other = self._wrap_binop_normalization(other) + lhs, rhs = (other, self) if reflect else (self, other) # Decimals in libcudf don't support truediv, see # https://github.com/rapidsai/cudf/pull/7435 for explanation. op = op.replace("true", "") - other = self._wrap_binop_normalization(other) # Binary Arithmetics between decimal columns. `Scale` and `precision` # are computed outside of libcudf diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 53ab79542e2..45ba0f7f26a 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -19,7 +19,7 @@ sort_lists, ) from cudf._lib.strings.convert.convert_lists import format_list_column -from cudf._typing import BinaryOperand, ColumnLike, Dtype, ScalarLike +from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike from cudf.api.types import _is_non_decimal_numeric_dtype, is_list_dtype from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, as_column, column @@ -92,8 +92,8 @@ def base_size(self): # avoid it being negative return max(0, len(self.base_children[0]) - 1) - def binary_operator( - self, binop: str, other: BinaryOperand, reflect: bool = False + def _binaryop( + self, binop: str, other: ColumnBinaryOperand, reflect: bool = False ) -> ColumnBase: """ Calls a binary operator *binop* on operands *self* diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 015524b841e..052eb0303e2 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -21,7 +21,13 @@ import cudf from cudf import _lib as libcudf from cudf._lib.stream_compaction import drop_nulls -from cudf._typing import BinaryOperand, ColumnLike, Dtype, DtypeObj, ScalarLike +from cudf._typing import ( + ColumnBinaryOperand, + ColumnLike, + Dtype, + DtypeObj, + ScalarLike, +) from cudf.api.types import ( is_bool_dtype, is_float_dtype, @@ -150,8 +156,8 @@ def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase: unaryop = libcudf.unary.UnaryOp[unaryop.upper()] return libcudf.unary.unary_operation(self, unaryop) - def binary_operator( - self, binop: str, rhs: BinaryOperand, reflect: bool = False, + def _binaryop( + self, binop: str, rhs: ColumnBinaryOperand, reflect: bool = False, ) -> ColumnBase: int_float_dtype_mapping = { np.int8: np.float32, @@ -168,16 +174,14 @@ def binary_operator( if binop in {"truediv", "rtruediv"}: # Division with integer types results in a suitable float. if (truediv_type := int_float_dtype_mapping.get(self.dtype.type)) : - return self.astype(truediv_type).binary_operator( - binop, rhs, reflect - ) + return self.astype(truediv_type)._binaryop(binop, rhs, reflect) rhs = self._wrap_binop_normalization(rhs) out_dtype = self.dtype if rhs is not None: if isinstance(rhs, cudf.core.column.DecimalBaseColumn): dtyp = rhs.dtype.__class__(rhs.dtype.MAX_PRECISION, 0) - return self.as_decimal_column(dtyp).binary_operator(binop, rhs) + return self.as_decimal_column(dtyp)._binaryop(binop, rhs) out_dtype = np.result_type(self.dtype, rhs.dtype) if binop in {"mod", "floordiv"}: diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 82be924dfbc..bc789e35984 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -50,7 +50,13 @@ def str_to_boolean(column: StringColumn): if TYPE_CHECKING: - from cudf._typing import ColumnLike, Dtype, ScalarLike, SeriesOrIndex + from cudf._typing import ( + ColumnBinaryOperand, + ColumnLike, + Dtype, + ScalarLike, + SeriesOrIndex, + ) _str_to_numeric_typecast_functions = { @@ -5444,8 +5450,8 @@ def normalize_binop_value( ) raise TypeError(f"cannot broadcast {type(other)}") - def binary_operator( - self, op: str, rhs, reflect: bool = False + def _binaryop( + self, op: str, rhs: ColumnBinaryOperand, reflect: bool = False ) -> "column.ColumnBase": # Handle object columns that are empty or all nulls when performing # binary operations diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 66e6271a4d1..3760daa3b8f 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -11,7 +11,12 @@ import cudf from cudf import _lib as libcudf -from cudf._typing import BinaryOperand, DatetimeLikeScalar, Dtype, DtypeObj +from cudf._typing import ( + ColumnBinaryOperand, + DatetimeLikeScalar, + Dtype, + DtypeObj, +) from cudf.api.types import is_scalar from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, column, string @@ -125,7 +130,7 @@ def to_pandas( return pd_series - def _binary_op_mul(self, rhs: BinaryOperand) -> DtypeObj: + def _binary_op_mul(self, rhs: ColumnBinaryOperand) -> DtypeObj: if rhs.dtype.kind in ("f", "i", "u"): out_dtype = self.dtype else: @@ -135,7 +140,7 @@ def _binary_op_mul(self, rhs: BinaryOperand) -> DtypeObj: ) return out_dtype - def _binary_op_mod(self, rhs: BinaryOperand) -> DtypeObj: + def _binary_op_mod(self, rhs: ColumnBinaryOperand) -> DtypeObj: if pd.api.types.is_timedelta64_dtype(rhs.dtype): out_dtype = determine_out_dtype(self.dtype, rhs.dtype) elif rhs.dtype.kind in ("f", "i", "u"): @@ -147,7 +152,9 @@ def _binary_op_mod(self, rhs: BinaryOperand) -> DtypeObj: ) return out_dtype - def _binary_op_lt_gt_le_ge_eq_ne(self, rhs: BinaryOperand) -> DtypeObj: + def _binary_op_lt_gt_le_ge_eq_ne( + self, rhs: ColumnBinaryOperand + ) -> DtypeObj: if pd.api.types.is_timedelta64_dtype(rhs.dtype): return np.bool_ raise TypeError( @@ -156,8 +163,8 @@ def _binary_op_lt_gt_le_ge_eq_ne(self, rhs: BinaryOperand) -> DtypeObj: ) def _binary_op_div( - self, rhs: BinaryOperand, op: str - ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]: + self, rhs: ColumnBinaryOperand, op: str + ) -> Tuple["column.ColumnBase", ColumnBinaryOperand, DtypeObj]: lhs = self # type: column.ColumnBase if pd.api.types.is_timedelta64_dtype(rhs.dtype): common_dtype = determine_out_dtype(self.dtype, rhs.dtype) @@ -181,8 +188,8 @@ def _binary_op_div( return lhs, rhs, out_dtype - def binary_operator( - self, op: str, rhs: BinaryOperand, reflect: bool = False + def _binaryop( + self, op: str, rhs: ColumnBinaryOperand, reflect: bool = False ) -> "column.ColumnBase": rhs = self._wrap_binop_normalization(rhs) lhs, rhs = self, rhs @@ -211,7 +218,7 @@ def binary_operator( return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype) - def normalize_binop_value(self, other) -> BinaryOperand: + def normalize_binop_value(self, other) -> ColumnBinaryOperand: if isinstance(other, (ColumnBase, cudf.Scalar)): return other if isinstance(other, np.ndarray) and other.ndim == 0: @@ -556,7 +563,7 @@ def determine_out_dtype(lhs_dtype: Dtype, rhs_dtype: Dtype) -> Dtype: def _timedelta_add_result_dtype( - lhs: BinaryOperand, rhs: BinaryOperand + lhs: ColumnBinaryOperand, rhs: ColumnBinaryOperand ) -> Dtype: if pd.api.types.is_timedelta64_dtype(rhs.dtype): out_dtype = determine_out_dtype(lhs.dtype, rhs.dtype) @@ -577,7 +584,7 @@ def _timedelta_add_result_dtype( def _timedelta_sub_result_dtype( - lhs: BinaryOperand, rhs: BinaryOperand + lhs: ColumnBinaryOperand, rhs: ColumnBinaryOperand ) -> Dtype: if pd.api.types.is_timedelta64_dtype( lhs.dtype From 8725f570cb7f6c9738a2b16251641a6402c9920e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 17 Mar 2022 17:35:32 -0700 Subject: [PATCH 04/22] Standardize binaryops around usage of 'other' vs 'rhs'. --- python/cudf/cudf/core/column/categorical.py | 8 +-- python/cudf/cudf/core/column/datetime.py | 31 ++++----- python/cudf/cudf/core/column/numerical.py | 28 +++++---- python/cudf/cudf/core/column/string.py | 8 +-- python/cudf/cudf/core/column/timedelta.py | 69 ++++++++++----------- 5 files changed, 74 insertions(+), 70 deletions(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 102ce79eb9e..e484d739f47 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -876,17 +876,17 @@ def slice( ) def _binaryop( - self, op: str, rhs: ColumnBinaryOperand, reflect: bool = False + self, op: str, other: ColumnBinaryOperand, reflect: bool = False ) -> ColumnBase: if op not in {"eq", "ne", "lt", "le", "gt", "ge", "NULL_EQUALS"}: raise TypeError( "Series of dtype `category` cannot perform the operation: " f"{op}" ) - rhs = self._wrap_binop_normalization(rhs) + other = self._wrap_binop_normalization(other) # TODO: This is currently just here to make mypy happy, but eventually # we'll need to properly establish the APIs for these methods. - if not isinstance(rhs, CategoricalColumn): + if not isinstance(other, CategoricalColumn): raise ValueError # Note: at this stage we are guaranteed that the dtypes are equal. if not self.ordered and op not in {"eq", "ne", "NULL_EQUALS"}: @@ -894,7 +894,7 @@ def _binaryop( "The only binary operations supported by unordered " "categorical columns are equality and inequality." ) - return self.as_numerical._binaryop(op, rhs.as_numerical) + return self.as_numerical._binaryop(op, other.as_numerical) def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn: if isinstance(other, column.ColumnBase): diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index cf97a0d281a..8389de37851 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -395,28 +395,31 @@ def quantile( return result.astype(self.dtype) def _binaryop( - self, op: str, rhs: ColumnBinaryOperand, reflect: bool = False, + self, op: str, other: ColumnBinaryOperand, reflect: bool = False, ) -> ColumnBase: - rhs = self._wrap_binop_normalization(rhs) - if isinstance(rhs, cudf.DateOffset): - return rhs._datetime_binop(self, op, reflect=reflect) - - lhs: Union[ScalarLike, ColumnBase] = self + other = self._wrap_binop_normalization(other) + if isinstance(other, cudf.DateOffset): + return other._datetime_binop(self, op, reflect=reflect) + + # TODO: Figure out if I can reflect before we start these checks. That + # requires figuring out why _timedelta_add_result_dtype and + # _timedelta_sub_result_dtype are 1) not symmetric, and 2) different + # from each other. if op in {"eq", "ne", "lt", "gt", "le", "ge", "NULL_EQUALS"}: out_dtype: Dtype = cudf.dtype(np.bool_) - elif op == "add" and pd.api.types.is_timedelta64_dtype(rhs.dtype): + elif op == "add" and pd.api.types.is_timedelta64_dtype(other.dtype): out_dtype = cudf.core.column.timedelta._timedelta_add_result_dtype( - rhs, lhs + other, self ) - elif op == "sub" and pd.api.types.is_timedelta64_dtype(rhs.dtype): + elif op == "sub" and pd.api.types.is_timedelta64_dtype(other.dtype): out_dtype = cudf.core.column.timedelta._timedelta_sub_result_dtype( - rhs if reflect else lhs, lhs if reflect else rhs + other if reflect else self, self if reflect else other ) - elif op == "sub" and pd.api.types.is_datetime64_dtype(rhs.dtype): + elif op == "sub" and pd.api.types.is_datetime64_dtype(other.dtype): units = ["s", "ms", "us", "ns"] - lhs_time_unit = cudf.utils.dtypes.get_time_unit(lhs) + lhs_time_unit = cudf.utils.dtypes.get_time_unit(self) lhs_unit = units.index(lhs_time_unit) - rhs_time_unit = cudf.utils.dtypes.get_time_unit(rhs) + rhs_time_unit = cudf.utils.dtypes.get_time_unit(other) rhs_unit = units.index(rhs_time_unit) out_dtype = np.dtype( f"timedelta64[{units[max(lhs_unit, rhs_unit)]}]" @@ -427,7 +430,7 @@ def _binaryop( f" the operation {op}" ) - lhs, rhs = (self, rhs) if not reflect else (rhs, self) + lhs, rhs = (self, other) if not reflect else (other, self) return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype) def fillna( diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 052eb0303e2..ffbf04a3487 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -157,7 +157,7 @@ def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase: return libcudf.unary.unary_operation(self, unaryop) def _binaryop( - self, binop: str, rhs: ColumnBinaryOperand, reflect: bool = False, + self, binop: str, other: ColumnBinaryOperand, reflect: bool = False, ) -> ColumnBase: int_float_dtype_mapping = { np.int8: np.float32, @@ -174,18 +174,20 @@ def _binaryop( if binop in {"truediv", "rtruediv"}: # Division with integer types results in a suitable float. if (truediv_type := int_float_dtype_mapping.get(self.dtype.type)) : - return self.astype(truediv_type)._binaryop(binop, rhs, reflect) + return self.astype(truediv_type)._binaryop( + binop, other, reflect + ) - rhs = self._wrap_binop_normalization(rhs) + other = self._wrap_binop_normalization(other) out_dtype = self.dtype - if rhs is not None: - if isinstance(rhs, cudf.core.column.DecimalBaseColumn): - dtyp = rhs.dtype.__class__(rhs.dtype.MAX_PRECISION, 0) - return self.as_decimal_column(dtyp)._binaryop(binop, rhs) + if other is not None: + if isinstance(other, cudf.core.column.DecimalBaseColumn): + dtyp = other.dtype.__class__(other.dtype.MAX_PRECISION, 0) + return self.as_decimal_column(dtyp)._binaryop(binop, other) - out_dtype = np.result_type(self.dtype, rhs.dtype) + out_dtype = np.result_type(self.dtype, other.dtype) if binop in {"mod", "floordiv"}: - tmp = self if reflect else rhs + tmp = self if reflect else other # Guard against division by zero for integers. if ( (tmp.dtype.type in int_float_dtype_mapping) @@ -213,16 +215,16 @@ def _binaryop( out_dtype = "bool" if binop in {"and", "or", "xor"}: - if is_float_dtype(self.dtype) or is_float_dtype(rhs): + if is_float_dtype(self.dtype) or is_float_dtype(other): raise TypeError( f"Operation 'bitwise {binop}' not supported between " f"{self.dtype.type.__name__} and " - f"{rhs.dtype.type.__name__}" + f"{other.dtype.type.__name__}" ) - if is_bool_dtype(self.dtype) or is_bool_dtype(rhs): + if is_bool_dtype(self.dtype) or is_bool_dtype(other): out_dtype = "bool" - lhs, rhs = (self, rhs) if not reflect else (rhs, self) + lhs, rhs = (self, other) if not reflect else (other, self) return libcudf.binaryop.binaryop(lhs, rhs, binop, out_dtype) def nans_to_nulls(self: NumericalColumn) -> NumericalColumn: diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index bc789e35984..868e73e9de6 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5451,7 +5451,7 @@ def normalize_binop_value( raise TypeError(f"cannot broadcast {type(other)}") def _binaryop( - self, op: str, rhs: ColumnBinaryOperand, reflect: bool = False + self, op: str, other: ColumnBinaryOperand, reflect: bool = False ) -> "column.ColumnBase": # Handle object columns that are empty or all nulls when performing # binary operations @@ -5479,10 +5479,10 @@ def _binaryop( elif op == "ne": return self.isnull() - rhs = self._wrap_binop_normalization(rhs) + other = self._wrap_binop_normalization(other) - if isinstance(rhs, (StringColumn, str, cudf.Scalar)): - lhs, rhs = (rhs, self) if reflect else (self, rhs) + if isinstance(other, (StringColumn, str, cudf.Scalar)): + lhs, rhs = (other, self) if reflect else (self, other) if op == "add": return cast( "column.ColumnBase", diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 3760daa3b8f..96d0ac12383 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -130,91 +130,90 @@ def to_pandas( return pd_series - def _binary_op_mul(self, rhs: ColumnBinaryOperand) -> DtypeObj: - if rhs.dtype.kind in ("f", "i", "u"): + def _binary_op_mul(self, other: ColumnBinaryOperand) -> DtypeObj: + if other.dtype.kind in ("f", "i", "u"): out_dtype = self.dtype else: raise TypeError( - f"Multiplication of {self.dtype} with {rhs.dtype} " + f"Multiplication of {self.dtype} with {other.dtype} " f"cannot be performed." ) return out_dtype - def _binary_op_mod(self, rhs: ColumnBinaryOperand) -> DtypeObj: - if pd.api.types.is_timedelta64_dtype(rhs.dtype): - out_dtype = determine_out_dtype(self.dtype, rhs.dtype) - elif rhs.dtype.kind in ("f", "i", "u"): + def _binary_op_mod(self, other: ColumnBinaryOperand) -> DtypeObj: + if pd.api.types.is_timedelta64_dtype(other.dtype): + out_dtype = determine_out_dtype(self.dtype, other.dtype) + elif other.dtype.kind in ("f", "i", "u"): out_dtype = self.dtype else: raise TypeError( - f"Modulus of {self.dtype} with {rhs.dtype} " + f"Modulus of {self.dtype} with {other.dtype} " f"cannot be performed." ) return out_dtype def _binary_op_lt_gt_le_ge_eq_ne( - self, rhs: ColumnBinaryOperand + self, other: ColumnBinaryOperand ) -> DtypeObj: - if pd.api.types.is_timedelta64_dtype(rhs.dtype): + if pd.api.types.is_timedelta64_dtype(other.dtype): return np.bool_ raise TypeError( f"Invalid comparison between dtype={self.dtype}" - f" and {rhs.dtype}" + f" and {other.dtype}" ) def _binary_op_div( - self, rhs: ColumnBinaryOperand, op: str + self, other: ColumnBinaryOperand, op: str ) -> Tuple["column.ColumnBase", ColumnBinaryOperand, DtypeObj]: - lhs = self # type: column.ColumnBase - if pd.api.types.is_timedelta64_dtype(rhs.dtype): - common_dtype = determine_out_dtype(self.dtype, rhs.dtype) - lhs = lhs.astype(common_dtype).astype("float64") - if isinstance(rhs, cudf.Scalar): - if rhs.is_valid(): - rhs = rhs.value.astype(common_dtype).astype("float64") + this: ColumnBase = self + if pd.api.types.is_timedelta64_dtype(other.dtype): + common_dtype = determine_out_dtype(self.dtype, other.dtype) + this = self.astype(common_dtype).astype("float64") + if isinstance(other, cudf.Scalar): + if other.is_valid(): + other = other.value.astype(common_dtype).astype("float64") else: - rhs = cudf.Scalar(None, "float64") + other = cudf.Scalar(None, "float64") else: - rhs = rhs.astype(common_dtype).astype("float64") + other = other.astype(common_dtype).astype("float64") out_dtype = cudf.dtype("float64" if op == "truediv" else "int64") - elif rhs.dtype.kind in ("f", "i", "u"): + elif other.dtype.kind in ("f", "i", "u"): out_dtype = self.dtype else: raise TypeError( - f"Division of {self.dtype} with {rhs.dtype} " + f"Division of {self.dtype} with {other.dtype} " f"cannot be performed." ) - return lhs, rhs, out_dtype + return this, other, out_dtype def _binaryop( - self, op: str, rhs: ColumnBinaryOperand, reflect: bool = False + self, op: str, other: ColumnBinaryOperand, reflect: bool = False ) -> "column.ColumnBase": - rhs = self._wrap_binop_normalization(rhs) - lhs, rhs = self, rhs + other = self._wrap_binop_normalization(other) + this: ColumnBinaryOperand = self if op in {"eq", "ne", "lt", "gt", "le", "ge", "NULL_EQUALS"}: - out_dtype = self._binary_op_lt_gt_le_ge_eq_ne(rhs) + out_dtype = self._binary_op_lt_gt_le_ge_eq_ne(other) elif op == "mul": - out_dtype = self._binary_op_mul(rhs) + out_dtype = self._binary_op_mul(other) elif op == "mod": - out_dtype = self._binary_op_mod(rhs) + out_dtype = self._binary_op_mod(other) elif op in {"truediv", "floordiv"}: - lhs, rhs, out_dtype = self._binary_op_div(rhs, op) # type: ignore + this, other, out_dtype = self._binary_op_div(other, op) op = "truediv" elif op == "add": - out_dtype = _timedelta_add_result_dtype(lhs, rhs) + out_dtype = _timedelta_add_result_dtype(self, other) elif op == "sub": - out_dtype = _timedelta_sub_result_dtype(lhs, rhs) + out_dtype = _timedelta_sub_result_dtype(self, other) else: raise TypeError( f"Series of dtype {self.dtype} cannot perform " f"the operation {op}" ) - if reflect: - lhs, rhs = rhs, lhs # type: ignore + lhs, rhs = (this, other) if not reflect else (other, this) return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype) From 412e95940b0cf665ae157630685b27015e62d0c3 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 17 Mar 2022 17:38:13 -0700 Subject: [PATCH 05/22] Remove now removed operators from numerical check. --- python/cudf/cudf/core/column/numerical.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index ffbf04a3487..d6f178f524d 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -202,8 +202,6 @@ def _binaryop( out_dtype = cudf.dtype("float64") if binop in { - "l_and", - "l_or", "lt", "gt", "le", From e66c44b1120908d7f38c83331bb1f8c0b8000646 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 18 Mar 2022 10:24:11 -0700 Subject: [PATCH 06/22] Standardize signatures of column _binaryop methods to use 'op' as the op name. --- python/cudf/cudf/core/column/lists.py | 4 ++-- python/cudf/cudf/core/column/numerical.py | 20 +++++++++----------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 45ba0f7f26a..207aabc8ec2 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -93,7 +93,7 @@ def base_size(self): return max(0, len(self.base_children[0]) - 1) def _binaryop( - self, binop: str, other: ColumnBinaryOperand, reflect: bool = False + self, op: str, other: ColumnBinaryOperand, reflect: bool = False ) -> ColumnBase: """ Calls a binary operator *binop* on operands *self* @@ -135,7 +135,7 @@ def _binaryop( """ other = self._wrap_binop_normalization(other) if isinstance(other.dtype, ListDtype): - if binop == "add": + if op == "add": return concatenate_rows( cudf.core.frame.Frame({0: self, 1: other}) ) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index d6f178f524d..3e1790bea90 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -157,7 +157,7 @@ def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase: return libcudf.unary.unary_operation(self, unaryop) def _binaryop( - self, binop: str, other: ColumnBinaryOperand, reflect: bool = False, + self, op: str, other: ColumnBinaryOperand, reflect: bool = False, ) -> ColumnBase: int_float_dtype_mapping = { np.int8: np.float32, @@ -171,22 +171,20 @@ def _binaryop( np.bool_: np.float32, } - if binop in {"truediv", "rtruediv"}: + if op in {"truediv", "rtruediv"}: # Division with integer types results in a suitable float. if (truediv_type := int_float_dtype_mapping.get(self.dtype.type)) : - return self.astype(truediv_type)._binaryop( - binop, other, reflect - ) + return self.astype(truediv_type)._binaryop(op, other, reflect) other = self._wrap_binop_normalization(other) out_dtype = self.dtype if other is not None: if isinstance(other, cudf.core.column.DecimalBaseColumn): dtyp = other.dtype.__class__(other.dtype.MAX_PRECISION, 0) - return self.as_decimal_column(dtyp)._binaryop(binop, other) + return self.as_decimal_column(dtyp)._binaryop(op, other) out_dtype = np.result_type(self.dtype, other.dtype) - if binop in {"mod", "floordiv"}: + if op in {"mod", "floordiv"}: tmp = self if reflect else other # Guard against division by zero for integers. if ( @@ -201,7 +199,7 @@ def _binaryop( ): out_dtype = cudf.dtype("float64") - if binop in { + if op in { "lt", "gt", "le", @@ -212,10 +210,10 @@ def _binaryop( }: out_dtype = "bool" - if binop in {"and", "or", "xor"}: + if op in {"and", "or", "xor"}: if is_float_dtype(self.dtype) or is_float_dtype(other): raise TypeError( - f"Operation 'bitwise {binop}' not supported between " + f"Operation 'bitwise {op}' not supported between " f"{self.dtype.type.__name__} and " f"{other.dtype.type.__name__}" ) @@ -223,7 +221,7 @@ def _binaryop( out_dtype = "bool" lhs, rhs = (self, other) if not reflect else (other, self) - return libcudf.binaryop.binaryop(lhs, rhs, binop, out_dtype) + return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype) def nans_to_nulls(self: NumericalColumn) -> NumericalColumn: # Only floats can contain nan. From f71389324aab08d3551bb4ab8da6b44bfea87d4f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 18 Mar 2022 11:02:42 -0700 Subject: [PATCH 07/22] Change all operation names to dunder versions. --- python/cudf/cudf/_lib/binaryop.pyx | 6 ++- python/cudf/cudf/core/column/categorical.py | 12 ++++- python/cudf/cudf/core/column/column.py | 52 ++++++++++----------- python/cudf/cudf/core/column/datetime.py | 20 ++++++-- python/cudf/cudf/core/column/decimal.py | 17 +++++-- python/cudf/cudf/core/column/lists.py | 2 +- python/cudf/cudf/core/column/numerical.py | 18 +++---- python/cudf/cudf/core/column/string.py | 44 ++++++++++------- python/cudf/cudf/core/column/timedelta.py | 26 +++++++---- python/cudf/cudf/core/tools/datetimes.py | 10 ++-- python/cudf/cudf/tests/test_timedelta.py | 2 +- 11 files changed, 129 insertions(+), 80 deletions(-) diff --git a/python/cudf/cudf/_lib/binaryop.pyx b/python/cudf/cudf/_lib/binaryop.pyx index 1b590db9e6d..b11d31ab368 100644 --- a/python/cudf/cudf/_lib/binaryop.pyx +++ b/python/cudf/cudf/_lib/binaryop.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from enum import IntEnum @@ -160,6 +160,10 @@ def binaryop(lhs, rhs, op, dtype): """ Dispatches a binary op call to the appropriate libcudf function: """ + # TODO: Shouldn't have to keep special-casing. We need to define a separate + # pipeline for libcudf binops that don't map to Python binops. + if op != "NULL_EQUALS": + op = op[2:-2] op = BinaryOperation[op.upper()] cdef binary_operator c_op = ( diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index e484d739f47..d6b5b083ed6 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -878,7 +878,15 @@ def slice( def _binaryop( self, op: str, other: ColumnBinaryOperand, reflect: bool = False ) -> ColumnBase: - if op not in {"eq", "ne", "lt", "le", "gt", "ge", "NULL_EQUALS"}: + if op not in { + "__eq__", + "__ne__", + "__lt__", + "__le__", + "__gt__", + "__ge__", + "NULL_EQUALS", + }: raise TypeError( "Series of dtype `category` cannot perform the operation: " f"{op}" @@ -889,7 +897,7 @@ def _binaryop( if not isinstance(other, CategoricalColumn): raise ValueError # Note: at this stage we are guaranteed that the dtypes are equal. - if not self.ordered and op not in {"eq", "ne", "NULL_EQUALS"}: + if not self.ordered and op not in {"__eq__", "__ne__", "NULL_EQUALS"}: raise TypeError( "The only binary operations supported by unordered " "categorical columns are equality and inequality." diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index e1925c3861b..675df8e8456 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1030,82 +1030,82 @@ def __cuda_array_interface__(self): ) def __add__(self, other): - return self._binaryop("add", other) + return self._binaryop("__add__", other) def __sub__(self, other): - return self._binaryop("sub", other) + return self._binaryop("__sub__", other) def __mul__(self, other): - return self._binaryop("mul", other) + return self._binaryop("__mul__", other) def __or__(self, other): - return self._binaryop("or", other) + return self._binaryop("__or__", other) def __xor__(self, other): - return self._binaryop("xor", other) + return self._binaryop("__xor__", other) def __and__(self, other): - return self._binaryop("and", other) + return self._binaryop("__and__", other) def __floordiv__(self, other): - return self._binaryop("floordiv", other) + return self._binaryop("__floordiv__", other) def __truediv__(self, other): - return self._binaryop("truediv", other) + return self._binaryop("__truediv__", other) def __mod__(self, other): - return self._binaryop("mod", other) + return self._binaryop("__mod__", other) def __pow__(self, other): - return self._binaryop("pow", other) + return self._binaryop("__pow__", other) def __radd__(self, other): - return self._binaryop("add", other, reflect=True) + return self._binaryop("__add__", other, reflect=True) def __rsub__(self, other): - return self._binaryop("sub", other, reflect=True) + return self._binaryop("__sub__", other, reflect=True) def __rmul__(self, other): - return self._binaryop("mul", other, reflect=True) + return self._binaryop("__mul__", other, reflect=True) def __ror__(self, other): - return self._binaryop("or", other, reflect=True) + return self._binaryop("__or__", other, reflect=True) def __rxor__(self, other): - return self._binaryop("xor", other, reflect=True) + return self._binaryop("__xor__", other, reflect=True) def __rand__(self, other): - return self._binaryop("and", other, reflect=True) + return self._binaryop("__and__", other, reflect=True) def __rfloordiv__(self, other): - return self._binaryop("floordiv", other, reflect=True) + return self._binaryop("__floordiv__", other, reflect=True) def __rtruediv__(self, other): - return self._binaryop("truediv", other, reflect=True) + return self._binaryop("__truediv__", other, reflect=True) def __rmod__(self, other): - return self._binaryop("mod", other, reflect=True) + return self._binaryop("__mod__", other, reflect=True) def __rpow__(self, other): - return self._binaryop("pow", other, reflect=True) + return self._binaryop("__pow__", other, reflect=True) def __eq__(self, other): - return self._binaryop("eq", other) + return self._binaryop("__eq__", other) def __ne__(self, other): - return self._binaryop("ne", other) + return self._binaryop("__ne__", other) def __lt__(self, other): - return self._binaryop("lt", other) + return self._binaryop("__lt__", other) def __gt__(self, other): - return self._binaryop("gt", other) + return self._binaryop("__gt__", other) def __le__(self, other): - return self._binaryop("le", other) + return self._binaryop("__le__", other) def __ge__(self, other): - return self._binaryop("ge", other) + return self._binaryop("__ge__", other) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return _array_ufunc(self, ufunc, method, inputs, kwargs) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 8389de37851..540d60bd19a 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -405,17 +405,29 @@ def _binaryop( # requires figuring out why _timedelta_add_result_dtype and # _timedelta_sub_result_dtype are 1) not symmetric, and 2) different # from each other. - if op in {"eq", "ne", "lt", "gt", "le", "ge", "NULL_EQUALS"}: + if op in { + "__eq__", + "__ne__", + "__lt__", + "__gt__", + "__le__", + "__ge__", + "NULL_EQUALS", + }: out_dtype: Dtype = cudf.dtype(np.bool_) - elif op == "add" and pd.api.types.is_timedelta64_dtype(other.dtype): + elif op == "__add__" and pd.api.types.is_timedelta64_dtype( + other.dtype + ): out_dtype = cudf.core.column.timedelta._timedelta_add_result_dtype( other, self ) - elif op == "sub" and pd.api.types.is_timedelta64_dtype(other.dtype): + elif op == "__sub__" and pd.api.types.is_timedelta64_dtype( + other.dtype + ): out_dtype = cudf.core.column.timedelta._timedelta_sub_result_dtype( other if reflect else self, self if reflect else other ) - elif op == "sub" and pd.api.types.is_datetime64_dtype(other.dtype): + elif op == "__sub__" and pd.api.types.is_datetime64_dtype(other.dtype): units = ["s", "ms", "us", "ns"] lhs_time_unit = cudf.utils.dtypes.get_time_unit(self) lhs_unit = units.index(lhs_time_unit) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 22f95386e8b..78d62e51a17 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -70,7 +70,7 @@ def _binaryop(self, op, other: ColumnBinaryOperand, reflect=False): # Binary Arithmetics between decimal columns. `Scale` and `precision` # are computed outside of libcudf try: - if op in {"add", "sub", "mul", "div"}: + if op in {"__add__", "__sub__", "__mul__", "__div__"}: output_type = _get_decimal_type(self.dtype, other.dtype, op) result = libcudf.binaryop.binaryop( self, other, op, output_type @@ -78,7 +78,14 @@ def _binaryop(self, op, other: ColumnBinaryOperand, reflect=False): # TODO: Why is this necessary? Why isn't the result's # precision already set correctly based on output_type? result.dtype.precision = output_type.precision - elif op in {"eq", "ne", "lt", "gt", "le", "ge"}: + elif op in { + "__eq__", + "__ne__", + "__lt__", + "__gt__", + "__le__", + "__ge__", + }: result = libcudf.binaryop.binaryop(self, other, op, bool) except RuntimeError as e: if "Unsupported operator for these types" in str(e): @@ -349,13 +356,13 @@ def _get_decimal_type(lhs_dtype, rhs_dtype, op): p1, p2 = lhs_dtype.precision, rhs_dtype.precision s1, s2 = lhs_dtype.scale, rhs_dtype.scale - if op in ("add", "sub"): + if op in ("__add__", "__sub__"): scale = max(s1, s2) precision = scale + max(p1 - s1, p2 - s2) + 1 - elif op == "mul": + elif op == "__mul__": scale = s1 + s2 precision = p1 + p2 + 1 - elif op == "div": + elif op == "__div__": scale = max(6, s1 + p2 + 1) precision = p1 - s1 + s2 + scale else: diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 207aabc8ec2..78c22b63e60 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -135,7 +135,7 @@ def _binaryop( """ other = self._wrap_binop_normalization(other) if isinstance(other.dtype, ListDtype): - if op == "add": + if op == "__add__": return concatenate_rows( cudf.core.frame.Frame({0: self, 1: other}) ) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 3e1790bea90..75471b6e94e 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -171,7 +171,7 @@ def _binaryop( np.bool_: np.float32, } - if op in {"truediv", "rtruediv"}: + if op in {"__truediv__", "__rtruediv__"}: # Division with integer types results in a suitable float. if (truediv_type := int_float_dtype_mapping.get(self.dtype.type)) : return self.astype(truediv_type)._binaryop(op, other, reflect) @@ -184,7 +184,7 @@ def _binaryop( return self.as_decimal_column(dtyp)._binaryop(op, other) out_dtype = np.result_type(self.dtype, other.dtype) - if op in {"mod", "floordiv"}: + if op in {"__mod__", "__floordiv__"}: tmp = self if reflect else other # Guard against division by zero for integers. if ( @@ -200,17 +200,17 @@ def _binaryop( out_dtype = cudf.dtype("float64") if op in { - "lt", - "gt", - "le", - "ge", - "eq", - "ne", + "__lt__", + "__gt__", + "__le__", + "__ge__", + "__eq__", + "__ne__", "NULL_EQUALS", }: out_dtype = "bool" - if op in {"and", "or", "xor"}: + if op in {"__and__", "__or__", "__xor__"}: if is_float_dtype(self.dtype) or is_float_dtype(other): raise TypeError( f"Operation 'bitwise {op}' not supported between " diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 868e73e9de6..62f8b1da1b0 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5458,32 +5458,32 @@ def _binaryop( # See https://github.com/pandas-dev/pandas/issues/46332 if self.null_count == len(self): if op in { - "add", - "sub", - "mul", - "mod", - "pow", - "truediv", - "floordiv", - "radd", - "rsub", - "rmul", - "rmod", - "rpow", - "rtruediv", - "rfloordiv", + "__add__", + "__sub__", + "__mul__", + "__mod__", + "__pow__", + "__truediv__", + "__floordiv__", + "__radd__", + "__rsub__", + "__rmul__", + "__rmod__", + "__rpow__", + "__rtruediv__", + "__rfloordiv__", }: return self - elif op in {"eq", "lt", "le", "gt", "ge"}: + elif op in {"__eq__", "__lt__", "__le__", "__gt__", "__ge__"}: return self.notnull() - elif op == "ne": + elif op == "__ne__": return self.isnull() other = self._wrap_binop_normalization(other) if isinstance(other, (StringColumn, str, cudf.Scalar)): lhs, rhs = (other, self) if reflect else (self, other) - if op == "add": + if op == "__add__": return cast( "column.ColumnBase", libstrings.concatenate( @@ -5492,7 +5492,15 @@ def _binaryop( na_rep=cudf.Scalar(None, "str"), ), ) - elif op in {"eq", "ne", "gt", "lt", "ge", "le", "NULL_EQUALS"}: + elif op in { + "__eq__", + "__ne__", + "__gt__", + "__lt__", + "__ge__", + "__le__", + "NULL_EQUALS", + }: return libcudf.binaryop.binaryop( lhs=lhs, rhs=rhs, op=op, dtype="bool" ) diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 96d0ac12383..cfbc2fced39 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -177,7 +177,9 @@ def _binary_op_div( else: other = other.astype(common_dtype).astype("float64") - out_dtype = cudf.dtype("float64" if op == "truediv" else "int64") + out_dtype = cudf.dtype( + "float64" if op == "__truediv__" else "int64" + ) elif other.dtype.kind in ("f", "i", "u"): out_dtype = self.dtype else: @@ -194,18 +196,26 @@ def _binaryop( other = self._wrap_binop_normalization(other) this: ColumnBinaryOperand = self - if op in {"eq", "ne", "lt", "gt", "le", "ge", "NULL_EQUALS"}: + if op in { + "__eq__", + "__ne__", + "__lt__", + "__gt__", + "__le__", + "__ge__", + "NULL_EQUALS", + }: out_dtype = self._binary_op_lt_gt_le_ge_eq_ne(other) - elif op == "mul": + elif op == "__mul__": out_dtype = self._binary_op_mul(other) - elif op == "mod": + elif op == "__mod__": out_dtype = self._binary_op_mod(other) - elif op in {"truediv", "floordiv"}: + elif op in {"__truediv__", "__floordiv__"}: this, other, out_dtype = self._binary_op_div(other, op) - op = "truediv" - elif op == "add": + op = "__truediv__" + elif op == "__add__": out_dtype = _timedelta_add_result_dtype(self, other) - elif op == "sub": + elif op == "__sub__": out_dtype = _timedelta_sub_result_dtype(self, other) else: raise TypeError( diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 62c31691ac1..b110a10e1e7 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. import math import re @@ -587,12 +587,12 @@ def _combine_kwargs_to_seconds(self, **kwargs): def _datetime_binop( self, datetime_col, op, reflect=False ) -> column.DatetimeColumn: - if reflect and op == "sub": + if reflect and op == "__sub__": raise TypeError( f"Can not subtract a {type(datetime_col).__name__}" f" from a {type(self).__name__}" ) - if op not in {"add", "sub"}: + if op not in {"__add__", "__sub__"}: raise TypeError( f"{op} not supported between {type(self).__name__}" f" and {type(datetime_col).__name__}" @@ -604,7 +604,7 @@ def _datetime_binop( for unit, value in self._scalars.items(): if unit != "months": - value = -value if op == "sub" else value + value = -value if op == "__sub__" else value datetime_col += cudf.core.column.as_column( value, length=len(datetime_col) ) @@ -613,7 +613,7 @@ def _datetime_binop( def _generate_months_column(self, size, op): months = self._scalars["months"] - months = -months if op == "sub" else months + months = -months if op == "__sub__" else months # TODO: pass a scalar instead of constructing a column # https://github.com/rapidsai/cudf/issues/6990 col = cudf.core.column.as_column(months, length=size) diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py index e371cd16180..cb3a385b542 100644 --- a/python/cudf/cudf/tests/test_timedelta.py +++ b/python/cudf/cudf/tests/test_timedelta.py @@ -1286,7 +1286,7 @@ def test_timedelta_invalid_ops(): lfunc_args_and_kwargs=([psr, psr],), rfunc_args_and_kwargs=([sr, sr],), expected_error_message=re.escape( - f"Series of dtype {sr.dtype} cannot perform the operation xor" + f"Series of dtype {sr.dtype} cannot perform the operation __xor__" ), ) From 6b7aa652af529d4480cba96c777deb34c7824c92 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 18 Mar 2022 12:23:40 -0700 Subject: [PATCH 08/22] Swap op and other order. --- python/cudf/cudf/core/column/categorical.py | 4 +- python/cudf/cudf/core/column/column.py | 56 ++++++++++----------- python/cudf/cudf/core/column/datetime.py | 2 +- python/cudf/cudf/core/column/decimal.py | 2 +- python/cudf/cudf/core/column/lists.py | 2 +- python/cudf/cudf/core/column/numerical.py | 6 +-- python/cudf/cudf/core/column/string.py | 2 +- python/cudf/cudf/core/column/timedelta.py | 2 +- 8 files changed, 38 insertions(+), 38 deletions(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index d6b5b083ed6..3c4a87bf989 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -876,7 +876,7 @@ def slice( ) def _binaryop( - self, op: str, other: ColumnBinaryOperand, reflect: bool = False + self, other: ColumnBinaryOperand, op: str, reflect: bool = False ) -> ColumnBase: if op not in { "__eq__", @@ -902,7 +902,7 @@ def _binaryop( "The only binary operations supported by unordered " "categorical columns are equality and inequality." ) - return self.as_numerical._binaryop(op, other.as_numerical) + return self.as_numerical._binaryop(other.as_numerical, op) def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn: if isinstance(other, column.ColumnBase): diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 675df8e8456..715b626356a 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -185,7 +185,7 @@ def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool: return False if check_dtypes and (self.dtype != other.dtype): return False - return self._binaryop("NULL_EQUALS", other).all() + return self._binaryop(other, "NULL_EQUALS").all() def all(self, skipna: bool = True) -> bool: # The skipna argument is only used for numerical columns. @@ -1030,82 +1030,82 @@ def __cuda_array_interface__(self): ) def __add__(self, other): - return self._binaryop("__add__", other) + return self._binaryop(other, "__add__") def __sub__(self, other): - return self._binaryop("__sub__", other) + return self._binaryop(other, "__sub__") def __mul__(self, other): - return self._binaryop("__mul__", other) + return self._binaryop(other, "__mul__") def __or__(self, other): - return self._binaryop("__or__", other) + return self._binaryop(other, "__or__") def __xor__(self, other): - return self._binaryop("__xor__", other) + return self._binaryop(other, "__xor__") def __and__(self, other): - return self._binaryop("__and__", other) + return self._binaryop(other, "__and__") def __floordiv__(self, other): - return self._binaryop("__floordiv__", other) + return self._binaryop(other, "__floordiv__") def __truediv__(self, other): - return self._binaryop("__truediv__", other) + return self._binaryop(other, "__truediv__") def __mod__(self, other): - return self._binaryop("__mod__", other) + return self._binaryop(other, "__mod__") def __pow__(self, other): - return self._binaryop("__pow__", other) + return self._binaryop(other, "__pow__") def __radd__(self, other): - return self._binaryop("__add__", other, reflect=True) + return self._binaryop(other, "__add__", reflect=True) def __rsub__(self, other): - return self._binaryop("__sub__", other, reflect=True) + return self._binaryop(other, "__sub__", reflect=True) def __rmul__(self, other): - return self._binaryop("__mul__", other, reflect=True) + return self._binaryop(other, "__mul__", reflect=True) def __ror__(self, other): - return self._binaryop("__or__", other, reflect=True) + return self._binaryop(other, "__or__", reflect=True) def __rxor__(self, other): - return self._binaryop("__xor__", other, reflect=True) + return self._binaryop(other, "__xor__", reflect=True) def __rand__(self, other): - return self._binaryop("__and__", other, reflect=True) + return self._binaryop(other, "__and__", reflect=True) def __rfloordiv__(self, other): - return self._binaryop("__floordiv__", other, reflect=True) + return self._binaryop(other, "__floordiv__", reflect=True) def __rtruediv__(self, other): - return self._binaryop("__truediv__", other, reflect=True) + return self._binaryop(other, "__truediv__", reflect=True) def __rmod__(self, other): - return self._binaryop("__mod__", other, reflect=True) + return self._binaryop(other, "__mod__", reflect=True) def __rpow__(self, other): - return self._binaryop("__pow__", other, reflect=True) + return self._binaryop(other, "__pow__", reflect=True) def __eq__(self, other): - return self._binaryop("__eq__", other) + return self._binaryop(other, "__eq__") def __ne__(self, other): - return self._binaryop("__ne__", other) + return self._binaryop(other, "__ne__") def __lt__(self, other): - return self._binaryop("__lt__", other) + return self._binaryop(other, "__lt__") def __gt__(self, other): - return self._binaryop("__gt__", other) + return self._binaryop(other, "__gt__") def __le__(self, other): - return self._binaryop("__le__", other) + return self._binaryop(other, "__le__") def __ge__(self, other): - return self._binaryop("__ge__", other) + return self._binaryop(other, "__ge__") def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return _array_ufunc(self, ufunc, method, inputs, kwargs) @@ -1170,7 +1170,7 @@ def unary_operator(self, unaryop: str): ) def _binaryop( - self, op: str, other: ColumnBinaryOperand, reflect: bool = False + self, other: ColumnBinaryOperand, op: str, reflect: bool = False ) -> ColumnBase: raise TypeError( f"Operation {op} not supported between dtypes {self.dtype} and " diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 540d60bd19a..634d71e9228 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -395,7 +395,7 @@ def quantile( return result.astype(self.dtype) def _binaryop( - self, op: str, other: ColumnBinaryOperand, reflect: bool = False, + self, other: ColumnBinaryOperand, op: str, reflect: bool = False, ) -> ColumnBase: other = self._wrap_binop_normalization(other) if isinstance(other, cudf.DateOffset): diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 78d62e51a17..e3d480f99eb 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -60,7 +60,7 @@ def as_string_column( "cudf.core.column.StringColumn", as_column([], dtype="object") ) - def _binaryop(self, op, other: ColumnBinaryOperand, reflect=False): + def _binaryop(self, other: ColumnBinaryOperand, op: str, reflect=False): other = self._wrap_binop_normalization(other) lhs, rhs = (other, self) if reflect else (self, other) # Decimals in libcudf don't support truediv, see diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 78c22b63e60..447a1358616 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -93,7 +93,7 @@ def base_size(self): return max(0, len(self.base_children[0]) - 1) def _binaryop( - self, op: str, other: ColumnBinaryOperand, reflect: bool = False + self, other: ColumnBinaryOperand, op: str, reflect: bool = False ) -> ColumnBase: """ Calls a binary operator *binop* on operands *self* diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 75471b6e94e..431dc0c33f0 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -157,7 +157,7 @@ def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase: return libcudf.unary.unary_operation(self, unaryop) def _binaryop( - self, op: str, other: ColumnBinaryOperand, reflect: bool = False, + self, other: ColumnBinaryOperand, op: str, reflect: bool = False, ) -> ColumnBase: int_float_dtype_mapping = { np.int8: np.float32, @@ -174,14 +174,14 @@ def _binaryop( if op in {"__truediv__", "__rtruediv__"}: # Division with integer types results in a suitable float. if (truediv_type := int_float_dtype_mapping.get(self.dtype.type)) : - return self.astype(truediv_type)._binaryop(op, other, reflect) + return self.astype(truediv_type)._binaryop(other, op, reflect) other = self._wrap_binop_normalization(other) out_dtype = self.dtype if other is not None: if isinstance(other, cudf.core.column.DecimalBaseColumn): dtyp = other.dtype.__class__(other.dtype.MAX_PRECISION, 0) - return self.as_decimal_column(dtyp)._binaryop(op, other) + return self.as_decimal_column(dtyp)._binaryop(other, op) out_dtype = np.result_type(self.dtype, other.dtype) if op in {"__mod__", "__floordiv__"}: diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 62f8b1da1b0..d528a2ab1f1 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5451,7 +5451,7 @@ def normalize_binop_value( raise TypeError(f"cannot broadcast {type(other)}") def _binaryop( - self, op: str, other: ColumnBinaryOperand, reflect: bool = False + self, other: ColumnBinaryOperand, op: str, reflect: bool = False ) -> "column.ColumnBase": # Handle object columns that are empty or all nulls when performing # binary operations diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index cfbc2fced39..dae5f72f9e4 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -191,7 +191,7 @@ def _binary_op_div( return this, other, out_dtype def _binaryop( - self, op: str, other: ColumnBinaryOperand, reflect: bool = False + self, other: ColumnBinaryOperand, op: str, reflect: bool = False ) -> "column.ColumnBase": other = self._wrap_binop_normalization(other) From 1f61aba16785dcd50bf6abfece56557a5c60e9a6 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 18 Mar 2022 13:44:41 -0700 Subject: [PATCH 09/22] Rely on operator names and the helper function for handling reflection. --- python/cudf/cudf/core/column/categorical.py | 4 +- python/cudf/cudf/core/column/column.py | 87 +-------------------- python/cudf/cudf/core/column/datetime.py | 5 +- python/cudf/cudf/core/column/decimal.py | 3 +- python/cudf/cudf/core/column/lists.py | 10 +-- python/cudf/cudf/core/column/numerical.py | 17 ++-- python/cudf/cudf/core/column/string.py | 10 +-- python/cudf/cudf/core/column/timedelta.py | 3 +- python/cudf/cudf/core/index.py | 4 +- python/cudf/cudf/core/indexed_frame.py | 4 +- python/cudf/cudf/core/mixins/binops.py | 10 ++- python/cudf/cudf/core/mixins/binops.pyi | 4 +- 12 files changed, 35 insertions(+), 126 deletions(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 3c4a87bf989..e4065142ec7 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -875,9 +875,7 @@ def slice( offset=codes.offset, ) - def _binaryop( - self, other: ColumnBinaryOperand, op: str, reflect: bool = False - ) -> ColumnBase: + def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: if op not in { "__eq__", "__ne__", diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 715b626356a..eab6dd38db4 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -68,7 +68,7 @@ ListDtype, StructDtype, ) -from cudf.core.mixins import Reducible +from cudf.core.mixins import BinaryOperand, Reducible from cudf.utils import utils from cudf.utils.dtypes import ( cudf_dtype_from_pa_type, @@ -86,13 +86,14 @@ Slice = TypeVar("Slice", bound=slice) -class ColumnBase(Column, Serializable, Reducible, NotIterable): +class ColumnBase(Column, Serializable, BinaryOperand, Reducible, NotIterable): _VALID_REDUCTIONS = { "any", "all", "max", "min", } + _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS def as_frame(self) -> "cudf.core.frame.Frame": """ @@ -1029,84 +1030,6 @@ def __cuda_array_interface__(self): "`__cuda_array_interface__`" ) - def __add__(self, other): - return self._binaryop(other, "__add__") - - def __sub__(self, other): - return self._binaryop(other, "__sub__") - - def __mul__(self, other): - return self._binaryop(other, "__mul__") - - def __or__(self, other): - return self._binaryop(other, "__or__") - - def __xor__(self, other): - return self._binaryop(other, "__xor__") - - def __and__(self, other): - return self._binaryop(other, "__and__") - - def __floordiv__(self, other): - return self._binaryop(other, "__floordiv__") - - def __truediv__(self, other): - return self._binaryop(other, "__truediv__") - - def __mod__(self, other): - return self._binaryop(other, "__mod__") - - def __pow__(self, other): - return self._binaryop(other, "__pow__") - - def __radd__(self, other): - return self._binaryop(other, "__add__", reflect=True) - - def __rsub__(self, other): - return self._binaryop(other, "__sub__", reflect=True) - - def __rmul__(self, other): - return self._binaryop(other, "__mul__", reflect=True) - - def __ror__(self, other): - return self._binaryop(other, "__or__", reflect=True) - - def __rxor__(self, other): - return self._binaryop(other, "__xor__", reflect=True) - - def __rand__(self, other): - return self._binaryop(other, "__and__", reflect=True) - - def __rfloordiv__(self, other): - return self._binaryop(other, "__floordiv__", reflect=True) - - def __rtruediv__(self, other): - return self._binaryop(other, "__truediv__", reflect=True) - - def __rmod__(self, other): - return self._binaryop(other, "__mod__", reflect=True) - - def __rpow__(self, other): - return self._binaryop(other, "__pow__", reflect=True) - - def __eq__(self, other): - return self._binaryop(other, "__eq__") - - def __ne__(self, other): - return self._binaryop(other, "__ne__") - - def __lt__(self, other): - return self._binaryop(other, "__lt__") - - def __gt__(self, other): - return self._binaryop(other, "__gt__") - - def __le__(self, other): - return self._binaryop(other, "__le__") - - def __ge__(self, other): - return self._binaryop(other, "__ge__") - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return _array_ufunc(self, ufunc, method, inputs, kwargs) @@ -1169,9 +1092,7 @@ def unary_operator(self, unaryop: str): f"Operation {unaryop} not supported for dtype {self.dtype}." ) - def _binaryop( - self, other: ColumnBinaryOperand, op: str, reflect: bool = False - ) -> ColumnBase: + def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: raise TypeError( f"Operation {op} not supported between dtypes {self.dtype} and " f"{other.dtype}." diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 634d71e9228..471d7e8e665 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -394,9 +394,8 @@ def quantile( return pd.Timestamp(result, unit=self.time_unit) return result.astype(self.dtype) - def _binaryop( - self, other: ColumnBinaryOperand, op: str, reflect: bool = False, - ) -> ColumnBase: + def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: + reflect, op = self._check_reflected_op(op) other = self._wrap_binop_normalization(other) if isinstance(other, cudf.DateOffset): return other._datetime_binop(self, op, reflect=reflect) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index e3d480f99eb..ccf764f9309 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -60,7 +60,8 @@ def as_string_column( "cudf.core.column.StringColumn", as_column([], dtype="object") ) - def _binaryop(self, other: ColumnBinaryOperand, op: str, reflect=False): + def _binaryop(self, other: ColumnBinaryOperand, op: str): + reflect, op = self._check_reflected_op(op) other = self._wrap_binop_normalization(other) lhs, rhs = (other, self) if reflect else (self, other) # Decimals in libcudf don't support truediv, see diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 447a1358616..7891d431e05 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -92,9 +92,7 @@ def base_size(self): # avoid it being negative return max(0, len(self.base_children[0]) - 1) - def _binaryop( - self, other: ColumnBinaryOperand, op: str, reflect: bool = False - ) -> ColumnBase: + def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: """ Calls a binary operator *binop* on operands *self* and *other*. @@ -107,11 +105,6 @@ def _binaryop( Only "add" operator is currently being supported for lists concatenation functions - reflect : boolean, default False - If ``True``, swap the order of the operands. See - https://docs.python.org/3/reference/datamodel.html#object.__ror__ - for more information on when this is necessary. - Returns ------- Series : the output dtype is determined by the @@ -133,6 +126,7 @@ def _binaryop( Name: val, dtype: list """ + reflect, op = self._check_reflected_op(op) other = self._wrap_binop_normalization(other) if isinstance(other.dtype, ListDtype): if op == "__add__": diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 431dc0c33f0..9540dc3ef4f 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -156,9 +156,13 @@ def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase: unaryop = libcudf.unary.UnaryOp[unaryop.upper()] return libcudf.unary.unary_operation(self, unaryop) - def _binaryop( - self, other: ColumnBinaryOperand, op: str, reflect: bool = False, - ) -> ColumnBase: + def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: + if other is not None and isinstance( + other, cudf.core.column.DecimalBaseColumn + ): + dtyp = other.dtype.__class__(other.dtype.MAX_PRECISION, 0) + return self.as_decimal_column(dtyp)._binaryop(other, op) + int_float_dtype_mapping = { np.int8: np.float32, np.int16: np.float32, @@ -174,15 +178,12 @@ def _binaryop( if op in {"__truediv__", "__rtruediv__"}: # Division with integer types results in a suitable float. if (truediv_type := int_float_dtype_mapping.get(self.dtype.type)) : - return self.astype(truediv_type)._binaryop(other, op, reflect) + return self.astype(truediv_type)._binaryop(other, op) + reflect, op = self._check_reflected_op(op) other = self._wrap_binop_normalization(other) out_dtype = self.dtype if other is not None: - if isinstance(other, cudf.core.column.DecimalBaseColumn): - dtyp = other.dtype.__class__(other.dtype.MAX_PRECISION, 0) - return self.as_decimal_column(dtyp)._binaryop(other, op) - out_dtype = np.result_type(self.dtype, other.dtype) if op in {"__mod__", "__floordiv__"}: tmp = self if reflect else other diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index d528a2ab1f1..f6ae05d1932 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5451,8 +5451,9 @@ def normalize_binop_value( raise TypeError(f"cannot broadcast {type(other)}") def _binaryop( - self, other: ColumnBinaryOperand, op: str, reflect: bool = False + self, other: ColumnBinaryOperand, op: str ) -> "column.ColumnBase": + reflect, op = self._check_reflected_op(op) # Handle object columns that are empty or all nulls when performing # binary operations # See https://github.com/pandas-dev/pandas/issues/46332 @@ -5465,13 +5466,6 @@ def _binaryop( "__pow__", "__truediv__", "__floordiv__", - "__radd__", - "__rsub__", - "__rmul__", - "__rmod__", - "__rpow__", - "__rtruediv__", - "__rfloordiv__", }: return self elif op in {"__eq__", "__lt__", "__le__", "__gt__", "__ge__"}: diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index dae5f72f9e4..f5181126335 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -191,8 +191,9 @@ def _binary_op_div( return this, other, out_dtype def _binaryop( - self, other: ColumnBinaryOperand, op: str, reflect: bool = False + self, other: ColumnBinaryOperand, op: str ) -> "column.ColumnBase": + reflect, op = self._check_reflected_op(op) other = self._wrap_binop_normalization(other) this: ColumnBinaryOperand = self diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index e60cf1f2103..d935da3bd14 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -854,9 +854,7 @@ def _from_data( def _binaryop( self, other: T, op: str, fill_value: Any = None, *args, **kwargs, ) -> SingleColumnFrame: - reflect = self._is_reflected_op(op) - if reflect: - op = op[:2] + op[3:] + reflect, op = self._check_reflected_op(op) operands = self._make_operands_for_binop(other, fill_value, reflect) if operands is NotImplemented: return NotImplemented diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 7e116607017..b8077d7d28b 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -2118,9 +2118,7 @@ def _binaryop( *args, **kwargs, ): - reflect = self._is_reflected_op(op) - if reflect: - op = op[:2] + op[3:] + reflect, op = self._check_reflected_op(op) operands, out_index = self._make_operands_and_index_for_binop( other, op, fill_value, reflect, can_reindex ) diff --git a/python/cudf/cudf/core/mixins/binops.py b/python/cudf/cudf/core/mixins/binops.py index 773b47b62b2..a6a3c66fd2c 100644 --- a/python/cudf/cudf/core/mixins/binops.py +++ b/python/cudf/cudf/core/mixins/binops.py @@ -49,8 +49,12 @@ ) -def _is_reflected_op(op): - return op[2] == "r" and op != "__rshift__" +# TODO: See if there is a better approach to this problem. +def _check_reflected_op(op): + reflect = op[2] == "r" and op != "__rshift__" + if reflect: + op = op[:2] + op[3:] + return reflect, op -BinaryOperand._is_reflected_op = staticmethod(_is_reflected_op) +BinaryOperand._check_reflected_op = staticmethod(_check_reflected_op) diff --git a/python/cudf/cudf/core/mixins/binops.pyi b/python/cudf/cudf/core/mixins/binops.pyi index 45093cd04d4..a2842bc5d07 100644 --- a/python/cudf/cudf/core/mixins/binops.pyi +++ b/python/cudf/cudf/core/mixins/binops.pyi @@ -1,6 +1,6 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from typing import Set +from typing import Set, Tuple class BinaryOperand: _SUPPORTED_BINARY_OPERATIONS: Set @@ -84,5 +84,5 @@ class BinaryOperand: ... @staticmethod - def _is_reflected_op(op) -> bool: + def _check_reflected_op(op) -> Tuple[bool, str]: ... From 1ef22efb39ec24e7748f2983ee0bf994556a3e53 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 18 Mar 2022 13:48:10 -0700 Subject: [PATCH 10/22] Remove unnecessary overrides for _binaryop using proper typing. --- python/cudf/cudf/core/column/column.py | 8 +------- python/cudf/cudf/core/frame.py | 24 ------------------------ python/cudf/cudf/core/mixins/binops.pyi | 8 +++++++- 3 files changed, 8 insertions(+), 32 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index eab6dd38db4..5142fed0b1a 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -41,7 +41,7 @@ drop_nulls, ) from cudf._lib.transform import bools_to_mask -from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike +from cudf._typing import ColumnLike, Dtype, ScalarLike from cudf.api.types import ( _is_non_decimal_numeric_dtype, _is_scalar_or_zero_d_array, @@ -1092,12 +1092,6 @@ def unary_operator(self, unaryop: str): f"Operation {unaryop} not supported for dtype {self.dtype}." ) - def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: - raise TypeError( - f"Operation {op} not supported between dtypes {self.dtype} and " - f"{other.dtype}." - ) - def normalize_binop_value( self, other: ScalarLike ) -> Union[ColumnBase, ScalarLike]: diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index cdb4abcbeb7..bdc588d8ee8 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -2440,30 +2440,6 @@ def _unaryop(self, op): zip(self._column_names, data_columns), self._index ) - def _binaryop( - self, other: T, op: str, fill_value: Any = None, *args, **kwargs, - ) -> Frame: - """Perform a binary operation between two frames. - - Parameters - ---------- - other : Frame - The second operand. - op : str - The operation to perform. - fill_value : Any, default None - The value to replace null values with. If ``None``, nulls are not - filled before the operation. - - Returns - ------- - Frame - A new instance containing the result of the operation. - """ - raise NotImplementedError( - f"Binary operations are not supported for {self.__class__}" - ) - @classmethod @_cudf_nvtx_annotate def _colwise_binop( diff --git a/python/cudf/cudf/core/mixins/binops.pyi b/python/cudf/cudf/core/mixins/binops.pyi index a2842bc5d07..ff47cdce418 100644 --- a/python/cudf/cudf/core/mixins/binops.pyi +++ b/python/cudf/cudf/core/mixins/binops.pyi @@ -1,10 +1,16 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from typing import Set, Tuple +from typing import Any, Set, Tuple, TypeVar + +# Note: It may be possible to define a narrower bound here eventually. +BinaryOperandType = TypeVar("BinaryOperandType", bound="Any") class BinaryOperand: _SUPPORTED_BINARY_OPERATIONS: Set + def _binaryop(self, other: BinaryOperandType, op: str): + ... + def __add__(self, other): ... From 0161d5754c330efa4b45598688235da83e52d6eb Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 18 Mar 2022 14:03:03 -0700 Subject: [PATCH 11/22] Improve handling of BinaryOperand mixin. --- python/cudf/cudf/core/mixins/binops.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/mixins/binops.py b/python/cudf/cudf/core/mixins/binops.py index a6a3c66fd2c..6dd9e901152 100644 --- a/python/cudf/cudf/core/mixins/binops.py +++ b/python/cudf/cudf/core/mixins/binops.py @@ -48,13 +48,22 @@ }, ) +# TODO: See if there is a better approach to these two issues: 1) The mixin +# assumes a single standard parameter, whereas binops have two, and 2) we need +# a way to determine reflected vs normal ops. + + +def _binaryop(self, other, op: str): + "The core binary_operation function. Must be overridden by subclasses, " + "the default implementation raises a NotImplementedError." + raise NotImplementedError + -# TODO: See if there is a better approach to this problem. def _check_reflected_op(op): - reflect = op[2] == "r" and op != "__rshift__" - if reflect: + if (reflect := op[2] == "r" and op != "__rshift__") : op = op[:2] + op[3:] return reflect, op +BinaryOperand._binaryop = _binaryop BinaryOperand._check_reflected_op = staticmethod(_check_reflected_op) From dfe86192eedda157899dc8bdafb919a11556ab38 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 18 Mar 2022 14:32:04 -0700 Subject: [PATCH 12/22] Only enable specific subset of operations for each column type. --- python/cudf/cudf/core/column/categorical.py | 21 ++++++++------------- python/cudf/cudf/core/column/column.py | 1 - python/cudf/cudf/core/column/datetime.py | 15 +++++++++++++++ python/cudf/cudf/core/column/decimal.py | 2 ++ python/cudf/cudf/core/column/lists.py | 1 + python/cudf/cudf/core/column/numerical.py | 2 ++ python/cudf/cudf/core/column/string.py | 21 +++++++++++++++++++++ python/cudf/cudf/core/column/timedelta.py | 21 +++++++++++++++++++++ python/cudf/cudf/tests/test_timedelta.py | 4 +--- python/cudf/cudf/utils/utils.py | 2 ++ 10 files changed, 73 insertions(+), 17 deletions(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index e4065142ec7..d29ca26df4e 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -630,6 +630,14 @@ class CategoricalColumn(column.ColumnBase): dtype: cudf.core.dtypes.CategoricalDtype _codes: Optional[NumericalColumn] _children: Tuple[NumericalColumn] + _VALID_BINARY_OPERATIONS = { + "__eq__", + "__ne__", + "__lt__", + "__le__", + "__gt__", + "__ge__", + } def __init__( self, @@ -876,19 +884,6 @@ def slice( ) def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: - if op not in { - "__eq__", - "__ne__", - "__lt__", - "__le__", - "__gt__", - "__ge__", - "NULL_EQUALS", - }: - raise TypeError( - "Series of dtype `category` cannot perform the operation: " - f"{op}" - ) other = self._wrap_binop_normalization(other) # TODO: This is currently just here to make mypy happy, but eventually # we'll need to properly establish the APIs for these methods. diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 5142fed0b1a..b1263f42eae 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -93,7 +93,6 @@ class ColumnBase(Column, Serializable, BinaryOperand, Reducible, NotIterable): "max", "min", } - _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS def as_frame(self) -> "cudf.core.frame.Frame": """ diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 471d7e8e665..93247c5fe2c 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -115,6 +115,21 @@ class DatetimeColumn(column.ColumnBase): The validity mask """ + # TODO: Timedelta columns support more operations than this, figure out why + # and whether we should be exploiting reflection more. + _VALID_BINARY_OPERATIONS = { + "__eq__", + "__ne__", + "__lt__", + "__le__", + "__gt__", + "__ge__", + "__add__", + "__sub__", + "__radd__", + "__rsub__", + } + def __init__( self, data: Buffer, diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index ccf764f9309..7dbcc93558b 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -24,6 +24,7 @@ Decimal128Dtype, DecimalDtype, ) +from cudf.core.mixins import BinaryOperand from cudf.utils.utils import pa_mask_buffer_to_mask from .numerical_base import NumericalBaseColumn @@ -33,6 +34,7 @@ class DecimalBaseColumn(NumericalBaseColumn): """Base column for decimal32, decimal64 or decimal128 columns""" dtype: DecimalDtype + _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS def as_decimal_column( self, dtype: Dtype, **kwargs diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 7891d431e05..6cf209faa13 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -29,6 +29,7 @@ class ListColumn(ColumnBase): dtype: ListDtype + _VALID_BINARY_OPERATIONS = {"__add__", "__radd__"} def __init__( self, size, dtype, mask=None, offset=0, null_count=None, children=(), diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 9540dc3ef4f..70e97167fef 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -43,6 +43,7 @@ string, ) from cudf.core.dtypes import CategoricalDtype +from cudf.core.mixins import BinaryOperand from cudf.utils import cudautils, utils from cudf.utils.dtypes import ( NUMERIC_TYPES, @@ -69,6 +70,7 @@ class NumericalColumn(NumericalBaseColumn): """ _nan_count: Optional[int] + _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS def __init__( self, diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index f6ae05d1932..784b17da5dc 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5031,6 +5031,25 @@ class StringColumn(column.ColumnBase): _start_offset: Optional[int] _end_offset: Optional[int] + _VALID_BINARY_OPERATIONS = { + "__eq__", + "__ne__", + "__lt__", + "__le__", + "__gt__", + "__ge__", + "__add__", + "__radd__", + # These operators aren't actually supported, they only exist to allow + # empty column binops with scalars of arbitrary other dtypes. + "__sub__", + "__mul__", + "__mod__", + "__pow__", + "__truediv__", + "__floordiv__", + } + def __init__( self, mask: Buffer = None, @@ -5454,6 +5473,8 @@ def _binaryop( self, other: ColumnBinaryOperand, op: str ) -> "column.ColumnBase": reflect, op = self._check_reflected_op(op) + # TODO: Try to find a way to disable these ops while still exposing + # this behavior. It may not be possible for now though. # Handle object columns that are empty or all nulls when performing # binary operations # See https://github.com/pandas-dev/pandas/issues/46332 diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index f5181126335..671c0c869f8 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -51,6 +51,27 @@ class TimeDeltaColumn(column.ColumnBase): If None, it is calculated automatically. """ + _VALID_BINARY_OPERATIONS = { + "__eq__", + "__ne__", + "__lt__", + "__le__", + "__gt__", + "__ge__", + "__add__", + "__sub__", + "__mul__", + "__mod__", + "__truediv__", + "__floordiv__", + "__radd__", + "__rsub__", + "__rmul__", + "__rmod__", + "__rtruediv__", + "__rfloordiv__", + } + def __init__( self, data: Buffer, diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py index cb3a385b542..74b14d637bf 100644 --- a/python/cudf/cudf/tests/test_timedelta.py +++ b/python/cudf/cudf/tests/test_timedelta.py @@ -1285,9 +1285,7 @@ def test_timedelta_invalid_ops(): rfunc=operator.xor, lfunc_args_and_kwargs=([psr, psr],), rfunc_args_and_kwargs=([sr, sr],), - expected_error_message=re.escape( - f"Series of dtype {sr.dtype} cannot perform the operation __xor__" - ), + compare_error_message=False, ) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 5f8a40f5806..0c5aa8f800e 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -88,6 +88,8 @@ def _array_ufunc(obj, ufunc, method, inputs, kwargs): op = f"__{'r' if reflect else ''}{op}__" # Float_power returns float irrespective of the input type. + # TODO: Do not get the attribute directly, get from the operator module + # so that we can still exploit reflection. if fname == "float_power": return getattr(obj, op)(other).astype(float) return getattr(obj, op)(other) From 7d8f2e20925f57ab0f09bde2e132d042d37e1a03 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 18 Mar 2022 15:19:09 -0700 Subject: [PATCH 13/22] Change unsupported cases to return NotImplemented instead of raising. --- python/cudf/cudf/core/column/column.py | 5 ++++- python/cudf/cudf/core/column/datetime.py | 10 ++++------ python/cudf/cudf/core/column/decimal.py | 10 ++++------ python/cudf/cudf/core/column/lists.py | 5 ++++- python/cudf/cudf/core/column/numerical.py | 10 ++++------ python/cudf/cudf/core/column/string.py | 9 ++++----- python/cudf/cudf/core/column/timedelta.py | 10 ++++------ 7 files changed, 28 insertions(+), 31 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index b1263f42eae..fb7278c96fb 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -185,7 +185,10 @@ def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool: return False if check_dtypes and (self.dtype != other.dtype): return False - return self._binaryop(other, "NULL_EQUALS").all() + ret = self._binaryop(other, "NULL_EQUALS") + if ret is NotImplemented: + raise TypeError + return ret.all() def all(self, skipna: bool = True) -> bool: # The skipna argument is only used for numerical columns. diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 93247c5fe2c..76b1f9a7dbc 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -278,7 +278,7 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: elif other is None: return cudf.Scalar(other, dtype=self.dtype) - raise TypeError(f"cannot normalize {type(other)}") + return NotImplemented @property def as_numerical(self) -> "cudf.core.column.NumericalColumn": @@ -411,7 +411,8 @@ def quantile( def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: reflect, op = self._check_reflected_op(op) - other = self._wrap_binop_normalization(other) + if (other := self._wrap_binop_normalization(other)) is NotImplemented: + return NotImplemented if isinstance(other, cudf.DateOffset): return other._datetime_binop(self, op, reflect=reflect) @@ -451,10 +452,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: f"timedelta64[{units[max(lhs_unit, rhs_unit)]}]" ) else: - raise TypeError( - f"Series of dtype {self.dtype} cannot perform " - f" the operation {op}" - ) + return NotImplemented lhs, rhs = (self, other) if not reflect else (other, self) return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 7dbcc93558b..9c593834b11 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -64,7 +64,8 @@ def as_string_column( def _binaryop(self, other: ColumnBinaryOperand, op: str): reflect, op = self._check_reflected_op(op) - other = self._wrap_binop_normalization(other) + if (other := self._wrap_binop_normalization(other)) is NotImplemented: + return NotImplemented lhs, rhs = (other, self) if reflect else (self, other) # Decimals in libcudf don't support truediv, see # https://github.com/rapidsai/cudf/pull/7435 for explanation. @@ -137,10 +138,7 @@ def normalize_binop_value(self, other): self.dtype.__class__(self.dtype.__class__.MAX_PRECISION, 0) ) elif not isinstance(other, DecimalBaseColumn): - raise TypeError( - f"Binary operations are not supported between" - f"{str(type(self))} and {str(type(other))}" - ) + return NotImplemented elif not isinstance(self.dtype, other.dtype.__class__): # This branch occurs if we have a DecimalBaseColumn of a # different size (e.g. 64 instead of 32). @@ -160,7 +158,7 @@ def normalize_binop_value(self, other): return other elif is_scalar(other) and isinstance(other, (int, Decimal)): return cudf.Scalar(Decimal(other)) - raise TypeError(f"cannot normalize {type(other)}") + return NotImplemented def _decimal_quantile( self, q: Union[float, Sequence[float]], interpolation: str, exact: bool diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 6cf209faa13..2d90d0400a6 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -128,7 +128,8 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: """ reflect, op = self._check_reflected_op(op) - other = self._wrap_binop_normalization(other) + if (other := self._wrap_binop_normalization(other)) is NotImplemented: + return NotImplemented if isinstance(other.dtype, ListDtype): if op == "__add__": return concatenate_rows( @@ -250,6 +251,8 @@ def __cuda_array_interface__(self): ) def normalize_binop_value(self, other): + if not isinstance(other, ListColumn): + return NotImplemented return other def _with_type_metadata( diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 70e97167fef..bc6a86fe88c 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -183,7 +183,8 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: return self.astype(truediv_type)._binaryop(other, op) reflect, op = self._check_reflected_op(op) - other = self._wrap_binop_normalization(other) + if (other := self._wrap_binop_normalization(other)) is NotImplemented: + return NotImplemented out_dtype = self.dtype if other is not None: out_dtype = np.result_type(self.dtype, other.dtype) @@ -240,10 +241,7 @@ def normalize_binop_value( if not isinstance( other, (NumericalColumn, cudf.core.column.DecimalBaseColumn,), ): - raise TypeError( - f"Binary operations are not supported between " - f"{type(self)}and {type(other)}" - ) + return NotImplemented return other if other is None: return other @@ -275,7 +273,7 @@ def normalize_binop_value( data=Buffer(ary), dtype=ary.dtype, mask=self.mask, ) else: - raise TypeError(f"cannot broadcast {type(other)}") + return NotImplemented def int2ip(self) -> "cudf.core.column.StringColumn": if self.dtype != cudf.dtype("int64"): diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 784b17da5dc..d6ac868f928 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5467,7 +5467,7 @@ def normalize_binop_value( return utils.scalar_broadcast_to( other.item(), size=len(self), dtype="object" ) - raise TypeError(f"cannot broadcast {type(other)}") + return NotImplemented def _binaryop( self, other: ColumnBinaryOperand, op: str @@ -5494,7 +5494,8 @@ def _binaryop( elif op == "__ne__": return self.isnull() - other = self._wrap_binop_normalization(other) + if (other := self._wrap_binop_normalization(other)) is NotImplemented: + return NotImplemented if isinstance(other, (StringColumn, str, cudf.Scalar)): lhs, rhs = (other, self) if reflect else (self, other) @@ -5519,9 +5520,7 @@ def _binaryop( return libcudf.binaryop.binaryop( lhs=lhs, rhs=rhs, op=op, dtype="bool" ) - raise TypeError( - f"{op} not supported between {type(self)} and {type(rhs)}" - ) + return NotImplemented @copy_docstring(column.ColumnBase.view) def view(self, dtype) -> "cudf.core.column.ColumnBase": diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 671c0c869f8..4f853af7c13 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -215,7 +215,8 @@ def _binaryop( self, other: ColumnBinaryOperand, op: str ) -> "column.ColumnBase": reflect, op = self._check_reflected_op(op) - other = self._wrap_binop_normalization(other) + if (other := self._wrap_binop_normalization(other)) is NotImplemented: + return NotImplemented this: ColumnBinaryOperand = self if op in { @@ -240,10 +241,7 @@ def _binaryop( elif op == "__sub__": out_dtype = _timedelta_sub_result_dtype(self, other) else: - raise TypeError( - f"Series of dtype {self.dtype} cannot perform " - f"the operation {op}" - ) + return NotImplemented lhs, rhs = (this, other) if not reflect else (other, this) @@ -276,7 +274,7 @@ def normalize_binop_value(self, other) -> ColumnBinaryOperand: elif other is None: return cudf.Scalar(other, dtype=self.dtype) else: - raise TypeError(f"cannot normalize {type(other)}") + return NotImplemented @property def as_numerical(self) -> "cudf.core.column.NumericalColumn": From 1fbc1bd4191da4921662d8a34565bc0b7a6bd708 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 18 Mar 2022 15:33:20 -0700 Subject: [PATCH 14/22] Remove decimal support from numerical and rely on reflection. --- python/cudf/cudf/core/column/numerical.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index bc6a86fe88c..bfe5ed127c4 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -159,12 +159,6 @@ def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase: return libcudf.unary.unary_operation(self, unaryop) def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: - if other is not None and isinstance( - other, cudf.core.column.DecimalBaseColumn - ): - dtyp = other.dtype.__class__(other.dtype.MAX_PRECISION, 0) - return self.as_decimal_column(dtyp)._binaryop(other, op) - int_float_dtype_mapping = { np.int8: np.float32, np.int16: np.float32, @@ -238,9 +232,7 @@ def normalize_binop_value( self, other: ScalarLike ) -> Union[ColumnBase, ScalarLike]: if isinstance(other, ColumnBase): - if not isinstance( - other, (NumericalColumn, cudf.core.column.DecimalBaseColumn,), - ): + if not isinstance(other, NumericalColumn): return NotImplemented return other if other is None: From 52fe1a378a260fae985ceab910818c7d0c192176 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 18 Mar 2022 16:11:36 -0700 Subject: [PATCH 15/22] Remove None handling from Frame and rely on columns. --- python/cudf/cudf/core/column/categorical.py | 5 +---- python/cudf/cudf/core/column/column.py | 2 +- python/cudf/cudf/core/column/datetime.py | 2 -- python/cudf/cudf/core/column/numerical.py | 2 -- python/cudf/cudf/core/column/string.py | 18 ++++++++++-------- python/cudf/cudf/core/column/timedelta.py | 5 +---- python/cudf/cudf/core/frame.py | 5 +---- 7 files changed, 14 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index d29ca26df4e..30c8628dada 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -900,10 +900,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn: if isinstance(other, column.ColumnBase): if not isinstance(other, CategoricalColumn): - raise ValueError( - "Binary operations with categorical columns require both " - "columns to be categorical." - ) + return NotImplemented if other.dtype != self.dtype: raise TypeError( "Categoricals can only compare with the same type" diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index fb7278c96fb..bb414451070 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -524,7 +524,7 @@ def __setitem__(self, key: Any, value: Any): self._mimic_inplace(out, inplace=True) def _wrap_binop_normalization(self, other): - if other is cudf.NA: + if other is cudf.NA or other is None: return cudf.Scalar(other, dtype=self.dtype) return self.normalize_binop_value(other) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 76b1f9a7dbc..4f05b0ba133 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -275,8 +275,6 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: return cudf.Scalar(None, dtype=other.dtype) return cudf.Scalar(other) - elif other is None: - return cudf.Scalar(other, dtype=self.dtype) return NotImplemented diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index bfe5ed127c4..bd25b1e5f71 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -235,8 +235,6 @@ def normalize_binop_value( if not isinstance(other, NumericalColumn): return NotImplemented return other - if other is None: - return other if isinstance(other, cudf.Scalar): if self.dtype == other.dtype: return other diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index d6ac868f928..0040fb8a74d 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5459,14 +5459,10 @@ def normalize_binop_value( and other.dtype == "object" ): return other - if isinstance(other, str) or other is None: - return utils.scalar_broadcast_to( - other, size=len(self), dtype="object" - ) if isinstance(other, np.ndarray) and other.ndim == 0: - return utils.scalar_broadcast_to( - other.item(), size=len(self), dtype="object" - ) + other = other.item() + if isinstance(other, str): + return cudf.Scalar(other) return NotImplemented def _binaryop( @@ -5498,8 +5494,13 @@ def _binaryop( return NotImplemented if isinstance(other, (StringColumn, str, cudf.Scalar)): - lhs, rhs = (other, self) if reflect else (self, other) if op == "__add__": + if isinstance(other, cudf.Scalar): + other = utils.scalar_broadcast_to( + other, size=len(self), dtype="object" + ) + lhs, rhs = (other, self) if reflect else (self, other) + return cast( "column.ColumnBase", libstrings.concatenate( @@ -5517,6 +5518,7 @@ def _binaryop( "__le__", "NULL_EQUALS", }: + lhs, rhs = (other, self) if reflect else (self, other) return libcudf.binaryop.binaryop( lhs=lhs, rhs=rhs, op=op, dtype="bool" ) diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 4f853af7c13..8664996dfdc 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -271,10 +271,7 @@ def normalize_binop_value(self, other) -> ColumnBinaryOperand: return cudf.Scalar(other) elif np.isscalar(other): return cudf.Scalar(other) - elif other is None: - return cudf.Scalar(other, dtype=self.dtype) - else: - return NotImplemented + return NotImplemented @property def as_numerical(self) -> "cudf.core.column.NumericalColumn": diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index bdc588d8ee8..d78744a719f 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -39,7 +39,6 @@ ColumnBase, as_column, build_categorical_column, - column_empty, deserialize_columns, serialize_columns, ) @@ -2499,9 +2498,7 @@ def _colwise_binop( # are not numerical using the new binops mixin. outcol = ( - column_empty(left_column.size, left_column.dtype, masked=True) - if right_column is None - else getattr(operator, fn)(right_column, left_column) + getattr(operator, fn)(right_column, left_column) if reflect else getattr(operator, fn)(left_column, right_column) ) From 74ae0af2998f30d708d26611ed0d5832ab2f6e31 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 18 Mar 2022 16:30:03 -0700 Subject: [PATCH 16/22] Move numpy array handling into central function. --- python/cudf/cudf/core/column/categorical.py | 2 -- python/cudf/cudf/core/column/column.py | 2 ++ python/cudf/cudf/core/column/datetime.py | 2 -- python/cudf/cudf/core/column/numerical.py | 2 -- python/cudf/cudf/core/column/string.py | 2 -- python/cudf/cudf/core/column/timedelta.py | 2 -- 6 files changed, 2 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 30c8628dada..e0022ed21ca 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -906,8 +906,6 @@ def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn: "Categoricals can only compare with the same type" ) return other - if isinstance(other, np.ndarray) and other.ndim == 0: - other = other.item() ary = cudf.utils.utils.scalar_broadcast_to( self._encode(other), size=len(self), dtype=self.codes.dtype diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index bb414451070..64519eca7e0 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -526,6 +526,8 @@ def __setitem__(self, key: Any, value: Any): def _wrap_binop_normalization(self, other): if other is cudf.NA or other is None: return cudf.Scalar(other, dtype=self.dtype) + if isinstance(other, np.ndarray) and other.ndim == 0: + other = other.item() return self.normalize_binop_value(other) def _scatter_by_slice( diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 4f05b0ba133..608f4f6feb4 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -248,8 +248,6 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: if isinstance(other, (cudf.Scalar, ColumnBase, cudf.DateOffset)): return other - if isinstance(other, np.ndarray) and other.ndim == 0: - other = other.item() if isinstance(other, dt.datetime): other = np.datetime64(other) elif isinstance(other, dt.timedelta): diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index bd25b1e5f71..c855dc39d4b 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -241,8 +241,6 @@ def normalize_binop_value( # expensive device-host transfer just to # adjust the dtype other = other.value - elif isinstance(other, np.ndarray) and other.ndim == 0: - other = other.item() other_dtype = np.min_scalar_type(other) if other_dtype.kind in {"b", "i", "u", "f"}: if isinstance(other, cudf.Scalar): diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 0040fb8a74d..060cee2255e 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5459,8 +5459,6 @@ def normalize_binop_value( and other.dtype == "object" ): return other - if isinstance(other, np.ndarray) and other.ndim == 0: - other = other.item() if isinstance(other, str): return cudf.Scalar(other) return NotImplemented diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 8664996dfdc..cb18bfcff47 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -250,8 +250,6 @@ def _binaryop( def normalize_binop_value(self, other) -> ColumnBinaryOperand: if isinstance(other, (ColumnBase, cudf.Scalar)): return other - if isinstance(other, np.ndarray) and other.ndim == 0: - other = other.item() if isinstance(other, dt.timedelta): other = np.timedelta64(other) elif isinstance(other, pd.Timestamp): From 5d997c75640722aa86a7b734c3f9d21bd5462392 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 24 Mar 2022 12:27:39 -0700 Subject: [PATCH 17/22] Fix style. --- python/cudf/cudf/core/column/datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 608f4f6feb4..bf21a3a26ba 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -7,7 +7,7 @@ import re from locale import nl_langinfo from types import SimpleNamespace -from typing import Any, Mapping, Sequence, Union, cast +from typing import Any, Mapping, Sequence, cast import numpy as np import pandas as pd From bd3354cabfad63e4f5eba973e01dfa19a7a09f28 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 24 Mar 2022 14:47:06 -0700 Subject: [PATCH 18/22] Fix one bug. --- python/cudf/cudf/core/column/lists.py | 34 +-------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 2d90d0400a6..dc283cf30b3 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -94,39 +94,7 @@ def base_size(self): return max(0, len(self.base_children[0]) - 1) def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: - """ - Calls a binary operator *binop* on operands *self* - and *other*. - - Parameters - ---------- - self, other : list columns - - binop : binary operator - Only "add" operator is currently being supported - for lists concatenation functions - - Returns - ------- - Series : the output dtype is determined by the - input operands. - - Examples - -------- - >>> import cudf - >>> gdf = cudf.DataFrame({'val': [['a', 'a'], ['b'], ['c']]}) - >>> gdf - val - 0 [a, a] - 1 [b] - 2 [c] - >>> gdf['val'] + gdf['val'] - 0 [a, a, a, a] - 1 [b, b] - 2 [c, c] - Name: val, dtype: list - - """ + # Lists only support __add__, which concatenates lists. reflect, op = self._check_reflected_op(op) if (other := self._wrap_binop_normalization(other)) is NotImplemented: return NotImplemented From 490fda93ea6e26ec2f33d6ce8dcec229f529f3b5 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 24 Mar 2022 14:57:01 -0700 Subject: [PATCH 19/22] Fix the other bug. --- python/cudf/cudf/utils/applyutils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py index 593965046e6..89331b933a8 100644 --- a/python/cudf/cudf/utils/applyutils.py +++ b/python/cudf/cudf/utils/applyutils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import functools from typing import Any, Dict @@ -103,7 +103,7 @@ def apply_chunks( return applychunks.run(df, chunks=chunks, tpb=tpb) -def make_aggregate_nullmask(df, columns=None, op="and"): +def make_aggregate_nullmask(df, columns=None, op="__and__"): out_mask = None for k in columns or df._data: From f412e55fde467b839a3d5a352f862e19658f042a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 24 Mar 2022 16:19:22 -0700 Subject: [PATCH 20/22] Address PR comments. --- python/cudf/cudf/_lib/datetime.pyx | 2 +- python/cudf/cudf/core/column/column.py | 2 +- python/cudf/cudf/core/column/datetime.py | 7 +++---- python/cudf/cudf/core/column/decimal.py | 16 +++++++++++----- python/cudf/cudf/core/column/lists.py | 3 ++- python/cudf/cudf/core/column/numerical.py | 4 ++-- python/cudf/cudf/core/column/string.py | 3 ++- python/cudf/cudf/core/column/timedelta.py | 7 ++++--- python/cudf/cudf/core/mixins/binops.py | 7 +++++-- python/cudf/cudf/tests/test_timedelta.py | 5 ++--- python/cudf/cudf/utils/utils.py | 2 +- 11 files changed, 34 insertions(+), 24 deletions(-) diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 00838664c0f..e218400a2db 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -58,7 +58,7 @@ def extract_datetime_component(Column col, object field): if field == "weekday": # Pandas counts Monday-Sunday as 0-6 - # while we count Monday-Sunday as 1-7 + # while libcudf counts Monday-Sunday as 1-7 result = result - result.dtype.type(1) return result diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 64519eca7e0..401d5f82743 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -187,7 +187,7 @@ def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool: return False ret = self._binaryop(other, "NULL_EQUALS") if ret is NotImplemented: - raise TypeError + raise TypeError(f"Cannot compare equality with {type(other)}") return ret.all() def all(self, skipna: bool = True) -> bool: diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index bf21a3a26ba..4ce5a70f0ec 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -115,8 +115,6 @@ class DatetimeColumn(column.ColumnBase): The validity mask """ - # TODO: Timedelta columns support more operations than this, figure out why - # and whether we should be exploiting reflection more. _VALID_BINARY_OPERATIONS = { "__eq__", "__ne__", @@ -407,7 +405,8 @@ def quantile( def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: reflect, op = self._check_reflected_op(op) - if (other := self._wrap_binop_normalization(other)) is NotImplemented: + other = self._wrap_binop_normalization(other) + if other is NotImplemented: return NotImplemented if isinstance(other, cudf.DateOffset): return other._datetime_binop(self, op, reflect=reflect) @@ -450,7 +449,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: else: return NotImplemented - lhs, rhs = (self, other) if not reflect else (other, self) + lhs, rhs = (other, self) if reflect else (self, other) return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype) def fillna( diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 9c593834b11..f10e257d359 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -62,14 +62,20 @@ def as_string_column( "cudf.core.column.StringColumn", as_column([], dtype="object") ) + # Decimals in libcudf don't support truediv, see + # https://github.com/rapidsai/cudf/pull/7435 for explanation. + def __truediv__(self, other): + return self._binaryop(other, "__div__") + + def __rtruediv__(self, other): + return self._binaryop(other, "__rdiv__") + def _binaryop(self, other: ColumnBinaryOperand, op: str): reflect, op = self._check_reflected_op(op) - if (other := self._wrap_binop_normalization(other)) is NotImplemented: + other = self._wrap_binop_normalization(other) + if other is NotImplemented: return NotImplemented lhs, rhs = (other, self) if reflect else (self, other) - # Decimals in libcudf don't support truediv, see - # https://github.com/rapidsai/cudf/pull/7435 for explanation. - op = op.replace("true", "") # Binary Arithmetics between decimal columns. `Scale` and `precision` # are computed outside of libcudf @@ -357,7 +363,7 @@ def _get_decimal_type(lhs_dtype, rhs_dtype, op): p1, p2 = lhs_dtype.precision, rhs_dtype.precision s1, s2 = lhs_dtype.scale, rhs_dtype.scale - if op in ("__add__", "__sub__"): + if op in {"__add__", "__sub__"}: scale = max(s1, s2) precision = scale + max(p1 - s1, p2 - s2) + 1 elif op == "__mul__": diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index dc283cf30b3..0df5be2d862 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -96,7 +96,8 @@ def base_size(self): def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: # Lists only support __add__, which concatenates lists. reflect, op = self._check_reflected_op(op) - if (other := self._wrap_binop_normalization(other)) is NotImplemented: + other = self._wrap_binop_normalization(other) + if other is NotImplemented: return NotImplemented if isinstance(other.dtype, ListDtype): if op == "__add__": diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index c855dc39d4b..c9bc3c59aea 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -211,14 +211,14 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: if op in {"__and__", "__or__", "__xor__"}: if is_float_dtype(self.dtype) or is_float_dtype(other): raise TypeError( - f"Operation 'bitwise {op}' not supported between " + f"Operation 'bitwise {op[2:-2]}' not supported between " f"{self.dtype.type.__name__} and " f"{other.dtype.type.__name__}" ) if is_bool_dtype(self.dtype) or is_bool_dtype(other): out_dtype = "bool" - lhs, rhs = (self, other) if not reflect else (other, self) + lhs, rhs = (other, self) if reflect else (self, other) return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype) def nans_to_nulls(self: NumericalColumn) -> NumericalColumn: diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 060cee2255e..15a65462232 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5488,7 +5488,8 @@ def _binaryop( elif op == "__ne__": return self.isnull() - if (other := self._wrap_binop_normalization(other)) is NotImplemented: + other = self._wrap_binop_normalization(other) + if other is NotImplemented: return NotImplemented if isinstance(other, (StringColumn, str, cudf.Scalar)): diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index cb18bfcff47..11d295a6190 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -168,7 +168,7 @@ def _binary_op_mod(self, other: ColumnBinaryOperand) -> DtypeObj: out_dtype = self.dtype else: raise TypeError( - f"Modulus of {self.dtype} with {other.dtype} " + f"Modulo of {self.dtype} with {other.dtype} " f"cannot be performed." ) return out_dtype @@ -215,7 +215,8 @@ def _binaryop( self, other: ColumnBinaryOperand, op: str ) -> "column.ColumnBase": reflect, op = self._check_reflected_op(op) - if (other := self._wrap_binop_normalization(other)) is NotImplemented: + other = self._wrap_binop_normalization(other) + if other is NotImplemented: return NotImplemented this: ColumnBinaryOperand = self @@ -243,7 +244,7 @@ def _binaryop( else: return NotImplemented - lhs, rhs = (this, other) if not reflect else (other, this) + lhs, rhs = (other, this) if reflect else (this, other) return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype) diff --git a/python/cudf/cudf/core/mixins/binops.py b/python/cudf/cudf/core/mixins/binops.py index 6dd9e901152..e07977ed4c3 100644 --- a/python/cudf/cudf/core/mixins/binops.py +++ b/python/cudf/cudf/core/mixins/binops.py @@ -54,8 +54,11 @@ def _binaryop(self, other, op: str): - "The core binary_operation function. Must be overridden by subclasses, " - "the default implementation raises a NotImplementedError." + """The core binary_operation function. + + Must be overridden by subclasses, the default implementation raises a + NotImplementedError. + """ raise NotImplementedError diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py index 74b14d637bf..2623b755cfb 100644 --- a/python/cudf/cudf/tests/test_timedelta.py +++ b/python/cudf/cudf/tests/test_timedelta.py @@ -1175,8 +1175,7 @@ def test_timedelta_invalid_ops(): lfunc_args_and_kwargs=([psr, dt_psr],), rfunc_args_and_kwargs=([sr, dt_sr],), expected_error_message=re.escape( - f"Modulus of {sr.dtype} with {dt_sr.dtype} " - f"cannot be performed." + f"Modulo of {sr.dtype} with {dt_sr.dtype} " f"cannot be performed." ), ) @@ -1186,7 +1185,7 @@ def test_timedelta_invalid_ops(): lfunc_args_and_kwargs=([psr, "a"],), rfunc_args_and_kwargs=([sr, "a"],), expected_error_message=re.escape( - f"Modulus of {sr.dtype} with {np.dtype('object')} " + f"Modulo of {sr.dtype} with {np.dtype('object')} " f"cannot be performed." ), ) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 0c5aa8f800e..ed714182576 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -87,7 +87,7 @@ def _array_ufunc(obj, ufunc, method, inputs, kwargs): reflect = False op = f"__{'r' if reflect else ''}{op}__" - # Float_power returns float irrespective of the input type. + # float_power returns float irrespective of the input type. # TODO: Do not get the attribute directly, get from the operator module # so that we can still exploit reflection. if fname == "float_power": From 7110ad518ad0679a9d0798a703c8ba54315d1de2 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 24 Mar 2022 16:45:21 -0700 Subject: [PATCH 21/22] Update comment about empty string columns. --- python/cudf/cudf/core/column/string.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 15a65462232..95bb06ebb0c 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5041,7 +5041,8 @@ class StringColumn(column.ColumnBase): "__add__", "__radd__", # These operators aren't actually supported, they only exist to allow - # empty column binops with scalars of arbitrary other dtypes. + # empty column binops with scalars of arbitrary other dtypes. See + # the _binaryop method for more information. "__sub__", "__mul__", "__mod__", @@ -5467,11 +5468,13 @@ def _binaryop( self, other: ColumnBinaryOperand, op: str ) -> "column.ColumnBase": reflect, op = self._check_reflected_op(op) - # TODO: Try to find a way to disable these ops while still exposing - # this behavior. It may not be possible for now though. - # Handle object columns that are empty or all nulls when performing - # binary operations - # See https://github.com/pandas-dev/pandas/issues/46332 + # Due to https://github.com/pandas-dev/pandas/issues/46332 we need to + # support binary operations between empty or all null string columns + # and columns of other dtypes, even if those operations would otherwise + # be invalid. For example, you cannot divide strings, but pandas allows + # division between an empty string column and a (nonempty) integer + # column. Ideally we would disable these operators entirely, but until + # the above issue is resolved we cannot avoid this problem. if self.null_count == len(self): if op in { "__add__", From 298572d0b3f46a7fe0018b1e5536e163f3cf91b3 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 24 Mar 2022 18:18:42 -0700 Subject: [PATCH 22/22] Fix error message. --- python/cudf/cudf/tests/test_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index fc9ad9711d1..8cc65de739e 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -381,7 +381,7 @@ def test_concatenate_rows_of_lists(): def test_concatenate_list_with_nonlist(): - with pytest.raises(TypeError, match="can only concatenate list to list"): + with pytest.raises(TypeError): gdf1 = cudf.DataFrame({"A": [["a", "c"], ["b", "d"], ["c", "d"]]}) gdf2 = cudf.DataFrame({"A": ["a", "b", "c"]}) gdf1["A"] + gdf2["A"]