diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 7a9a17631a9..387cd54d7d3 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -41,14 +41,6 @@ class BaseIndex(Serializable): _accessors: Set[Any] = set() _data: ColumnAccessor - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - - if method == "__call__" and hasattr(cudf, ufunc.__name__): - func = getattr(cudf, ufunc.__name__) - return func(*inputs) - else: - return NotImplemented - @cached_property def _values(self) -> ColumnBase: raise NotImplementedError diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 1c672aacd86..45bf1c27bab 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -11,7 +11,17 @@ import warnings from collections import defaultdict from collections.abc import Iterable, Sequence -from typing import Any, MutableMapping, Optional, Set, TypeVar +from typing import ( + Any, + Dict, + MutableMapping, + Optional, + Set, + Tuple, + Type, + TypeVar, + Union, +) import cupy import numpy as np @@ -44,6 +54,7 @@ from cudf.core.abc import Serializable from cudf.core.column import ( CategoricalColumn, + ColumnBase, as_column, build_categorical_column, build_column, @@ -1898,7 +1909,7 @@ def _get_columns_by_label(self, labels, downcast=False): ) return out - def _prep_for_binop( + def _make_operands_and_index_for_binop( self, other: Any, fn: str, @@ -1907,7 +1918,13 @@ def _prep_for_binop( can_reindex: bool = False, *args, **kwargs, - ): + ) -> Tuple[ + Union[ + Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]], + Type[NotImplemented], + ], + Optional[BaseIndex], + ]: lhs, rhs = self, other if _is_scalar_or_zero_d_array(rhs): @@ -1986,28 +2003,6 @@ def _prep_for_binop( return operands, lhs._index - @annotate("DATAFRAME_BINARYOP", color="blue", domain="cudf_python") - def _binaryop( - self, - other: Any, - fn: str, - fill_value: Any = None, - reflect: bool = False, - can_reindex: bool = False, - *args, - **kwargs, - ): - operands, out_index = self._prep_for_binop( - other, fn, fill_value, reflect, can_reindex - ) - if operands is NotImplemented: - return NotImplemented - - return self._from_data( - ColumnAccessor(type(self)._colwise_binop(operands, fn)), - index=out_index, - ) - @annotate("DATAFRAME_UPDATE", color="blue", domain="cudf_python") def update( self, @@ -2164,9 +2159,7 @@ def columns(self, columns): columns = pd.Index(range(len(self._data.columns))) is_multiindex = isinstance(columns, pd.MultiIndex) - if isinstance( - columns, (Series, cudf.Index, cudf.core.column.ColumnBase) - ): + if isinstance(columns, (Series, cudf.Index, ColumnBase)): columns = pd.Index(columns.to_numpy(), tupleize_cols=is_multiindex) elif not isinstance(columns, pd.Index): columns = pd.Index(columns, tupleize_cols=is_multiindex) @@ -6607,7 +6600,7 @@ def _setitem_with_dataframe( input_df: DataFrame, replace_df: DataFrame, input_cols: Any = None, - mask: Optional[cudf.core.column.ColumnBase] = None, + mask: Optional[ColumnBase] = None, ignore_index: bool = False, ): """ @@ -6698,9 +6691,7 @@ def _get_union_of_series_names(series_list): def _get_host_unique(array): - if isinstance( - array, (cudf.Series, cudf.Index, cudf.core.column.ColumnBase) - ): + if isinstance(array, (cudf.Series, cudf.Index, ColumnBase)): return array.unique.to_pandas() elif isinstance(array, (str, numbers.Number)): return [array] diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index d478baf2d69..4a5a222e81e 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -54,6 +54,49 @@ T = TypeVar("T", bound="Frame") +# Mapping from ufuncs to the corresponding binary operators. +_ufunc_binary_operations = { + # Arithmetic binary operations. + "add": "add", + "subtract": "sub", + "multiply": "mul", + "matmul": "matmul", + "divide": "truediv", + "true_divide": "truediv", + "floor_divide": "floordiv", + "power": "pow", + "float_power": "pow", + "remainder": "mod", + "mod": "mod", + "fmod": "mod", + # Bitwise binary operations. + "bitwise_and": "and", + "bitwise_or": "or", + "bitwise_xor": "xor", + # Comparison binary operators + "greater": "gt", + "greater_equal": "ge", + "less": "lt", + "less_equal": "le", + "not_equal": "ne", + "equal": "eq", +} + +# These operators need to be mapped to their inverses when performing a +# reflected ufunc operation because no reflected version of the operators +# themselves exist. When these operators are invoked directly (not via +# __array_ufunc__) Python takes care of calling the inverse operation. +_ops_without_reflection = { + "gt": "lt", + "ge": "le", + "lt": "gt", + "le": "ge", + # ne and eq are symmetric, so they are their own inverse op + "ne": "ne", + "eq": "eq", +} + + class Frame: """A collection of Column objects with an optional index. @@ -2752,6 +2795,11 @@ def sin(self): 0.8011526357338306, 0.8939966636005579], dtype='float64') """ + warnings.warn( + "sin is deprecated and will be removed. Use numpy.sin instead", + FutureWarning, + ) + return self._unaryop("sin") @annotate("FRAME_COS", color="green", domain="cudf_python") @@ -2814,6 +2862,11 @@ def cos(self): -0.5984600690578581, -0.4480736161291701], dtype='float64') """ + warnings.warn( + "cos is deprecated and will be removed. Use numpy.cos instead", + FutureWarning, + ) + return self._unaryop("cos") @annotate("FRAME_TAN", color="green", domain="cudf_python") @@ -2876,6 +2929,11 @@ def tan(self): -1.3386902103511544, -1.995200412208242], dtype='float64') """ + warnings.warn( + "tan is deprecated and will be removed. Use numpy.tan instead", + FutureWarning, + ) + return self._unaryop("tan") @annotate("FRAME_ASIN", color="green", domain="cudf_python") @@ -2927,6 +2985,11 @@ def asin(self): 1.5707963267948966, 0.3046926540153975], dtype='float64') """ + warnings.warn( + "asin is deprecated and will be removed in the future", + FutureWarning, + ) + return self._unaryop("asin") @annotate("FRAME_ACOS", color="green", domain="cudf_python") @@ -2978,6 +3041,11 @@ def acos(self): 1.5707963267948966, 1.266103672779499], dtype='float64') """ + warnings.warn( + "acos is deprecated and will be removed. Use numpy.acos instead", + FutureWarning, + ) + result = self.copy(deep=False) for col in result._data: min_float_dtype = cudf.utils.dtypes.get_min_float_dtype( @@ -3047,6 +3115,11 @@ def atan(self): 0.2914567944778671], dtype='float64') """ + warnings.warn( + "atan is deprecated and will be removed. Use numpy.atan instead", + FutureWarning, + ) + return self._unaryop("atan") @annotate("FRAME_EXP", color="green", domain="cudf_python") @@ -3110,6 +3183,11 @@ def exp(self): 2.718281828459045, 1.0, 1.3498588075760032], dtype='float64') """ + warnings.warn( + "exp is deprecated and will be removed. Use numpy.exp instead", + FutureWarning, + ) + return self._unaryop("exp") @annotate("FRAME_LOG", color="green", domain="cudf_python") @@ -3172,6 +3250,11 @@ def log(self): Float64Index([2.302585092994046, 2.3978952727983707, 6.214608098422191], dtype='float64') """ + warnings.warn( + "log is deprecated and will be removed. Use numpy.log instead", + FutureWarning, + ) + return self._unaryop("log") @annotate("FRAME_SQRT", color="green", domain="cudf_python") @@ -3228,6 +3311,11 @@ def sqrt(self): >>> index.sqrt() Float64Index([nan, 10.0, 25.0], dtype='float64') """ + warnings.warn( + "sqrt is deprecated and will be removed. Use numpy.sqrt instead", + FutureWarning, + ) + return self._unaryop("sqrt") @annotate("FRAME_ABS", color="green", domain="cudf_python") @@ -3496,7 +3584,9 @@ def _binaryop( Frame A new instance containing the result of the operation. """ - raise NotImplementedError + raise NotImplementedError( + f"Binary operations are not supported for {self.__class__}" + ) @classmethod @annotate("FRAME_COLWISE_BINOP", color="green", domain="cudf_python") @@ -3658,6 +3748,84 @@ def _colwise_binop( return output + # For more detail on this function and how it should work, see + # https://numpy.org/doc/stable/reference/ufuncs.html + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # We don't currently support reduction, accumulation, etc. We also + # don't support any special kwargs or higher arity ufuncs than binary. + if method != "__call__" or kwargs or ufunc.nin > 2: + return NotImplemented + + fname = ufunc.__name__ + if fname in _ufunc_binary_operations: + reflect = self is not inputs[0] + other = inputs[0] if reflect else inputs[1] + + op = _ufunc_binary_operations[fname] + if reflect and op in _ops_without_reflection: + op = _ops_without_reflection[op] + reflect = False + op = f"__{'r' if reflect else ''}{op}__" + + # Float_power returns float irrespective of the input type. + if fname == "float_power": + return getattr(self, op)(other).astype(float) + return getattr(self, op)(other) + + # Special handling for various unary operations. + if fname == "negative": + return self * -1 + if fname == "positive": + return self.copy(deep=True) + if fname == "invert": + return ~self + if fname == "absolute": + return self.abs() + if fname == "fabs": + return self.abs().astype(np.float64) + + # None is a sentinel used by subclasses to trigger cupy dispatch. + return None + + def _apply_cupy_ufunc_to_operands( + self, ufunc, cupy_func, operands, **kwargs + ): + # Note: There are some operations that may be supported by libcudf but + # are not supported by pandas APIs. In particular, libcudf binary + # operations support logical and/or operations as well as + # trigonometric, but those operations are not defined on + # pd.Series/DataFrame. For now those operations will dispatch to cupy, + # but if ufuncs are ever a bottleneck we could add special handling to + # dispatch those (or any other) functions that we could implement + # without cupy. + + mask = None + data = [{} for _ in range(ufunc.nout)] + for name, (left, right, _, _) in operands.items(): + cupy_inputs = [] + for inp in (left, right) if ufunc.nin == 2 else (left,): + if isinstance(inp, ColumnBase) and inp.has_nulls(): + new_mask = as_column(inp.nullmask) + + # TODO: This is a hackish way to perform a bitwise and + # of bitmasks. Once we expose + # cudf::detail::bitwise_and, then we can use that + # instead. + mask = new_mask if mask is None else (mask & new_mask) + + # Arbitrarily fill with zeros. For ufuncs, we assume + # that the end result propagates nulls via a bitwise + # and, so these elements are irrelevant. + inp = inp.fillna(0) + cupy_inputs.append(cupy.asarray(inp)) + + cp_output = cupy_func(*cupy_inputs, **kwargs) + if ufunc.nout == 1: + cp_output = (cp_output,) + for i, out in enumerate(cp_output): + data[i][name] = as_column(out).set_mask(mask) + return data + @annotate("FRAME_DOT", color="green", domain="cudf_python") def dot(self, other, reflect=False): """ diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 5b60e8dbd1c..5aab834d452 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -520,6 +520,11 @@ def _as_int64(self): # that are not defined directly on RangeIndex. return Int64Index._from_data(self._data) + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return self._as_int64().__array_ufunc__( + ufunc, method, *inputs, **kwargs + ) + def __getattr__(self, key): # For methods that are not defined for RangeIndex we attempt to operate # on the corresponding integer index if possible. @@ -773,6 +778,41 @@ def __init__(self, data, **kwargs): name = kwargs.get("name") super().__init__({name: data}) + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + ret = super().__array_ufunc__(ufunc, method, *inputs, **kwargs) + + if ret is not None: + return ret + + # Attempt to dispatch all other functions to cupy. + cupy_func = getattr(cupy, ufunc.__name__) + if cupy_func: + if ufunc.nin == 2: + other = inputs[self is inputs[0]] + inputs = self._make_operands_for_binop(other) + else: + inputs = { + name: (col, None, False, None) + for name, col in self._data.items() + } + + data = self._apply_cupy_ufunc_to_operands( + ufunc, cupy_func, inputs, **kwargs + ) + + out = [_index_from_data(out) for out in data] + + # pandas returns numpy arrays when the outputs are boolean. + for i, o in enumerate(out): + # We explicitly _do not_ use isinstance here: we want only + # boolean GenericIndexes, not dtype-specific subclasses. + if type(o) is GenericIndex and o.dtype.kind == "b": + out[i] = o.values + + return out[0] if ufunc.nout == 1 else tuple(out) + + return NotImplemented + def _binaryop( self, other: T, @@ -784,11 +824,16 @@ def _binaryop( ) -> SingleColumnFrame: # Specialize binops to generate the appropriate output index type. operands = self._make_operands_for_binop(other, fill_value, reflect) - return ( - _index_from_data(data=self._colwise_binop(operands, fn),) - if operands is not NotImplemented - else NotImplemented - ) + if operands is NotImplemented: + return NotImplemented + ret = _index_from_data(self._colwise_binop(operands, fn)) + + # pandas returns numpy arrays when the outputs are boolean. We + # explicitly _do not_ use isinstance here: we want only boolean + # GenericIndexes, not dtype-specific subclasses. + if type(ret) is GenericIndex and ret.dtype.kind == "b": + return ret.values + return ret def _copy_type_metadata( self, other: Frame, include_index: bool = True diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index bc7337d0a42..0226ee4888f 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -7,7 +7,7 @@ import warnings from collections import Counter, abc from functools import cached_property -from typing import Callable, Type, TypeVar +from typing import Any, Callable, Dict, Optional, Tuple, Type, TypeVar, Union from uuid import uuid4 import cupy as cp @@ -1695,150 +1695,86 @@ def last(self, offset): slice_func=lambda i: self.iloc[i:], ) - # For more detail on this function and how it should work, see - # https://numpy.org/doc/stable/reference/ufuncs.html - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - # We don't currently support reduction, accumulation, etc. We also - # don't support any special kwargs or higher arity ufuncs than binary. - if method != "__call__" or kwargs or ufunc.nin > 2: + def _binaryop( + self, + other: Any, + fn: str, + fill_value: Any = None, + reflect: bool = False, + can_reindex: bool = False, + *args, + **kwargs, + ): + operands, out_index = self._make_operands_and_index_for_binop( + other, fn, fill_value, reflect, can_reindex + ) + if operands is NotImplemented: return NotImplemented - # Binary operations - binary_operations = { - # Arithmetic binary operations. - "add": "add", - "subtract": "sub", - "multiply": "mul", - "matmul": "matmul", - "divide": "truediv", - "true_divide": "truediv", - "floor_divide": "floordiv", - "power": "pow", - "float_power": "pow", - "remainder": "mod", - "mod": "mod", - "fmod": "mod", - # Bitwise binary operations. - "bitwise_and": "and", - "bitwise_or": "or", - "bitwise_xor": "xor", - # Comparison binary operators - "greater": "gt", - "greater_equal": "ge", - "less": "lt", - "less_equal": "le", - "not_equal": "ne", - "equal": "eq", - } + return self._from_data( + ColumnAccessor(type(self)._colwise_binop(operands, fn)), + index=out_index, + ) + + def _make_operands_and_index_for_binop( + self, + other: Any, + fn: str, + fill_value: Any = None, + reflect: bool = False, + can_reindex: bool = False, + *args, + **kwargs, + ) -> Tuple[ + Union[ + Dict[ + Optional[str], + Tuple[cudf.core.column.ColumnBase, Any, bool, Any], + ], + Type[NotImplemented], + ], + Optional[cudf.BaseIndex], + ]: + raise NotImplementedError( + "Binary operations are not supported for {self.__class__}" + ) - # First look for methods of the class. + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + ret = super().__array_ufunc__(ufunc, method, *inputs, **kwargs) fname = ufunc.__name__ - if fname in binary_operations: - reflect = self is not inputs[0] - other = inputs[0] if reflect else inputs[1] - - # These operators need to be mapped to their inverses when - # performing a reflected operation because no reflected version of - # the operators themselves exist. - ops_without_reflection = { - "gt": "lt", - "ge": "le", - "lt": "gt", - "le": "ge", - # ne and eq are symmetric, so they are their own inverse op - "ne": "ne", - "eq": "eq", - } - - op = binary_operations[fname] - if reflect and op in ops_without_reflection: - op = ops_without_reflection[op] - reflect = False - op = f"__{'r' if reflect else ''}{op}__" + if ret is not None: # pandas bitwise operations return bools if indexes are misaligned. - if ( - "bitwise" in fname - and isinstance(other, IndexedFrame) - and not self.index.equals(other.index) - ): - return getattr(self, op)(other).astype(bool) - # Float_power returns float irrespective of the input type. - if fname == "float_power": - return getattr(self, op)(other).astype(float) - return getattr(self, op)(other) - - # Special handling for unary operations. - if fname == "negative": - return self * -1 - if fname == "positive": - return self.copy(deep=True) - if fname == "invert": - return ~self - if fname == "absolute": - return self.abs() - if fname == "fabs": - return self.abs().astype(np.float64) - - # Note: There are some operations that may be supported by libcudf but - # are not supported by pandas APIs. In particular, libcudf binary - # operations support logical and/or operations, but those operations - # are not defined on pd.Series/DataFrame. For now those operations will - # dispatch to cupy, but if ufuncs are ever a bottleneck we could add - # special handling to dispatch those (or any other) functions that we - # could implement without cupy. + if "bitwise" in fname: + reflect = self is not inputs[0] + other = inputs[0] if reflect else inputs[1] + if isinstance(other, self.__class__) and not self.index.equals( + other.index + ): + ret = ret.astype(bool) + return ret # Attempt to dispatch all other functions to cupy. cupy_func = getattr(cp, fname) if cupy_func: - # Indices must be aligned before converting to arrays. if ufunc.nin == 2: other = inputs[self is inputs[0]] - inputs, index = self._prep_for_binop(other, fname) + inputs, index = self._make_operands_and_index_for_binop( + other, fname + ) else: + # This works for Index too inputs = { name: (col, None, False, None) for name, col in self._data.items() } index = self._index - mask = None - data = [{} for _ in range(ufunc.nout)] - for name, (left, right, _, _) in inputs.items(): - cupy_inputs = [] - # TODO: I'm jumping through multiple hoops to get the unary - # behavior to match up with the binary. I should see if there - # are better patterns to employ here. - for inp in (left, right) if ufunc.nin == 2 else (left,): - if ( - isinstance(inp, cudf.core.column.ColumnBase) - and inp.has_nulls() - ): - new_mask = cudf.core.column.as_column(inp.nullmask) - - # TODO: This is a hackish way to perform a bitwise and - # of bitmasks. Once we expose - # cudf::detail::bitwise_and, then we can use that - # instead. - mask = new_mask if mask is None else (mask & new_mask) - - # Arbitrarily fill with zeros. For ufuncs, we assume - # that the end result propagates nulls via a bitwise - # and, so these elements are irrelevant. - inp = inp.fillna(0) - cupy_inputs.append(cp.asarray(inp)) - - cp_output = cupy_func(*cupy_inputs, **kwargs) - if ufunc.nout == 1: - cp_output = (cp_output,) - for i, out in enumerate(cp_output): - data[i][name] = cudf.core.column.as_column(out).set_mask( - mask - ) - - out = tuple( - self.__class__._from_data(out, index=index) for out in data + data = self._apply_cupy_ufunc_to_operands( + ufunc, cupy_func, inputs, **kwargs ) + + out = tuple(self._from_data(out, index=index) for out in data) return out[0] if ufunc.nout == 1 else out return NotImplemented diff --git a/python/cudf/cudf/core/ops.py b/python/cudf/cudf/core/ops.py index fe9e012f406..c2a8c0e72fb 100644 --- a/python/cudf/cudf/core/ops.py +++ b/python/cudf/cudf/core/ops.py @@ -1,4 +1,5 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. +import warnings from numbers import Number import numpy as np @@ -10,6 +11,10 @@ def sin(arbitrary): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.sin(arbitrary) else: @@ -17,6 +22,10 @@ def sin(arbitrary): def cos(arbitrary): + warnings.warn( + "cos is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.cos(arbitrary) else: @@ -24,6 +33,10 @@ def cos(arbitrary): def tan(arbitrary): + warnings.warn( + "tan is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.tan(arbitrary) else: @@ -31,6 +44,11 @@ def tan(arbitrary): def arcsin(arbitrary): + warnings.warn( + "arcsin is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(arbitrary, Number): return np.arcsin(arbitrary) else: @@ -38,6 +56,11 @@ def arcsin(arbitrary): def arccos(arbitrary): + warnings.warn( + "arcsin is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(arbitrary, Number): return np.arccos(arbitrary) else: @@ -45,6 +68,11 @@ def arccos(arbitrary): def arctan(arbitrary): + warnings.warn( + "arctan is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(arbitrary, Number): return np.arctan(arbitrary) else: @@ -52,6 +80,10 @@ def arctan(arbitrary): def exp(arbitrary): + warnings.warn( + "exp is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.exp(arbitrary) else: @@ -59,6 +91,10 @@ def exp(arbitrary): def log(arbitrary): + warnings.warn( + "log is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.log(arbitrary) else: @@ -66,6 +102,10 @@ def log(arbitrary): def sqrt(arbitrary): + warnings.warn( + "sqrt is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.sqrt(arbitrary) else: @@ -73,6 +113,11 @@ def sqrt(arbitrary): def logical_not(arbitrary): + warnings.warn( + "logical_not is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(arbitrary, Number): return np.logical_not(arbitrary) else: @@ -80,6 +125,11 @@ def logical_not(arbitrary): def logical_and(lhs, rhs): + warnings.warn( + "logical_and is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.logical_and(lhs, rhs) else: @@ -87,6 +137,11 @@ def logical_and(lhs, rhs): def logical_or(lhs, rhs): + warnings.warn( + "logical_or is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.logical_or(lhs, rhs) else: @@ -94,6 +149,11 @@ def logical_or(lhs, rhs): def remainder(lhs, rhs): + warnings.warn( + "remainder is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.mod(lhs, rhs) elif isinstance(lhs, Frame): @@ -103,6 +163,10 @@ def remainder(lhs, rhs): def floor_divide(lhs, rhs): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.floor_divide(lhs, rhs) elif isinstance(lhs, Frame): @@ -112,6 +176,10 @@ def floor_divide(lhs, rhs): def subtract(lhs, rhs): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.subtract(lhs, rhs) elif isinstance(lhs, Frame): @@ -121,6 +189,10 @@ def subtract(lhs, rhs): def add(lhs, rhs): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.add(lhs, rhs) elif isinstance(rhs, Frame): @@ -130,6 +202,10 @@ def add(lhs, rhs): def true_divide(lhs, rhs): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.true_divide(lhs, rhs) elif isinstance(rhs, Frame): @@ -139,6 +215,10 @@ def true_divide(lhs, rhs): def multiply(lhs, rhs): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.multiply(lhs, rhs) elif isinstance(rhs, Frame): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 8574a152c44..45a44016449 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -9,7 +9,7 @@ from collections import abc as abc from numbers import Number from shutil import get_terminal_size -from typing import Any, MutableMapping, Optional, Set, Union +from typing import Any, Dict, MutableMapping, Optional, Set, Tuple, Type, Union import cupy import numpy as np @@ -39,6 +39,7 @@ ) from cudf.core.abc import Serializable from cudf.core.column import ( + ColumnBase, DatetimeColumn, TimeDeltaColumn, arange, @@ -435,7 +436,7 @@ def __init__( else: data = {} - if not isinstance(data, column.ColumnBase): + if not isinstance(data, ColumnBase): data = column.as_column(data, nan_as_null=nan_as_null, dtype=dtype) else: if dtype is not None: @@ -444,7 +445,7 @@ def __init__( if index is not None and not isinstance(index, BaseIndex): index = as_index(index) - assert isinstance(data, column.ColumnBase) + assert isinstance(data, ColumnBase) super().__init__({name: data}) self._index = RangeIndex(len(data)) if index is None else index @@ -1206,7 +1207,7 @@ def __repr__(self): lines.append(category_memory) return "\n".join(lines) - def _prep_for_binop( + def _make_operands_and_index_for_binop( self, other: Any, fn: str, @@ -1215,22 +1216,19 @@ def _prep_for_binop( can_reindex: bool = False, *args, **kwargs, - ): + ) -> Tuple[ + Union[ + Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]], + Type[NotImplemented], + ], + Optional[BaseIndex], + ]: # Specialize binops to align indices. - if isinstance(other, SingleColumnFrame): + if isinstance(other, Series): if ( - # TODO: The can_reindex logic also needs to be applied for - # DataFrame (the methods that need it just don't exist yet). not can_reindex and fn in cudf.utils.utils._EQUALITY_OPS - and ( - isinstance(other, Series) - # TODO: mypy doesn't like this line because the index - # property is not defined on SingleColumnFrame (or Index, - # for that matter). Ignoring is the easy solution for now, - # a cleaner fix requires reworking the type hierarchy. - and not self.index.equals(other.index) # type: ignore - ) + and not self.index.equals(other.index) ): raise ValueError( "Can only compare identically-labeled Series objects" @@ -1242,27 +1240,6 @@ def _prep_for_binop( operands = lhs._make_operands_for_binop(other, fill_value, reflect) return operands, lhs._index - def _binaryop( - self, - other: Frame, - fn: str, - fill_value: Any = None, - reflect: bool = False, - can_reindex: bool = False, - *args, - **kwargs, - ): - operands, out_index = self._prep_for_binop( - other, fn, fill_value, reflect, can_reindex - ) - return ( - self._from_data( - data=self._colwise_binop(operands, fn), index=out_index, - ) - if operands is not NotImplemented - else NotImplemented - ) - def logical_and(self, other): warnings.warn( "Series.logical_and is deprecated and will be removed.", diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 50b206d3388..f02e3b9f959 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -4,7 +4,16 @@ from __future__ import annotations import builtins -from typing import Any, Dict, MutableMapping, Optional, Tuple, TypeVar, Union +from typing import ( + Any, + Dict, + MutableMapping, + Optional, + Tuple, + Type, + TypeVar, + Union, +) import cupy import numpy as np @@ -279,7 +288,10 @@ def _make_operands_for_binop( reflect: bool = False, *args, **kwargs, - ) -> Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]]: + ) -> Union[ + Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]], + Type[NotImplemented], + ]: """Generate the dictionary of operands used for a binary operation. Parameters diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py index e4b4d5020ea..9d762f26ebd 100644 --- a/python/cudf/cudf/tests/test_array_ufunc.py +++ b/python/cudf/cudf/tests/test_array_ufunc.py @@ -50,6 +50,83 @@ def _hide_ufunc_warnings(ufunc): yield +@pytest.mark.parametrize("ufunc", _UFUNCS) +def test_ufunc_index(ufunc): + # Note: This test assumes that all ufuncs are unary or binary. + fname = ufunc.__name__ + + N = 100 + # Avoid zeros in either array to skip division by 0 errors. Also limit the + # scale to avoid issues with overflow, etc. We use ints because some + # operations (like bitwise ops) are not defined for floats. + pandas_args = args = [ + cudf.Index(cp.random.randint(low=1, high=10, size=N),) + for _ in range(ufunc.nin) + ] + + try: + got = ufunc(*args) + except AttributeError as e: + # We xfail if we don't have an explicit dispatch and cupy doesn't have + # the method so that we can easily identify these methods. As of this + # writing, the only missing methods are isnat and heaviside. + if "module 'cupy' has no attribute" in str(e): + pytest.xfail(reason="Operation not supported by cupy") + raise + + expect = ufunc(*(arg.to_pandas() for arg in pandas_args)) + + try: + if ufunc.nout > 1: + for g, e in zip(got, expect): + assert_eq(g, e, check_exact=False) + else: + assert_eq(got, expect, check_exact=False) + except AssertionError: + # TODO: This branch can be removed when + # https://github.com/rapidsai/cudf/issues/10178 is resolved + if fname in ("power", "float_power"): + if (got - expect).abs().max() == 1: + pytest.xfail("https://github.com/rapidsai/cudf/issues/10178") + raise + + +@pytest.mark.parametrize( + "ufunc", [np.add, np.greater, np.greater_equal, np.logical_and] +) +@pytest.mark.parametrize("type_", ["cupy", "numpy", "list"]) +@pytest.mark.parametrize("reflect", [True, False]) +def test_binary_ufunc_index_array(ufunc, type_, reflect): + N = 100 + # Avoid zeros in either array to skip division by 0 errors. Also limit the + # scale to avoid issues with overflow, etc. We use ints because some + # operations (like bitwise ops) are not defined for floats. + args = [cudf.Index(cp.random.rand(N)) for _ in range(ufunc.nin)] + + arg1 = args[1].to_cupy() if type_ == "cupy" else args[1].to_numpy() + if type_ == "list": + arg1 = arg1.tolist() + + if reflect: + got = ufunc(arg1, args[0]) + expect = ufunc(args[1].to_numpy(), args[0].to_pandas()) + else: + got = ufunc(args[0], arg1) + expect = ufunc(args[0].to_pandas(), args[1].to_numpy()) + + if ufunc.nout > 1: + for g, e in zip(got, expect): + if type_ == "cupy" and reflect: + assert (cp.asnumpy(g) == e).all() + else: + assert_eq(g, e, check_exact=False) + else: + if type_ == "cupy" and reflect: + assert (cp.asnumpy(got) == expect).all() + else: + assert_eq(got, expect, check_exact=False) + + @pytest.mark.parametrize("ufunc", _UFUNCS) @pytest.mark.parametrize("has_nulls", [True, False]) @pytest.mark.parametrize("indexed", [True, False]) @@ -117,11 +194,11 @@ def test_ufunc_series(ufunc, has_nulls, indexed): for g, e in zip(got, expect): if has_nulls: e[mask] = np.nan - assert_eq(g, e) + assert_eq(g, e, check_exact=False) else: if has_nulls: expect[mask] = np.nan - assert_eq(got, expect) + assert_eq(got, expect, check_exact=False) except AssertionError: # TODO: This branch can be removed when # https://github.com/rapidsai/cudf/issues/10178 is resolved @@ -195,14 +272,14 @@ def test_binary_ufunc_series_array(ufunc, has_nulls, indexed, type_, reflect): if type_ == "cupy" and reflect: assert (cp.asnumpy(g) == e).all() else: - assert_eq(g, e) + assert_eq(g, e, check_exact=False) else: if has_nulls: expect[mask] = np.nan if type_ == "cupy" and reflect: assert (cp.asnumpy(got) == expect).all() else: - assert_eq(got, expect) + assert_eq(got, expect, check_exact=False) @pytest.mark.parametrize( @@ -298,11 +375,11 @@ def test_ufunc_dataframe(ufunc, has_nulls, indexed): for g, e in zip(got, expect): if has_nulls: e[mask] = np.nan - assert_eq(g, e) + assert_eq(g, e, check_exact=False) else: if has_nulls: expect[mask] = np.nan - assert_eq(got, expect) + assert_eq(got, expect, check_exact=False) except AssertionError: # TODO: This branch can be removed when # https://github.com/rapidsai/cudf/issues/10178 is resolved diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 02ca7a0cd58..c90eb75b761 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1744,10 +1744,6 @@ def test_binops_with_lhs_numpy_scalar(frame, dtype): expected = val == data.to_pandas() got = val == data - # In case of index, expected would be a numpy array - if isinstance(data, cudf.BaseIndex): - expected = pd.Index(expected) - utils.assert_eq(expected, got) @@ -2945,7 +2941,7 @@ def test_binops_non_cudf_types(obj_class, binop, other_type): data = range(1, 100) lhs = obj_class(data) rhs = other_type(data) - assert cp.all((binop(lhs, rhs) == binop(lhs, lhs)).values) + assert (binop(lhs, rhs) == binop(lhs, lhs)).all() @pytest.mark.parametrize("binop", _binops + _binops_compare) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index acd9e28c661..cab3ed8a30f 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -3342,14 +3342,6 @@ def test_select_dtype_datetime_with_frequency(): ) -def test_array_ufunc(): - gdf = cudf.DataFrame({"x": [2, 3, 4.0], "y": [9.0, 2.5, 1.1]}) - pdf = gdf.to_pandas() - - assert_eq(np.sqrt(gdf), np.sqrt(pdf)) - assert_eq(np.sqrt(gdf.x), np.sqrt(pdf.x)) - - def test_dataframe_describe_exclude(): np.random.seed(12) data_length = 10000 diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py index 8d504edd669..57b297004bf 100644 --- a/python/cudf/cudf/tests/test_pickling.py +++ b/python/cudf/cudf/tests/test_pickling.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import sys @@ -90,9 +90,7 @@ def test_pickle_index(): idx = GenericIndex(np.arange(nelem), name="a") pickled = pickle.dumps(idx) out = pickle.loads(pickled) - # TODO: Once operations like `all` are supported on Index objects, we can - # just use that without calling values first. - assert (idx == out).values.all() + assert (idx == out).all() def test_pickle_buffer():