Refactor Series.__array_ufunc__ (#10217)

This PR is a first step in addressing #9083. It rewrites Series ufunc dispatch to use a simpler dispatch pattern that removes lookup in the overall cudf namespace, restricting only to Series methods or cupy functions. The changes in this PR also enable proper support for indexing so that ufuncs between two Series objects will work correctly and preserve indexes. Additionally, ufuncs will now support Series that contain nulls, which was not previously the case. Series methods that don't exist in `pandas.Series` that were previously only necessary to support ufunc dispatch have now been removed. Once this PR is merged, I will work on generalizing this approach to also support DataFrame objects, which should enable full support for ufuncs as well as allowing us to deprecate significant chunks of existing code. For the cases that previously worked, this PR does have some performance implications. Any operator that dispatches to cupy is about 3x faster now (assuming no nulls, since the nullable case was not previously supported), with the exception of logical operators for which we previously defined functions in the Series namespace that do not have pandas analogs. I've made a note in the code that we could reintroduce internal versions of these just for ufunc dispatch if that slowdown becomes a bottleneck for users, but for now I would prefer to avoid any more special cases than we really need. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Christopher Harris (https://github.com/cwharris) - Charles Blackmon-Luca (https://github.com/charlesbluca) URL: #10217
rapidsai · Feb 8, 2022 · 6e267bd · 6e267bd
1 parent fff51b8
commit 6e267bd
Show file tree

Hide file tree

Showing 5 changed files with 294 additions and 169 deletions.
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
@@ -669,6 +669,12 @@ def get_column_values_na(col):
             matrix[:, i] = get_column_values_na(col)
         return matrix
 
+    # TODO: As of now, calling cupy.asarray is _much_ faster than calling
+    # to_cupy. We should investigate the reasons why and whether we can provide
+    # a more efficient method here by exploiting __cuda_array_interface__. In
+    # particular, we need to benchmark how much of the overhead is coming from
+    # (potentially unavoidable) local copies in to_cupy and how much comes from
+    # inefficiencies in the implementation.
     def to_cupy(
         self,
         dtype: Union[Dtype, None] = None,
@@ -3622,6 +3628,8 @@ def dot(self, other, reflect=False):
         >>> [1, 2, 3, 4] @ s
         10
         """
+        # TODO: This function does not currently support nulls.
+        # TODO: This function does not properly support misaligned indexes.
         lhs = self.values
         if isinstance(other, Frame):
             rhs = other.values
@@ -3632,6 +3640,16 @@ def dot(self, other, reflect=False):
         ):
             rhs = cupy.asarray(other)
         else:
+            # TODO: This should raise an exception, not return NotImplemented,
+            # but __matmul__ relies on the current behavior. We should either
+            # move this implementation to __matmul__ and call it from here
+            # (checking for NotImplemented and raising NotImplementedError if
+            # that's what's returned), or __matmul__ should catch a
+            # NotImplementedError from here and return NotImplemented. The
+            # latter feels cleaner (putting the implementation in this method
+            # rather than in the operator) but will be slower in the (highly
+            # unlikely) case that we're multiplying a cudf object with another
+            # type of object that somehow supports this behavior.
             return NotImplemented
         if reflect:
             lhs, rhs = rhs, lhs

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
@@ -7,6 +7,7 @@
 import pickle
 import warnings
 from collections import abc as abc
+from itertools import repeat
 from numbers import Number
 from shutil import get_terminal_size
 from typing import Any, MutableMapping, Optional, Set, Union
@@ -958,14 +959,123 @@ def to_frame(self, name=None):
     def memory_usage(self, index=True, deep=False):
         return sum(super().memory_usage(index, deep).values())
 
+    # For more detail on this function and how it should work, see
+    # https://numpy.org/doc/stable/reference/ufuncs.html
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-        if method == "__call__":
-            return get_appropriate_dispatched_func(
-                cudf, cudf.Series, cupy, ufunc, inputs, kwargs
-            )
-        else:
+        # We don't currently support reduction, accumulation, etc. We also
+        # don't support any special kwargs or higher arity ufuncs than binary.
+        if method != "__call__" or kwargs or ufunc.nin > 2:
             return NotImplemented
 
+        # Binary operations
+        binary_operations = {
+            # Arithmetic binary operations.
+            "add": "add",
+            "subtract": "sub",
+            "multiply": "mul",
+            "matmul": "matmul",
+            "divide": "truediv",
+            "true_divide": "truediv",
+            "floor_divide": "floordiv",
+            "power": "pow",
+            "float_power": "pow",
+            "remainder": "mod",
+            "mod": "mod",
+            "fmod": "mod",
+            # Bitwise binary operations.
+            "bitwise_and": "and",
+            "bitwise_or": "or",
+            "bitwise_xor": "xor",
+            # Comparison binary operators
+            "greater": "gt",
+            "greater_equal": "ge",
+            "less": "lt",
+            "less_equal": "le",
+            "not_equal": "ne",
+            "equal": "eq",
+        }
+
+        # First look for methods of the class.
+        fname = ufunc.__name__
+        if fname in binary_operations:
+            not_reflect = self is inputs[0]
+            other = inputs[not_reflect]
+            op = f"__{'' if not_reflect else 'r'}{binary_operations[fname]}__"
+
+            # pandas bitwise operations return bools if indexes are misaligned.
+            # TODO: Generalize for other types of Frames
+            if (
+                "bitwise" in fname
+                and isinstance(other, Series)
+                and not self.index.equals(other.index)
+            ):
+                return getattr(self, op)(other).astype(bool)
+            # Float_power returns float irrespective of the input type.
+            if fname == "float_power":
+                return getattr(self, op)(other).astype(float)
+            return getattr(self, op)(other)
+
+        # Special handling for unary operations.
+        if fname == "negative":
+            return self * -1
+        if fname == "positive":
+            return self.copy(deep=True)
+        if fname == "invert":
+            return ~self
+        if fname == "absolute":
+            return self.abs()
+        if fname == "fabs":
+            return self.abs().astype(np.float64)
+
+        # Note: There are some operations that may be supported by libcudf but
+        # are not supported by pandas APIs. In particular, libcudf binary
+        # operations support logical and/or operations, but those operations
+        # are not defined on pd.Series/DataFrame. For now those operations will
+        # dispatch to cupy, but if ufuncs are ever a bottleneck we could add
+        # special handling to dispatch those (or any other) functions that we
+        # could implement without cupy.
+
+        # Attempt to dispatch all other functions to cupy.
+        cupy_func = getattr(cupy, fname)
+        if cupy_func:
+            # Indices must be aligned before converting to arrays.
+            if ufunc.nin == 2 and all(map(isinstance, inputs, repeat(Series))):
+                inputs = _align_indices(inputs, allow_non_unique=True)
+                index = inputs[0].index
+            else:
+                index = self.index
+
+            cupy_inputs = []
+            mask = None
+            for inp in inputs:
+                # TODO: Generalize for other types of Frames
+                if isinstance(inp, Series) and inp.has_nulls:
+                    new_mask = as_column(inp.nullmask)
+
+                    # TODO: This is a hackish way to perform a bitwise and of
+                    # bitmasks. Once we expose cudf::detail::bitwise_and, then
+                    # we can use that instead.
+                    mask = new_mask if mask is None else (mask & new_mask)
+
+                    # Arbitrarily fill with zeros. For ufuncs, we assume that
+                    # the end result propagates nulls via a bitwise and, so
+                    # these elements are irrelevant.
+                    inp = inp.fillna(0)
+                cupy_inputs.append(cupy.asarray(inp))
+
+            cp_output = cupy_func(*cupy_inputs, **kwargs)
+
+            def make_frame(arr):
+                return self.__class__._from_data(
+                    {self.name: as_column(arr).set_mask(mask)}, index=index
+                )
+
+            if ufunc.nout > 1:
+                return tuple(make_frame(out) for out in cp_output)
+            return make_frame(cp_output)
+
+        return NotImplemented
+
     def __array_function__(self, func, types, args, kwargs):
         handled_types = [cudf.Series]
         for t in types:
@@ -1257,15 +1367,31 @@ def _binaryop(
         )
 
     def logical_and(self, other):
+        warnings.warn(
+            "Series.logical_and is deprecated and will be removed.",
+            FutureWarning,
+        )
         return self._binaryop(other, "l_and").astype(np.bool_)
 
     def remainder(self, other):
+        warnings.warn(
+            "Series.remainder is deprecated and will be removed.",
+            FutureWarning,
+        )
         return self._binaryop(other, "mod")
 
     def logical_or(self, other):
+        warnings.warn(
+            "Series.logical_or is deprecated and will be removed.",
+            FutureWarning,
+        )
         return self._binaryop(other, "l_or").astype(np.bool_)
 
     def logical_not(self):
+        warnings.warn(
+            "Series.logical_not is deprecated and will be removed.",
+            FutureWarning,
+        )
         return self._unaryop("not")
 
     @copy_docstring(CategoricalAccessor)  # type: ignore

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
@@ -46,7 +46,7 @@ def set_random_null_mask_inplace(series, null_probability=0.5, seed=None):
     probs = [null_probability, 1 - null_probability]
     rng = np.random.default_rng(seed=seed)
     mask = rng.choice([False, True], size=len(series), p=probs)
-    series[mask] = None
+    series.iloc[mask] = None
 
 
 # TODO: This function should be removed. Anywhere that it is being used should