From 2c074b1c9806c018d0280917b805374a55ca09ff Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 21 Jul 2021 15:36:41 -0700
Subject: [PATCH 01/39] Move reductions from NumericalBase into Reducible
 mixin.

---
 .../cudf/cudf/core/column/numerical_base.py   | 48 ++--------
 python/cudf/cudf/core/reductions.py           | 90 +++++++++++++++++++
 2 files changed, 98 insertions(+), 40 deletions(-)
 create mode 100644 python/cudf/cudf/core/reductions.py

diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 1f84cb88e37..68604124288 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -10,11 +10,12 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._typing import Dtype, ScalarLike
+from cudf._typing import ScalarLike
 from cudf.core.column import ColumnBase
+from cudf.core.reductions import Reducible
 
 
-class NumericalBaseColumn(ColumnBase):
+class NumericalBaseColumn(Reducible, ColumnBase):
     """A column composed of numerical data.
 
     This class encodes a standard interface for different types of columns
@@ -23,7 +24,7 @@ class NumericalBaseColumn(ColumnBase):
     point, should be encoded here.
     """
 
-    def reduce(
+    def _reduce(
         self, op: str, skipna: bool = None, min_count: int = 0, **kwargs
     ) -> ScalarLike:
         """Perform a reduction operation.
@@ -31,7 +32,10 @@ def reduce(
         op : str
             The operation to perform.
         skipna : bool
-            Whether or not na values must be
+            Whether or not na values must be skipped.
+        min_count : int, default 0
+            The minimum number of entries for the reduction, otherwise the
+            reduction returns NaN.
         """
         preprocessed = self._process_for_reduction(
             skipna=skipna, min_count=min_count
@@ -41,42 +45,6 @@ def reduce(
         else:
             return preprocessed
 
-    def sum(
-        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
-    ) -> ScalarLike:
-        return self.reduce(
-            "sum", skipna=skipna, dtype=dtype, min_count=min_count
-        )
-
-    def product(
-        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
-    ) -> ScalarLike:
-        return self.reduce(
-            "product", skipna=skipna, dtype=dtype, min_count=min_count
-        )
-
-    def mean(
-        self, skipna: bool = None, dtype: Dtype = np.float64
-    ) -> ScalarLike:
-        return self.reduce("mean", skipna=skipna, dtype=dtype)
-
-    def var(
-        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64
-    ) -> ScalarLike:
-        return self.reduce("var", skipna=skipna, dtype=dtype, ddof=ddof)
-
-    def std(
-        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64
-    ) -> ScalarLike:
-        return self.reduce("std", skipna=skipna, dtype=dtype, ddof=ddof)
-
-    def sum_of_squares(
-        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
-    ) -> ScalarLike:
-        return self.reduce(
-            "sum_of_squares", skipna=skipna, dtype=dtype, min_count=min_count
-        )
-
     def _can_return_nan(self, skipna: bool = None) -> bool:
         return not skipna and self.has_nulls()
 
diff --git a/python/cudf/cudf/core/reductions.py b/python/cudf/cudf/core/reductions.py
new file mode 100644
index 00000000000..148e1d78ab8
--- /dev/null
+++ b/python/cudf/cudf/core/reductions.py
@@ -0,0 +1,90 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+import numpy as np
+
+
+class Reducible:
+    """Mixin encapsulating for reduction operations.
+
+    Various classes in cuDF support
+    `reductions <https://en.wikipedia.org/wiki/Reduction_Operator>`__.  In
+    practice the reductions are implemented via dispatch to a lower-level API.
+    For example, Frame-like objects dispatch to Columns, which in turn dispatch
+    to libcudf implementations. As a result, rather than encoding the logic for
+    different types of reductions, most classes can implement all reductions
+    in terms of a single function that performs necessary pre- and
+    post-processing of a result generated by calling to a lower-level API. This
+    class encapsulates that paradigm.
+    """
+
+    # TODO: We need a way to indicate supported/unsupported operations.
+    # Alternatively, we could just leave that up to the implementation of
+    # _reduce.
+    # TODO: If supported operations are indicated using a class member list
+    # (_SUPPORTED_OPERATIONS or so) we could use __init_subclass__ to combine
+    # all parent lists via walking the MRO.
+
+    def _reduce(self, op: str, *args, **kwargs):
+        raise NotImplementedError
+
+    def sum(self, *args, **kwargs):
+        return self._reduce("sum", *args, **kwargs)
+
+    def product(self, *args, **kwargs):
+        return self._reduce("product", *args, **kwargs)
+
+    # def min(self, *args, **kwargs):
+    #     return self._reduce("min", *args, **kwargs)
+    #
+    # def max(self, *args, **kwargs):
+    #     return self._reduce("max", *args, **kwargs)
+
+    # def count(self, *args, **kwargs):
+    #     return self._reduce("count", *args, **kwargs)
+    #
+    # def size(self, *args, **kwargs):
+    #     return self._reduce("size", *args, **kwargs)
+    #
+    # def any(self, *args, **kwargs):
+    #     return self._reduce("any", *args, **kwargs)
+    #
+    # def all(self, *args, **kwargs):
+    #     return self._reduce("all", *args, **kwargs)
+
+    def sum_of_squares(self, *args, **kwargs):
+        return self._reduce("sum_of_squares", *args, **kwargs)
+
+    # TODO: Need a better way of setting a default dtype. If nothing else, I
+    # guess we'll just need to override this parameter in the relevant column
+    # types (numerical_base).
+    def mean(self, dtype=np.float64, *args, **kwargs):
+        return self._reduce("mean", dtype=dtype, *args, **kwargs)
+
+    def var(self, dtype=np.float64, *args, **kwargs):
+        return self._reduce("var", dtype=dtype, *args, **kwargs)
+
+    def std(self, dtype=np.float64, *args, **kwargs):
+        return self._reduce("std", dtype=dtype, *args, **kwargs)
+
+    # def median(self, *args, **kwargs):
+    #     return self._reduce("median", *args, **kwargs)
+    #
+    # def quantile(self, *args, **kwargs):
+    #     return self._reduce("quantile", *args, **kwargs)
+    #
+    # def argmax(self, *args, **kwargs):
+    #     return self._reduce("argmax", *args, **kwargs)
+    #
+    # def argmin(self, *args, **kwargs):
+    #     return self._reduce("argmin", *args, **kwargs)
+    #
+    # def nunique(self, *args, **kwargs):
+    #     return self._reduce("nunique", *args, **kwargs)
+    #
+    # def nth(self, *args, **kwargs):
+    #     return self._reduce("nth", *args, **kwargs)
+
+    # def collect(self, *args, **kwargs):
+    #     return self._reduce("sum", *args, **kwargs)
+    #
+    # def unique(self, *args, **kwargs):
+    #     return self._reduce("sum", *args, **kwargs)

From 970b45d0bda643670eec1ed42ddf055dc3e7d6b5 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 21 Jul 2021 15:43:41 -0700
Subject: [PATCH 02/39] Override dtype for numerical type methods that require
 it.

---
 python/cudf/cudf/core/column/numerical_base.py |  9 +++++++++
 python/cudf/cudf/core/reductions.py            | 16 ++++++----------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 68604124288..de9cb139519 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -116,6 +116,15 @@ def quantile(
             )
         return result
 
+    def mean(self, dtype=np.float64, *args, **kwargs):
+        return super().mean(dtype=dtype, *args, **kwargs)
+
+    def var(self, dtype=np.float64, *args, **kwargs):
+        return super().var(dtype=dtype, *args, **kwargs)
+
+    def std(self, dtype=np.float64, *args, **kwargs):
+        return super().std(dtype=dtype, *args, **kwargs)
+
     def median(self, skipna: bool = None) -> NumericalBaseColumn:
         skipna = True if skipna is None else skipna
 
diff --git a/python/cudf/cudf/core/reductions.py b/python/cudf/cudf/core/reductions.py
index 148e1d78ab8..7d787d54ab7 100644
--- a/python/cudf/cudf/core/reductions.py
+++ b/python/cudf/cudf/core/reductions.py
@@ -1,5 +1,4 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
-import numpy as np
 
 
 class Reducible:
@@ -53,17 +52,14 @@ def product(self, *args, **kwargs):
     def sum_of_squares(self, *args, **kwargs):
         return self._reduce("sum_of_squares", *args, **kwargs)
 
-    # TODO: Need a better way of setting a default dtype. If nothing else, I
-    # guess we'll just need to override this parameter in the relevant column
-    # types (numerical_base).
-    def mean(self, dtype=np.float64, *args, **kwargs):
-        return self._reduce("mean", dtype=dtype, *args, **kwargs)
+    def mean(self, *args, **kwargs):
+        return self._reduce("mean", *args, **kwargs)
 
-    def var(self, dtype=np.float64, *args, **kwargs):
-        return self._reduce("var", dtype=dtype, *args, **kwargs)
+    def var(self, *args, **kwargs):
+        return self._reduce("var", *args, **kwargs)
 
-    def std(self, dtype=np.float64, *args, **kwargs):
-        return self._reduce("std", dtype=dtype, *args, **kwargs)
+    def std(self, *args, **kwargs):
+        return self._reduce("std", *args, **kwargs)
 
     # def median(self, *args, **kwargs):
     #     return self._reduce("median", *args, **kwargs)

From 0a774f628bd234ef9a5be64447ae83e69c8b93fd Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 18 Nov 2021 17:02:58 -0800
Subject: [PATCH 03/39] Generate reductions programmatically.

---
 python/cudf/cudf/core/column/column.py        |  36 -----
 .../cudf/cudf/core/column/numerical_base.py   |  21 ++-
 python/cudf/cudf/core/column/timedelta.py     |   5 +-
 python/cudf/cudf/core/dataframe.py            |  11 +-
 python/cudf/cudf/core/reductions.py           | 130 +++++++++---------
 python/cudf/cudf/core/series.py               |  10 +-
 python/cudf/cudf/core/single_column_frame.py  |   5 +-
 7 files changed, 106 insertions(+), 112 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 1a83194489d..d28e12c1eaf 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1176,42 +1176,6 @@ def max(self, skipna: bool = None, dtype: Dtype = None):
             return libcudf.reduce.reduce("max", result_col, dtype=dtype)
         return result_col
 
-    def sum(
-        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
-    ):
-        raise TypeError(f"cannot perform sum with type {self.dtype}")
-
-    def product(
-        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
-    ):
-        raise TypeError(f"cannot perform product with type {self.dtype}")
-
-    def mean(self, skipna: bool = None, dtype: Dtype = None):
-        raise TypeError(f"cannot perform mean with type {self.dtype}")
-
-    def std(self, skipna: bool = None, ddof=1, dtype: Dtype = np.float64):
-        raise TypeError(f"cannot perform std with type {self.dtype}")
-
-    def var(self, skipna: bool = None, ddof=1, dtype: Dtype = np.float64):
-        raise TypeError(f"cannot perform var with type {self.dtype}")
-
-    def kurtosis(self, skipna: bool = None):
-        raise TypeError(f"cannot perform kurtosis with type {self.dtype}")
-
-    def skew(self, skipna: bool = None):
-        raise TypeError(f"cannot perform skew with type {self.dtype}")
-
-    def cov(self, other: ColumnBase):
-        raise TypeError(
-            f"cannot perform covarience with types {self.dtype}, "
-            f"{other.dtype}"
-        )
-
-    def corr(self, other: ColumnBase):
-        raise TypeError(
-            f"cannot perform corr with types {self.dtype}, {other.dtype}"
-        )
-
     def nans_to_nulls(self: T) -> T:
         # Only floats can contain nan.
         if self.dtype.kind != "f":
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index de9cb139519..866e864d407 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -24,8 +24,17 @@ class NumericalBaseColumn(Reducible, ColumnBase):
     point, should be encoded here.
     """
 
+    _VALID_REDUCTIONS = {
+        "sum",
+        "product",
+        "sum_of_squares",
+        "mean",
+        "var",
+        "std",
+    }
+
     def _reduce(
-        self, op: str, skipna: bool = None, min_count: int = 0, **kwargs
+        self, op: str, skipna: bool = None, min_count: int = 0, *args, **kwargs
     ) -> ScalarLike:
         """Perform a reduction operation.
 
@@ -117,13 +126,13 @@ def quantile(
         return result
 
     def mean(self, dtype=np.float64, *args, **kwargs):
-        return super().mean(dtype=dtype, *args, **kwargs)
+        return self._reduce("mean", dtype=dtype, *args, **kwargs)
 
     def var(self, dtype=np.float64, *args, **kwargs):
-        return super().var(dtype=dtype, *args, **kwargs)
+        return self._reduce("var", dtype=dtype, *args, **kwargs)
 
     def std(self, dtype=np.float64, *args, **kwargs):
-        return super().std(dtype=dtype, *args, **kwargs)
+        return self._reduce("std", dtype=dtype, *args, **kwargs)
 
     def median(self, skipna: bool = None) -> NumericalBaseColumn:
         skipna = True if skipna is None else skipna
@@ -148,7 +157,7 @@ def _numeric_quantile(
             self, quant, interpolation, sorted_indices, exact
         )
 
-    def cov(self, other: ColumnBase) -> float:
+    def cov(self, other: NumericalBaseColumn) -> float:
         if (
             len(self) == 0
             or len(other) == 0
@@ -160,7 +169,7 @@ def cov(self, other: ColumnBase) -> float:
         cov_sample = result.sum() / (len(self) - 1)
         return cov_sample
 
-    def corr(self, other: ColumnBase) -> float:
+    def corr(self, other: NumericalBaseColumn) -> float:
         if len(self) == 0 or len(other) == 0:
             return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
 
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 4b7a3bcc197..400214475e7 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -397,7 +397,10 @@ def sum(
         self, skipna: bool = None, dtype: Dtype = None, min_count=0
     ) -> pd.Timedelta:
         return pd.Timedelta(
-            self.as_numerical.sum(
+            # TODO: mypy doesn't fully support monkey-patching, so the
+            # monkey-patching of reductions creates some serious limitations.
+            # We'll have to see how bad the problem is.
+            self.as_numerical.sum(  # type: ignore
                 skipna=skipna, dtype=dtype, min_count=min_count
             ),
             unit=self.time_unit,
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index c686cd0fd39..f07f2c46e41 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5328,10 +5328,13 @@ def _reduce(
         axis = self._get_axis_from_axis_arg(axis)
 
         if axis == 0:
-            result = [
-                getattr(self._data[col], op)(**kwargs)
-                for col in self._data.names
-            ]
+            try:
+                result = [
+                    getattr(self._data[col], op)(**kwargs)
+                    for col in self._data.names
+                ]
+            except AttributeError:
+                raise TypeError(f"cannot perform {op} with type {self.dtype}")
 
             return Series._from_data(
                 {None: result}, as_index(self._data.names)
diff --git a/python/cudf/cudf/core/reductions.py b/python/cudf/cudf/core/reductions.py
index 7d787d54ab7..c813aeda1d8 100644
--- a/python/cudf/cudf/core/reductions.py
+++ b/python/cudf/cudf/core/reductions.py
@@ -1,6 +1,31 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
 
 
+# TODO: Implement a factory that generates classes encapsulating passthrough
+# operations like this. Then we can programmatically create suitable classes
+# for reductions, scans, and binops (and perhaps others).
+
+
+# TODO: The docstring for a class's reductions should be taken from formatting
+# the reduce method since it needs to support all the same things.
+
+
+# TODO: We have 3 options for how to handle unsupported operations:
+# 1. We only monkey-patch in the operations that are well-defined. This is what
+#    I'm currently doing, and it works, but it precludes mypy checking.
+# 2. We define _all_ aggregations in Reducible and then delete the ill-defined
+#    ones in __init_subclass__. This is basically the inverse of what I'm
+#    currently doing. Relative to option 1, it replaces mypy false positives
+#    for false negatives: instead of saying attributes don't exist when they
+#    do, it will never detect missing attributes that really shouldn't exist
+#    for certain column types.
+# 3. Define all aggregations like in 2 but don't delete them. mypy will treat
+#    this essentially identically to 2, so we don't get any more useful
+#    diagnostics, but we run into the additional problem that now all
+#    reduction functions are defined for all subclasses, even if they aren't
+#    well-defined operations.
+
+
 class Reducible:
     """Mixin encapsulating for reduction operations.
 
@@ -15,72 +40,53 @@ class Reducible:
     class encapsulates that paradigm.
     """
 
-    # TODO: We need a way to indicate supported/unsupported operations.
-    # Alternatively, we could just leave that up to the implementation of
-    # _reduce.
-    # TODO: If supported operations are indicated using a class member list
-    # (_SUPPORTED_OPERATIONS or so) we could use __init_subclass__ to combine
-    # all parent lists via walking the MRO.
+    _SUPPORTED_REDUCTIONS = {
+        "sum",
+        "product",
+        "min",
+        "max",
+        "count",
+        "size",
+        "any",
+        "all",
+        "sum_of_squares",
+        "mean",
+        "var",
+        "std",
+        "median",
+        "quantile",
+        "argmax",
+        "argmin",
+        "nunique",
+        "nth",
+        "collect",
+        "unique",
+    }
 
     def _reduce(self, op: str, *args, **kwargs):
         raise NotImplementedError
 
-    def sum(self, *args, **kwargs):
-        return self._reduce("sum", *args, **kwargs)
-
-    def product(self, *args, **kwargs):
-        return self._reduce("product", *args, **kwargs)
-
-    # def min(self, *args, **kwargs):
-    #     return self._reduce("min", *args, **kwargs)
-    #
-    # def max(self, *args, **kwargs):
-    #     return self._reduce("max", *args, **kwargs)
-
-    # def count(self, *args, **kwargs):
-    #     return self._reduce("count", *args, **kwargs)
-    #
-    # def size(self, *args, **kwargs):
-    #     return self._reduce("size", *args, **kwargs)
-    #
-    # def any(self, *args, **kwargs):
-    #     return self._reduce("any", *args, **kwargs)
-    #
-    # def all(self, *args, **kwargs):
-    #     return self._reduce("all", *args, **kwargs)
-
-    def sum_of_squares(self, *args, **kwargs):
-        return self._reduce("sum_of_squares", *args, **kwargs)
-
-    def mean(self, *args, **kwargs):
-        return self._reduce("mean", *args, **kwargs)
-
-    def var(self, *args, **kwargs):
-        return self._reduce("var", *args, **kwargs)
+    @classmethod
+    def _add_reduction(cls, reduction):
+        def op(self, *args, **kwargs):
+            return self._reduce(reduction, *args, **kwargs)
 
-    def std(self, *args, **kwargs):
-        return self._reduce("std", *args, **kwargs)
+        setattr(cls, reduction, op)
 
-    # def median(self, *args, **kwargs):
-    #     return self._reduce("median", *args, **kwargs)
-    #
-    # def quantile(self, *args, **kwargs):
-    #     return self._reduce("quantile", *args, **kwargs)
-    #
-    # def argmax(self, *args, **kwargs):
-    #     return self._reduce("argmax", *args, **kwargs)
-    #
-    # def argmin(self, *args, **kwargs):
-    #     return self._reduce("argmin", *args, **kwargs)
-    #
-    # def nunique(self, *args, **kwargs):
-    #     return self._reduce("nunique", *args, **kwargs)
-    #
-    # def nth(self, *args, **kwargs):
-    #     return self._reduce("nth", *args, **kwargs)
+    @classmethod
+    def __init_subclass__(cls):
+        # Only add the set of reductions that are valid for a particular class.
+        # Subclasses may override the methods directly if they need special
+        # behavior.
+        valid_reductions = set()
+        for base_cls in cls.__mro__:
+            valid_reductions |= getattr(base_cls, "_VALID_REDUCTIONS", set())
 
-    # def collect(self, *args, **kwargs):
-    #     return self._reduce("sum", *args, **kwargs)
-    #
-    # def unique(self, *args, **kwargs):
-    #     return self._reduce("sum", *args, **kwargs)
+        assert len(valid_reductions - cls._SUPPORTED_REDUCTIONS) == 0, (
+            "Invalid requested reductions "
+            f"{valid_reductions - cls._SUPPORTED_REDUCTIONS}"
+        )
+        for reduction in valid_reductions:
+            # Allow overriding the operation in the classes.
+            if not hasattr(cls, reduction):
+                cls._add_reduction(reduction)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index e96531d4b1c..f2c07d8b492 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2752,7 +2752,10 @@ def cov(self, other, min_periods=None):
 
         lhs, rhs = _align_indices([lhs, rhs], how="inner")
 
-        return lhs._column.cov(rhs._column)
+        try:
+            return lhs._column.cov(rhs._column)
+        except AttributeError:
+            raise TypeError(f"cannot perform cov with type {self.dtype}")
 
     def transpose(self):
         """Return the transpose, which is by definition self.
@@ -2788,7 +2791,10 @@ def corr(self, other, method="pearson", min_periods=None):
         rhs = other.nans_to_nulls().dropna()
         lhs, rhs = _align_indices([lhs, rhs], how="inner")
 
-        return lhs._column.corr(rhs._column)
+        try:
+            return lhs._column.corr(rhs._column)
+        except AttributeError:
+            raise TypeError(f"cannot perform corr with type {self.dtype}")
 
     def autocorr(self, lag=1):
         """Compute the lag-N autocorrelation. This method computes the Pearson
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index 7793a2fdf29..44779ff54a7 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -45,7 +45,10 @@ def _reduce(
             raise NotImplementedError(
                 "numeric_only parameter is not implemented yet"
             )
-        return getattr(self._column, op)(**kwargs)
+        try:
+            return getattr(self._column, op)(**kwargs)
+        except AttributeError:
+            raise TypeError(f"cannot perform {op} with type {self.dtype}")
 
     def _scan(self, op, axis=None, *args, **kwargs):
         if axis not in (None, 0):

From 3435e90ffb3111c7a8d5f6f62e2524acb34316a1 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 18 Nov 2021 18:02:43 -0800
Subject: [PATCH 04/39] Make ColumnBase Reducible.

---
 python/cudf/cudf/core/column/column.py        | 40 +++++++++++++------
 .../cudf/cudf/core/column/numerical_base.py   | 23 +----------
 2 files changed, 29 insertions(+), 34 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index d28e12c1eaf..5651867d822 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -68,6 +68,7 @@
     ListDtype,
     StructDtype,
 )
+from cudf.core.reductions import Reducible
 from cudf.utils import utils
 from cudf.utils.dtypes import (
     cudf_dtype_from_pa_type,
@@ -82,7 +83,14 @@
 T = TypeVar("T", bound="ColumnBase")
 
 
-class ColumnBase(Column, Serializable):
+class ColumnBase(Column, Serializable, Reducible):
+    _VALID_REDUCTIONS = {
+        "any",
+        "all",
+        "max",
+        "min",
+    }
+
     def as_frame(self) -> "cudf.core.frame.Frame":
         """
         Converts a Column to Frame
@@ -1164,17 +1172,25 @@ def _minmax(self, skipna: bool = None):
             return libcudf.reduce.minmax(result_col)
         return result_col
 
-    def min(self, skipna: bool = None, dtype: Dtype = None):
-        result_col = self._process_for_reduction(skipna=skipna)
-        if isinstance(result_col, ColumnBase):
-            return libcudf.reduce.reduce("min", result_col, dtype=dtype)
-        return result_col
-
-    def max(self, skipna: bool = None, dtype: Dtype = None):
-        result_col = self._process_for_reduction(skipna=skipna)
-        if isinstance(result_col, ColumnBase):
-            return libcudf.reduce.reduce("max", result_col, dtype=dtype)
-        return result_col
+    def _reduce(
+        self, op: str, skipna: bool = None, min_count: int = 0, *args, **kwargs
+    ) -> ScalarLike:
+        """Perform a reduction operation.
+
+        op : str
+            The operation to perform.
+        skipna : bool
+            Whether or not na values must be skipped.
+        min_count : int, default 0
+            The minimum number of entries for the reduction, otherwise the
+            reduction returns NaN.
+        """
+        preprocessed = self._process_for_reduction(
+            skipna=skipna, min_count=min_count
+        )
+        if isinstance(preprocessed, ColumnBase):
+            return libcudf.reduce.reduce(op, preprocessed, **kwargs)
+        return preprocessed
 
     def nans_to_nulls(self: T) -> T:
         # Only floats can contain nan.
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 866e864d407..725e15aa3e2 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -15,7 +15,7 @@
 from cudf.core.reductions import Reducible
 
 
-class NumericalBaseColumn(Reducible, ColumnBase):
+class NumericalBaseColumn(ColumnBase, Reducible):
     """A column composed of numerical data.
 
     This class encodes a standard interface for different types of columns
@@ -33,27 +33,6 @@ class NumericalBaseColumn(Reducible, ColumnBase):
         "std",
     }
 
-    def _reduce(
-        self, op: str, skipna: bool = None, min_count: int = 0, *args, **kwargs
-    ) -> ScalarLike:
-        """Perform a reduction operation.
-
-        op : str
-            The operation to perform.
-        skipna : bool
-            Whether or not na values must be skipped.
-        min_count : int, default 0
-            The minimum number of entries for the reduction, otherwise the
-            reduction returns NaN.
-        """
-        preprocessed = self._process_for_reduction(
-            skipna=skipna, min_count=min_count
-        )
-        if isinstance(preprocessed, ColumnBase):
-            return libcudf.reduce.reduce(op, preprocessed, **kwargs)
-        else:
-            return preprocessed
-
     def _can_return_nan(self, skipna: bool = None) -> bool:
         return not skipna and self.has_nulls()
 

From 77c77678f2a27b1a09e9877233a5a66e58bd2d53 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Nov 2021 12:15:14 -0800
Subject: [PATCH 05/39] Use Reducible in GroupBy.

---
 python/cudf/cudf/core/groupby/groupby.py | 70 +++++++-----------------
 python/cudf/cudf/core/reductions.py      |  6 ++
 python/cudf/cudf/core/resample.py        |  2 +
 3 files changed, 27 insertions(+), 51 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index a393d8e9457..441b594c34f 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -16,6 +16,7 @@
 from cudf.core.abc import Serializable
 from cudf.core.column.column import arange, as_column
 from cudf.core.multiindex import MultiIndex
+from cudf.core.reductions import Reducible
 from cudf.utils.utils import GetAttrGetItemMixin, cached_property
 
 
@@ -34,7 +35,21 @@ def _quantile_75(x):
     return x.quantile(0.75)
 
 
-class GroupBy(Serializable):
+class GroupBy(Serializable, Reducible):
+
+    _VALID_REDUCTIONS = {
+        "sum",
+        "prod",
+        "idxmin",
+        "idxmax",
+        "min",
+        "max",
+        "mean",
+        "median",
+        "nunique",
+        "first",
+        "last",
+    }
 
     _MAX_GROUPS_BEFORE_WARN = 100
 
@@ -295,6 +310,7 @@ def agg(self, func):
 
         return result
 
+    _reduce = agg
     aggregate = agg
 
     def nth(self, n):
@@ -811,38 +827,6 @@ def describe(self, include=None, exclude=None):
         )
         return res
 
-    def sum(self):
-        """Compute the column-wise sum of the values in each group."""
-        return self.agg("sum")
-
-    def prod(self):
-        """Compute the column-wise product of the values in each group."""
-        return self.agg("prod")
-
-    def idxmin(self):
-        """Get the column-wise index of the minimum value in each group."""
-        return self.agg("idxmin")
-
-    def idxmax(self):
-        """Get the column-wise index of the maximum value in each group."""
-        return self.agg("idxmax")
-
-    def min(self):
-        """Get the column-wise minimum value in each group."""
-        return self.agg("min")
-
-    def max(self):
-        """Get the column-wise maximum value in each group."""
-        return self.agg("max")
-
-    def mean(self):
-        """Compute the column-wise mean of the values in each group."""
-        return self.agg("mean")
-
-    def median(self):
-        """Get the column-wise median of the values in each group."""
-        return self.agg("median")
-
     def corr(self, method="pearson", min_periods=1):
         """
         Compute pairwise correlation of columns, excluding NA/null values.
@@ -1005,10 +989,6 @@ def func(x):
 
         return self.agg(func)
 
-    def nunique(self):
-        """Compute the number of unique values in each column in each group."""
-        return self.agg("nunique")
-
     def collect(self):
         """Get a list of all the values for each column in each group."""
         return self.agg("collect")
@@ -1030,14 +1010,6 @@ def cummax(self):
         """Get the column-wise cumulative maximum value in each group."""
         return self.agg("cummax")
 
-    def first(self):
-        """Get the first non-null value in each group."""
-        return self.agg("first")
-
-    def last(self):
-        """Get the last non-null value in each group."""
-        return self.agg("last")
-
     def diff(self, periods=1, axis=0):
         """Get the difference between the values in each group.
 
@@ -1367,12 +1339,6 @@ def __getitem__(self, key):
             by=self.grouping.keys, dropna=self._dropna, sort=self._sort
         )
 
-    def nunique(self):
-        """
-        Return the number of unique values per group
-        """
-        return self.agg("nunique")
-
 
 class SeriesGroupBy(GroupBy):
     """
@@ -1451,6 +1417,8 @@ def agg(self, func):
 
         return result
 
+    _reduce = agg
+
     def apply(self, func):
         result = super().apply(func)
 
diff --git a/python/cudf/cudf/core/reductions.py b/python/cudf/cudf/core/reductions.py
index c813aeda1d8..a1e4c678f7a 100644
--- a/python/cudf/cudf/core/reductions.py
+++ b/python/cudf/cudf/core/reductions.py
@@ -61,6 +61,12 @@ class encapsulates that paradigm.
         "nth",
         "collect",
         "unique",
+        "prod",
+        "idxmin",
+        "idxmax",
+        "nunique",
+        "first",
+        "last",
     }
 
     def _reduce(self, op: str, *args, **kwargs):
diff --git a/python/cudf/cudf/core/resample.py b/python/cudf/cudf/core/resample.py
index a4810701781..e51b829da95 100644
--- a/python/cudf/cudf/core/resample.py
+++ b/python/cudf/cudf/core/resample.py
@@ -49,6 +49,8 @@ def agg(self, func):
         else:
             return result.sort_index()
 
+    _reduce = agg
+
     def asfreq(self):
         return self.obj._align_to_index(
             self.grouping.bin_labels,

From 4d40e902e42c011c138e66c298d686653856a6d9 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Nov 2021 15:13:13 -0800
Subject: [PATCH 06/39] Format _reduce to generate docstrings.

---
 python/cudf/cudf/core/column/column.py   |  4 +--
 python/cudf/cudf/core/groupby/groupby.py | 43 ++++++++++++++++++++++--
 python/cudf/cudf/core/reductions.py      | 26 +++++++-------
 python/cudf/cudf/core/resample.py        |  2 --
 4 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 5651867d822..d1543d6fb69 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1175,10 +1175,8 @@ def _minmax(self, skipna: bool = None):
     def _reduce(
         self, op: str, skipna: bool = None, min_count: int = 0, *args, **kwargs
     ) -> ScalarLike:
-        """Perform a reduction operation.
+        """Compute {op} of column values.
 
-        op : str
-            The operation to perform.
         skipna : bool
             Whether or not na values must be skipped.
         min_count : int, default 0
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 441b594c34f..e9dc6aa17e2 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -310,7 +310,46 @@ def agg(self, func):
 
         return result
 
-    _reduce = agg
+    def _reduce(
+        self,
+        op: str,
+        numeric_only: bool = False,
+        min_count: int = 0,
+        *args,
+        **kwargs,
+    ):
+        """Compute {op} of group values.
+
+        Parameters
+        ----------
+        numeric_only : bool, default None
+            Include only float, int, boolean columns. If None, will attempt to
+            use everything, then use only numeric data.
+        min_count : int, default 0
+            The required number of valid values to perform the operation. If
+            fewer than ``min_count`` non-NA values are present the result will
+            be NA.
+
+        Returns
+        -------
+        Series or DataFrame
+            Computed {op} of values within each group.
+
+        Notes
+        -----
+        Difference from pandas:
+            * Not supporting: numeric_only, min_count
+        """
+        if numeric_only:
+            raise NotImplementedError(
+                "numeric_only parameter is not implemented yet"
+            )
+        if min_count != 0:
+            raise NotImplementedError(
+                "min_count parameter is not implemented yet"
+            )
+        return self.agg(op)
+
     aggregate = agg
 
     def nth(self, n):
@@ -1417,8 +1456,6 @@ def agg(self, func):
 
         return result
 
-    _reduce = agg
-
     def apply(self, func):
         result = super().apply(func)
 
diff --git a/python/cudf/cudf/core/reductions.py b/python/cudf/cudf/core/reductions.py
index a1e4c678f7a..e554dd34e35 100644
--- a/python/cudf/cudf/core/reductions.py
+++ b/python/cudf/cudf/core/reductions.py
@@ -10,20 +10,8 @@
 # the reduce method since it needs to support all the same things.
 
 
-# TODO: We have 3 options for how to handle unsupported operations:
-# 1. We only monkey-patch in the operations that are well-defined. This is what
-#    I'm currently doing, and it works, but it precludes mypy checking.
-# 2. We define _all_ aggregations in Reducible and then delete the ill-defined
-#    ones in __init_subclass__. This is basically the inverse of what I'm
-#    currently doing. Relative to option 1, it replaces mypy false positives
-#    for false negatives: instead of saying attributes don't exist when they
-#    do, it will never detect missing attributes that really shouldn't exist
-#    for certain column types.
-# 3. Define all aggregations like in 2 but don't delete them. mypy will treat
-#    this essentially identically to 2, so we don't get any more useful
-#    diagnostics, but we run into the additional problem that now all
-#    reduction functions are defined for all subclasses, even if they aren't
-#    well-defined operations.
+# TODO: Consider using a pyi file to trick mypy into seeing the monkey-patched
+# methods.
 
 
 class Reducible:
@@ -38,6 +26,13 @@ class Reducible:
     in terms of a single function that performs necessary pre- and
     post-processing of a result generated by calling to a lower-level API. This
     class encapsulates that paradigm.
+
+    Notes
+    -----
+    The documentation for the reductions is generated by formatting the
+    docstring for _reduce via `cls._reduce.__doc__.format(op=reduction)`.
+    Therefore, subclasses are responsible for writing an appropriate docstring
+    for the _reduce method (one that omits documentation of the op parameter).
     """
 
     _SUPPORTED_REDUCTIONS = {
@@ -69,6 +64,7 @@ class encapsulates that paradigm.
         "last",
     }
 
+    # TODO: Add a return type.
     def _reduce(self, op: str, *args, **kwargs):
         raise NotImplementedError
 
@@ -77,6 +73,8 @@ def _add_reduction(cls, reduction):
         def op(self, *args, **kwargs):
             return self._reduce(reduction, *args, **kwargs)
 
+        # The default docstring is that
+        op.__doc__ = cls._reduce.__doc__.format(op=reduction)
         setattr(cls, reduction, op)
 
     @classmethod
diff --git a/python/cudf/cudf/core/resample.py b/python/cudf/cudf/core/resample.py
index e51b829da95..a4810701781 100644
--- a/python/cudf/cudf/core/resample.py
+++ b/python/cudf/cudf/core/resample.py
@@ -49,8 +49,6 @@ def agg(self, func):
         else:
             return result.sort_index()
 
-    _reduce = agg
-
     def asfreq(self):
         return self.obj._align_to_index(
             self.grouping.bin_labels,

From 31772178a57b243da6294a8307ca868c7f2021de Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Nov 2021 15:52:34 -0800
Subject: [PATCH 07/39] Make Rolling Reducible.

---
 python/cudf/cudf/core/groupby/groupby.py |  2 ++
 python/cudf/cudf/core/window/rolling.py  | 32 ++++++++++++++++--------
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index e9dc6aa17e2..b2298f42fb1 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -49,6 +49,8 @@ class GroupBy(Serializable, Reducible):
         "nunique",
         "first",
         "last",
+        "var",
+        "std",
     }
 
     _MAX_GROUPS_BEFORE_WARN = 100
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index 0f4256e49a6..059daf99ecd 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -11,11 +11,12 @@
 from cudf.api.types import is_integer, is_number
 from cudf.core import column
 from cudf.core.column.column import as_column
+from cudf.core.reductions import Reducible
 from cudf.utils import cudautils
 from cudf.utils.utils import GetAttrGetItemMixin
 
 
-class Rolling(GetAttrGetItemMixin):
+class Rolling(GetAttrGetItemMixin, Reducible):
     """
     Rolling window calculations.
 
@@ -163,6 +164,15 @@ class Rolling(GetAttrGetItemMixin):
 
     _time_window = False
 
+    _VALID_REDUCTIONS = {
+        "sum",
+        "min",
+        "max",
+        "mean",
+        "var",
+        "std",
+    }
+
     def __init__(
         self,
         obj,
@@ -256,17 +266,17 @@ def _apply_agg(self, agg_name):
         else:
             return self._apply_agg_dataframe(self.obj, agg_name)
 
-    def sum(self):
-        return self._apply_agg("sum")
-
-    def min(self):
-        return self._apply_agg("min")
-
-    def max(self):
-        return self._apply_agg("max")
+    def _reduce(
+        self, op: str, *args, **kwargs,
+    ):
+        """Calculate the rolling {op}.
 
-    def mean(self):
-        return self._apply_agg("mean")
+        Returns
+        -------
+        Series or DataFrame
+            Return type is the same as the original object.
+        """
+        return self._apply_agg(op)
 
     def var(self, ddof=1):
         self.agg_params["ddof"] = ddof

From 10eeab2bf710e067475501eb351c6c5654dcdca5 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 22 Nov 2021 13:09:53 -0800
Subject: [PATCH 08/39] Add full support for new reduction functions to match
 docstrings and signatures.

---
 python/cudf/cudf/core/reductions.py | 61 +++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/core/reductions.py b/python/cudf/cudf/core/reductions.py
index e554dd34e35..c7c8b10118e 100644
--- a/python/cudf/cudf/core/reductions.py
+++ b/python/cudf/cudf/core/reductions.py
@@ -6,13 +6,20 @@
 # for reductions, scans, and binops (and perhaps others).
 
 
-# TODO: The docstring for a class's reductions should be taken from formatting
-# the reduce method since it needs to support all the same things.
+# TODO: Figure out how to support examples in the docstrings.
 
 
 # TODO: Consider using a pyi file to trick mypy into seeing the monkey-patched
 # methods.
 
+# TODO: Add type annotations for the class variables.
+
+# TODO: Extract signature from _reduce
+
+# TODO: Take advantage of the new approach to also format the cls into the doc.
+
+import inspect
+
 
 class Reducible:
     """Mixin encapsulating for reduction operations.
@@ -70,12 +77,52 @@ def _reduce(self, op: str, *args, **kwargs):
 
     @classmethod
     def _add_reduction(cls, reduction):
-        def op(self, *args, **kwargs):
-            return self._reduce(reduction, *args, **kwargs)
+        # This function creates reduction operations on-the-fly and assigns
+        # them to the class.
+
+        # Generate a signature without the `op` parameter.
+        signature = inspect.signature(cls._reduce)
+        new_params = signature.parameters.copy()
+        new_params.pop("op")
+        signature = signature.replace(parameters=new_params.values())
+
+        # Generate the list of arguments forwarded to _reduce.
+        arglist = ", ".join(
+            [
+                f"{key}={key}"
+                for key in signature.parameters
+                if key not in ("self", "args", "kwargs")
+            ]
+        )
+        if arglist:
+            arglist += ", *args, **kwargs"
+        else:
+            arglist = "*args, **kwargs"
+
+        # The default docstring is that of the _reduce method. Additional
+        # formatting arguments may be provided in a class-level dictionary
+        # of the form _REDUCTION_DOCSTRINGS
+        docstring = cls._reduce.__doc__.format(
+            cls=cls.__name__,
+            op=reduction,
+            **getattr(cls, "_REDUCTION_DOCSTRINGS", {}).get(reduction, {}),
+        )
 
-        # The default docstring is that
-        op.__doc__ = cls._reduce.__doc__.format(op=reduction)
-        setattr(cls, reduction, op)
+        # Create the desired function.
+        namespace = {}
+        out = """
+def {reduction}{signature}:
+    \"\"\"{docstring}
+    \"\"\"
+    return self._reduce(op="{reduction}", {arglist})
+        """.format(
+            reduction=reduction,
+            signature=str(signature),
+            arglist=arglist,
+            docstring=docstring,
+        )
+        exec(out, namespace)
+        setattr(cls, reduction, namespace[reduction])
 
     @classmethod
     def __init_subclass__(cls):

From 34cd0b9d1b3390aa3ff7c697840e7b71e01dc825 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 14 Jan 2022 14:36:19 -0800
Subject: [PATCH 09/39] Initial version of pyi file, all reductions working
 except for quantile and size.

---
 python/cudf/cudf/core/column/categorical.py   |  2 +-
 python/cudf/cudf/core/column/column.py        | 12 +--
 python/cudf/cudf/core/column/datetime.py      |  6 +-
 .../cudf/cudf/core/column/numerical_base.py   |  2 +-
 python/cudf/cudf/core/column/string.py        |  2 +-
 python/cudf/cudf/core/column/timedelta.py     | 10 +--
 python/cudf/cudf/core/reductions.py           |  8 --
 python/cudf/cudf/core/reductions.pyi          | 88 +++++++++++++++++++
 8 files changed, 105 insertions(+), 25 deletions(-)
 create mode 100644 python/cudf/cudf/core/reductions.pyi

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index de06e62cbb1..c210d631aff 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1000,7 +1000,7 @@ def clip(self, lo: ScalarLike, hi: ScalarLike) -> "column.ColumnBase":
     def data_array_view(self) -> cuda.devicearray.DeviceNDArray:
         return self.codes.data_array_view
 
-    def unique(self) -> CategoricalColumn:
+    def unique(self, *args, **kwargs) -> CategoricalColumn:
         codes = self.as_numerical.unique()
         return column.build_categorical_column(
             categories=self.categories,
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index d1543d6fb69..741a2b5a860 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -187,7 +187,7 @@ def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool:
             return False
         return self.binary_operator("NULL_EQUALS", other).all()
 
-    def all(self, skipna: bool = True) -> bool:
+    def all(self, skipna: bool = True, *args, **kwargs) -> bool:
         # If all entries are null the result is True, including when the column
         # is empty.
         result_col = self.nans_to_nulls() if skipna else self
@@ -200,7 +200,7 @@ def all(self, skipna: bool = True) -> bool:
 
         return result_col
 
-    def any(self, skipna: bool = True) -> bool:
+    def any(self, skipna: bool = True, *args, **kwargs) -> bool:
         # Early exit for fast cases.
         result_col = self.nans_to_nulls() if skipna else self
         if not skipna and result_col.has_nulls():
@@ -686,12 +686,12 @@ def append(self, other: ColumnBase) -> ColumnBase:
     def quantile(
         self,
         q: Union[float, Sequence[float]],
-        interpolation: builtins.str,
-        exact: bool,
+        interpolation: str,
+        exact: bool, *args, **kwargs,
     ) -> ColumnBase:
         raise TypeError(f"cannot perform quantile with type {self.dtype}")
 
-    def median(self, skipna: bool = None) -> ScalarLike:
+    def median(self, skipna: bool = None, *args, **kwargs) -> ScalarLike:
         raise TypeError(f"cannot perform median with type {self.dtype}")
 
     def take(
@@ -1106,7 +1106,7 @@ def searchsorted(
             values, side, ascending=ascending, na_position=na_position
         )
 
-    def unique(self) -> ColumnBase:
+    def unique(self, *args, **kwargs) -> ColumnBase:
         """
         Get unique values in the data
         """
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index b763790986a..60645b0110d 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -350,21 +350,21 @@ def _default_na_value(self) -> DatetimeLikeScalar:
         """Returns the default NA value for this column"""
         return np.datetime64("nat", self.time_unit)
 
-    def mean(self, skipna=None, dtype=np.float64) -> ScalarLike:
+    def mean(self, skipna=None, dtype=np.float64, *args, **kwargs) -> ScalarLike:
         return pd.Timestamp(
             self.as_numerical.mean(skipna=skipna, dtype=dtype),
             unit=self.time_unit,
         )
 
     def std(
-        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64
+        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64, *args, **kwargs
     ) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.std(skipna=skipna, ddof=ddof, dtype=dtype)
             * _numpy_to_pandas_conversion[self.time_unit],
         )
 
-    def median(self, skipna: bool = None) -> pd.Timestamp:
+    def median(self, skipna: bool = None, *args, **kwargs) -> pd.Timestamp:
         return pd.Timestamp(
             self.as_numerical.median(skipna=skipna), unit=self.time_unit
         )
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 725e15aa3e2..68adf594462 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -113,7 +113,7 @@ def var(self, dtype=np.float64, *args, **kwargs):
     def std(self, dtype=np.float64, *args, **kwargs):
         return self._reduce("std", dtype=dtype, *args, **kwargs)
 
-    def median(self, skipna: bool = None) -> NumericalBaseColumn:
+    def median(self, skipna: bool = None, *args, **kwargs) -> NumericalBaseColumn:
         skipna = True if skipna is None else skipna
 
         if self._can_return_nan(skipna=skipna):
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 9b44b4e6831..b286be78117 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5089,7 +5089,7 @@ def to_arrow(self) -> pa.Array:
             return super().to_arrow()
 
     def sum(
-        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
+        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0, *args, **kwargs
     ):
         result_col = self._process_for_reduction(
             skipna=skipna, min_count=min_count
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 400214475e7..e92347d42d1 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -369,13 +369,13 @@ def as_timedelta_column(self, dtype: Dtype, **kwargs) -> TimeDeltaColumn:
             return self
         return libcudf.unary.cast(self, dtype=dtype)
 
-    def mean(self, skipna=None, dtype: Dtype = np.float64) -> pd.Timedelta:
+    def mean(self, skipna=None, dtype: Dtype = np.float64, *args, **kwargs) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.mean(skipna=skipna, dtype=dtype),
             unit=self.time_unit,
         )
 
-    def median(self, skipna: bool = None) -> pd.Timedelta:
+    def median(self, skipna: bool = None, *args, **kwargs) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.median(skipna=skipna), unit=self.time_unit
         )
@@ -384,7 +384,7 @@ def isin(self, values: Sequence) -> ColumnBase:
         return cudf.core.tools.datetimes._isin_datetimelike(self, values)
 
     def quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool, *args, **kwargs
     ) -> "column.ColumnBase":
         result = self.as_numerical.quantile(
             q=q, interpolation=interpolation, exact=exact
@@ -394,7 +394,7 @@ def quantile(
         return result.astype(self.dtype)
 
     def sum(
-        self, skipna: bool = None, dtype: Dtype = None, min_count=0
+        self, skipna: bool = None, dtype: Dtype = None, min_count=0, *args, **kwargs
     ) -> pd.Timedelta:
         return pd.Timedelta(
             # TODO: mypy doesn't fully support monkey-patching, so the
@@ -407,7 +407,7 @@ def sum(
         )
 
     def std(
-        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64
+        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64, *args, **kwargs
     ) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.std(skipna=skipna, ddof=ddof, dtype=dtype),
diff --git a/python/cudf/cudf/core/reductions.py b/python/cudf/cudf/core/reductions.py
index c7c8b10118e..ba7026faac4 100644
--- a/python/cudf/cudf/core/reductions.py
+++ b/python/cudf/cudf/core/reductions.py
@@ -6,18 +6,11 @@
 # for reductions, scans, and binops (and perhaps others).
 
 
-# TODO: Figure out how to support examples in the docstrings.
-
-
 # TODO: Consider using a pyi file to trick mypy into seeing the monkey-patched
 # methods.
 
 # TODO: Add type annotations for the class variables.
 
-# TODO: Extract signature from _reduce
-
-# TODO: Take advantage of the new approach to also format the cls into the doc.
-
 import inspect
 
 
@@ -66,7 +59,6 @@ class encapsulates that paradigm.
         "prod",
         "idxmin",
         "idxmax",
-        "nunique",
         "first",
         "last",
     }
diff --git a/python/cudf/cudf/core/reductions.pyi b/python/cudf/cudf/core/reductions.pyi
new file mode 100644
index 00000000000..d50b551dfa2
--- /dev/null
+++ b/python/cudf/cudf/core/reductions.pyi
@@ -0,0 +1,88 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
+from typing import Sequence, Union
+#
+# from cudf._typing import Dtype, DtypeObj, ScalarLike
+# from cudf.core.buffer import Buffer
+# from cudf.core.column import ColumnBase
+#
+# T = TypeVar("T")
+
+
+class Reducible:
+    def sum(self, *args, **kwargs):
+        ...
+
+    def product(self, *args, **kwargs):
+        ...
+
+    def min(self, *args, **kwargs):
+        ...
+
+    def max(self, *args, **kwargs):
+        ...
+
+    def count(self, *args, **kwargs):
+        ...
+
+    def size(self, *args, **kwargs):
+        ...
+
+    def any(self, *args, **kwargs):
+        ...
+
+    def all(self, *args, **kwargs):
+        ...
+
+    def sum_of_squares(self, *args, **kwargs):
+        ...
+
+    def mean(self, *args, **kwargs):
+        ...
+
+    def var(self, *args, **kwargs):
+        ...
+
+    def std(self, *args, **kwargs):
+        ...
+
+    def median(self, *args, **kwargs):
+        ...
+
+    def quantile(self, q: Union[float, Sequence[float]], interpolation: str, *args, **kwargs):
+        ...
+
+    def argmax(self, *args, **kwargs):
+        ...
+
+    def argmin(self, *args, **kwargs):
+        ...
+
+    def nunique(self, *args, **kwargs):
+        ...
+
+    def nth(self, *args, **kwargs):
+        ...
+
+    def collect(self, *args, **kwargs):
+        ...
+
+    def unique(self, *args, **kwargs):
+        ...
+
+    def prod(self, *args, **kwargs):
+        ...
+
+    def idxmin(self, *args, **kwargs):
+        ...
+
+    def idxmax(self, *args, **kwargs):
+        ...
+
+    def first(self, *args, **kwargs):
+        ...
+
+    def last(self, *args, **kwargs):
+        ...

From fce53b89358f5f34b3387fa55fe324cb9d7f9cfa Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 18 Jan 2022 14:39:05 -0800
Subject: [PATCH 10/39] Fix quantile issues.

---
 python/cudf/cudf/core/column/datetime.py       | 2 +-
 python/cudf/cudf/core/column/numerical_base.py | 2 +-
 python/cudf/cudf/core/reductions.py            | 2 +-
 python/cudf/cudf/core/reductions.pyi           | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 60645b0110d..a1431025cf1 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -370,7 +370,7 @@ def median(self, skipna: bool = None, *args, **kwargs) -> pd.Timestamp:
         )
 
     def quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool, *args, **kwargs
     ) -> ColumnBase:
         result = self.as_numerical.quantile(
             q=q, interpolation=interpolation, exact=exact
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 68adf594462..56b56ad3c5c 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -85,7 +85,7 @@ def skew(self, skipna: bool = None) -> ScalarLike:
         return skew
 
     def quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool, *args, **kwargs
     ) -> NumericalBaseColumn:
         if isinstance(q, Number) or cudf.api.types.is_list_like(q):
             np_array_q = np.asarray(q)
diff --git a/python/cudf/cudf/core/reductions.py b/python/cudf/cudf/core/reductions.py
index ba7026faac4..6046714fbcf 100644
--- a/python/cudf/cudf/core/reductions.py
+++ b/python/cudf/cudf/core/reductions.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2022, NVIDIA CORPORATION.
 
 
 # TODO: Implement a factory that generates classes encapsulating passthrough
diff --git a/python/cudf/cudf/core/reductions.pyi b/python/cudf/cudf/core/reductions.pyi
index d50b551dfa2..aa1d0e46b71 100644
--- a/python/cudf/cudf/core/reductions.pyi
+++ b/python/cudf/cudf/core/reductions.pyi
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2022, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -51,7 +51,7 @@ class Reducible:
     def median(self, *args, **kwargs):
         ...
 
-    def quantile(self, q: Union[float, Sequence[float]], interpolation: str, *args, **kwargs):
+    def quantile(self, q: Union[float, Sequence[float]], interpolation: str, exact: bool, *args, **kwargs):
         ...
 
     def argmax(self, *args, **kwargs):

From 9d1fb4f20854f761d24ae8d6a5965d0e42d0dc2d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 18 Jan 2022 14:39:34 -0800
Subject: [PATCH 11/39] Remove size reduction.

---
 python/cudf/cudf/core/reductions.py  | 1 -
 python/cudf/cudf/core/reductions.pyi | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/python/cudf/cudf/core/reductions.py b/python/cudf/cudf/core/reductions.py
index 6046714fbcf..2b8fd1fae14 100644
--- a/python/cudf/cudf/core/reductions.py
+++ b/python/cudf/cudf/core/reductions.py
@@ -41,7 +41,6 @@ class encapsulates that paradigm.
         "min",
         "max",
         "count",
-        "size",
         "any",
         "all",
         "sum_of_squares",
diff --git a/python/cudf/cudf/core/reductions.pyi b/python/cudf/cudf/core/reductions.pyi
index aa1d0e46b71..19b2d7c9306 100644
--- a/python/cudf/cudf/core/reductions.pyi
+++ b/python/cudf/cudf/core/reductions.pyi
@@ -27,9 +27,6 @@ class Reducible:
     def count(self, *args, **kwargs):
         ...
 
-    def size(self, *args, **kwargs):
-        ...
-
     def any(self, *args, **kwargs):
         ...
 

From 4eaecaa24cc0f38f640919264d57b043b04c91f9 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 18 Jan 2022 17:10:53 -0800
Subject: [PATCH 12/39] Write a factory for delegating mixins.

---
 python/cudf/cudf/core/reductions.py  | 256 ++++++++++++++++-----------
 python/cudf/cudf/core/reductions.pyi |   6 -
 2 files changed, 156 insertions(+), 106 deletions(-)

diff --git a/python/cudf/cudf/core/reductions.py b/python/cudf/cudf/core/reductions.py
index 2b8fd1fae14..e2b1ddaa152 100644
--- a/python/cudf/cudf/core/reductions.py
+++ b/python/cudf/cudf/core/reductions.py
@@ -1,41 +1,167 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
+import inspect
 
-# TODO: Implement a factory that generates classes encapsulating passthrough
-# operations like this. Then we can programmatically create suitable classes
-# for reductions, scans, and binops (and perhaps others).
 
+def _create_delegating_mixin(
+    mixin_name, docstring, category_name, operation_name, supported_operations
+):
+    """Factory for mixins defining collections of delegated operations.
+
+    This function generates mixins based on two common paradigms in cuDF:
+
+    1. Many classes implement similar operations (e.g. `sum`) via a sequence of
+       calls to lower-level APIs terminating in a libcudf C++ function call.
+    2. libcudf groups many operations into categories using a common API. These
+       APIs usually accept an enum to delinate the specific operation to
+       perform, e.g. binary operations use the `binary_operator` enum when
+       calling the `binary_operation` function. cuDF Python mimics this
+       structure by having operations within a category delegate to a common
+       internal function (e.g. DataFrame.__add__ calls DataFrame._binaryop).
+
+    This factory creates mixins for a category of operations implemented by via
+    this delegator pattern. Its usage is best demonstrated by example below.
+
+    Parameters
+    ----------
+    category_name : str
+        The category of operations for which a mixin is being created. This
+        name will be used to define the following attributes as shown in the
+        example below:
+            - f'_{category_name}_DOCSTRINGS'
+            - f'_VALID_{category_name}S'
+            - f'_SUPPORTED_{category_name}S'
+    operation_name : str
+        The name given to the core function implementing this category of
+        operations.  The corresponding function is the entrypoint for child
+        classes.
+    supported_ops : List[str]
+        The list of valid operations that subclasses of the resulting mixin may
+        request to be implemented.
+
+    Examples
+    --------
+    >>> MyFoo = _create_delegating_mixin(
+    ...     "MyFoo", "MyFoo's docstring", "FOO", "do_foo", {"foo1", "foo2"}
+    ... )
+
+    >>> # The above is equivalent to defining a class like so:
+    ... class MyFoo:
+    ...     # The set of valid foo operations.
+    ...     _VALID_FOOS = {"foo1", "foo2"}
+    ...
+    ...     # This is the method for child classes to override. Note that the
+    ...     # first parameter is always called "op".
+    ...     def do_foo(self, op, *args, **kwargs):
+    ...         raise NotImplementedError
+
+    >>> # MyFoo can be used as follows.
+    >>> class BarImplementsFoo(MyFoo):
+    ...     _SUPPORTED_FOOS = ("foo1",)
+    ...     _FOO_DOCSTRINGS = {"ret": "42"}
+    ...
+    ...     # This method's docstring will be formatted and used for all valid
+    ...     # operations. The `op` key is always automatically filled in, while
+    ...     # other keys are formatted using _FOO_DOCSTRINGS.
+    ...     def do_foo(self, op, *args, **kwargs):
+    ...         '''Perform the operation {op}, which returns {ret}.'''
+    ...         return 42
+
+    >>> bar = BarImplementsFoo()
+    >>> print(bar.foo1())
+    42
+
+    >>> # This will raise an AttributeError because foo2 is not supported by
+    >>> # the BarImplementsFoo subclass of MyFoo.
+    >>> # print(bar.foo2())
+
+    >>> # The docstring is formatted with the operation name as well as any
+    >>> # additional keys provided via the _FOO_DOCSTRINGS parameter.
+    >>> print(bar.foo1.__doc__)
+    Perform the operation foo1, which returns 42.
+    """
 
-# TODO: Consider using a pyi file to trick mypy into seeing the monkey-patched
-# methods.
+    docstring_attr = f"{category_name}_DOCSTRINGS"
+    validity_attr = f"_VALID_{category_name}S"
+
+    class OperationMixin:
+        @classmethod
+        def _add_operation(cls, operation):
+            # This function creates operations on-the-fly.
+
+            # Generate a signature without the `op` parameter.
+            base_operation = getattr(cls, operation_name)
+            signature = inspect.signature(base_operation)
+            new_params = signature.parameters.copy()
+            new_params.pop("op")
+            signature = signature.replace(parameters=new_params.values())
+
+            # Generate the list of arguments forwarded to _reduce.
+            arglist = ", ".join(
+                [
+                    f"{key}={key}"
+                    for key in signature.parameters
+                    if key not in ("self", "args", "kwargs")
+                ]
+            )
+            if arglist:
+                arglist += ", *args, **kwargs"
+            else:
+                arglist = "*args, **kwargs"
+
+            # Apply the formatted docstring of the base operation to the
+            # operation being created here.
+            docstring = base_operation.__doc__.format(
+                cls=cls.__name__,
+                op=operation,
+                **getattr(cls, docstring_attr, {}).get(operation, {}),
+            )
+
+            namespace = {}
+            out = f"""
+def {operation}{str(signature)}:
+    '''{docstring}
+    '''
+    return self.{operation_name}(op="{operation}", {arglist})
+"""
+            exec(out, namespace)
+            setattr(cls, operation, namespace[operation])
+
+        @classmethod
+        def __init_subclass__(cls):
+            # Only add the valid set of operations for a particular class.
+            valid_operations = set()
+            for base_cls in cls.__mro__:
+                valid_operations |= getattr(base_cls, validity_attr, set())
+
+            invalid_operations = valid_operations - supported_operations
+            assert len(invalid_operations) == 0, (
+                f"Invalid requested operations: {invalid_operations}"
+            )
+            for operation in valid_operations:
+                # Check if the operation is already defined so that subclasses
+                # can override the method if desired.
+                if not hasattr(cls, operation):
+                    cls._add_operation(operation)
+
+    def _operation(self, op: str, *args, **kwargs):
+        raise NotImplementedError
 
-# TODO: Add type annotations for the class variables.
+    OperationMixin.__name__ = mixin_name
+    OperationMixin.__doc__ = docstring
+    setattr(OperationMixin, operation_name, _operation)
+    setattr(OperationMixin, f"_SUPPORTED_{category_name}S",
+            supported_operations)
 
-import inspect
+    return OperationMixin
 
 
-class Reducible:
-    """Mixin encapsulating for reduction operations.
-
-    Various classes in cuDF support
-    `reductions <https://en.wikipedia.org/wiki/Reduction_Operator>`__.  In
-    practice the reductions are implemented via dispatch to a lower-level API.
-    For example, Frame-like objects dispatch to Columns, which in turn dispatch
-    to libcudf implementations. As a result, rather than encoding the logic for
-    different types of reductions, most classes can implement all reductions
-    in terms of a single function that performs necessary pre- and
-    post-processing of a result generated by calling to a lower-level API. This
-    class encapsulates that paradigm.
-
-    Notes
-    -----
-    The documentation for the reductions is generated by formatting the
-    docstring for _reduce via `cls._reduce.__doc__.format(op=reduction)`.
-    Therefore, subclasses are responsible for writing an appropriate docstring
-    for the _reduce method (one that omits documentation of the op parameter).
-    """
-
-    _SUPPORTED_REDUCTIONS = {
+Reducible = _create_delegating_mixin(
+    "Reducible",
+    "Mixin encapsulating for reduction operations.",
+    "REDUCTION",
+    "_reduce",
+    {
         "sum",
         "product",
         "min",
@@ -61,74 +187,4 @@ class encapsulates that paradigm.
         "first",
         "last",
     }
-
-    # TODO: Add a return type.
-    def _reduce(self, op: str, *args, **kwargs):
-        raise NotImplementedError
-
-    @classmethod
-    def _add_reduction(cls, reduction):
-        # This function creates reduction operations on-the-fly and assigns
-        # them to the class.
-
-        # Generate a signature without the `op` parameter.
-        signature = inspect.signature(cls._reduce)
-        new_params = signature.parameters.copy()
-        new_params.pop("op")
-        signature = signature.replace(parameters=new_params.values())
-
-        # Generate the list of arguments forwarded to _reduce.
-        arglist = ", ".join(
-            [
-                f"{key}={key}"
-                for key in signature.parameters
-                if key not in ("self", "args", "kwargs")
-            ]
-        )
-        if arglist:
-            arglist += ", *args, **kwargs"
-        else:
-            arglist = "*args, **kwargs"
-
-        # The default docstring is that of the _reduce method. Additional
-        # formatting arguments may be provided in a class-level dictionary
-        # of the form _REDUCTION_DOCSTRINGS
-        docstring = cls._reduce.__doc__.format(
-            cls=cls.__name__,
-            op=reduction,
-            **getattr(cls, "_REDUCTION_DOCSTRINGS", {}).get(reduction, {}),
-        )
-
-        # Create the desired function.
-        namespace = {}
-        out = """
-def {reduction}{signature}:
-    \"\"\"{docstring}
-    \"\"\"
-    return self._reduce(op="{reduction}", {arglist})
-        """.format(
-            reduction=reduction,
-            signature=str(signature),
-            arglist=arglist,
-            docstring=docstring,
-        )
-        exec(out, namespace)
-        setattr(cls, reduction, namespace[reduction])
-
-    @classmethod
-    def __init_subclass__(cls):
-        # Only add the set of reductions that are valid for a particular class.
-        # Subclasses may override the methods directly if they need special
-        # behavior.
-        valid_reductions = set()
-        for base_cls in cls.__mro__:
-            valid_reductions |= getattr(base_cls, "_VALID_REDUCTIONS", set())
-
-        assert len(valid_reductions - cls._SUPPORTED_REDUCTIONS) == 0, (
-            "Invalid requested reductions "
-            f"{valid_reductions - cls._SUPPORTED_REDUCTIONS}"
-        )
-        for reduction in valid_reductions:
-            # Allow overriding the operation in the classes.
-            if not hasattr(cls, reduction):
-                cls._add_reduction(reduction)
+)
diff --git a/python/cudf/cudf/core/reductions.pyi b/python/cudf/cudf/core/reductions.pyi
index 19b2d7c9306..360bf73482f 100644
--- a/python/cudf/cudf/core/reductions.pyi
+++ b/python/cudf/cudf/core/reductions.pyi
@@ -3,12 +3,6 @@
 from __future__ import annotations
 
 from typing import Sequence, Union
-#
-# from cudf._typing import Dtype, DtypeObj, ScalarLike
-# from cudf.core.buffer import Buffer
-# from cudf.core.column import ColumnBase
-#
-# T = TypeVar("T")
 
 
 class Reducible:

From c6c2e808f81627d79b6e3840773486c208d39596 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 19 Jan 2022 10:12:49 -0800
Subject: [PATCH 13/39] Move reducible to new subpackage.

---
 python/cudf/cudf/core/column/column.py            | 2 +-
 python/cudf/cudf/core/column/numerical_base.py    | 2 +-
 python/cudf/cudf/core/groupby/groupby.py          | 2 +-
 python/cudf/cudf/core/mixins/__init__.py          | 5 +++++
 python/cudf/cudf/core/{ => mixins}/reductions.py  | 0
 python/cudf/cudf/core/{ => mixins}/reductions.pyi | 0
 python/cudf/cudf/core/window/rolling.py           | 2 +-
 7 files changed, 9 insertions(+), 4 deletions(-)
 create mode 100644 python/cudf/cudf/core/mixins/__init__.py
 rename python/cudf/cudf/core/{ => mixins}/reductions.py (100%)
 rename python/cudf/cudf/core/{ => mixins}/reductions.pyi (100%)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 741a2b5a860..70883f717f4 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -68,7 +68,7 @@
     ListDtype,
     StructDtype,
 )
-from cudf.core.reductions import Reducible
+from cudf.core.mixins import Reducible
 from cudf.utils import utils
 from cudf.utils.dtypes import (
     cudf_dtype_from_pa_type,
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 56b56ad3c5c..5776363f16c 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -12,7 +12,7 @@
 from cudf import _lib as libcudf
 from cudf._typing import ScalarLike
 from cudf.core.column import ColumnBase
-from cudf.core.reductions import Reducible
+from cudf.core.mixins import Reducible
 
 
 class NumericalBaseColumn(ColumnBase, Reducible):
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index b2298f42fb1..b0bd3694850 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -16,7 +16,7 @@
 from cudf.core.abc import Serializable
 from cudf.core.column.column import arange, as_column
 from cudf.core.multiindex import MultiIndex
-from cudf.core.reductions import Reducible
+from cudf.core.mixins import Reducible
 from cudf.utils.utils import GetAttrGetItemMixin, cached_property
 
 
diff --git a/python/cudf/cudf/core/mixins/__init__.py b/python/cudf/cudf/core/mixins/__init__.py
new file mode 100644
index 00000000000..cecf4c1c7ed
--- /dev/null
+++ b/python/cudf/cudf/core/mixins/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from .reductions import Reducible
+
+__all__ = ["Reducible"]
diff --git a/python/cudf/cudf/core/reductions.py b/python/cudf/cudf/core/mixins/reductions.py
similarity index 100%
rename from python/cudf/cudf/core/reductions.py
rename to python/cudf/cudf/core/mixins/reductions.py
diff --git a/python/cudf/cudf/core/reductions.pyi b/python/cudf/cudf/core/mixins/reductions.pyi
similarity index 100%
rename from python/cudf/cudf/core/reductions.pyi
rename to python/cudf/cudf/core/mixins/reductions.pyi
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index 059daf99ecd..ce57c2c1215 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -11,7 +11,7 @@
 from cudf.api.types import is_integer, is_number
 from cudf.core import column
 from cudf.core.column.column import as_column
-from cudf.core.reductions import Reducible
+from cudf.core.mixins import Reducible
 from cudf.utils import cudautils
 from cudf.utils.utils import GetAttrGetItemMixin
 

From c43a1a66e1a56c89a5086f977900d077e0765f66 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 19 Jan 2022 16:33:38 -0800
Subject: [PATCH 14/39] Move factory to a separate module.

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 156 ++++++++++++++++++
 python/cudf/cudf/core/mixins/reductions.py    | 155 +----------------
 2 files changed, 157 insertions(+), 154 deletions(-)
 create mode 100644 python/cudf/cudf/core/mixins/mixin_factory.py

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
new file mode 100644
index 00000000000..3070af36f0f
--- /dev/null
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -0,0 +1,156 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import inspect
+
+
+def _create_delegating_mixin(
+    mixin_name, docstring, category_name, operation_name, supported_operations
+):
+    """Factory for mixins defining collections of delegated operations.
+
+    This function generates mixins based on two common paradigms in cuDF:
+
+    1. Many classes implement similar operations (e.g. `sum`) via a sequence of
+       calls to lower-level APIs terminating in a libcudf C++ function call.
+    2. libcudf groups many operations into categories using a common API. These
+       APIs usually accept an enum to delinate the specific operation to
+       perform, e.g. binary operations use the `binary_operator` enum when
+       calling the `binary_operation` function. cuDF Python mimics this
+       structure by having operations within a category delegate to a common
+       internal function (e.g. DataFrame.__add__ calls DataFrame._binaryop).
+
+    This factory creates mixins for a category of operations implemented by via
+    this delegator pattern. Its usage is best demonstrated by example below.
+
+    Parameters
+    ----------
+    category_name : str
+        The category of operations for which a mixin is being created. This
+        name will be used to define the following attributes as shown in the
+        example below:
+            - f'_{category_name}_DOCSTRINGS'
+            - f'_VALID_{category_name}S'
+            - f'_SUPPORTED_{category_name}S'
+    operation_name : str
+        The name given to the core function implementing this category of
+        operations.  The corresponding function is the entrypoint for child
+        classes.
+    supported_ops : List[str]
+        The list of valid operations that subclasses of the resulting mixin may
+        request to be implemented.
+
+    Examples
+    --------
+    >>> MyFoo = _create_delegating_mixin(
+    ...     "MyFoo", "MyFoo's docstring", "FOO", "do_foo", {"foo1", "foo2"}
+    ... )
+
+    >>> # The above is equivalent to defining a class like so:
+    ... class MyFoo:
+    ...     # The set of valid foo operations.
+    ...     _VALID_FOOS = {"foo1", "foo2"}
+    ...
+    ...     # This is the method for child classes to override. Note that the
+    ...     # first parameter is always called "op".
+    ...     def do_foo(self, op, *args, **kwargs):
+    ...         raise NotImplementedError
+
+    >>> # MyFoo can be used as follows.
+    >>> class BarImplementsFoo(MyFoo):
+    ...     _SUPPORTED_FOOS = ("foo1",)
+    ...     _FOO_DOCSTRINGS = {"ret": "42"}
+    ...
+    ...     # This method's docstring will be formatted and used for all valid
+    ...     # operations. The `op` key is always automatically filled in, while
+    ...     # other keys are formatted using _FOO_DOCSTRINGS.
+    ...     def do_foo(self, op, *args, **kwargs):
+    ...         '''Perform the operation {op}, which returns {ret}.'''
+    ...         return 42
+
+    >>> bar = BarImplementsFoo()
+    >>> print(bar.foo1())
+    42
+
+    >>> # This will raise an AttributeError because foo2 is not supported by
+    >>> # the BarImplementsFoo subclass of MyFoo.
+    >>> # print(bar.foo2())
+
+    >>> # The docstring is formatted with the operation name as well as any
+    >>> # additional keys provided via the _FOO_DOCSTRINGS parameter.
+    >>> print(bar.foo1.__doc__)
+    Perform the operation foo1, which returns 42.
+    """
+
+    docstring_attr = f"{category_name}_DOCSTRINGS"
+    validity_attr = f"_VALID_{category_name}S"
+
+    class OperationMixin:
+        @classmethod
+        def _add_operation(cls, operation):
+            # This function creates operations on-the-fly.
+
+            # Generate a signature without the `op` parameter.
+            base_operation = getattr(cls, operation_name)
+            signature = inspect.signature(base_operation)
+            new_params = signature.parameters.copy()
+            new_params.pop("op")
+            signature = signature.replace(parameters=new_params.values())
+
+            # Generate the list of arguments forwarded to _reduce.
+            arglist = ", ".join(
+                [
+                    f"{key}={key}"
+                    for key in signature.parameters
+                    if key not in ("self", "args", "kwargs")
+                ]
+            )
+            if arglist:
+                arglist += ", *args, **kwargs"
+            else:
+                arglist = "*args, **kwargs"
+
+            # Apply the formatted docstring of the base operation to the
+            # operation being created here.
+            docstring = base_operation.__doc__.format(
+                cls=cls.__name__,
+                op=operation,
+                **getattr(cls, docstring_attr, {}).get(operation, {}),
+            )
+
+            namespace = {}
+            out = f"""
+def {operation}{str(signature)}:
+    '''{docstring}
+    '''
+    return self.{operation_name}(op="{operation}", {arglist})
+"""
+            exec(out, namespace)
+            setattr(cls, operation, namespace[operation])
+
+        @classmethod
+        def __init_subclass__(cls):
+            # Only add the valid set of operations for a particular class.
+            valid_operations = set()
+            for base_cls in cls.__mro__:
+                valid_operations |= getattr(base_cls, validity_attr, set())
+
+            invalid_operations = valid_operations - supported_operations
+            assert len(invalid_operations) == 0, (
+                f"Invalid requested operations: {invalid_operations}"
+            )
+            for operation in valid_operations:
+                # Check if the operation is already defined so that subclasses
+                # can override the method if desired.
+                if not hasattr(cls, operation):
+                    cls._add_operation(operation)
+
+    def _operation(self, op: str, *args, **kwargs):
+        raise NotImplementedError
+
+    OperationMixin.__name__ = mixin_name
+    OperationMixin.__doc__ = docstring
+    setattr(OperationMixin, operation_name, _operation)
+    setattr(OperationMixin, f"_SUPPORTED_{category_name}S",
+            supported_operations)
+
+    return OperationMixin
diff --git a/python/cudf/cudf/core/mixins/reductions.py b/python/cudf/cudf/core/mixins/reductions.py
index e2b1ddaa152..8a3e436cb54 100644
--- a/python/cudf/cudf/core/mixins/reductions.py
+++ b/python/cudf/cudf/core/mixins/reductions.py
@@ -1,159 +1,6 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
-import inspect
-
-
-def _create_delegating_mixin(
-    mixin_name, docstring, category_name, operation_name, supported_operations
-):
-    """Factory for mixins defining collections of delegated operations.
-
-    This function generates mixins based on two common paradigms in cuDF:
-
-    1. Many classes implement similar operations (e.g. `sum`) via a sequence of
-       calls to lower-level APIs terminating in a libcudf C++ function call.
-    2. libcudf groups many operations into categories using a common API. These
-       APIs usually accept an enum to delinate the specific operation to
-       perform, e.g. binary operations use the `binary_operator` enum when
-       calling the `binary_operation` function. cuDF Python mimics this
-       structure by having operations within a category delegate to a common
-       internal function (e.g. DataFrame.__add__ calls DataFrame._binaryop).
-
-    This factory creates mixins for a category of operations implemented by via
-    this delegator pattern. Its usage is best demonstrated by example below.
-
-    Parameters
-    ----------
-    category_name : str
-        The category of operations for which a mixin is being created. This
-        name will be used to define the following attributes as shown in the
-        example below:
-            - f'_{category_name}_DOCSTRINGS'
-            - f'_VALID_{category_name}S'
-            - f'_SUPPORTED_{category_name}S'
-    operation_name : str
-        The name given to the core function implementing this category of
-        operations.  The corresponding function is the entrypoint for child
-        classes.
-    supported_ops : List[str]
-        The list of valid operations that subclasses of the resulting mixin may
-        request to be implemented.
-
-    Examples
-    --------
-    >>> MyFoo = _create_delegating_mixin(
-    ...     "MyFoo", "MyFoo's docstring", "FOO", "do_foo", {"foo1", "foo2"}
-    ... )
-
-    >>> # The above is equivalent to defining a class like so:
-    ... class MyFoo:
-    ...     # The set of valid foo operations.
-    ...     _VALID_FOOS = {"foo1", "foo2"}
-    ...
-    ...     # This is the method for child classes to override. Note that the
-    ...     # first parameter is always called "op".
-    ...     def do_foo(self, op, *args, **kwargs):
-    ...         raise NotImplementedError
-
-    >>> # MyFoo can be used as follows.
-    >>> class BarImplementsFoo(MyFoo):
-    ...     _SUPPORTED_FOOS = ("foo1",)
-    ...     _FOO_DOCSTRINGS = {"ret": "42"}
-    ...
-    ...     # This method's docstring will be formatted and used for all valid
-    ...     # operations. The `op` key is always automatically filled in, while
-    ...     # other keys are formatted using _FOO_DOCSTRINGS.
-    ...     def do_foo(self, op, *args, **kwargs):
-    ...         '''Perform the operation {op}, which returns {ret}.'''
-    ...         return 42
-
-    >>> bar = BarImplementsFoo()
-    >>> print(bar.foo1())
-    42
-
-    >>> # This will raise an AttributeError because foo2 is not supported by
-    >>> # the BarImplementsFoo subclass of MyFoo.
-    >>> # print(bar.foo2())
-
-    >>> # The docstring is formatted with the operation name as well as any
-    >>> # additional keys provided via the _FOO_DOCSTRINGS parameter.
-    >>> print(bar.foo1.__doc__)
-    Perform the operation foo1, which returns 42.
-    """
-
-    docstring_attr = f"{category_name}_DOCSTRINGS"
-    validity_attr = f"_VALID_{category_name}S"
-
-    class OperationMixin:
-        @classmethod
-        def _add_operation(cls, operation):
-            # This function creates operations on-the-fly.
-
-            # Generate a signature without the `op` parameter.
-            base_operation = getattr(cls, operation_name)
-            signature = inspect.signature(base_operation)
-            new_params = signature.parameters.copy()
-            new_params.pop("op")
-            signature = signature.replace(parameters=new_params.values())
-
-            # Generate the list of arguments forwarded to _reduce.
-            arglist = ", ".join(
-                [
-                    f"{key}={key}"
-                    for key in signature.parameters
-                    if key not in ("self", "args", "kwargs")
-                ]
-            )
-            if arglist:
-                arglist += ", *args, **kwargs"
-            else:
-                arglist = "*args, **kwargs"
-
-            # Apply the formatted docstring of the base operation to the
-            # operation being created here.
-            docstring = base_operation.__doc__.format(
-                cls=cls.__name__,
-                op=operation,
-                **getattr(cls, docstring_attr, {}).get(operation, {}),
-            )
-
-            namespace = {}
-            out = f"""
-def {operation}{str(signature)}:
-    '''{docstring}
-    '''
-    return self.{operation_name}(op="{operation}", {arglist})
-"""
-            exec(out, namespace)
-            setattr(cls, operation, namespace[operation])
-
-        @classmethod
-        def __init_subclass__(cls):
-            # Only add the valid set of operations for a particular class.
-            valid_operations = set()
-            for base_cls in cls.__mro__:
-                valid_operations |= getattr(base_cls, validity_attr, set())
-
-            invalid_operations = valid_operations - supported_operations
-            assert len(invalid_operations) == 0, (
-                f"Invalid requested operations: {invalid_operations}"
-            )
-            for operation in valid_operations:
-                # Check if the operation is already defined so that subclasses
-                # can override the method if desired.
-                if not hasattr(cls, operation):
-                    cls._add_operation(operation)
-
-    def _operation(self, op: str, *args, **kwargs):
-        raise NotImplementedError
-
-    OperationMixin.__name__ = mixin_name
-    OperationMixin.__doc__ = docstring
-    setattr(OperationMixin, operation_name, _operation)
-    setattr(OperationMixin, f"_SUPPORTED_{category_name}S",
-            supported_operations)
-
-    return OperationMixin
+from .mixin_factory import _create_delegating_mixin
 
 
 Reducible = _create_delegating_mixin(

From 01d15743c1ca73f84974fbb0f0e5c1d02e6df843 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 19 Jan 2022 17:05:03 -0800
Subject: [PATCH 15/39] Some final cleanup and polishing of documentation.

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 31 +++++++++++++------
 python/cudf/cudf/core/mixins/reductions.py    |  5 ++-
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 3070af36f0f..dc0554e0341 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -10,17 +10,24 @@ def _create_delegating_mixin(
 
     This function generates mixins based on two common paradigms in cuDF:
 
-    1. Many classes implement similar operations (e.g. `sum`) via a sequence of
-       calls to lower-level APIs terminating in a libcudf C++ function call.
-    2. libcudf groups many operations into categories using a common API. These
+    1. libcudf groups many operations into categories using a common API. These
        APIs usually accept an enum to delinate the specific operation to
        perform, e.g. binary operations use the `binary_operator` enum when
        calling the `binary_operation` function. cuDF Python mimics this
        structure by having operations within a category delegate to a common
        internal function (e.g. DataFrame.__add__ calls DataFrame._binaryop).
+    2. Many cuDF classes implement similar operations (e.g. `sum`) via
+       delegation to lower-level APIs before reaching a libcudf C++ function
+       call. As a result, many API function calls actually involve multiple
+       delegations to lower-level APIs that can look essentially identical. An
+       example of such a sequence would be DataFrame.sum -> DataFrame._reduce
+       -> Column.sum -> Column._reduce -> libcudf.
 
     This factory creates mixins for a category of operations implemented by via
-    this delegator pattern. Its usage is best demonstrated by example below.
+    this delegator pattern. The resulting mixins make it easy to share common
+    functions across various classes while also providing a common entrypoint
+    for implementing the centralized logic for a given category of operations.
+    Its usage is best demonstrated by example below.
 
     Parameters
     ----------
@@ -96,7 +103,7 @@ def _add_operation(cls, operation):
             new_params.pop("op")
             signature = signature.replace(parameters=new_params.values())
 
-            # Generate the list of arguments forwarded to _reduce.
+            # Generate the list of arguments forwarded to the base operation.
             arglist = ", ".join(
                 [
                     f"{key}={key}"
@@ -135,9 +142,9 @@ def __init_subclass__(cls):
                 valid_operations |= getattr(base_cls, validity_attr, set())
 
             invalid_operations = valid_operations - supported_operations
-            assert len(invalid_operations) == 0, (
-                f"Invalid requested operations: {invalid_operations}"
-            )
+            assert (
+                len(invalid_operations) == 0
+            ), f"Invalid requested operations: {invalid_operations}"
             for operation in valid_operations:
                 # Check if the operation is already defined so that subclasses
                 # can override the method if desired.
@@ -150,7 +157,11 @@ def _operation(self, op: str, *args, **kwargs):
     OperationMixin.__name__ = mixin_name
     OperationMixin.__doc__ = docstring
     setattr(OperationMixin, operation_name, _operation)
-    setattr(OperationMixin, f"_SUPPORTED_{category_name}S",
-            supported_operations)
+    # This attribute is set in case lookup is convenient at a later point, but
+    # it is not strictly necessary since `supported_operations` is part of the
+    # closure associated with the class's creation.
+    setattr(
+        OperationMixin, f"_SUPPORTED_{category_name}S", supported_operations
+    )
 
     return OperationMixin
diff --git a/python/cudf/cudf/core/mixins/reductions.py b/python/cudf/cudf/core/mixins/reductions.py
index 8a3e436cb54..24e5319b9a3 100644
--- a/python/cudf/cudf/core/mixins/reductions.py
+++ b/python/cudf/cudf/core/mixins/reductions.py
@@ -2,10 +2,9 @@
 
 from .mixin_factory import _create_delegating_mixin
 
-
 Reducible = _create_delegating_mixin(
     "Reducible",
-    "Mixin encapsulating for reduction operations.",
+    "Mixin encapsulating reduction operations.",
     "REDUCTION",
     "_reduce",
     {
@@ -33,5 +32,5 @@
         "idxmax",
         "first",
         "last",
-    }
+    },
 )

From ba6e98f5fa24ace3a2c5bb3bfbee91ff09f6ea1f Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 24 Jan 2022 12:06:49 -0800
Subject: [PATCH 16/39] Fix style.

---
 python/cudf/cudf/core/column/column.py        |  4 ++-
 python/cudf/cudf/core/column/datetime.py      | 18 ++++++++++---
 .../cudf/cudf/core/column/numerical_base.py   | 11 ++++++--
 python/cudf/cudf/core/column/string.py        |  7 +++++-
 python/cudf/cudf/core/column/timedelta.py     | 25 ++++++++++++++++---
 python/cudf/cudf/core/groupby/groupby.py      |  2 +-
 python/cudf/cudf/core/mixins/reductions.pyi   |  1 -
 7 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 70883f717f4..6b95d9e99fe 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -687,7 +687,9 @@ def quantile(
         self,
         q: Union[float, Sequence[float]],
         interpolation: str,
-        exact: bool, *args, **kwargs,
+        exact: bool,
+        *args,
+        **kwargs,
     ) -> ColumnBase:
         raise TypeError(f"cannot perform quantile with type {self.dtype}")
 
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index a1431025cf1..f3a6da93cd0 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -350,14 +350,21 @@ def _default_na_value(self) -> DatetimeLikeScalar:
         """Returns the default NA value for this column"""
         return np.datetime64("nat", self.time_unit)
 
-    def mean(self, skipna=None, dtype=np.float64, *args, **kwargs) -> ScalarLike:
+    def mean(
+        self, skipna=None, dtype=np.float64, *args, **kwargs
+    ) -> ScalarLike:
         return pd.Timestamp(
             self.as_numerical.mean(skipna=skipna, dtype=dtype),
             unit=self.time_unit,
         )
 
     def std(
-        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64, *args, **kwargs
+        self,
+        skipna: bool = None,
+        ddof: int = 1,
+        dtype: Dtype = np.float64,
+        *args,
+        **kwargs,
     ) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.std(skipna=skipna, ddof=ddof, dtype=dtype)
@@ -370,7 +377,12 @@ def median(self, skipna: bool = None, *args, **kwargs) -> pd.Timestamp:
         )
 
     def quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool, *args, **kwargs
+        self,
+        q: Union[float, Sequence[float]],
+        interpolation: str,
+        exact: bool,
+        *args,
+        **kwargs,
     ) -> ColumnBase:
         result = self.as_numerical.quantile(
             q=q, interpolation=interpolation, exact=exact
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 5776363f16c..583eb59bb4b 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -85,7 +85,12 @@ def skew(self, skipna: bool = None) -> ScalarLike:
         return skew
 
     def quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool, *args, **kwargs
+        self,
+        q: Union[float, Sequence[float]],
+        interpolation: str,
+        exact: bool,
+        *args,
+        **kwargs,
     ) -> NumericalBaseColumn:
         if isinstance(q, Number) or cudf.api.types.is_list_like(q):
             np_array_q = np.asarray(q)
@@ -113,7 +118,9 @@ def var(self, dtype=np.float64, *args, **kwargs):
     def std(self, dtype=np.float64, *args, **kwargs):
         return self._reduce("std", dtype=dtype, *args, **kwargs)
 
-    def median(self, skipna: bool = None, *args, **kwargs) -> NumericalBaseColumn:
+    def median(
+        self, skipna: bool = None, *args, **kwargs
+    ) -> NumericalBaseColumn:
         skipna = True if skipna is None else skipna
 
         if self._can_return_nan(skipna=skipna):
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index b286be78117..4c503ba5a0b 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5089,7 +5089,12 @@ def to_arrow(self) -> pa.Array:
             return super().to_arrow()
 
     def sum(
-        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0, *args, **kwargs
+        self,
+        skipna: bool = None,
+        dtype: Dtype = None,
+        min_count: int = 0,
+        *args,
+        **kwargs,
     ):
         result_col = self._process_for_reduction(
             skipna=skipna, min_count=min_count
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index e92347d42d1..cea261205f6 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -369,7 +369,9 @@ def as_timedelta_column(self, dtype: Dtype, **kwargs) -> TimeDeltaColumn:
             return self
         return libcudf.unary.cast(self, dtype=dtype)
 
-    def mean(self, skipna=None, dtype: Dtype = np.float64, *args, **kwargs) -> pd.Timedelta:
+    def mean(
+        self, skipna=None, dtype: Dtype = np.float64, *args, **kwargs
+    ) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.mean(skipna=skipna, dtype=dtype),
             unit=self.time_unit,
@@ -384,7 +386,12 @@ def isin(self, values: Sequence) -> ColumnBase:
         return cudf.core.tools.datetimes._isin_datetimelike(self, values)
 
     def quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool, *args, **kwargs
+        self,
+        q: Union[float, Sequence[float]],
+        interpolation: str,
+        exact: bool,
+        *args,
+        **kwargs,
     ) -> "column.ColumnBase":
         result = self.as_numerical.quantile(
             q=q, interpolation=interpolation, exact=exact
@@ -394,7 +401,12 @@ def quantile(
         return result.astype(self.dtype)
 
     def sum(
-        self, skipna: bool = None, dtype: Dtype = None, min_count=0, *args, **kwargs
+        self,
+        skipna: bool = None,
+        dtype: Dtype = None,
+        min_count=0,
+        *args,
+        **kwargs,
     ) -> pd.Timedelta:
         return pd.Timedelta(
             # TODO: mypy doesn't fully support monkey-patching, so the
@@ -407,7 +419,12 @@ def sum(
         )
 
     def std(
-        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64, *args, **kwargs
+        self,
+        skipna: bool = None,
+        ddof: int = 1,
+        dtype: Dtype = np.float64,
+        *args,
+        **kwargs,
     ) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.std(skipna=skipna, ddof=ddof, dtype=dtype),
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index b0bd3694850..ec53b4ed93a 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -15,8 +15,8 @@
 from cudf.api.types import is_list_like
 from cudf.core.abc import Serializable
 from cudf.core.column.column import arange, as_column
-from cudf.core.multiindex import MultiIndex
 from cudf.core.mixins import Reducible
+from cudf.core.multiindex import MultiIndex
 from cudf.utils.utils import GetAttrGetItemMixin, cached_property
 
 
diff --git a/python/cudf/cudf/core/mixins/reductions.pyi b/python/cudf/cudf/core/mixins/reductions.pyi
index 360bf73482f..ece904f5c2a 100644
--- a/python/cudf/cudf/core/mixins/reductions.pyi
+++ b/python/cudf/cudf/core/mixins/reductions.pyi
@@ -4,7 +4,6 @@ from __future__ import annotations
 
 from typing import Sequence, Union
 
-
 class Reducible:
     def sum(self, *args, **kwargs):
         ...

From 8fbcd73957bf1d318505fa8fa9cd950543ffeb98 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 24 Jan 2022 15:08:41 -0800
Subject: [PATCH 17/39] Some minor simplifications and clarifications.

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index dc0554e0341..d7efe2674d3 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -98,10 +98,9 @@ def _add_operation(cls, operation):
 
             # Generate a signature without the `op` parameter.
             base_operation = getattr(cls, operation_name)
-            signature = inspect.signature(base_operation)
-            new_params = signature.parameters.copy()
-            new_params.pop("op")
-            signature = signature.replace(parameters=new_params.values())
+            base_params = inspect.signature(base_operation).parameters.values()
+            params = [p for p in base_params if p.name != "op"]
+            signature = inspect.Signature(parameters=params)
 
             # Generate the list of arguments forwarded to the base operation.
             arglist = ", ".join(
@@ -111,10 +110,7 @@ def _add_operation(cls, operation):
                     if key not in ("self", "args", "kwargs")
                 ]
             )
-            if arglist:
-                arglist += ", *args, **kwargs"
-            else:
-                arglist = "*args, **kwargs"
+            arglist = arglist + (", " if arglist else "") + "*args, **kwargs"
 
             # Apply the formatted docstring of the base operation to the
             # operation being created here.
@@ -125,13 +121,13 @@ def _add_operation(cls, operation):
             )
 
             namespace = {}
-            out = f"""
+            exec_str = f"""
 def {operation}{str(signature)}:
     '''{docstring}
     '''
     return self.{operation_name}(op="{operation}", {arglist})
 """
-            exec(out, namespace)
+            exec(exec_str, namespace)
             setattr(cls, operation, namespace[operation])
 
         @classmethod

From d2829cf35d0786720d800be0ed87c931e0f3e877 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 24 Jan 2022 15:25:17 -0800
Subject: [PATCH 18/39] Fix error messages and remove unnecessary comparison in
 tests.

---
 python/cudf/cudf/core/series.py      | 9 +++++++--
 python/cudf/cudf/tests/test_stats.py | 9 ++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index f2c07d8b492..7e1b8549f06 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2755,7 +2755,10 @@ def cov(self, other, min_periods=None):
         try:
             return lhs._column.cov(rhs._column)
         except AttributeError:
-            raise TypeError(f"cannot perform cov with type {self.dtype}")
+            raise TypeError(
+                f"cannot perform covariance with types {self.dtype}, "
+                f"{other.dtype}"
+            )
 
     def transpose(self):
         """Return the transpose, which is by definition self.
@@ -2794,7 +2797,9 @@ def corr(self, other, method="pearson", min_periods=None):
         try:
             return lhs._column.corr(rhs._column)
         except AttributeError:
-            raise TypeError(f"cannot perform corr with type {self.dtype}")
+            raise TypeError(
+                f"cannot perform corr with types {self.dtype}, {other.dtype}"
+            )
 
     def autocorr(self, lag=1):
         """Compute the lag-N autocorrelation. This method computes the Pearson
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index 142ca6c6831..8ae77936e46 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
-import re
 from concurrent.futures import ThreadPoolExecutor
 
 import numpy as np
@@ -510,9 +509,7 @@ def test_cov_corr_invalid_dtypes(gsr):
         rfunc=gsr.corr,
         lfunc_args_and_kwargs=([psr],),
         rfunc_args_and_kwargs=([gsr],),
-        expected_error_message=re.escape(
-            f"cannot perform corr with types {gsr.dtype}, {gsr.dtype}"
-        ),
+        compare_error_message=False,
     )
 
     assert_exceptions_equal(
@@ -520,7 +517,5 @@ def test_cov_corr_invalid_dtypes(gsr):
         rfunc=gsr.cov,
         lfunc_args_and_kwargs=([psr],),
         rfunc_args_and_kwargs=([gsr],),
-        expected_error_message=re.escape(
-            f"cannot perform covarience with types {gsr.dtype}, {gsr.dtype}"
-        ),
+        compare_error_message=False,
     )

From a63e0c3966b8cfc42a90d2899d211a0562d43b01 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 24 Jan 2022 15:29:52 -0800
Subject: [PATCH 19/39] Remove quantile from reductions since it doesn't
 strictly follow the paradigm (may result in the same dimensionality if
 multiple quantiles are requested) and it is always overridden anyway.

---
 python/cudf/cudf/core/column/column.py         | 7 +------
 python/cudf/cudf/core/column/datetime.py       | 7 +------
 python/cudf/cudf/core/column/numerical_base.py | 7 +------
 python/cudf/cudf/core/column/timedelta.py      | 7 +------
 python/cudf/cudf/core/mixins/reductions.py     | 1 -
 python/cudf/cudf/core/mixins/reductions.pyi    | 5 -----
 6 files changed, 4 insertions(+), 30 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 6b95d9e99fe..57767e019da 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -684,12 +684,7 @@ def append(self, other: ColumnBase) -> ColumnBase:
         return concat_columns([self, as_column(other)])
 
     def quantile(
-        self,
-        q: Union[float, Sequence[float]],
-        interpolation: str,
-        exact: bool,
-        *args,
-        **kwargs,
+        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
     ) -> ColumnBase:
         raise TypeError(f"cannot perform quantile with type {self.dtype}")
 
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index f3a6da93cd0..64f44763454 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -377,12 +377,7 @@ def median(self, skipna: bool = None, *args, **kwargs) -> pd.Timestamp:
         )
 
     def quantile(
-        self,
-        q: Union[float, Sequence[float]],
-        interpolation: str,
-        exact: bool,
-        *args,
-        **kwargs,
+        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
     ) -> ColumnBase:
         result = self.as_numerical.quantile(
             q=q, interpolation=interpolation, exact=exact
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 583eb59bb4b..70f7fc4444d 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -85,12 +85,7 @@ def skew(self, skipna: bool = None) -> ScalarLike:
         return skew
 
     def quantile(
-        self,
-        q: Union[float, Sequence[float]],
-        interpolation: str,
-        exact: bool,
-        *args,
-        **kwargs,
+        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
     ) -> NumericalBaseColumn:
         if isinstance(q, Number) or cudf.api.types.is_list_like(q):
             np_array_q = np.asarray(q)
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index cea261205f6..42c294cfd0c 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -386,12 +386,7 @@ def isin(self, values: Sequence) -> ColumnBase:
         return cudf.core.tools.datetimes._isin_datetimelike(self, values)
 
     def quantile(
-        self,
-        q: Union[float, Sequence[float]],
-        interpolation: str,
-        exact: bool,
-        *args,
-        **kwargs,
+        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
     ) -> "column.ColumnBase":
         result = self.as_numerical.quantile(
             q=q, interpolation=interpolation, exact=exact
diff --git a/python/cudf/cudf/core/mixins/reductions.py b/python/cudf/cudf/core/mixins/reductions.py
index 24e5319b9a3..f73f0e8fbc6 100644
--- a/python/cudf/cudf/core/mixins/reductions.py
+++ b/python/cudf/cudf/core/mixins/reductions.py
@@ -20,7 +20,6 @@
         "var",
         "std",
         "median",
-        "quantile",
         "argmax",
         "argmin",
         "nunique",
diff --git a/python/cudf/cudf/core/mixins/reductions.pyi b/python/cudf/cudf/core/mixins/reductions.pyi
index ece904f5c2a..1ce2d8acdd6 100644
--- a/python/cudf/cudf/core/mixins/reductions.pyi
+++ b/python/cudf/cudf/core/mixins/reductions.pyi
@@ -2,8 +2,6 @@
 
 from __future__ import annotations
 
-from typing import Sequence, Union
-
 class Reducible:
     def sum(self, *args, **kwargs):
         ...
@@ -41,9 +39,6 @@ class Reducible:
     def median(self, *args, **kwargs):
         ...
 
-    def quantile(self, q: Union[float, Sequence[float]], interpolation: str, exact: bool, *args, **kwargs):
-        ...
-
     def argmax(self, *args, **kwargs):
         ...
 

From 2150f60b492f8d9c557648a36a0afc7252373b58 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 27 Jan 2022 11:35:25 -0800
Subject: [PATCH 20/39] Document missing parameters and remove unnecessary
 Reducible inheritance.

---
 python/cudf/cudf/core/column/numerical_base.py | 3 +--
 python/cudf/cudf/core/mixins/mixin_factory.py  | 6 ++++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 70f7fc4444d..5e7d63693e9 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -12,10 +12,9 @@
 from cudf import _lib as libcudf
 from cudf._typing import ScalarLike
 from cudf.core.column import ColumnBase
-from cudf.core.mixins import Reducible
 
 
-class NumericalBaseColumn(ColumnBase, Reducible):
+class NumericalBaseColumn(ColumnBase):
     """A column composed of numerical data.
 
     This class encodes a standard interface for different types of columns
diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index d7efe2674d3..5377f0f1002 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -31,6 +31,12 @@ def _create_delegating_mixin(
 
     Parameters
     ----------
+    mixin_name : str
+        The name of the class. This argument should be the same as the object
+        that this function's output is assigned to, e.g.
+        :code:`Baz = _create_delegating_mixin("Baz", ...)`.
+    docstring : str
+        The documentation string for the mixin class.
     category_name : str
         The category of operations for which a mixin is being created. This
         name will be used to define the following attributes as shown in the

From c9395cb3be271c5782a0a2930b0368e64f24456d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 27 Jan 2022 12:14:47 -0800
Subject: [PATCH 21/39] Remove all superfluous args/kwargs parameters.

---
 python/cudf/cudf/core/column/column.py        |  9 ++--
 python/cudf/cudf/core/column/datetime.py      |  6 +--
 .../cudf/cudf/core/column/numerical_base.py   |  4 +-
 python/cudf/cudf/core/column/string.py        |  7 +--
 python/cudf/cudf/core/mixins/reductions.pyi   | 47 +++++++++----------
 5 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 57767e019da..8640eaa7432 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -187,7 +187,7 @@ def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool:
             return False
         return self.binary_operator("NULL_EQUALS", other).all()
 
-    def all(self, skipna: bool = True, *args, **kwargs) -> bool:
+    def all(self, skipna: bool = True) -> bool:
         # If all entries are null the result is True, including when the column
         # is empty.
         result_col = self.nans_to_nulls() if skipna else self
@@ -200,7 +200,7 @@ def all(self, skipna: bool = True, *args, **kwargs) -> bool:
 
         return result_col
 
-    def any(self, skipna: bool = True, *args, **kwargs) -> bool:
+    def any(self, skipna: bool = True) -> bool:
         # Early exit for fast cases.
         result_col = self.nans_to_nulls() if skipna else self
         if not skipna and result_col.has_nulls():
@@ -688,9 +688,6 @@ def quantile(
     ) -> ColumnBase:
         raise TypeError(f"cannot perform quantile with type {self.dtype}")
 
-    def median(self, skipna: bool = None, *args, **kwargs) -> ScalarLike:
-        raise TypeError(f"cannot perform median with type {self.dtype}")
-
     def take(
         self: T, indices: ColumnBase, nullify: bool = False, check_bounds=True
     ) -> T:
@@ -1103,7 +1100,7 @@ def searchsorted(
             values, side, ascending=ascending, na_position=na_position
         )
 
-    def unique(self, *args, **kwargs) -> ColumnBase:
+    def unique(self) -> ColumnBase:
         """
         Get unique values in the data
         """
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 64f44763454..e7442e1236b 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -350,9 +350,7 @@ def _default_na_value(self) -> DatetimeLikeScalar:
         """Returns the default NA value for this column"""
         return np.datetime64("nat", self.time_unit)
 
-    def mean(
-        self, skipna=None, dtype=np.float64, *args, **kwargs
-    ) -> ScalarLike:
+    def mean(self, skipna=None, dtype=np.float64) -> ScalarLike:
         return pd.Timestamp(
             self.as_numerical.mean(skipna=skipna, dtype=dtype),
             unit=self.time_unit,
@@ -371,7 +369,7 @@ def std(
             * _numpy_to_pandas_conversion[self.time_unit],
         )
 
-    def median(self, skipna: bool = None, *args, **kwargs) -> pd.Timestamp:
+    def median(self, skipna: bool = None) -> pd.Timestamp:
         return pd.Timestamp(
             self.as_numerical.median(skipna=skipna), unit=self.time_unit
         )
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 5e7d63693e9..a6a124e1ac4 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -112,9 +112,7 @@ def var(self, dtype=np.float64, *args, **kwargs):
     def std(self, dtype=np.float64, *args, **kwargs):
         return self._reduce("std", dtype=dtype, *args, **kwargs)
 
-    def median(
-        self, skipna: bool = None, *args, **kwargs
-    ) -> NumericalBaseColumn:
+    def median(self, skipna: bool = None) -> NumericalBaseColumn:
         skipna = True if skipna is None else skipna
 
         if self._can_return_nan(skipna=skipna):
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 4c503ba5a0b..e2c321381d8 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5089,12 +5089,7 @@ def to_arrow(self) -> pa.Array:
             return super().to_arrow()
 
     def sum(
-        self,
-        skipna: bool = None,
-        dtype: Dtype = None,
-        min_count: int = 0,
-        *args,
-        **kwargs,
+        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0,
     ):
         result_col = self._process_for_reduction(
             skipna=skipna, min_count=min_count
diff --git a/python/cudf/cudf/core/mixins/reductions.pyi b/python/cudf/cudf/core/mixins/reductions.pyi
index 1ce2d8acdd6..600f30e9372 100644
--- a/python/cudf/cudf/core/mixins/reductions.pyi
+++ b/python/cudf/cudf/core/mixins/reductions.pyi
@@ -3,71 +3,68 @@
 from __future__ import annotations
 
 class Reducible:
-    def sum(self, *args, **kwargs):
+    def sum(self):
         ...
 
-    def product(self, *args, **kwargs):
+    def product(self):
         ...
 
-    def min(self, *args, **kwargs):
+    def min(self):
         ...
 
-    def max(self, *args, **kwargs):
+    def max(self):
         ...
 
-    def count(self, *args, **kwargs):
+    def count(self):
         ...
 
-    def any(self, *args, **kwargs):
+    def any(self):
         ...
 
-    def all(self, *args, **kwargs):
+    def all(self):
         ...
 
-    def sum_of_squares(self, *args, **kwargs):
+    def sum_of_squares(self):
         ...
 
-    def mean(self, *args, **kwargs):
+    def mean(self):
         ...
 
-    def var(self, *args, **kwargs):
+    def var(self):
         ...
 
-    def std(self, *args, **kwargs):
+    def std(self):
         ...
 
-    def median(self, *args, **kwargs):
+    def median(self):
         ...
 
-    def argmax(self, *args, **kwargs):
+    def argmax(self):
         ...
 
-    def argmin(self, *args, **kwargs):
+    def argmin(self):
         ...
 
-    def nunique(self, *args, **kwargs):
+    def nunique(self):
         ...
 
-    def nth(self, *args, **kwargs):
+    def nth(self):
         ...
 
-    def collect(self, *args, **kwargs):
+    def collect(self):
         ...
 
-    def unique(self, *args, **kwargs):
+    def prod(self):
         ...
 
-    def prod(self, *args, **kwargs):
+    def idxmin(self):
         ...
 
-    def idxmin(self, *args, **kwargs):
+    def idxmax(self):
         ...
 
-    def idxmax(self, *args, **kwargs):
+    def first(self):
         ...
 
-    def first(self, *args, **kwargs):
-        ...
-
-    def last(self, *args, **kwargs):
+    def last(self):
         ...

From 6f6cc6fbc7b50eb08625b2fdc8f9b2530b618094 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 27 Jan 2022 12:27:38 -0800
Subject: [PATCH 22/39] Fix signatures for methods of numerical columns called
 by datetime or timedelta columns.

---
 python/cudf/cudf/core/column/datetime.py      | 17 ++++++++-----
 .../cudf/cudf/core/column/numerical_base.py   | 22 ++++++++++++-----
 python/cudf/cudf/core/column/timedelta.py     | 24 ++++++++-----------
 3 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index e7442e1236b..d7da342bdcf 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -350,22 +350,27 @@ def _default_na_value(self) -> DatetimeLikeScalar:
         """Returns the default NA value for this column"""
         return np.datetime64("nat", self.time_unit)
 
-    def mean(self, skipna=None, dtype=np.float64) -> ScalarLike:
+    def mean(
+        self, skipna=None, min_count: int = 0, dtype=np.float64
+    ) -> ScalarLike:
         return pd.Timestamp(
-            self.as_numerical.mean(skipna=skipna, dtype=dtype),
+            self.as_numerical.mean(
+                skipna=skipna, min_count=min_count, dtype=dtype
+            ),
             unit=self.time_unit,
         )
 
     def std(
         self,
         skipna: bool = None,
-        ddof: int = 1,
+        min_count: int = 0,
         dtype: Dtype = np.float64,
-        *args,
-        **kwargs,
+        ddof: int = 1,
     ) -> pd.Timedelta:
         return pd.Timedelta(
-            self.as_numerical.std(skipna=skipna, ddof=ddof, dtype=dtype)
+            self.as_numerical.std(
+                skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof
+            )
             * _numpy_to_pandas_conversion[self.time_unit],
         )
 
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index a6a124e1ac4..f491aae9fab 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -103,14 +103,24 @@ def quantile(
             )
         return result
 
-    def mean(self, dtype=np.float64, *args, **kwargs):
-        return self._reduce("mean", dtype=dtype, *args, **kwargs)
+    def mean(self, skipna: bool = None, min_count: int = 0, dtype=np.float64):
+        return self._reduce(
+            "mean", skipna=skipna, min_count=min_count, dtype=dtype
+        )
 
-    def var(self, dtype=np.float64, *args, **kwargs):
-        return self._reduce("var", dtype=dtype, *args, **kwargs)
+    def var(
+        self, skipna: bool = None, min_count: int = 0, dtype=np.float64, ddof=1
+    ):
+        return self._reduce(
+            "var", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof
+        )
 
-    def std(self, dtype=np.float64, *args, **kwargs):
-        return self._reduce("std", dtype=dtype, *args, **kwargs)
+    def std(
+        self, skipna: bool = None, min_count: int = 0, dtype=np.float64, ddof=1
+    ):
+        return self._reduce(
+            "std", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof
+        )
 
     def median(self, skipna: bool = None) -> NumericalBaseColumn:
         skipna = True if skipna is None else skipna
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 42c294cfd0c..55adeb20a2a 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -396,19 +396,14 @@ def quantile(
         return result.astype(self.dtype)
 
     def sum(
-        self,
-        skipna: bool = None,
-        dtype: Dtype = None,
-        min_count=0,
-        *args,
-        **kwargs,
+        self, skipna: bool = None, min_count: int = 0, dtype: Dtype = None,
     ) -> pd.Timedelta:
         return pd.Timedelta(
-            # TODO: mypy doesn't fully support monkey-patching, so the
-            # monkey-patching of reductions creates some serious limitations.
-            # We'll have to see how bad the problem is.
+            # Since sum isn't overriden in Numerical[Base]Column, mypy only
+            # sees the signature from Reducible (which doesn't have the extra
+            # parameters from ColumnBase._reduce) so we have to ignore this.
             self.as_numerical.sum(  # type: ignore
-                skipna=skipna, dtype=dtype, min_count=min_count
+                skipna=skipna, min_count=min_count, dtype=dtype
             ),
             unit=self.time_unit,
         )
@@ -416,13 +411,14 @@ def sum(
     def std(
         self,
         skipna: bool = None,
-        ddof: int = 1,
+        min_count: int = 0,
         dtype: Dtype = np.float64,
-        *args,
-        **kwargs,
+        ddof: int = 1,
     ) -> pd.Timedelta:
         return pd.Timedelta(
-            self.as_numerical.std(skipna=skipna, ddof=ddof, dtype=dtype),
+            self.as_numerical.std(
+                skipna=skipna, min_count=min_count, ddof=ddof, dtype=dtype
+            ),
             unit=self.time_unit,
         )
 

From ac0a2452d32a87c4c7bfd16a5bee572f573bc7cf Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 27 Jan 2022 12:36:39 -0800
Subject: [PATCH 23/39] Remove more args/kwargs.

---
 python/cudf/cudf/core/column/categorical.py | 2 +-
 python/cudf/cudf/core/column/timedelta.py   | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index c210d631aff..de06e62cbb1 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1000,7 +1000,7 @@ def clip(self, lo: ScalarLike, hi: ScalarLike) -> "column.ColumnBase":
     def data_array_view(self) -> cuda.devicearray.DeviceNDArray:
         return self.codes.data_array_view
 
-    def unique(self, *args, **kwargs) -> CategoricalColumn:
+    def unique(self) -> CategoricalColumn:
         codes = self.as_numerical.unique()
         return column.build_categorical_column(
             categories=self.categories,
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 55adeb20a2a..0e5feb347aa 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -369,15 +369,13 @@ def as_timedelta_column(self, dtype: Dtype, **kwargs) -> TimeDeltaColumn:
             return self
         return libcudf.unary.cast(self, dtype=dtype)
 
-    def mean(
-        self, skipna=None, dtype: Dtype = np.float64, *args, **kwargs
-    ) -> pd.Timedelta:
+    def mean(self, skipna=None, dtype: Dtype = np.float64) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.mean(skipna=skipna, dtype=dtype),
             unit=self.time_unit,
         )
 
-    def median(self, skipna: bool = None, *args, **kwargs) -> pd.Timedelta:
+    def median(self, skipna: bool = None) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.median(skipna=skipna), unit=self.time_unit
         )

From eb651bcd70ffc395fad981796563f090cf3f0d19 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 14 Feb 2022 11:38:42 -0500
Subject: [PATCH 24/39] Use a descriptor instead

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 91 ++++++++++---------
 1 file changed, 49 insertions(+), 42 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 5377f0f1002..67f49650d18 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -1,8 +1,17 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
+import functools
 import inspect
 
 
+def _partialmethod(method, *args1, **kwargs1):
+    @functools.wraps(method)
+    def wrapper(self, *args2, **kwargs2):
+        return method(self, *args1, *args2, **kwargs1, **kwargs2)
+
+    return wrapper
+
+
 def _create_delegating_mixin(
     mixin_name, docstring, category_name, operation_name, supported_operations
 ):
@@ -93,49 +102,46 @@ def _create_delegating_mixin(
     >>> print(bar.foo1.__doc__)
     Perform the operation foo1, which returns 42.
     """
-
     docstring_attr = f"{category_name}_DOCSTRINGS"
     validity_attr = f"_VALID_{category_name}S"
 
-    class OperationMixin:
-        @classmethod
-        def _add_operation(cls, operation):
-            # This function creates operations on-the-fly.
-
-            # Generate a signature without the `op` parameter.
-            base_operation = getattr(cls, operation_name)
-            base_params = inspect.signature(base_operation).parameters.values()
-            params = [p for p in base_params if p.name != "op"]
-            signature = inspect.Signature(parameters=params)
-
-            # Generate the list of arguments forwarded to the base operation.
-            arglist = ", ".join(
-                [
-                    f"{key}={key}"
-                    for key in signature.parameters
-                    if key not in ("self", "args", "kwargs")
-                ]
-            )
-            arglist = arglist + (", " if arglist else "") + "*args, **kwargs"
-
-            # Apply the formatted docstring of the base operation to the
-            # operation being created here.
-            docstring = base_operation.__doc__.format(
-                cls=cls.__name__,
-                op=operation,
-                **getattr(cls, docstring_attr, {}).get(operation, {}),
-            )
+    class Operation:
+        """
+        """
+
+        def __init__(self):
+            self._operation_name = operation_name
 
-            namespace = {}
-            exec_str = f"""
-def {operation}{str(signature)}:
-    '''{docstring}
-    '''
-    return self.{operation_name}(op="{operation}", {arglist})
-"""
-            exec(exec_str, namespace)
-            setattr(cls, operation, namespace[operation])
+        def __set_name__(self, owner, name):
+            self._name = name
 
+        def __get__(self, obj, owner=None):
+            base_operation = getattr(owner, self._operation_name)
+            retfunc = _partialmethod(base_operation, op=self._name)
+
+            retfunc.__doc__ = base_operation.__doc__.format(
+                cls=owner.__name__,
+                op=self._name,
+                **getattr(owner, docstring_attr, {}).get(self._name, {}),
+            )
+            retfunc.__name__ = self._name
+            retfunc_params = {
+                k: v
+                for k, v in inspect.signature(
+                    base_operation
+                ).parameters.items()
+                if k != "op"
+            }.values()
+            retfunc.__signature__ = inspect.Signature(retfunc_params)
+
+            setattr(owner, self._name, retfunc)
+
+            if obj is None:
+                return getattr(owner, self._name)
+            else:
+                return getattr(obj, self._name)
+
+    class OperationMixin:
         @classmethod
         def __init_subclass__(cls):
             # Only add the valid set of operations for a particular class.
@@ -147,11 +153,12 @@ def __init_subclass__(cls):
             assert (
                 len(invalid_operations) == 0
             ), f"Invalid requested operations: {invalid_operations}"
+
             for operation in valid_operations:
-                # Check if the operation is already defined so that subclasses
-                # can override the method if desired.
-                if not hasattr(cls, operation):
-                    cls._add_operation(operation)
+                if operation not in dir(cls):
+                    op_attr = Operation()
+                    setattr(cls, operation, op_attr)
+                    op_attr.__set_name__(cls, operation)
 
     def _operation(self, op: str, *args, **kwargs):
         raise NotImplementedError

From 7e2bc87c96b20c1f60c1b5ef9ea278f6776b6041 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 14 Feb 2022 11:39:49 -0500
Subject: [PATCH 25/39] Doc

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 67f49650d18..0018985ae83 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -4,6 +4,8 @@
 import inspect
 
 
+# A simple `partialmethod` that allows setting attributes such as
+# __doc__ on instances.
 def _partialmethod(method, *args1, **kwargs1):
     @functools.wraps(method)
     def wrapper(self, *args2, **kwargs2):

From 53a623de4c5f06e48787ce44a77eb422755c7acf Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Mon, 14 Feb 2022 18:52:00 -0500
Subject: [PATCH 26/39] Update python/cudf/cudf/core/mixins/mixin_factory.py

Co-authored-by: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
---
 python/cudf/cudf/core/mixins/mixin_factory.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 0018985ae83..ac89584b7f5 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -127,13 +127,13 @@ def __get__(self, obj, owner=None):
                 **getattr(owner, docstring_attr, {}).get(self._name, {}),
             )
             retfunc.__name__ = self._name
-            retfunc_params = {
-                k: v
+            retfunc_params = [
+                v
                 for k, v in inspect.signature(
                     base_operation
                 ).parameters.items()
                 if k != "op"
-            }.values()
+            ]
             retfunc.__signature__ = inspect.Signature(retfunc_params)
 
             setattr(owner, self._name, retfunc)

From 9b78852fec19b0742384bdffe2e36890f3d8de32 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 14 Feb 2022 18:54:28 -0500
Subject: [PATCH 27/39] Merge __set_name__ into __init__

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 0018985ae83..920f273bdf0 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -108,17 +108,11 @@ def _create_delegating_mixin(
     validity_attr = f"_VALID_{category_name}S"
 
     class Operation:
-        """
-        """
-
-        def __init__(self):
-            self._operation_name = operation_name
-
-        def __set_name__(self, owner, name):
+        def __init__(self, name):
             self._name = name
 
         def __get__(self, obj, owner=None):
-            base_operation = getattr(owner, self._operation_name)
+            base_operation = getattr(owner, operation_name)
             retfunc = _partialmethod(base_operation, op=self._name)
 
             retfunc.__doc__ = base_operation.__doc__.format(
@@ -158,9 +152,8 @@ def __init_subclass__(cls):
 
             for operation in valid_operations:
                 if operation not in dir(cls):
-                    op_attr = Operation()
+                    op_attr = Operation(operation)
                     setattr(cls, operation, op_attr)
-                    op_attr.__set_name__(cls, operation)
 
     def _operation(self, op: str, *args, **kwargs):
         raise NotImplementedError

From 1e580f71b6cdeac9a146433005b1cd7cf7d30405 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 15 Feb 2022 10:34:48 -0800
Subject: [PATCH 28/39] Replace functools with explicitly patching attributes
 (necessary for correctness).

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index e8b5e128dc9..94cbfbc9053 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -1,13 +1,11 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
-import functools
 import inspect
 
 
 # A simple `partialmethod` that allows setting attributes such as
 # __doc__ on instances.
 def _partialmethod(method, *args1, **kwargs1):
-    @functools.wraps(method)
     def wrapper(self, *args2, **kwargs2):
         return method(self, *args1, *args2, **kwargs1, **kwargs2)
 
@@ -121,6 +119,10 @@ def __get__(self, obj, owner=None):
                 **getattr(owner, docstring_attr, {}).get(self._name, {}),
             )
             retfunc.__name__ = self._name
+            retfunc.__qualname__ = ".".join([owner.__name__, self._name])
+            retfunc.__module__ = base_operation.__module__
+            retfunc.__annotations__ = base_operation.__annotations__.copy()
+            retfunc.__annotations__.pop("op")
             retfunc_params = [
                 v
                 for k, v in inspect.signature(

From f5ec1d672259e4737219dc1e13131b1be704d454 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 15 Feb 2022 10:36:23 -0800
Subject: [PATCH 29/39] Fix copyrights.

---
 ci/checks/copyright.py                         | 2 +-
 python/cudf/cudf/core/column/datetime.py       | 2 +-
 python/cudf/cudf/core/column/numerical_base.py | 2 +-
 python/cudf/cudf/core/column/timedelta.py      | 2 +-
 python/cudf/cudf/core/window/rolling.py        | 2 +-
 python/cudf/cudf/tests/test_stats.py           | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py
index d72fd95fea3..9c37a6ab041 100644
--- a/ci/checks/copyright.py
+++ b/ci/checks/copyright.py
@@ -230,4 +230,4 @@ def checkCopyright_main():
 
 if __name__ == "__main__":
     import sys
-    sys.exit(checkCopyright_main())
\ No newline at end of file
+    sys.exit(checkCopyright_main())
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 86bf30170bd..44e5c6f62f1 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index f491aae9fab..db333328692 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 """Define an interface for columns that can perform numerical operations."""
 
 from __future__ import annotations
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index f620135eecf..7a5f777e88e 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index ce57c2c1215..578a6c33c2c 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION
+# Copyright (c) 2020-2022, NVIDIA CORPORATION
 
 import itertools
 
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index bed5fe2ca99..7bf339d6ab7 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 from concurrent.futures import ThreadPoolExecutor
 

From b8884f18e28d0ce439a48f171ffdc7e6b29f27ae Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 15 Feb 2022 10:43:16 -0800
Subject: [PATCH 30/39] Address PR comments.

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 94cbfbc9053..45375bd2daa 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -13,14 +13,18 @@ def wrapper(self, *args2, **kwargs2):
 
 
 def _create_delegating_mixin(
-    mixin_name, docstring, category_name, operation_name, supported_operations
+    mixin_name,
+    docstring,
+    category_name,
+    category_operation_name,
+    supported_operations,
 ):
     """Factory for mixins defining collections of delegated operations.
 
     This function generates mixins based on two common paradigms in cuDF:
 
     1. libcudf groups many operations into categories using a common API. These
-       APIs usually accept an enum to delinate the specific operation to
+       APIs usually accept an enum to delineate the specific operation to
        perform, e.g. binary operations use the `binary_operator` enum when
        calling the `binary_operation` function. cuDF Python mimics this
        structure by having operations within a category delegate to a common
@@ -53,7 +57,7 @@ def _create_delegating_mixin(
             - f'_{category_name}_DOCSTRINGS'
             - f'_VALID_{category_name}S'
             - f'_SUPPORTED_{category_name}S'
-    operation_name : str
+    category_operation_name : str
         The name given to the core function implementing this category of
         operations.  The corresponding function is the entrypoint for child
         classes.
@@ -110,7 +114,7 @@ def __init__(self, name):
             self._name = name
 
         def __get__(self, obj, owner=None):
-            base_operation = getattr(owner, operation_name)
+            base_operation = getattr(owner, category_operation_name)
             retfunc = _partialmethod(base_operation, op=self._name)
 
             retfunc.__doc__ = base_operation.__doc__.format(
@@ -162,7 +166,7 @@ def _operation(self, op: str, *args, **kwargs):
 
     OperationMixin.__name__ = mixin_name
     OperationMixin.__doc__ = docstring
-    setattr(OperationMixin, operation_name, _operation)
+    setattr(OperationMixin, category_operation_name, _operation)
     # This attribute is set in case lookup is convenient at a later point, but
     # it is not strictly necessary since `supported_operations` is part of the
     # closure associated with the class's creation.

From f88f78453a9e3a95f14e392d936ff28374c9bc69 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 15 Feb 2022 10:55:58 -0800
Subject: [PATCH 31/39] Move Operation out of the function to reduce local
 state.

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 76 ++++++++++---------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 45375bd2daa..3be31fe55b4 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -12,6 +12,42 @@ def wrapper(self, *args2, **kwargs2):
     return wrapper
 
 
+class Operation:
+    def __init__(self, name, category_name, base_operation):
+        self._name = name
+        self._docstring_attr = f"{category_name}_DOCSTRINGS"
+        self._base_operation = base_operation
+
+    def __get__(self, obj, owner=None):
+        retfunc = _partialmethod(self._base_operation, op=self._name)
+
+        retfunc.__doc__ = self._base_operation.__doc__.format(
+            cls=owner.__name__,
+            op=self._name,
+            **getattr(owner, self._docstring_attr, {}).get(self._name, {}),
+        )
+        retfunc.__name__ = self._name
+        retfunc.__qualname__ = ".".join([owner.__name__, self._name])
+        retfunc.__module__ = self._base_operation.__module__
+        retfunc.__annotations__ = self._base_operation.__annotations__.copy()
+        retfunc.__annotations__.pop("op")
+        retfunc_params = [
+            v
+            for k, v in inspect.signature(
+                self._base_operation
+            ).parameters.items()
+            if k != "op"
+        ]
+        retfunc.__signature__ = inspect.Signature(retfunc_params)
+
+        setattr(owner, self._name, retfunc)
+
+        if obj is None:
+            return getattr(owner, self._name)
+        else:
+            return getattr(obj, self._name)
+
+
 def _create_delegating_mixin(
     mixin_name,
     docstring,
@@ -106,43 +142,8 @@ def _create_delegating_mixin(
     >>> print(bar.foo1.__doc__)
     Perform the operation foo1, which returns 42.
     """
-    docstring_attr = f"{category_name}_DOCSTRINGS"
     validity_attr = f"_VALID_{category_name}S"
 
-    class Operation:
-        def __init__(self, name):
-            self._name = name
-
-        def __get__(self, obj, owner=None):
-            base_operation = getattr(owner, category_operation_name)
-            retfunc = _partialmethod(base_operation, op=self._name)
-
-            retfunc.__doc__ = base_operation.__doc__.format(
-                cls=owner.__name__,
-                op=self._name,
-                **getattr(owner, docstring_attr, {}).get(self._name, {}),
-            )
-            retfunc.__name__ = self._name
-            retfunc.__qualname__ = ".".join([owner.__name__, self._name])
-            retfunc.__module__ = base_operation.__module__
-            retfunc.__annotations__ = base_operation.__annotations__.copy()
-            retfunc.__annotations__.pop("op")
-            retfunc_params = [
-                v
-                for k, v in inspect.signature(
-                    base_operation
-                ).parameters.items()
-                if k != "op"
-            ]
-            retfunc.__signature__ = inspect.Signature(retfunc_params)
-
-            setattr(owner, self._name, retfunc)
-
-            if obj is None:
-                return getattr(owner, self._name)
-            else:
-                return getattr(obj, self._name)
-
     class OperationMixin:
         @classmethod
         def __init_subclass__(cls):
@@ -156,9 +157,12 @@ def __init_subclass__(cls):
                 len(invalid_operations) == 0
             ), f"Invalid requested operations: {invalid_operations}"
 
+            base_operation = getattr(cls, category_operation_name)
             for operation in valid_operations:
                 if operation not in dir(cls):
-                    op_attr = Operation(operation)
+                    op_attr = Operation(
+                        operation, category_name, base_operation
+                    )
                     setattr(cls, operation, op_attr)
 
     def _operation(self, op: str, *args, **kwargs):

From 502d07e0ddced05c6bd3fbb2d087a4d3a9a1d23a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 15 Feb 2022 11:21:34 -0800
Subject: [PATCH 32/39] Some cleanup.

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 62 +++++++++++++++----
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 3be31fe55b4..6281bce039a 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -13,18 +13,37 @@ def wrapper(self, *args2, **kwargs2):
 
 
 class Operation:
-    def __init__(self, name, category_name, base_operation):
+    """Descriptor used to define operations for delegating mixins.
+
+    This class is designed to be assigned to the attributes (the delegating
+    methods) defined by the OperationMixin. This class will create the method
+    and mimic all the expected attributes for that method to appear as though
+    it was originally designed on the class. The use of the descriptor pattern
+    ensures that the method is only created the first time it is invoked, after
+    which all further calls use the callable generated on the first invocation.
+
+    Parameters
+    ----------
+    name : str
+        The name of the operation.
+    docstring_format_args : str
+        The attribute of the owning class from which to pull format parameters
+        for this operation's docstring.
+    base_operation : str
+        The underlying operation function to be invoked when operation `name`
+        is called on the owning class.
+    """
+
+    def __init__(self, name, docstring_format_args, base_operation):
         self._name = name
-        self._docstring_attr = f"{category_name}_DOCSTRINGS"
+        self._docstring_format_args = docstring_format_args
         self._base_operation = base_operation
 
     def __get__(self, obj, owner=None):
         retfunc = _partialmethod(self._base_operation, op=self._name)
 
         retfunc.__doc__ = self._base_operation.__doc__.format(
-            cls=owner.__name__,
-            op=self._name,
-            **getattr(owner, self._docstring_attr, {}).get(self._name, {}),
+            cls=owner.__name__, op=self._name, **self._docstring_format_args,
         )
         retfunc.__name__ = self._name
         retfunc.__qualname__ = ".".join([owner.__name__, self._name])
@@ -88,8 +107,8 @@ def _create_delegating_mixin(
         The documentation string for the mixin class.
     category_name : str
         The category of operations for which a mixin is being created. This
-        name will be used to define the following attributes as shown in the
-        example below:
+        name will be used to define or access the following attributes as shown
+        in the example below:
             - f'_{category_name}_DOCSTRINGS'
             - f'_VALID_{category_name}S'
             - f'_SUPPORTED_{category_name}S'
@@ -142,7 +161,14 @@ def _create_delegating_mixin(
     >>> print(bar.foo1.__doc__)
     Perform the operation foo1, which returns 42.
     """
+    # The first two attributes may be defined on subclasses of the generated
+    # OperationMixin to indicate valid attributes and parameters to use when
+    # formatting docstrings. The supported_attr will be defined on the
+    # OperationMixin itself to indicate what operations its subclass may
+    # inherit from it.
     validity_attr = f"_VALID_{category_name}S"
+    docstring_attr = f"{category_name}_DOCSTRINGS"
+    supported_attr = f"_SUPPORTED_{category_name}S"
 
     class OperationMixin:
         @classmethod
@@ -160,22 +186,32 @@ def __init_subclass__(cls):
             base_operation = getattr(cls, category_operation_name)
             for operation in valid_operations:
                 if operation not in dir(cls):
+                    docstring_format_args = getattr(
+                        cls, docstring_attr, {}
+                    ).get(operation, {})
                     op_attr = Operation(
-                        operation, category_name, base_operation
+                        operation, docstring_format_args, base_operation
                     )
                     setattr(cls, operation, op_attr)
 
+    OperationMixin.__name__ = mixin_name
+    OperationMixin.__qualname__ = mixin_name
+    OperationMixin.__doc__ = docstring
+
     def _operation(self, op: str, *args, **kwargs):
         raise NotImplementedError
 
-    OperationMixin.__name__ = mixin_name
-    OperationMixin.__doc__ = docstring
+    _operation.__name__ = category_operation_name
+    _operation.__qualname__ = ".".join([mixin_name, category_operation_name])
+    _operation.__doc__ = (
+        f"The core {category_name.lower()} function. Must be overridden by "
+        "subclasses, the default implementation raises a NotImplementedError."
+    )
+
     setattr(OperationMixin, category_operation_name, _operation)
     # This attribute is set in case lookup is convenient at a later point, but
     # it is not strictly necessary since `supported_operations` is part of the
     # closure associated with the class's creation.
-    setattr(
-        OperationMixin, f"_SUPPORTED_{category_name}S", supported_operations
-    )
+    setattr(OperationMixin, supported_attr, supported_operations)
 
     return OperationMixin

From 6ed61aaf56553aa4212b90225cfd241e8bf3ab6c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 15 Feb 2022 11:28:29 -0800
Subject: [PATCH 33/39] Revert changes to copyright file.

---
 ci/checks/copyright.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py
index 9c37a6ab041..d72fd95fea3 100644
--- a/ci/checks/copyright.py
+++ b/ci/checks/copyright.py
@@ -230,4 +230,4 @@ def checkCopyright_main():
 
 if __name__ == "__main__":
     import sys
-    sys.exit(checkCopyright_main())
+    sys.exit(checkCopyright_main())
\ No newline at end of file

From b7859dd44b59001609e47dc22ab53b63fa764c4c Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 16 Feb 2022 11:23:06 -0500
Subject: [PATCH 34/39] Better explain the need for a custom partialmethod

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 6281bce039a..93a4d394966 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -3,8 +3,9 @@
 import inspect
 
 
-# A simple `partialmethod` that allows setting attributes such as
-# __doc__ on instances.
+# `functools.partialmethod` does not allow setting attributes such as
+# __doc__ on the resulting method. So we use a simple alternative to
+# it here:
 def _partialmethod(method, *args1, **kwargs1):
     def wrapper(self, *args2, **kwargs2):
         return method(self, *args1, *args2, **kwargs1, **kwargs2)

From 764eb8106afe73251c2c82e0d961db1a4e455a26 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 17 Feb 2022 12:35:45 -0800
Subject: [PATCH 35/39] Address most PR comments.

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 36 +++++++++++--------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 93a4d394966..2e9dce0633c 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -43,14 +43,20 @@ def __init__(self, name, docstring_format_args, base_operation):
     def __get__(self, obj, owner=None):
         retfunc = _partialmethod(self._base_operation, op=self._name)
 
-        retfunc.__doc__ = self._base_operation.__doc__.format(
-            cls=owner.__name__, op=self._name, **self._docstring_format_args,
-        )
+        # Required attributes that will exist.
         retfunc.__name__ = self._name
         retfunc.__qualname__ = ".".join([owner.__name__, self._name])
         retfunc.__module__ = self._base_operation.__module__
+
+        if self._base_operation.__doc__ is not None:
+            retfunc.__doc__ = self._base_operation.__doc__.format(
+                cls=owner.__name__,
+                op=self._name,
+                **self._docstring_format_args,
+            )
+
         retfunc.__annotations__ = self._base_operation.__annotations__.copy()
-        retfunc.__annotations__.pop("op")
+        retfunc.__annotations__.pop("op", None)
         retfunc_params = [
             v
             for k, v in inspect.signature(
@@ -72,7 +78,7 @@ def _create_delegating_mixin(
     mixin_name,
     docstring,
     category_name,
-    category_operation_name,
+    base_operation_name,
     supported_operations,
 ):
     """Factory for mixins defining collections of delegated operations.
@@ -111,9 +117,9 @@ def _create_delegating_mixin(
         name will be used to define or access the following attributes as shown
         in the example below:
             - f'_{category_name}_DOCSTRINGS'
-            - f'_VALID_{category_name}S'
-            - f'_SUPPORTED_{category_name}S'
-    category_operation_name : str
+            - f'_VALID_{category_name}S'  # The subset of ops a subclass allows
+            - f'_SUPPORTED_{category_name}S'  # The ops supported by the mixin
+    base_operation_name : str
         The name given to the core function implementing this category of
         operations.  The corresponding function is the entrypoint for child
         classes.
@@ -130,7 +136,7 @@ def _create_delegating_mixin(
     >>> # The above is equivalent to defining a class like so:
     ... class MyFoo:
     ...     # The set of valid foo operations.
-    ...     _VALID_FOOS = {"foo1", "foo2"}
+    ...     _SUPPORTED_FOOS = {"foo1", "foo2"}
     ...
     ...     # This is the method for child classes to override. Note that the
     ...     # first parameter is always called "op".
@@ -139,7 +145,7 @@ def _create_delegating_mixin(
 
     >>> # MyFoo can be used as follows.
     >>> class BarImplementsFoo(MyFoo):
-    ...     _SUPPORTED_FOOS = ("foo1",)
+    ...     _VALID_FOOS = ("foo1",)
     ...     _FOO_DOCSTRINGS = {"ret": "42"}
     ...
     ...     # This method's docstring will be formatted and used for all valid
@@ -168,7 +174,7 @@ def _create_delegating_mixin(
     # OperationMixin itself to indicate what operations its subclass may
     # inherit from it.
     validity_attr = f"_VALID_{category_name}S"
-    docstring_attr = f"{category_name}_DOCSTRINGS"
+    docstring_attr = f"_{category_name}_DOCSTRINGS"
     supported_attr = f"_SUPPORTED_{category_name}S"
 
     class OperationMixin:
@@ -184,7 +190,7 @@ def __init_subclass__(cls):
                 len(invalid_operations) == 0
             ), f"Invalid requested operations: {invalid_operations}"
 
-            base_operation = getattr(cls, category_operation_name)
+            base_operation = getattr(cls, base_operation_name)
             for operation in valid_operations:
                 if operation not in dir(cls):
                     docstring_format_args = getattr(
@@ -202,14 +208,14 @@ def __init_subclass__(cls):
     def _operation(self, op: str, *args, **kwargs):
         raise NotImplementedError
 
-    _operation.__name__ = category_operation_name
-    _operation.__qualname__ = ".".join([mixin_name, category_operation_name])
+    _operation.__name__ = base_operation_name
+    _operation.__qualname__ = ".".join([mixin_name, base_operation_name])
     _operation.__doc__ = (
         f"The core {category_name.lower()} function. Must be overridden by "
         "subclasses, the default implementation raises a NotImplementedError."
     )
 
-    setattr(OperationMixin, category_operation_name, _operation)
+    setattr(OperationMixin, base_operation_name, _operation)
     # This attribute is set in case lookup is convenient at a later point, but
     # it is not strictly necessary since `supported_operations` is part of the
     # closure associated with the class's creation.

From 1303bb7a773d4ef94016762cdf35c79430ac8f3c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 18 Feb 2022 12:08:22 -0800
Subject: [PATCH 36/39] Enable overriding of the base operation in child
 classes.

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 28 ++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 2e9dce0633c..8c88e886a54 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -74,6 +74,30 @@ def __get__(self, obj, owner=None):
             return getattr(obj, self._name)
 
 
+def _should_define_operation(cls, operation, base_operation_name):
+    if operation not in dir(cls):
+        return True
+
+    # If the class doesn't override the base operation we stick to whatever
+    # parent implementation exists.
+    if base_operation_name not in cls.__dict__:
+        return False
+
+    # There are two possibilities here:
+    # 1. A parent class manually overrode the operation. That override takes
+    #    precedence even if the current class overrode the base_operation.
+    # 2. A parent class has an auto-generated operation. The current class must
+    #    override it so that its base operation is used rather than the parent.
+    for base_cls in cls.__mro__:
+        # The first attribute in the MRO is the one that will be used.
+        if operation in base_cls.__dict__:
+            return isinstance(base_cls.__dict__[operation], Operation)
+
+    # This line should be unreachable since we know the attribute exists
+    # somewhere in the MRO if the for loop was entered.
+    assert False, "Operation attribute not found in hierarchy."
+
+
 def _create_delegating_mixin(
     mixin_name,
     docstring,
@@ -192,7 +216,9 @@ def __init_subclass__(cls):
 
             base_operation = getattr(cls, base_operation_name)
             for operation in valid_operations:
-                if operation not in dir(cls):
+                if _should_define_operation(
+                    cls, operation, base_operation_name
+                ):
                     docstring_format_args = getattr(
                         cls, docstring_attr, {}
                     ).get(operation, {})

From 9f7f207b0b0a93717b7fcb9876c0b57835136a5c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 23 Feb 2022 16:12:41 -0800
Subject: [PATCH 37/39] Properly support composition of OperationMixins.

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index 8c88e886a54..ae79941baf9 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -204,6 +204,12 @@ def _create_delegating_mixin(
     class OperationMixin:
         @classmethod
         def __init_subclass__(cls):
+            # Support composition of various OperationMixins. Note that since
+            # this __init_subclass__ is defined on mixins, it does not prohibit
+            # classes that inherit it from implementing this method as well as
+            # long as those implementations also include this super call.
+            super().__init_subclass__()
+
             # Only add the valid set of operations for a particular class.
             valid_operations = set()
             for base_cls in cls.__mro__:

From 8eb72debc3853ac10d44b3aa5ce4fa45bd4cc2fa Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 24 Feb 2022 17:36:19 -0800
Subject: [PATCH 38/39] Add a slightly extended comment.

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index ae79941baf9..b44a7d6a8c6 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -83,11 +83,16 @@ def _should_define_operation(cls, operation, base_operation_name):
     if base_operation_name not in cls.__dict__:
         return False
 
-    # There are two possibilities here:
-    # 1. A parent class manually overrode the operation. That override takes
-    #    precedence even if the current class overrode the base_operation.
-    # 2. A parent class has an auto-generated operation. The current class must
-    #    override it so that its base operation is used rather than the parent.
+    # At this point we know that the class has the operation defined but it
+    # also overrides the base operation. Since this function is called before
+    # the operation is defined on the current class, we know that it inherited
+    # the operation from a parent. We therefore have two possibilities:
+    # 1. A parent class manually defined the operation. That override takes
+    #    precedence even if the current class defined the base operation.
+    # 2. A parent class has an auto-generated operation, i.e. it is of type
+    #    Operation and was created by OperationMixin.__init_subclass__. The
+    #    current class must override it so that its base operation is used
+    #    rather than the parent's base operation.
     for base_cls in cls.__mro__:
         # The first attribute in the MRO is the one that will be used.
         if operation in base_cls.__dict__:

From 74e7445bb9d4277bdf59f975967201eb71eb2cb3 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 25 Feb 2022 10:23:21 -0800
Subject: [PATCH 39/39] Update docstring with concrete example.

---
 python/cudf/cudf/core/mixins/mixin_factory.py | 70 +++++++++----------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py
index b44a7d6a8c6..ecb18f61830 100644
--- a/python/cudf/cudf/core/mixins/mixin_factory.py
+++ b/python/cudf/cudf/core/mixins/mixin_factory.py
@@ -158,44 +158,42 @@ def _create_delegating_mixin(
 
     Examples
     --------
-    >>> MyFoo = _create_delegating_mixin(
-    ...     "MyFoo", "MyFoo's docstring", "FOO", "do_foo", {"foo1", "foo2"}
-    ... )
-
-    >>> # The above is equivalent to defining a class like so:
-    ... class MyFoo:
-    ...     # The set of valid foo operations.
-    ...     _SUPPORTED_FOOS = {"foo1", "foo2"}
+    >>> # The class below:
+    >>> class Person:
+    ...     def _greet(self, op):
+    ...         print(op)
     ...
-    ...     # This is the method for child classes to override. Note that the
-    ...     # first parameter is always called "op".
-    ...     def do_foo(self, op, *args, **kwargs):
-    ...         raise NotImplementedError
-
-    >>> # MyFoo can be used as follows.
-    >>> class BarImplementsFoo(MyFoo):
-    ...     _VALID_FOOS = ("foo1",)
-    ...     _FOO_DOCSTRINGS = {"ret": "42"}
+    ...     def hello(self):
+    ...         self._greet("hello")
+    ...
+    ...     def goodbye(self):
+    ...         self._greet("goodbye")
+    >>> # can  be rewritten using a delegating mixin as follows:
+    >>> Greeter = _create_delegating_mixin(
+    ...     "Greeter", "", "GREETING", "_greet", {"hello", "goodbye", "hey"}
+    ... )
+    >>> # The `hello` and `goodbye` methods will now be automatically generated
+    >>> # for the Person class below.
+    >>> class Person(Greeter):
+    ...     _VALID_GREETINGS = {"hello", "goodbye"}
     ...
-    ...     # This method's docstring will be formatted and used for all valid
-    ...     # operations. The `op` key is always automatically filled in, while
-    ...     # other keys are formatted using _FOO_DOCSTRINGS.
-    ...     def do_foo(self, op, *args, **kwargs):
-    ...         '''Perform the operation {op}, which returns {ret}.'''
-    ...         return 42
-
-    >>> bar = BarImplementsFoo()
-    >>> print(bar.foo1())
-    42
-
-    >>> # This will raise an AttributeError because foo2 is not supported by
-    >>> # the BarImplementsFoo subclass of MyFoo.
-    >>> # print(bar.foo2())
-
-    >>> # The docstring is formatted with the operation name as well as any
-    >>> # additional keys provided via the _FOO_DOCSTRINGS parameter.
-    >>> print(bar.foo1.__doc__)
-    Perform the operation foo1, which returns 42.
+    ...     def _greet(self, op: str):
+    ...         '''Say {op}.'''
+    ...         print(op)
+    >>> mom = Person()
+    >>> mom.hello()
+    hello
+    >>> # The Greeter class could also enable the `hey` method, but Person did
+    >>> # not include it in the _VALID_GREETINGS set so it will not exist.
+    >>> mom.hey()
+    Traceback (most recent call last):
+        ...
+    AttributeError: 'Person' object has no attribute 'hey'
+    >>> # The docstrings for each method are generated by formatting the _greet
+    >>> # docstring with the operation name as well as any additional keys
+    >>> # provided via the _GREETING_DOCSTRINGS parameter.
+    >>> print(mom.hello.__doc__)
+    Say hello.
     """
     # The first two attributes may be defined on subclasses of the generated
     # OperationMixin to indicate valid attributes and parameters to use when