From 75c357053fa10d034cc531d3f7ad6821ae5af4d3 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 10 Aug 2021 12:52:20 -0700
Subject: [PATCH 01/13] Enable axis=1 for scans.

---
 python/cudf/cudf/core/dataframe.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 0aafae0a85b..a068e992a7d 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6355,10 +6355,6 @@ def cummin(self, axis=None, skipna=True, *args, **kwargs):
         -------
         DataFrame
 
-        Notes
-        -----
-        Parameters currently not supported is `axis`
-
         Examples
         --------
         >>> import cudf
@@ -6370,9 +6366,6 @@ def cummin(self, axis=None, skipna=True, *args, **kwargs):
         2  1  7
         3  1  7
         """
-        if axis not in (0, "index", None):
-            raise NotImplementedError("Only axis=0 is currently supported.")
-
         return self._apply_support_method(
             "cummin", axis=axis, skipna=skipna, *args, **kwargs
         )
@@ -6392,10 +6385,6 @@ def cummax(self, axis=None, skipna=True, *args, **kwargs):
         -------
         DataFrame
 
-        Notes
-        -----
-        Parameters currently not supported is `axis`
-
         Examples
         --------
         >>> import cudf
@@ -6407,9 +6396,6 @@ def cummax(self, axis=None, skipna=True, *args, **kwargs):
         2  3   9
         3  4  10
         """
-        if axis not in (0, "index", None):
-            raise NotImplementedError("Only axis=0 is currently supported.")
-
         return self._apply_support_method(
             "cummax", axis=axis, skipna=skipna, *args, **kwargs
         )
@@ -6430,10 +6416,6 @@ def cumsum(self, axis=None, skipna=True, *args, **kwargs):
         -------
         DataFrame
 
-        Notes
-        -----
-        Parameters currently not supported is `axis`
-
         Examples
         --------
         >>> import cudf
@@ -6445,9 +6427,6 @@ def cumsum(self, axis=None, skipna=True, *args, **kwargs):
         2   6  24
         3  10  34
         """
-        if axis not in (0, "index", None):
-            raise NotImplementedError("Only axis=0 is currently supported.")
-
         return self._apply_support_method(
             "cumsum", axis=axis, skipna=skipna, *args, **kwargs
         )
@@ -6467,10 +6446,6 @@ def cumprod(self, axis=None, skipna=True, *args, **kwargs):
         -------
         DataFrame
 
-        Notes
-        -----
-        Parameters currently not supported is `axis`
-
         Examples
         --------
         >>> import cudf
@@ -6482,9 +6457,6 @@ def cumprod(self, axis=None, skipna=True, *args, **kwargs):
         2   6   504
         3  24  5040
         """
-        if axis not in (0, "index", None):
-            raise NotImplementedError("Only axis=0 is currently supported.")
-
         return self._apply_support_method(
             "cumprod", axis=axis, skipna=skipna, *args, **kwargs
         )

From 5ed37091fa4061475a4461935850518c18d13d1b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 10 Aug 2021 13:34:01 -0700
Subject: [PATCH 02/13] Document axis, initial implementation of
 DataFrame._scan.

---
 python/cudf/cudf/core/dataframe.py | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a068e992a7d..5d7cb8eb898 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6340,6 +6340,24 @@ def _reduce(
         elif axis == 1:
             return self._apply_support_method_axis_1(op, **kwargs)
 
+    def _scan(
+        self, op, axis=None, *args, **kwargs,
+    ):
+        axis = self._get_axis_from_axis_arg(axis)
+
+        if axis == 0:
+            result = [
+                getattr(self[col], op)(*args, **kwargs)
+                for col in self._data.names
+            ]
+
+            return DataFrame._from_data(
+                {col: result[i] for i, col in enumerate(self._data.names)},
+                index=result[0].index,
+            )
+        elif axis == 1:
+            return self._apply_support_method_axis_1(op, **kwargs)
+
     def cummin(self, axis=None, skipna=True, *args, **kwargs):
         """
         Return cumulative minimum of the DataFrame.
@@ -6347,6 +6365,8 @@ def cummin(self, axis=None, skipna=True, *args, **kwargs):
         Parameters
         ----------
 
+        axis: {index (0), columns(1)}
+            Axis for the function to be applied on.
         skipna: bool, default True
             Exclude NA/null values. If an entire row/column is NA,
             the result will be NA.
@@ -6366,9 +6386,7 @@ def cummin(self, axis=None, skipna=True, *args, **kwargs):
         2  1  7
         3  1  7
         """
-        return self._apply_support_method(
-            "cummin", axis=axis, skipna=skipna, *args, **kwargs
-        )
+        return self._scan("cummin", axis=axis, skipna=skipna, *args, **kwargs)
 
     def cummax(self, axis=None, skipna=True, *args, **kwargs):
         """
@@ -6377,6 +6395,8 @@ def cummax(self, axis=None, skipna=True, *args, **kwargs):
         Parameters
         ----------
 
+        axis: {index (0), columns(1)}
+            Axis for the function to be applied on.
         skipna: bool, default True
             Exclude NA/null values. If an entire row/column is NA,
             the result will be NA.
@@ -6407,6 +6427,8 @@ def cumsum(self, axis=None, skipna=True, *args, **kwargs):
         Parameters
         ----------
 
+        axis: {index (0), columns(1)}
+            Axis for the function to be applied on.
         skipna: bool, default True
             Exclude NA/null values. If an entire row/column is NA,
             the result will be NA.
@@ -6438,6 +6460,8 @@ def cumprod(self, axis=None, skipna=True, *args, **kwargs):
         Parameters
         ----------
 
+        axis: {index (0), columns(1)}
+            Axis for the function to be applied on.
         skipna: bool, default True
             Exclude NA/null values. If an entire row/column is NA,
             the result will be NA.

From 5b5502afec8e0e09397d1ca8dde03e15f984493d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 10 Aug 2021 15:07:20 -0700
Subject: [PATCH 03/13] Unify scan implementations for Series.

---
 python/cudf/cudf/core/series.py | 146 +++++++++-----------------------
 1 file changed, 39 insertions(+), 107 deletions(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 177208fa921..3381de88182 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -3933,6 +3933,39 @@ def count(self, level=None, **kwargs):
 
         return self.valid_count
 
+    def _scan(self, op, axis=None, skipna=True, cast_to_int=False):
+        if axis not in (None, 0):
+            raise NotImplementedError("axis parameter is not implemented yet")
+
+        skipna = True if skipna is None else skipna
+
+        if skipna:
+            result_col = self.nans_to_nulls()._column
+        else:
+            result_col = self._column.copy()
+            if result_col.has_nulls:
+                # Workaround as find_first_value doesn't seem to work
+                # incase of bools.
+                first_index = int(
+                    result_col.isnull().astype("int8").find_first_value(1)
+                )
+                result_col[first_index:] = None
+
+        if (
+            cast_to_int
+            and not is_decimal_dtype(result_col.dtype)
+            and (
+                np.issubdtype(result_col.dtype, np.integer)
+                or np.issubdtype(result_col.dtype, np.bool_)
+            )
+        ):
+            # For reductions that accumulate a value (e.g. sum, not max) pandas
+            # returns an int64 dtype for all input int or bool dtypes.
+            result_col = result_col.astype(np.int64)
+        return Series._from_data(
+            {self.name: result_col._apply_scan_op(op)}, index=self.index,
+        )
+
     def cummin(self, axis=None, skipna=True, *args, **kwargs):
         """
         Return cumulative minimum of the Series.
@@ -3963,27 +3996,7 @@ def cummin(self, axis=None, skipna=True, *args, **kwargs):
         3    1
         4    1
         """
-
-        if axis not in (None, 0):
-            raise NotImplementedError("axis parameter is not implemented yet")
-
-        skipna = True if skipna is None else skipna
-
-        if skipna:
-            result_col = self.nans_to_nulls()._column
-        else:
-            result_col = self._column.copy()
-            if result_col.has_nulls:
-                # Workaround as find_first_value doesn't seem to work
-                # incase of bools.
-                first_index = int(
-                    result_col.isnull().astype("int8").find_first_value(1)
-                )
-                result_col[first_index:] = None
-
-        return Series(
-            result_col._apply_scan_op("min"), name=self.name, index=self.index,
-        )
+        return self._scan("min", axis=axis, skipna=skipna)
 
     def cummax(self, axis=0, skipna=True, *args, **kwargs):
         """
@@ -4015,24 +4028,7 @@ def cummax(self, axis=0, skipna=True, *args, **kwargs):
         3    5
         4    5
         """
-        if axis not in (None, 0):
-            raise NotImplementedError("axis parameter is not implemented yet")
-
-        skipna = True if skipna is None else skipna
-
-        if skipna:
-            result_col = self.nans_to_nulls()._column
-        else:
-            result_col = self._column.copy()
-            if result_col.has_nulls:
-                first_index = int(
-                    result_col.isnull().astype("int8").find_first_value(1)
-                )
-                result_col[first_index:] = None
-
-        return Series(
-            result_col._apply_scan_op("max"), name=self.name, index=self.index,
-        )
+        return self._scan("max", axis=axis, skipna=skipna)
 
     def cumsum(self, axis=0, skipna=True, *args, **kwargs):
         """
@@ -4065,38 +4061,7 @@ def cumsum(self, axis=0, skipna=True, *args, **kwargs):
         3    12
         4    15
         """
-
-        if axis not in (None, 0):
-            raise NotImplementedError("axis parameter is not implemented yet")
-
-        skipna = True if skipna is None else skipna
-
-        if skipna:
-            result_col = self.nans_to_nulls()._column
-        else:
-            result_col = self._column.copy()
-            if result_col.has_nulls:
-                first_index = int(
-                    result_col.isnull().astype("int8").find_first_value(1)
-                )
-                result_col[first_index:] = None
-
-        # pandas always returns int64 dtype if original dtype is int or `bool`
-        if not is_decimal_dtype(result_col.dtype) and (
-            np.issubdtype(result_col.dtype, np.integer)
-            or np.issubdtype(result_col.dtype, np.bool_)
-        ):
-            return Series(
-                result_col.astype(np.int64)._apply_scan_op("sum"),
-                name=self.name,
-                index=self.index,
-            )
-        else:
-            return Series(
-                result_col._apply_scan_op("sum"),
-                name=self.name,
-                index=self.index,
-            )
+        return self._scan("sum", axis=axis, skipna=skipna, cast_to_int=True)
 
     def cumprod(self, axis=0, skipna=True, *args, **kwargs):
         """
@@ -4128,42 +4093,9 @@ def cumprod(self, axis=0, skipna=True, *args, **kwargs):
         3    40
         4    120
         """
-
-        if axis not in (None, 0):
-            raise NotImplementedError("axis parameter is not implemented yet")
-
-        if is_decimal_dtype(self.dtype):
-            raise NotImplementedError(
-                "cumprod does not currently support decimal types"
-            )
-
-        skipna = True if skipna is None else skipna
-
-        if skipna:
-            result_col = self.nans_to_nulls()._column
-        else:
-            result_col = self._column.copy()
-            if result_col.has_nulls:
-                first_index = int(
-                    result_col.isnull().astype("int8").find_first_value(1)
-                )
-                result_col[first_index:] = None
-
-        # pandas always returns int64 dtype if original dtype is int or `bool`
-        if np.issubdtype(result_col.dtype, np.integer) or np.issubdtype(
-            result_col.dtype, np.bool_
-        ):
-            return Series(
-                result_col.astype(np.int64)._apply_scan_op("product"),
-                name=self.name,
-                index=self.index,
-            )
-        else:
-            return Series(
-                result_col._apply_scan_op("product"),
-                name=self.name,
-                index=self.index,
-            )
+        return self._scan(
+            "product", axis=axis, skipna=skipna, cast_to_int=True
+        )
 
     def mode(self, dropna=True):
         """

From 37473277af2d013dce7d8e3ab27559e82a0bed1c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 10 Aug 2021 16:57:58 -0700
Subject: [PATCH 04/13] Stop expecting axis=1 to fail.

---
 python/cudf/cudf/tests/test_dataframe.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 8744238a062..0da7da7305c 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8107,17 +8107,7 @@ def custom_func(df, column):
 
 
 @pytest.mark.parametrize(
-    "op",
-    [
-        "count",
-        "cummin",
-        "cummax",
-        "cummax",
-        "cumprod",
-        "kurt",
-        "kurtosis",
-        "skew",
-    ],
+    "op", ["count", "kurt", "kurtosis", "skew"],
 )
 def test_dataframe_axis1_unsupported_ops(op):
     df = cudf.DataFrame({"a": [1, 2, 3], "b": [8, 9, 10]})

From 2804f5c1f002016081a17ccdbeed7df5eb1a1f4c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 10 Aug 2021 17:10:27 -0700
Subject: [PATCH 05/13] Move scan implementation up to Frame and reuse for
 DataFrame.

---
 python/cudf/cudf/core/dataframe.py | 24 ++++--------------
 python/cudf/cudf/core/frame.py     | 40 ++++++++++++++++++++++++------
 python/cudf/cudf/core/series.py    | 29 +---------------------
 3 files changed, 38 insertions(+), 55 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 5d7cb8eb898..4da29c6ac4c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6346,15 +6346,7 @@ def _scan(
         axis = self._get_axis_from_axis_arg(axis)
 
         if axis == 0:
-            result = [
-                getattr(self[col], op)(*args, **kwargs)
-                for col in self._data.names
-            ]
-
-            return DataFrame._from_data(
-                {col: result[i] for i, col in enumerate(self._data.names)},
-                index=result[0].index,
-            )
+            return super()._scan(op, axis, *args, **kwargs)
         elif axis == 1:
             return self._apply_support_method_axis_1(op, **kwargs)
 
@@ -6386,7 +6378,7 @@ def cummin(self, axis=None, skipna=True, *args, **kwargs):
         2  1  7
         3  1  7
         """
-        return self._scan("cummin", axis=axis, skipna=skipna, *args, **kwargs)
+        return self._scan("min", axis=axis, skipna=skipna, *args, **kwargs)
 
     def cummax(self, axis=None, skipna=True, *args, **kwargs):
         """
@@ -6416,9 +6408,7 @@ def cummax(self, axis=None, skipna=True, *args, **kwargs):
         2  3   9
         3  4  10
         """
-        return self._apply_support_method(
-            "cummax", axis=axis, skipna=skipna, *args, **kwargs
-        )
+        return self._scan("max", axis=axis, skipna=skipna, *args, **kwargs)
 
     def cumsum(self, axis=None, skipna=True, *args, **kwargs):
         """
@@ -6449,9 +6439,7 @@ def cumsum(self, axis=None, skipna=True, *args, **kwargs):
         2   6  24
         3  10  34
         """
-        return self._apply_support_method(
-            "cumsum", axis=axis, skipna=skipna, *args, **kwargs
-        )
+        return self._scan("sum", axis=axis, skipna=skipna, *args, **kwargs)
 
     def cumprod(self, axis=None, skipna=True, *args, **kwargs):
         """
@@ -6481,9 +6469,7 @@ def cumprod(self, axis=None, skipna=True, *args, **kwargs):
         2   6   504
         3  24  5040
         """
-        return self._apply_support_method(
-            "cumprod", axis=axis, skipna=skipna, *args, **kwargs
-        )
+        return self._scan("prod", axis=axis, skipna=skipna, *args, **kwargs)
 
     def mode(self, axis=0, numeric_only=False, dropna=True):
         """
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 3c6bc057af1..240858dbb6b 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3681,20 +3681,44 @@ def _get_axis_from_axis_arg(cls, axis):
         try:
             return cls._SUPPORT_AXIS_LOOKUP[axis]
         except KeyError:
-            valid_axes = ", ".join(
-                (
-                    ax
-                    for ax in cls._SUPPORT_AXIS_LOOKUP.keys()
-                    if ax is not None
-                )
-            )
-            raise ValueError(f"Invalid axis, must be one of {valid_axes}.")
+            raise ValueError(f"No axis named {axis} for object type {cls}")
 
     def _reduce(self, *args, **kwargs):
         raise NotImplementedError(
             f"Reductions are not supported for objects of type {type(self)}."
         )
 
+    def _scan(self, op, axis=None, skipna=True, cast_to_int=False):
+        skipna = True if skipna is None else skipna
+
+        results = {}
+        for name, col in self._data.items():
+            if skipna:
+                result_col = self._data[name].nans_to_nulls()
+            else:
+                result_col = self._data[name].copy()
+                if result_col.has_nulls:
+                    # Workaround as find_first_value doesn't seem to work
+                    # incase of bools.
+                    first_index = int(
+                        result_col.isnull().astype("int8").find_first_value(1)
+                    )
+                    result_col[first_index:] = None
+
+            if (
+                cast_to_int
+                and not is_decimal_dtype(result_col.dtype)
+                and (
+                    np.issubdtype(result_col.dtype, np.integer)
+                    or np.issubdtype(result_col.dtype, np.bool_)
+                )
+            ):
+                # For reductions that accumulate a value (e.g. sum, not max)
+                # pandas returns an int64 dtype for all int or bool dtypes.
+                result_col = result_col.astype(np.int64)
+            results[name] = result_col._apply_scan_op(op)
+        return self._from_data(results, index=self.index)
+
     def min(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs,
     ):
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 3381de88182..c4d19540958 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -3937,34 +3937,7 @@ def _scan(self, op, axis=None, skipna=True, cast_to_int=False):
         if axis not in (None, 0):
             raise NotImplementedError("axis parameter is not implemented yet")
 
-        skipna = True if skipna is None else skipna
-
-        if skipna:
-            result_col = self.nans_to_nulls()._column
-        else:
-            result_col = self._column.copy()
-            if result_col.has_nulls:
-                # Workaround as find_first_value doesn't seem to work
-                # incase of bools.
-                first_index = int(
-                    result_col.isnull().astype("int8").find_first_value(1)
-                )
-                result_col[first_index:] = None
-
-        if (
-            cast_to_int
-            and not is_decimal_dtype(result_col.dtype)
-            and (
-                np.issubdtype(result_col.dtype, np.integer)
-                or np.issubdtype(result_col.dtype, np.bool_)
-            )
-        ):
-            # For reductions that accumulate a value (e.g. sum, not max) pandas
-            # returns an int64 dtype for all input int or bool dtypes.
-            result_col = result_col.astype(np.int64)
-        return Series._from_data(
-            {self.name: result_col._apply_scan_op(op)}, index=self.index,
-        )
+        return super()._scan(op, axis, skipna, cast_to_int)
 
     def cummin(self, axis=None, skipna=True, *args, **kwargs):
         """

From a190e6fa36f3455df71b447d8690e57875a409bb Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 11 Aug 2021 14:30:20 -0700
Subject: [PATCH 06/13] Move all scan implementations into Frame.

---
 python/cudf/cudf/core/dataframe.py | 123 +------------------
 python/cudf/cudf/core/frame.py     | 184 ++++++++++++++++++++++++-----
 python/cudf/cudf/core/series.py    | 135 +--------------------
 3 files changed, 156 insertions(+), 286 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 4da29c6ac4c..a6220038cb2 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6346,131 +6346,10 @@ def _scan(
         axis = self._get_axis_from_axis_arg(axis)
 
         if axis == 0:
-            return super()._scan(op, axis, *args, **kwargs)
+            return super()._scan(op, axis=axis, *args, **kwargs)
         elif axis == 1:
             return self._apply_support_method_axis_1(op, **kwargs)
 
-    def cummin(self, axis=None, skipna=True, *args, **kwargs):
-        """
-        Return cumulative minimum of the DataFrame.
-
-        Parameters
-        ----------
-
-        axis: {index (0), columns(1)}
-            Axis for the function to be applied on.
-        skipna: bool, default True
-            Exclude NA/null values. If an entire row/column is NA,
-            the result will be NA.
-
-        Returns
-        -------
-        DataFrame
-
-        Examples
-        --------
-        >>> import cudf
-        >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
-        >>> df.cummin()
-           a  b
-        0  1  7
-        1  1  7
-        2  1  7
-        3  1  7
-        """
-        return self._scan("min", axis=axis, skipna=skipna, *args, **kwargs)
-
-    def cummax(self, axis=None, skipna=True, *args, **kwargs):
-        """
-        Return cumulative maximum of the DataFrame.
-
-        Parameters
-        ----------
-
-        axis: {index (0), columns(1)}
-            Axis for the function to be applied on.
-        skipna: bool, default True
-            Exclude NA/null values. If an entire row/column is NA,
-            the result will be NA.
-
-        Returns
-        -------
-        DataFrame
-
-        Examples
-        --------
-        >>> import cudf
-        >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
-        >>> df.cummax()
-           a   b
-        0  1   7
-        1  2   8
-        2  3   9
-        3  4  10
-        """
-        return self._scan("max", axis=axis, skipna=skipna, *args, **kwargs)
-
-    def cumsum(self, axis=None, skipna=True, *args, **kwargs):
-        """
-        Return cumulative sum of the DataFrame.
-
-        Parameters
-        ----------
-
-        axis: {index (0), columns(1)}
-            Axis for the function to be applied on.
-        skipna: bool, default True
-            Exclude NA/null values. If an entire row/column is NA,
-            the result will be NA.
-
-
-        Returns
-        -------
-        DataFrame
-
-        Examples
-        --------
-        >>> import cudf
-        >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
-        >>> s.cumsum()
-            a   b
-        0   1   7
-        1   3  15
-        2   6  24
-        3  10  34
-        """
-        return self._scan("sum", axis=axis, skipna=skipna, *args, **kwargs)
-
-    def cumprod(self, axis=None, skipna=True, *args, **kwargs):
-        """
-        Return cumulative product of the DataFrame.
-
-        Parameters
-        ----------
-
-        axis: {index (0), columns(1)}
-            Axis for the function to be applied on.
-        skipna: bool, default True
-            Exclude NA/null values. If an entire row/column is NA,
-            the result will be NA.
-
-        Returns
-        -------
-        DataFrame
-
-        Examples
-        --------
-        >>> import cudf
-        >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
-        >>> s.cumprod()
-            a     b
-        0   1     7
-        1   2    56
-        2   6   504
-        3  24  5040
-        """
-        return self._scan("prod", axis=axis, skipna=skipna, *args, **kwargs)
-
     def mode(self, axis=0, numeric_only=False, dropna=True):
         """
         Get the mode(s) of each element along the selected axis.
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 240858dbb6b..12376ea58e8 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3688,37 +3688,6 @@ def _reduce(self, *args, **kwargs):
             f"Reductions are not supported for objects of type {type(self)}."
         )
 
-    def _scan(self, op, axis=None, skipna=True, cast_to_int=False):
-        skipna = True if skipna is None else skipna
-
-        results = {}
-        for name, col in self._data.items():
-            if skipna:
-                result_col = self._data[name].nans_to_nulls()
-            else:
-                result_col = self._data[name].copy()
-                if result_col.has_nulls:
-                    # Workaround as find_first_value doesn't seem to work
-                    # incase of bools.
-                    first_index = int(
-                        result_col.isnull().astype("int8").find_first_value(1)
-                    )
-                    result_col[first_index:] = None
-
-            if (
-                cast_to_int
-                and not is_decimal_dtype(result_col.dtype)
-                and (
-                    np.issubdtype(result_col.dtype, np.integer)
-                    or np.issubdtype(result_col.dtype, np.bool_)
-                )
-            ):
-                # For reductions that accumulate a value (e.g. sum, not max)
-                # pandas returns an int64 dtype for all int or bool dtypes.
-                result_col = result_col.astype(np.int64)
-            results[name] = result_col._apply_scan_op(op)
-        return self._from_data(results, index=self.index)
-
     def min(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs,
     ):
@@ -4223,6 +4192,159 @@ def median(
             **kwargs,
         )
 
+    # Scans
+    def _scan(self, op, axis=None, skipna=True, cast_to_int=False):
+        skipna = True if skipna is None else skipna
+
+        results = {}
+        for name, col in self._data.items():
+            if skipna:
+                result_col = self._data[name].nans_to_nulls()
+            else:
+                result_col = self._data[name].copy()
+                if result_col.has_nulls:
+                    # Workaround as find_first_value doesn't seem to work
+                    # incase of bools.
+                    first_index = int(
+                        result_col.isnull().astype("int8").find_first_value(1)
+                    )
+                    result_col[first_index:] = None
+
+            if (
+                cast_to_int
+                and not is_decimal_dtype(result_col.dtype)
+                and (
+                    np.issubdtype(result_col.dtype, np.integer)
+                    or np.issubdtype(result_col.dtype, np.bool_)
+                )
+            ):
+                # For reductions that accumulate a value (e.g. sum, not max)
+                # pandas returns an int64 dtype for all int or bool dtypes.
+                result_col = result_col.astype(np.int64)
+            results[name] = result_col._apply_scan_op(op)
+        return self._from_data(results, index=self.index)
+
+    def cummin(self, axis=None, skipna=True, *args, **kwargs):
+        """
+        Return cumulative minimum of the Series or DataFrame.
+
+        Parameters
+        ----------
+
+        axis: {index (0), columns(1)}
+            Axis for the function to be applied on.
+        skipna: bool, default True
+            Exclude NA/null values. If an entire row/column is NA,
+            the result will be NA.
+
+        Returns
+        -------
+        Series or DataFrame
+
+        Examples
+        --------
+        >>> import cudf
+        >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
+        >>> df.cummin()
+           a  b
+        0  1  7
+        1  1  7
+        2  1  7
+        3  1  7
+        """
+        return self._scan("min", axis=axis, skipna=skipna, *args, **kwargs)
+
+    def cummax(self, axis=None, skipna=True, *args, **kwargs):
+        """
+        Return cumulative maximum of the Series or DataFrame.
+
+        Parameters
+        ----------
+
+        axis: {index (0), columns(1)}
+            Axis for the function to be applied on.
+        skipna: bool, default True
+            Exclude NA/null values. If an entire row/column is NA,
+            the result will be NA.
+
+        Returns
+        -------
+        Series or DataFrame
+
+        Examples
+        --------
+        >>> import cudf
+        >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
+        >>> df.cummax()
+           a   b
+        0  1   7
+        1  2   8
+        2  3   9
+        3  4  10
+        """
+        return self._scan("max", axis=axis, skipna=skipna, *args, **kwargs)
+
+    def cumsum(self, axis=None, skipna=True, *args, **kwargs):
+        """
+        Return cumulative sum of the Series or DataFrame.
+
+        Parameters
+        ----------
+
+        axis: {index (0), columns(1)}
+            Axis for the function to be applied on.
+        skipna: bool, default True
+            Exclude NA/null values. If an entire row/column is NA,
+            the result will be NA.
+
+
+        Returns
+        -------
+        Series or DataFrame
+
+        Examples
+        --------
+        >>> import cudf
+        >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
+        >>> s.cumsum()
+            a   b
+        0   1   7
+        1   3  15
+        2   6  24
+        3  10  34
+        """
+        return self._scan("sum", axis=axis, skipna=skipna, *args, **kwargs)
+
+    def cumprod(self, axis=None, skipna=True, *args, **kwargs):
+        """
+        Return cumulative product of the Series or DataFrame.
+
+        Parameters
+        ----------
+
+        axis: {index (0), columns(1)}
+            Axis for the function to be applied on.
+        skipna: bool, default True
+            Exclude NA/null values. If an entire row/column is NA,
+            the result will be NA.
+
+        Returns
+        -------
+        Series or DataFrame
+
+        Examples
+        --------
+        >>> import cudf
+        >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
+        >>> s.cumprod()
+            a     b
+        0   1     7
+        1   2    56
+        2   6   504
+        3  24  5040
+        """
+        return self._scan("prod", axis=axis, skipna=skipna, *args, **kwargs)
+
 
 class SingleColumnFrame(Frame):
     """A one-dimensional frame.
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index c4d19540958..405cd96b49a 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -3933,142 +3933,11 @@ def count(self, level=None, **kwargs):
 
         return self.valid_count
 
-    def _scan(self, op, axis=None, skipna=True, cast_to_int=False):
+    def _scan(self, op, axis=None, *args, **kwargs):
         if axis not in (None, 0):
             raise NotImplementedError("axis parameter is not implemented yet")
 
-        return super()._scan(op, axis, skipna, cast_to_int)
-
-    def cummin(self, axis=None, skipna=True, *args, **kwargs):
-        """
-        Return cumulative minimum of the Series.
-
-        Parameters
-        ----------
-
-        skipna : bool, default True
-            Exclude NA/null values. If an entire row/column is NA,
-            the result will be NA.
-
-        Returns
-        -------
-        Series
-
-        Notes
-        -----
-        Parameters currently not supported is `axis`
-
-        Examples
-        --------
-        >>> import cudf
-        >>> ser = cudf.Series([1, 5, 2, 4, 3])
-        >>> ser.cummin()
-        0    1
-        1    1
-        2    1
-        3    1
-        4    1
-        """
-        return self._scan("min", axis=axis, skipna=skipna)
-
-    def cummax(self, axis=0, skipna=True, *args, **kwargs):
-        """
-        Return cumulative maximum of the Series.
-
-        Parameters
-        ----------
-
-        skipna : bool, default True
-            Exclude NA/null values. If an entire row/column is NA,
-            the result will be NA.
-
-        Returns
-        -------
-        Series
-
-        Notes
-        -----
-        Parameters currently not supported is `axis`
-
-        Examples
-        --------
-        >>> import cudf
-        >>> ser = cudf.Series([1, 5, 2, 4, 3])
-        >>> ser.cummax()
-        0    1
-        1    5
-        2    5
-        3    5
-        4    5
-        """
-        return self._scan("max", axis=axis, skipna=skipna)
-
-    def cumsum(self, axis=0, skipna=True, *args, **kwargs):
-        """
-        Return cumulative sum of the Series.
-
-        Parameters
-        ----------
-
-        skipna : bool, default True
-            Exclude NA/null values. If an entire row/column is NA,
-            the result will be NA.
-
-
-        Returns
-        -------
-        Series
-
-        Notes
-        -----
-        Parameters currently not supported is `axis`
-
-        Examples
-        --------
-        >>> import cudf
-        >>> ser = cudf.Series([1, 5, 2, 4, 3])
-        >>> ser.cumsum()
-        0    1
-        1    6
-        2    8
-        3    12
-        4    15
-        """
-        return self._scan("sum", axis=axis, skipna=skipna, cast_to_int=True)
-
-    def cumprod(self, axis=0, skipna=True, *args, **kwargs):
-        """
-        Return cumulative product of the Series.
-
-        Parameters
-        ----------
-
-        skipna : bool, default True
-            Exclude NA/null values. If an entire row/column is NA,
-            the result will be NA.
-
-        Returns
-        -------
-        Series
-
-        Notes
-        -----
-        Parameters currently not supported is `axis`
-
-        Examples
-        --------
-        >>> import cudf
-        >>> ser = cudf.Series([1, 5, 2, 4, 3])
-        >>> ser.cumprod()
-        0    1
-        1    5
-        2    10
-        3    40
-        4    120
-        """
-        return self._scan(
-            "product", axis=axis, skipna=skipna, cast_to_int=True
-        )
+        return super()._scan(op, axis=axis, *args, **kwargs)
 
     def mode(self, dropna=True):
         """

From 9f96b78df38544ac78255a0b428819d276ee0c1f Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 11 Aug 2021 14:42:52 -0700
Subject: [PATCH 07/13] Move Series scan impl to SingleColumnFrame to enable
 for Index types.

---
 python/cudf/cudf/core/frame.py  | 11 ++++++++++-
 python/cudf/cudf/core/series.py |  6 ------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 12376ea58e8..9122f5b854f 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -4222,7 +4222,10 @@ def _scan(self, op, axis=None, skipna=True, cast_to_int=False):
                 # pandas returns an int64 dtype for all int or bool dtypes.
                 result_col = result_col.astype(np.int64)
             results[name] = result_col._apply_scan_op(op)
-        return self._from_data(results, index=self.index)
+        # TODO: This will work for Index because it's passing self._index
+        # (which is None), but eventually we may want to remove that parameter
+        # for Index._from_data and simplify.
+        return self._from_data(results, index=self._index)
 
     def cummin(self, axis=None, skipna=True, *args, **kwargs):
         """
@@ -4374,6 +4377,12 @@ def _reduce(
             )
         return getattr(self._column, op)(**kwargs)
 
+    def _scan(self, op, axis=None, *args, **kwargs):
+        if axis not in (None, 0):
+            raise NotImplementedError("axis parameter is not implemented yet")
+
+        return super()._scan(op, axis=axis, *args, **kwargs)
+
     @classmethod
     def _from_data(
         cls,
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 405cd96b49a..75ac9941931 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -3933,12 +3933,6 @@ def count(self, level=None, **kwargs):
 
         return self.valid_count
 
-    def _scan(self, op, axis=None, *args, **kwargs):
-        if axis not in (None, 0):
-            raise NotImplementedError("axis parameter is not implemented yet")
-
-        return super()._scan(op, axis=axis, *args, **kwargs)
-
     def mode(self, dropna=True):
         """
         Return the mode(s) of the dataset.

From 1fa2e63c63fae458fc8b4c6bcaeab1923eb9bb0c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 11 Aug 2021 14:49:05 -0700
Subject: [PATCH 08/13] Reenable cumulative ops for axis 1 correctly.

---
 python/cudf/cudf/core/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a6220038cb2..b45d6510674 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6348,7 +6348,7 @@ def _scan(
         if axis == 0:
             return super()._scan(op, axis=axis, *args, **kwargs)
         elif axis == 1:
-            return self._apply_support_method_axis_1(op, **kwargs)
+            return self._apply_support_method_axis_1(f"cum{op}", **kwargs)
 
     def mode(self, axis=0, numeric_only=False, dropna=True):
         """

From a477d1f8b0313daaca4d231c86d7ae9657537dca Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 11 Aug 2021 16:10:08 -0700
Subject: [PATCH 09/13] Add axis=1 tests and get most of them working.

---
 python/cudf/cudf/core/dataframe.py       |  5 +++--
 python/cudf/cudf/tests/test_dataframe.py | 27 ++++++++++++++++++------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index b45d6510674..a1ff6d16a0f 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5851,7 +5851,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False):
             )
 
         if data.ndim == 2:
-            num_cols = len(data[0])
+            num_cols = data.shape[1]
         else:
             # Since we validate ndim to be either 1 or 2 above,
             # this case can be assumed to be ndim == 1.
@@ -6576,13 +6576,14 @@ def _apply_support_method_axis_0(self, method, *args, **kwargs):
     def _apply_support_method_axis_1(self, method, *args, **kwargs):
         # for dask metadata compatibility
         skipna = kwargs.pop("skipna", None)
+        skipna = True if skipna is None else skipna
         if method not in _cupy_nan_methods_map and skipna not in (
             None,
             True,
             1,
         ):
             raise NotImplementedError(
-                f"Row-wise operation to calculate '{method}'"
+                f"Row-wise operations to calculate '{method}'"
                 f" currently do not support `skipna=False`."
             )
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 0da7da7305c..32c2e9f9fbf 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1835,6 +1835,8 @@ def gdf(pdf):
         {"x": []},
     ],
 )
+# @pytest.mark.parametrize("axis", [0, 1])
+@pytest.mark.parametrize("axis", [1])
 @pytest.mark.parametrize(
     "func",
     [
@@ -1852,19 +1854,32 @@ def gdf(pdf):
         lambda df, **kwargs: df.max(**kwargs),
         lambda df, **kwargs: df.std(ddof=1, **kwargs),
         lambda df, **kwargs: df.var(ddof=1, **kwargs),
-        lambda df, **kwargs: df.std(ddof=2, **kwargs),
-        lambda df, **kwargs: df.var(ddof=2, **kwargs),
-        lambda df, **kwargs: df.kurt(**kwargs),
-        lambda df, **kwargs: df.skew(**kwargs),
+        # lambda df, **kwargs: df.std(ddof=2, **kwargs),
+        # lambda df, **kwargs: df.var(ddof=2, **kwargs),
+        # lambda df, **kwargs: df.kurt(**kwargs),
+        # lambda df, **kwargs: df.skew(**kwargs),
         lambda df, **kwargs: df.all(**kwargs),
         lambda df, **kwargs: df.any(**kwargs),
     ],
 )
 @pytest.mark.parametrize("skipna", [True, False, None])
-def test_dataframe_reductions(data, func, skipna):
+def test_dataframe_reductions(data, axis, func, skipna):
     pdf = pd.DataFrame(data=data)
     gdf = cudf.DataFrame.from_pandas(pdf)
-    assert_eq(func(pdf, skipna=skipna), func(gdf, skipna=skipna))
+    try:
+        assert_eq(
+            func(pdf, axis=axis, skipna=skipna),
+            func(gdf, axis=axis, skipna=skipna),
+            check_dtype=False,
+        )
+    except Exception as e:
+        acceptable_errors = (
+            "Row-wise operations to calculate",
+            "module 'cupy' has no attribute",
+        )
+        if any(a in str(e) for a in acceptable_errors):
+            return
+        raise e
 
 
 @pytest.mark.parametrize(

From 745684f952a7dcab5a61245663e3bfd93bbe26c0 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 11 Aug 2021 16:53:00 -0700
Subject: [PATCH 10/13] Get all possible axis=1 tests working and filter
 properly.

---
 python/cudf/cudf/core/dataframe.py       |  2 +-
 python/cudf/cudf/tests/test_dataframe.py | 82 +++++++++++++-----------
 2 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a1ff6d16a0f..1b41703ba3f 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6225,7 +6225,7 @@ def _prepare_for_rowwise_op(self, method, skipna):
             col.nullable for col in self._columns
         ):
             msg = (
-                f"Row-wise operations to calculate '{method}' is not "
+                f"Row-wise operations to calculate '{method}' do not "
                 f"currently support columns with null values. "
                 f"Consider removing them with .dropna() "
                 f"or using .fillna()."
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 32c2e9f9fbf..412b9fccb6b 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1828,58 +1828,66 @@ def gdf(pdf):
 @pytest.mark.parametrize(
     "data",
     [
-        {"x": [np.nan, 2, 3, 4, 100, np.nan], "y": [4, 5, 6, 88, 99, np.nan]},
-        {"x": [1, 2, 3], "y": [4, 5, 6]},
-        {"x": [np.nan, np.nan, np.nan], "y": [np.nan, np.nan, np.nan]},
-        {"x": [], "y": []},
+        {
+            "x": [np.nan, 2, 3, 4, 100, np.nan],
+            "y": [4, 5, 6, 88, 99, np.nan],
+            "z": [7, 8, 9, 66, np.nan, 77],
+        },
+        {"x": [1, 2, 3], "y": [4, 5, 6], "z": [7, 8, 9]},
+        {
+            "x": [np.nan, np.nan, np.nan],
+            "y": [np.nan, np.nan, np.nan],
+            "z": [np.nan, np.nan, np.nan],
+        },
+        {"x": [], "y": [], "z": []},
         {"x": []},
     ],
 )
-# @pytest.mark.parametrize("axis", [0, 1])
-@pytest.mark.parametrize("axis", [1])
+@pytest.mark.parametrize("axis", [0, 1])
 @pytest.mark.parametrize(
     "func",
     [
-        lambda df, **kwargs: df.min(**kwargs),
-        lambda df, **kwargs: df.max(**kwargs),
-        lambda df, **kwargs: df.sum(**kwargs),
-        lambda df, **kwargs: df.product(**kwargs),
-        lambda df, **kwargs: df.cummin(**kwargs),
-        lambda df, **kwargs: df.cummax(**kwargs),
-        lambda df, **kwargs: df.cumsum(**kwargs),
-        lambda df, **kwargs: df.cumprod(**kwargs),
-        lambda df, **kwargs: df.mean(**kwargs),
-        lambda df, **kwargs: df.median(**kwargs),
-        lambda df, **kwargs: df.sum(**kwargs),
-        lambda df, **kwargs: df.max(**kwargs),
-        lambda df, **kwargs: df.std(ddof=1, **kwargs),
-        lambda df, **kwargs: df.var(ddof=1, **kwargs),
-        # lambda df, **kwargs: df.std(ddof=2, **kwargs),
-        # lambda df, **kwargs: df.var(ddof=2, **kwargs),
-        # lambda df, **kwargs: df.kurt(**kwargs),
-        # lambda df, **kwargs: df.skew(**kwargs),
-        lambda df, **kwargs: df.all(**kwargs),
-        lambda df, **kwargs: df.any(**kwargs),
+        "min",
+        "max",
+        "sum",
+        "product",
+        "cummin",
+        "cummax",
+        "cumsum",
+        "cumprod",
+        "mean",
+        "median",
+        "sum",
+        "max",
+        "std",
+        "var",
+        "kurt",
+        "skew",
+        "all",
+        "any",
     ],
 )
 @pytest.mark.parametrize("skipna", [True, False, None])
 def test_dataframe_reductions(data, axis, func, skipna):
     pdf = pd.DataFrame(data=data)
     gdf = cudf.DataFrame.from_pandas(pdf)
-    try:
+
+    # These reductions don't support axis=1
+    if axis == 1 and func in ("kurt", "skew"):
+        return
+
+    # We need cupy-supported operations when performing rowwise ops.
+    if func not in cudf.core.dataframe._cupy_nan_methods_map and axis == 1:
+        return
+
+    # Test different degrees of freedom for var and std.
+    all_kwargs = [{"ddof": 1}, {"ddof": 2}] if func in ("var", "std") else [{}]
+    for kwargs in all_kwargs:
         assert_eq(
-            func(pdf, axis=axis, skipna=skipna),
-            func(gdf, axis=axis, skipna=skipna),
+            getattr(pdf, func)(axis=axis, skipna=skipna, **kwargs),
+            getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs),
             check_dtype=False,
         )
-    except Exception as e:
-        acceptable_errors = (
-            "Row-wise operations to calculate",
-            "module 'cupy' has no attribute",
-        )
-        if any(a in str(e) for a in acceptable_errors):
-            return
-        raise e
 
 
 @pytest.mark.parametrize(

From 939b45561eadfb37120c1b0c797edaa9cb7c277d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 12 Aug 2021 09:32:16 -0700
Subject: [PATCH 11/13] Add in cast that was lost in the scramble.

---
 python/cudf/cudf/core/frame.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 9122f5b854f..37584d2c3e4 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -4316,7 +4316,9 @@ def cumsum(self, axis=None, skipna=True, *args, **kwargs):
         2   6  24
         3  10  34
         """
-        return self._scan("sum", axis=axis, skipna=skipna, *args, **kwargs)
+        return self._scan(
+            "sum", axis=axis, skipna=skipna, cast_to_int=True, *args, **kwargs
+        )
 
     def cumprod(self, axis=None, skipna=True, *args, **kwargs):
         """
@@ -4346,7 +4348,9 @@ def cumprod(self, axis=None, skipna=True, *args, **kwargs):
         2   6   504
         3  24  5040
         """
-        return self._scan("prod", axis=axis, skipna=skipna, *args, **kwargs)
+        return self._scan(
+            "prod", axis=axis, skipna=skipna, cast_to_int=True, *args, **kwargs
+        )
 
 
 class SingleColumnFrame(Frame):

From d804de5c8b9752b35fece4015697a8d72292b4e1 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 16 Aug 2021 10:51:47 -0700
Subject: [PATCH 12/13] Add back Series examples.

---
 python/cudf/cudf/core/frame.py | 52 ++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 37584d2c3e4..324c555f974 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -4246,6 +4246,19 @@ def cummin(self, axis=None, skipna=True, *args, **kwargs):
 
         Examples
         --------
+        **Series**
+
+        >>> import cudf
+        >>> ser = cudf.Series([1, 5, 2, 4, 3])
+        >>> ser.cummin()
+        0    1
+        1    1
+        2    1
+        3    1
+        4    1
+
+        **DataFrame**
+
         >>> import cudf
         >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
         >>> df.cummin()
@@ -4276,6 +4289,19 @@ def cummax(self, axis=None, skipna=True, *args, **kwargs):
 
         Examples
         --------
+        **Series**
+
+        >>> import cudf
+        >>> ser = cudf.Series([1, 5, 2, 4, 3])
+        >>> ser.cummax()
+        0    1
+        1    5
+        2    5
+        3    5
+        4    5
+
+        **DataFrame**
+
         >>> import cudf
         >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
         >>> df.cummax()
@@ -4307,6 +4333,19 @@ def cumsum(self, axis=None, skipna=True, *args, **kwargs):
 
         Examples
         --------
+        **Series**
+
+        >>> import cudf
+        >>> ser = cudf.Series([1, 5, 2, 4, 3])
+        >>> ser.cumsum()
+        0    1
+        1    6
+        2    8
+        3    12
+        4    15
+
+        **DataFrame**
+
         >>> import cudf
         >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
         >>> s.cumsum()
@@ -4339,6 +4378,19 @@ def cumprod(self, axis=None, skipna=True, *args, **kwargs):
 
         Examples
         --------
+        **Series**
+
+        >>> import cudf
+        >>> ser = cudf.Series([1, 5, 2, 4, 3])
+        >>> ser.cumprod()
+        0    1
+        1    5
+        2    10
+        3    40
+        4    120
+
+        **DataFrame**
+
         >>> import cudf
         >>> df = cudf.DataFrame({'a': [1, 2, 3, 4], 'b': [7, 8, 9, 10]})
         >>> s.cumprod()

From d4abda9658f61227daec6023ee2ae7fac8df9bf5 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 16 Aug 2021 15:34:44 -0700
Subject: [PATCH 13/13] Change test to verify that the correct exceptions are
 thrown.

---
 python/cudf/cudf/core/dataframe.py       |  6 ++++
 python/cudf/cudf/tests/test_dataframe.py | 37 ++++++++++++++++--------
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 1b41703ba3f..3f9804daf05 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -62,6 +62,7 @@
     "max": "nanmax",
     "sum": "nansum",
     "prod": "nanprod",
+    "product": "nanprod",
     "mean": "nanmean",
     "std": "nanstd",
     "var": "nanvar",
@@ -6612,6 +6613,11 @@ def _apply_support_method_axis_1(self, method, *args, **kwargs):
                 "Row-wise operations currently do not " "support `bool_only`."
             )
 
+        # This parameter is only necessary for axis 0 reductions that cuDF
+        # performs internally. cupy already upcasts smaller integer/bool types
+        # to int64 when accumulating.
+        kwargs.pop("cast_to_int", None)
+
         prepared, mask, common_dtype = self._prepare_for_rowwise_op(
             method, skipna
         )
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 412b9fccb6b..484278d0237 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1850,6 +1850,7 @@ def gdf(pdf):
         "min",
         "max",
         "sum",
+        "prod",
         "product",
         "cummin",
         "cummax",
@@ -1872,22 +1873,34 @@ def test_dataframe_reductions(data, axis, func, skipna):
     pdf = pd.DataFrame(data=data)
     gdf = cudf.DataFrame.from_pandas(pdf)
 
-    # These reductions don't support axis=1
-    if axis == 1 and func in ("kurt", "skew"):
-        return
-
-    # We need cupy-supported operations when performing rowwise ops.
-    if func not in cudf.core.dataframe._cupy_nan_methods_map and axis == 1:
-        return
+    # Reductions can fail in numerous possible ways when attempting row-wise
+    # reductions, which are only partially supported. Catching the appropriate
+    # exception here allows us to detect API breakage in the form of changing
+    # exceptions.
+    expected_exception = None
+    if axis == 1:
+        if func in ("kurt", "skew"):
+            expected_exception = NotImplementedError
+        elif func not in cudf.core.dataframe._cupy_nan_methods_map:
+            if skipna is False:
+                expected_exception = NotImplementedError
+            elif any(col.nullable for name, col in gdf.iteritems()):
+                expected_exception = ValueError
+            elif func in ("cummin", "cummax"):
+                expected_exception = AttributeError
 
     # Test different degrees of freedom for var and std.
     all_kwargs = [{"ddof": 1}, {"ddof": 2}] if func in ("var", "std") else [{}]
     for kwargs in all_kwargs:
-        assert_eq(
-            getattr(pdf, func)(axis=axis, skipna=skipna, **kwargs),
-            getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs),
-            check_dtype=False,
-        )
+        if expected_exception is not None:
+            with pytest.raises(expected_exception):
+                getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs),
+        else:
+            assert_eq(
+                getattr(pdf, func)(axis=axis, skipna=skipna, **kwargs),
+                getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs),
+                check_dtype=False,
+            )
 
 
 @pytest.mark.parametrize(