diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index db333328692..87e1a87e68b 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -12,9 +12,10 @@ from cudf import _lib as libcudf from cudf._typing import ScalarLike from cudf.core.column import ColumnBase +from cudf.core.mixins import Scannable -class NumericalBaseColumn(ColumnBase): +class NumericalBaseColumn(ColumnBase, Scannable): """A column composed of numerical data. This class encodes a standard interface for different types of columns @@ -32,6 +33,13 @@ class NumericalBaseColumn(ColumnBase): "std", } + _VALID_SCANS = { + "cumsum", + "cumprod", + "cummin", + "cummax", + } + def _can_return_nan(self, skipna: bool = None) -> bool: return not skipna and self.has_nulls() @@ -174,7 +182,7 @@ def round( """Round the values in the Column to the given number of decimals.""" return libcudf.round.round(self, decimal_places=decimals, how=how) - def _apply_scan_op(self, op: str) -> ColumnBase: - return libcudf.reduce.scan(op, self, True)._with_type_metadata( - self.dtype - ) + def _scan(self, op: str) -> ColumnBase: + return libcudf.reduce.scan( + op.replace("cum", ""), self, True + )._with_type_metadata(self.dtype) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 55acdd55e14..d5edbd7284e 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -125,6 +125,7 @@ class Frame(Scannable): # Necessary because the function names don't directly map to the docs. _SCAN_DOCSTRINGS = { "cumsum": {"op_name": "cumulative sum"}, + "cumprod": {"op_name": "cumulative product"}, "cummin": {"op_name": "cumulative min"}, "cummax": {"op_name": "cumulative max"}, } @@ -4701,7 +4702,6 @@ def _scan(self, op, axis=None, skipna=True): 3 10 34 """ cast_to_int = op in ("cumsum", "cumprod") - op = op.replace("cum", "") skipna = True if skipna is None else skipna results = {} @@ -4734,7 +4734,7 @@ def _scan(self, op, axis=None, skipna=True): # For reductions that accumulate a value (e.g. sum, not max) # pandas returns an int64 dtype for all int or bool dtypes. result_col = result_col.astype(np.int64) - results[name] = result_col._apply_scan_op(op) + results[name] = getattr(result_col, op)() # TODO: This will work for Index because it's passing self._index # (which is None), but eventually we may want to remove that parameter # for Index._from_data and simplify.