Skip to content

Commit

Permalink
Make NumericalBaseColumn Scannable.
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr committed Feb 25, 2022
1 parent 9630165 commit f646716
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
18 changes: 13 additions & 5 deletions python/cudf/cudf/core/column/numerical_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from cudf import _lib as libcudf
from cudf._typing import ScalarLike
from cudf.core.column import ColumnBase
from cudf.core.mixins import Scannable


class NumericalBaseColumn(ColumnBase):
class NumericalBaseColumn(ColumnBase, Scannable):
"""A column composed of numerical data.
This class encodes a standard interface for different types of columns
Expand All @@ -32,6 +33,13 @@ class NumericalBaseColumn(ColumnBase):
"std",
}

_VALID_SCANS = {
"cumsum",
"cumprod",
"cummin",
"cummax",
}

def _can_return_nan(self, skipna: bool = None) -> bool:
return not skipna and self.has_nulls()

Expand Down Expand Up @@ -174,7 +182,7 @@ def round(
"""Round the values in the Column to the given number of decimals."""
return libcudf.round.round(self, decimal_places=decimals, how=how)

def _apply_scan_op(self, op: str) -> ColumnBase:
return libcudf.reduce.scan(op, self, True)._with_type_metadata(
self.dtype
)
def _scan(self, op: str) -> ColumnBase:
return libcudf.reduce.scan(
op.replace("cum", ""), self, True
)._with_type_metadata(self.dtype)
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ class Frame(Scannable):
# Necessary because the function names don't directly map to the docs.
_SCAN_DOCSTRINGS = {
"cumsum": {"op_name": "cumulative sum"},
"cumprod": {"op_name": "cumulative product"},
"cummin": {"op_name": "cumulative min"},
"cummax": {"op_name": "cumulative max"},
}
Expand Down Expand Up @@ -4701,7 +4702,6 @@ def _scan(self, op, axis=None, skipna=True):
3 10 34
"""
cast_to_int = op in ("cumsum", "cumprod")
op = op.replace("cum", "")
skipna = True if skipna is None else skipna

results = {}
Expand Down Expand Up @@ -4734,7 +4734,7 @@ def _scan(self, op, axis=None, skipna=True):
# For reductions that accumulate a value (e.g. sum, not max)
# pandas returns an int64 dtype for all int or bool dtypes.
result_col = result_col.astype(np.int64)
results[name] = result_col._apply_scan_op(op)
results[name] = getattr(result_col, op)()
# TODO: This will work for Index because it's passing self._index
# (which is None), but eventually we may want to remove that parameter
# for Index._from_data and simplify.
Expand Down

0 comments on commit f646716

Please sign in to comment.