Skip to content

Commit

Permalink
Implement scan operations for decimal columns (#7707)
Browse files Browse the repository at this point in the history
This adds support for `cummin`, `cummax`, and `cumsum` in cuDF for columns with type `decimal`

Authors:
  - @ChrisJar

Approvers:
  - GALI PREM SAGAR (@galipremsagar)

URL: #7707
  • Loading branch information
ChrisJar authored Mar 24, 2021
1 parent 1417297 commit f38daf3
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 2 deletions.
3 changes: 3 additions & 0 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ def binary_operator(self, op, other, reflect=False):
result.dtype.precision = _binop_precision(self.dtype, other.dtype, op)
return result

def _apply_scan_op(self, op: str) -> ColumnBase:
return libcudf.reduce.scan(op, self, True)

def as_decimal_column(
self, dtype: Dtype, **kwargs
) -> "cudf.core.column.DecimalColumn":
Expand Down
10 changes: 8 additions & 2 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4725,8 +4725,9 @@ def cumsum(self, axis=0, skipna=True, *args, **kwargs):
result_col[first_index:] = None

# pandas always returns int64 dtype if original dtype is int or `bool`
if np.issubdtype(result_col.dtype, np.integer) or np.issubdtype(
result_col.dtype, np.bool_
if not is_decimal_dtype(result_col.dtype) and (
np.issubdtype(result_col.dtype, np.integer)
or np.issubdtype(result_col.dtype, np.bool_)
):
return Series(
result_col.astype(np.int64)._apply_scan_op("sum"),
Expand Down Expand Up @@ -4774,6 +4775,11 @@ def cumprod(self, axis=0, skipna=True, *args, **kwargs):
if axis not in (None, 0):
raise NotImplementedError("axis parameter is not implemented yet")

if is_decimal_dtype(self.dtype):
raise NotImplementedError(
"cumprod does not currently support decimal types"
)

skipna = True if skipna is None else skipna

if skipna:
Expand Down
46 changes: 46 additions & 0 deletions python/cudf/cudf/tests/test_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import cudf
from cudf.tests.utils import INTEGER_TYPES, NUMERIC_TYPES, assert_eq, gen_rand
from cudf.core.dtypes import Decimal64Dtype

params_sizes = [0, 1, 2, 5]

Expand Down Expand Up @@ -61,6 +62,21 @@ def test_cumsum_masked():
assert_eq(got, expected)


@pytest.mark.parametrize(
"dtype",
[Decimal64Dtype(8, 4), Decimal64Dtype(10, 5), Decimal64Dtype(12, 7)],
)
def test_cumsum_decimal(dtype):
data = ["243.32", "48.245", "-7234.298", np.nan, "-467.2"]
gser = cudf.Series(data).astype(dtype)
pser = pd.Series(data, dtype="float64")

got = gser.cumsum()
expected = cudf.Series.from_pandas(pser.cumsum()).astype(dtype)

assert_eq(got, expected)


@pytest.mark.parametrize("dtype,nelem", list(_gen_params()))
def test_cummin(dtype, nelem):
if dtype == np.int8:
Expand Down Expand Up @@ -103,6 +119,21 @@ def test_cummin_masked():
assert_eq(gs.cummin(), expected)


@pytest.mark.parametrize(
"dtype",
[Decimal64Dtype(8, 4), Decimal64Dtype(11, 6), Decimal64Dtype(14, 7)],
)
def test_cummin_decimal(dtype):
data = ["8394.294", np.nan, "-9940.444", np.nan, "-23.928"]
gser = cudf.Series(data).astype(dtype)
pser = pd.Series(data, dtype="float64")

got = gser.cummin()
expected = cudf.Series.from_pandas(pser.cummin()).astype(dtype)

assert_eq(got, expected)


@pytest.mark.parametrize("dtype,nelem", list(_gen_params()))
def test_cummax(dtype, nelem):
if dtype == np.int8:
Expand Down Expand Up @@ -145,6 +176,21 @@ def test_cummax_masked():
assert_eq(gs.cummax(), expected)


@pytest.mark.parametrize(
"dtype",
[Decimal64Dtype(8, 4), Decimal64Dtype(11, 6), Decimal64Dtype(14, 7)],
)
def test_cummax_decimal(dtype):
data = [np.nan, "54.203", "8.222", "644.32", "-562.272"]
gser = cudf.Series(data).astype(dtype)
pser = pd.Series(data, dtype="float64")

got = gser.cummax()
expected = cudf.Series.from_pandas(pser.cummax()).astype(dtype)

assert_eq(got, expected)


@pytest.mark.parametrize("dtype,nelem", list(_gen_params()))
def test_cumprod(dtype, nelem):
if dtype == np.int8:
Expand Down

0 comments on commit f38daf3

Please sign in to comment.