Skip to content

Commit

Permalink
Added pct_change to Series (#8650)
Browse files Browse the repository at this point in the history
closes #6133
closes #8727
unit test in test_stats.py: test_series_pct_change

Added pct_change function for Series. The limit and freq arguments could not be fully implemented due to lack of implementation in Series.fillna() and Series.shift() functions respectively.

Authors:
  - https://github.com/TravisHester

Approvers:
  - Michael Wang (https://github.com/isVoid)
  - Benjamin Zaitlen (https://github.com/quasiben)

URL: #8650
  • Loading branch information
TravisHester authored Jul 22, 2021
1 parent 09cd5a0 commit eddb2f8
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 2 deletions.
11 changes: 9 additions & 2 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1296,8 +1296,15 @@ def fillna(
if value is not None and method is not None:
raise ValueError("Cannot specify both 'value' and 'method'.")

if method and method not in {"ffill", "bfill"}:
raise NotImplementedError(f"Fill method {method} is not supported")
if method:
if method not in {"ffill", "bfill", "pad", "backfill"}:
raise NotImplementedError(
f"Fill method {method} is not supported"
)
if method == "pad":
method = "ffill"
elif method == "backfill":
method = "bfill"

if isinstance(value, cudf.Series):
value = value.reindex(self._data.names)
Expand Down
39 changes: 39 additions & 0 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -6041,6 +6041,45 @@ def explode(self, ignore_index=False):

return super()._explode(self._column_names[0], ignore_index)

def pct_change(
self, periods=1, fill_method="ffill", limit=None, freq=None
):
"""
Calculates the percent change between sequential elements
in the Series.
Parameters
----------
periods : int, default 1
Periods to shift for forming percent change.
fill_method : str, default 'ffill'
How to handle NAs before computing percent changes.
limit : int, optional
The number of consecutive NAs to fill before stopping.
Not yet implemented.
freq : str, optional
Increment to use from time series API.
Not yet implemented.
Returns
-------
Series
"""
if limit is not None:
raise NotImplementedError("limit parameter not supported yet.")
if freq is not None:
raise NotImplementedError("freq parameter not supported yet.")
elif fill_method not in {"ffill", "pad", "bfill", "backfill"}:
raise ValueError(
"fill_method must be one of 'ffill', 'pad', "
"'bfill', or 'backfill'."
)

data = self.fillna(method=fill_method, limit=limit)
diff = data.diff(periods=periods)
change = diff / data.shift(periods=periods, freq=freq)
return change


class DatetimeProperties(object):
"""
Expand Down
26 changes: 26 additions & 0 deletions python/cudf/cudf/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,32 @@ def test_series_median(dtype, num_na):
np.testing.assert_approx_equal(actual, desired)


@pytest.mark.parametrize(
"data",
[
np.random.normal(-100, 100, 1000),
np.random.randint(-50, 50, 1000),
np.zeros(100),
np.array([1.123, 2.343, np.nan, 0.0]),
np.array([-2, 3.75, 6, None, None, None, -8.5, None, 4.2]),
cudf.Series([]),
cudf.Series([-3]),
],
)
@pytest.mark.parametrize("periods", range(-5, 5))
@pytest.mark.parametrize("fill_method", ["ffill", "bfill", "pad", "backfill"])
def test_series_pct_change(data, periods, fill_method):
cs = cudf.Series(data)
ps = cs.to_pandas()

if np.abs(periods) <= len(cs):
got = cs.pct_change(periods=periods, fill_method=fill_method)
expected = ps.pct_change(periods=periods, fill_method=fill_method)
np.testing.assert_array_almost_equal(
got.to_array(fillna="pandas"), expected
)


@pytest.mark.parametrize(
"data1",
[
Expand Down

0 comments on commit eddb2f8

Please sign in to comment.