From 767ddd76c91ef0b3b3b5faa42f22ff05771496cd Mon Sep 17 00:00:00 2001 From: sft-managed Date: Fri, 2 Jul 2021 20:31:37 +0000 Subject: [PATCH 1/4] Added pct_change to Series --- python/cudf/cudf/core/series.py | 36 ++++++++++++++++++++++++++++ python/cudf/cudf/tests/test_stats.py | 26 ++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 77640db6a1d..4527da9d05c 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6022,6 +6022,42 @@ def explode(self, ignore_index=False): return super()._explode(self._column_names[0], ignore_index) + def pct_change( + self, periods=1, fill_method="ffill", limit=None, freq=None + ): + """ + Calculates the percent change between sequential elements + in the Series. + + Parameters + ---------- + periods : int, default 1 + Periods to shift for forming percent change. + fill_method : str, default 'ffill' + How to handle NAs before computing percent changes. + limit : int, optional + The number of consecutive NAs to fill before stopping. + Not yet implemented. + freq : str, optional + Increment to use from time series API. + Not yet implemented. + + Returns + ------- + Series + """ + if limit is not None: + raise NotImplementedError("limit parameter not supported yet.") + if freq is not None: + raise NotImplementedError("freq parameter not supported yet.") + if fill_method not in ["ffill", "bfill"]: + raise ValueError("fill_method must be either 'ffill' or 'bfill'.") + + data = self.fillna(method=fill_method, limit=limit) + diff = data.diff(periods=periods) + change = diff / data.shift(periods=periods, freq=freq) + return change + class DatetimeProperties(object): """ diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py index d4e944848c9..0407bf15535 100644 --- a/python/cudf/cudf/tests/test_stats.py +++ b/python/cudf/cudf/tests/test_stats.py @@ -316,6 +316,32 @@ def test_series_median(dtype, num_na): np.testing.assert_approx_equal(actual, desired) +@pytest.mark.parametrize( + "data", + [ + np.random.normal(-100, 100, 1000), + np.random.randint(-50, 50, 1000), + np.zeros(100), + np.array([1.123, 2.343, np.nan, 0.0]), + np.array([-2, 3.75, 6, None, None, None, -8.5, None, 4.2]), + cudf.Series([]), + cudf.Series([-3]), + ], +) +@pytest.mark.parametrize("periods", range(-5, 5)) +@pytest.mark.parametrize("fill_method", ["ffill", "bfill"]) +def test_series_pct_change(data, periods, fill_method): + cs = cudf.Series(data) + ps = cs.to_pandas() + + if np.abs(periods) <= len(cs): + got = cs.pct_change(periods=periods, fill_method=fill_method) + expected = ps.pct_change(periods=periods, fill_method=fill_method) + np.testing.assert_array_almost_equal( + got.to_array(fillna="pandas"), expected + ) + + @pytest.mark.parametrize( "data1", [ From 0cad02cf491504d5562927f9a45b02d0fdc5b6a4 Mon Sep 17 00:00:00 2001 From: sft-managed Date: Tue, 13 Jul 2021 16:38:32 +0000 Subject: [PATCH 2/4] Added support for 'pad' and 'backfill' as options for fill_method --- python/cudf/cudf/core/series.py | 11 +++++++++-- python/cudf/cudf/tests/test_stats.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 4527da9d05c..1b818ceebce 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6050,8 +6050,15 @@ def pct_change( raise NotImplementedError("limit parameter not supported yet.") if freq is not None: raise NotImplementedError("freq parameter not supported yet.") - if fill_method not in ["ffill", "bfill"]: - raise ValueError("fill_method must be either 'ffill' or 'bfill'.") + if fill_method == "pad": + fill_method = "ffill" + elif fill_method == "backfill": + fill_method = "bfill" + elif fill_method not in ["ffill", "bfill"]: + raise ValueError( + "fill_method must be either 'ffill', 'pad', " + "'bfill', or 'backfill'." + ) data = self.fillna(method=fill_method, limit=limit) diff = data.diff(periods=periods) diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py index 0407bf15535..759a6a95798 100644 --- a/python/cudf/cudf/tests/test_stats.py +++ b/python/cudf/cudf/tests/test_stats.py @@ -329,7 +329,7 @@ def test_series_median(dtype, num_na): ], ) @pytest.mark.parametrize("periods", range(-5, 5)) -@pytest.mark.parametrize("fill_method", ["ffill", "bfill"]) +@pytest.mark.parametrize("fill_method", ["ffill", "bfill", "pad", "backfill"]) def test_series_pct_change(data, periods, fill_method): cs = cudf.Series(data) ps = cs.to_pandas() From c821791d24540a8d6c0afaa81ada5d85eb1f4f60 Mon Sep 17 00:00:00 2001 From: sft-managed Date: Tue, 13 Jul 2021 18:54:30 +0000 Subject: [PATCH 3/4] Moved 'pad' and 'backfil' support to Frame --- python/cudf/cudf/core/frame.py | 11 ++++++++--- python/cudf/cudf/core/series.py | 8 ++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 3629358ee9f..d8bc8e3a8b9 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1295,9 +1295,14 @@ def fillna( if value is not None and method is not None: raise ValueError("Cannot specify both 'value' and 'method'.") - - if method and method not in {"ffill", "bfill"}: - raise NotImplementedError(f"Fill method {method} is not supported") + + if method: + if method not in {"ffill", "bfill", "pad", "backfill"}: + raise NotImplementedError(f"Fill method {method} is not supported") + if method == "pad": + method = "ffill" + elif method == "backfill": + method = "bfill" if isinstance(value, cudf.Series): value = value.reindex(self._data.names) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 1b818ceebce..80581014c39 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6050,13 +6050,9 @@ def pct_change( raise NotImplementedError("limit parameter not supported yet.") if freq is not None: raise NotImplementedError("freq parameter not supported yet.") - if fill_method == "pad": - fill_method = "ffill" - elif fill_method == "backfill": - fill_method = "bfill" - elif fill_method not in ["ffill", "bfill"]: + elif fill_method not in {"ffill", "pad", "bfill", "backfill"}: raise ValueError( - "fill_method must be either 'ffill', 'pad', " + "fill_method must be one of 'ffill', 'pad', " "'bfill', or 'backfill'." ) From aa991740248a9ec360e2cec4e1061cf4dd6803a4 Mon Sep 17 00:00:00 2001 From: sft-managed Date: Tue, 13 Jul 2021 19:32:12 +0000 Subject: [PATCH 4/4] flake8 --- python/cudf/cudf/core/frame.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index d8bc8e3a8b9..37d7a4ddd14 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1295,10 +1295,12 @@ def fillna( if value is not None and method is not None: raise ValueError("Cannot specify both 'value' and 'method'.") - + if method: if method not in {"ffill", "bfill", "pad", "backfill"}: - raise NotImplementedError(f"Fill method {method} is not supported") + raise NotImplementedError( + f"Fill method {method} is not supported" + ) if method == "pad": method = "ffill" elif method == "backfill":