From c4fba3fc368fd72512eff93dc17ffbf63e78591a Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Tue, 30 Nov 2021 17:22:13 -0800
Subject: [PATCH 1/9] create new pr

---
 python/cudf/cudf/core/dataframe.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index c0cb6f1917f..902a78f34d9 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6329,6 +6329,30 @@ def explode(self, column, ignore_index=False):
 
         return super()._explode(column, ignore_index)
 
+    def pct_change(self):
+        """
+        Calculates the percent change between sequential elements
+        in the DataFrame.
+
+        Parameters
+        ----------
+        periods : int, default 1
+            Periods to shift for forming percent change.
+        fill_method : str, default 'ffill'
+            How to handle NAs before computing percent changes.
+        limit : int, optional
+            The number of consecutive NAs to fill before stopping.
+            Not yet implemented.
+        freq : str, optional
+            Increment to use from time series API.
+            Not yet implemented.
+
+        Returns
+        -------
+        DataFrame
+        """
+        pass
+
     def __dataframe__(
         self, nan_as_null: bool = False, allow_copy: bool = True
     ):

From b0f6ba347a1b50087f15755e21b5d638ebba9790 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 1 Dec 2021 17:48:29 -0800
Subject: [PATCH 2/9] TO-DO: implem. diff method for dataframes - separate PR

---
 python/cudf/cudf/core/dataframe.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 902a78f34d9..e534585205e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6329,7 +6329,9 @@ def explode(self, column, ignore_index=False):
 
         return super()._explode(column, ignore_index)
 
-    def pct_change(self):
+    def pct_change(
+        self, periods=1, fill_method="ffill", limit=None, freq=None
+    ):
         """
         Calculates the percent change between sequential elements
         in the DataFrame.
@@ -6351,7 +6353,20 @@ def pct_change(self):
         -------
         DataFrame
         """
-        pass
+        if limit is not None:
+            raise NotImplementedError("limit parameter not supported yet.")
+        if freq is not None:
+            raise NotImplementedError("freq parameter not supported yet.")
+        elif fill_method not in {"ffill", "pad", "bfill", "backfill"}:
+            raise ValueError(
+                "fill_method must be one of 'ffill', 'pad', "
+                "'bfill', or 'backfill'."
+            )
+
+        data = self.fillna(method=fill_method, limit=limit)
+        data_diff = data.diff(periods=periods)  # need to implem. diff method
+        change = data_diff / data.shift(periods=periods, freq=freq)
+        return change
 
     def __dataframe__(
         self, nan_as_null: bool = False, allow_copy: bool = True

From a9414418f3685ec5650cfad840b9e02031637614 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Thu, 2 Dec 2021 21:59:29 -0800
Subject: [PATCH 3/9] addressed review

---
 python/cudf/cudf/core/dataframe.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index e534585205e..00c5c267a3b 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6364,9 +6364,10 @@ def pct_change(
             )
 
         data = self.fillna(method=fill_method, limit=limit)
-        data_diff = data.diff(periods=periods)  # need to implem. diff method
-        change = data_diff / data.shift(periods=periods, freq=freq)
-        return change
+
+        return data.diff(periods=periods) / data.shift(
+            periods=periods, freq=freq
+        )
 
     def __dataframe__(
         self, nan_as_null: bool = False, allow_copy: bool = True

From 80546474333ca2b5a177ca62405e874d2f6a5816 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Thu, 2 Dec 2021 22:02:56 -0800
Subject: [PATCH 4/9] adding tests, WIP

---
 python/cudf/cudf/tests/test_dataframe.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index d555b5c4033..6a274569c9d 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9039,3 +9039,25 @@ def test_pearson_corr_multiindex_dataframe():
     expected = gdf.to_pandas().groupby(level="a").corr("pearson")
 
     assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        np.random.normal(-100, 100, (50, 50)),
+        np.random.randint(-50, 50, (25, 30)),
+        np.random.random_sample((4, 4)),
+        np.random.uniform(10.5, 75.5, (10, 6)),
+        np.array([1.123, 2.343, 5.890, 0.0]),
+    ],
+)
+@pytest.mark.parametrize("periods", range(-5, 5))
+@pytest.mark.parametrize("fill_method", ["ffill", "bfill", "pad", "backfill"])
+def test_dataframe_pct_change(data, periods, fill_method):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    actual = gdf.pct_change(periods=periods, fill_method=fill_method)
+    expected = pdf.pct_change(periods=periods, fill_method=fill_method)
+
+    assert_eq(expected, actual)

From 76c30c1ccf4a3159246f2da24f4693bf99578431 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Thu, 10 Feb 2022 02:13:46 -0800
Subject: [PATCH 5/9] use seed to generate random test data

---
 python/cudf/cudf/tests/test_dataframe.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e4fccd4f481..f97173f297a 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9184,14 +9184,13 @@ def test_dataframe_rename_duplicate_column():
 @pytest.mark.parametrize(
     "data",
     [
-        np.random.normal(-100, 100, (50, 50)),
-        np.random.randint(-50, 50, (25, 30)),
-        np.random.random_sample((4, 4)),
-        np.random.uniform(10.5, 75.5, (10, 6)),
+        np.random.RandomState(seed=10).randint(-50, 50, (25, 30)),
+        np.random.RandomState(seed=10).random_sample((4, 4)),
         np.array([1.123, 2.343, 5.890, 0.0]),
+        {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]},
     ],
 )
-@pytest.mark.parametrize("periods", range(-5, 5))
+@pytest.mark.parametrize("periods", (-2, -1, 0, 1, 2))
 @pytest.mark.parametrize("fill_method", ["ffill", "bfill", "pad", "backfill"])
 def test_dataframe_pct_change(data, periods, fill_method):
     gdf = cudf.DataFrame(data)

From e1c5d855be964be6fe990d1685903b9fc8fe806b Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com>
Date: Wed, 16 Feb 2022 13:30:43 -0800
Subject: [PATCH 6/9] Update python/cudf/cudf/tests/test_dataframe.py

Co-authored-by: Michael Wang <isVoid@users.noreply.github.com>
---
 python/cudf/cudf/tests/test_dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 81e2c8b2a2f..35cc3ba74d0 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9188,7 +9188,7 @@ def test_dataframe_rename_duplicate_column():
         {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]},
     ],
 )
-@pytest.mark.parametrize("periods", (-2, -1, 0, 1, 2))
+@pytest.mark.parametrize("periods", [-5, -2, 0, 2, 5])
 @pytest.mark.parametrize("fill_method", ["ffill", "bfill", "pad", "backfill"])
 def test_dataframe_pct_change(data, periods, fill_method):
     gdf = cudf.DataFrame(data)

From c264ea4365ef6ba598486d1beb79b4a54fb8cb53 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 16 Feb 2022 16:14:08 -0800
Subject: [PATCH 7/9] handles case when periods > len(df)

---
 python/cudf/cudf/core/column/column.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 393afe4a5b9..5d5e85e4d79 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -335,6 +335,12 @@ def _fill(
         return self
 
     def shift(self, offset: int, fill_value: ScalarLike) -> ColumnBase:
+        # link to the libcudf ticket you will create
+        if abs(offset) > len(self):
+            if fill_value is None:
+                return column_empty_like(self, masked=True)
+            else:
+                return full(len(self), fill_value, dtype=self.dtype)
         return libcudf.copying.shift(self, offset, fill_value)
 
     @property

From 0f68434898881cb6a0abb157a9233d468c1032ef Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 16 Feb 2022 16:17:27 -0800
Subject: [PATCH 8/9] avoid check_dtype, reduce test cases for periods

---
 python/cudf/cudf/tests/test_dataframe.py | 28 +++++++++++++++---------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 35cc3ba74d0..5b9c73fd827 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -3442,29 +3442,37 @@ def test_get_numeric_data():
 
 
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("period", [-1, -5, -10, -20, 0, 1, 5, 10, 20])
+@pytest.mark.parametrize("period", [-15, -1, 0, 1, 15])
 @pytest.mark.parametrize("data_empty", [False, True])
 def test_shift(dtype, period, data_empty):
-
+    # TODO : this function currently tests for series.shift()
+    # but should instead test for dataframe.shift()
     if data_empty:
         data = None
     else:
         if dtype == np.int8:
             # to keep data in range
-            data = gen_rand(dtype, 100000, low=-2, high=2)
+            data = gen_rand(dtype, 10, low=-2, high=2)
         else:
-            data = gen_rand(dtype, 100000)
+            data = gen_rand(dtype, 10)
 
-    gdf = cudf.DataFrame({"a": cudf.Series(data, dtype=dtype)})
-    pdf = pd.DataFrame({"a": pd.Series(data, dtype=dtype)})
+    gs = cudf.DataFrame({"a": cudf.Series(data, dtype=dtype)})
+    ps = pd.DataFrame({"a": pd.Series(data, dtype=dtype)})
 
-    shifted_outcome = gdf.a.shift(period).fillna(0)
-    expected_outcome = pdf.a.shift(period).fillna(0).astype(dtype)
+    shifted_outcome = gs.a.shift(period)
+    expected_outcome = ps.a.shift(period)
 
+    # pandas uses NaNs to signal missing value and force converts the
+    # results columns to float types
     if data_empty:
-        assert_eq(shifted_outcome, expected_outcome, check_index_type=False)
+        assert_eq(
+            shifted_outcome,
+            expected_outcome,
+            check_index_type=False,
+            check_dtype=False,
+        )
     else:
-        assert_eq(shifted_outcome, expected_outcome)
+        assert_eq(shifted_outcome, expected_outcome, check_dtype=False)
 
 
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES)

From c3c36fba6c05ab9484aad74588e5e2865908527f Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 16 Feb 2022 16:28:01 -0800
Subject: [PATCH 9/9] added link to created bug-fix libcudf ticket

---
 python/cudf/cudf/core/column/column.py   | 4 +++-
 python/cudf/cudf/tests/test_dataframe.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 5d5e85e4d79..1c1c2ef2bf6 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -335,7 +335,9 @@ def _fill(
         return self
 
     def shift(self, offset: int, fill_value: ScalarLike) -> ColumnBase:
-        # link to the libcudf ticket you will create
+        # libcudf currently doesn't handle case when offset > len(df)
+        # ticket to fix the bug in link below:
+        # https://github.com/rapidsai/cudf/issues/10314
         if abs(offset) > len(self):
             if fill_value is None:
                 return column_empty_like(self, masked=True)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 5b9c73fd827..f1468801732 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9190,7 +9190,7 @@ def test_dataframe_rename_duplicate_column():
 @pytest.mark.parametrize(
     "data",
     [
-        np.random.RandomState(seed=10).randint(-50, 50, (25, 30)),
+        np.random.RandomState(seed=10).randint(-50, 50, (10, 10)),
         np.random.RandomState(seed=10).random_sample((4, 4)),
         np.array([1.123, 2.343, 5.890, 0.0]),
         {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]},