From 6b97e6e7761876648038e02fe8c1f2f77fa0ed33 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 13 Jul 2021 08:39:36 -0700
Subject: [PATCH 01/23] very basic stuff

---
 python/cudf/cudf/core/dataframe.py | 25 ++++++++++++++++---------
 python/cudf/cudf/core/series.py    | 15 +++++++++++++++
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index c02bf3d11a4..ba2fc029f9c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5818,6 +5818,13 @@ def _from_columns(cls, cols, index=None, columns=None):
 
         return cls(data=data, index=index,)
 
+    def interpolate(
+        self,
+        method='linear',
+        axis=0
+    ):
+        return self._apply_support_method("interpolate", method=method, axis=axis)
+
     def quantile(
         self,
         q=0.5,
@@ -7063,12 +7070,12 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):
             **kwargs,
         )
 
-    def _apply_support_method(self, method, axis=0, *args, **kwargs):
+    def _apply_support_method(self, _method, axis=0, *args, **kwargs):
         assert axis in (None, 0, 1)
 
         if axis in (None, 0):
             result = [
-                getattr(self[col], method)(*args, **kwargs)
+                getattr(self[col], _method)(*args, **kwargs)
                 for col in self._data.names
             ]
 
@@ -7085,13 +7092,13 @@ def _apply_support_method(self, method, axis=0, *args, **kwargs):
         elif axis == 1:
             # for dask metadata compatibility
             skipna = kwargs.pop("skipna", None)
-            if method not in _cupy_nan_methods_map and skipna not in (
+            if _method not in _cupy_nan_methods_map and skipna not in (
                 None,
                 True,
                 1,
             ):
                 raise NotImplementedError(
-                    f"Row-wise operation to calculate '{method}'"
+                    f"Row-wise operation to calculate '{_method}'"
                     f" currently do not support `skipna=False`."
                 )
 
@@ -7123,7 +7130,7 @@ def _apply_support_method(self, method, axis=0, *args, **kwargs):
                 )
 
             prepared, mask, common_dtype = self._prepare_for_rowwise_op(
-                method, skipna
+                _method, skipna
             )
             for col in prepared._data.names:
                 if prepared._data[col].nullable:
@@ -7140,10 +7147,10 @@ def _apply_support_method(self, method, axis=0, *args, **kwargs):
                     )
             arr = cupy.asarray(prepared.as_gpu_matrix())
 
-            if skipna is not False and method in _cupy_nan_methods_map:
-                method = _cupy_nan_methods_map[method]
+            if skipna is not False and _method in _cupy_nan_methods_map:
+                _method = _cupy_nan_methods_map[_method]
 
-            result = getattr(cupy, method)(arr, axis=1, **kwargs)
+            result = getattr(cupy, _method)(arr, axis=1, **kwargs)
 
             if result.ndim == 1:
                 type_coerced_methods = {
@@ -7159,7 +7166,7 @@ def _apply_support_method(self, method, axis=0, *args, **kwargs):
                 }
                 result_dtype = (
                     common_dtype
-                    if method in type_coerced_methods
+                    if _method in type_coerced_methods
                     or is_datetime_dtype(common_dtype)
                     else None
                 )
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 77640db6a1d..50fd32cee5d 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -5403,6 +5403,21 @@ def hash_encode(self, stop, use_name=False):
         mod_vals = hashed_values % stop
         return Series(mod_vals._column, index=self.index, name=self.name)
 
+    def interpolate(
+        self,
+        method='linear'
+    ):
+        data = cupy.asarray(self._column.astype('float').fillna(np.nan))
+        interp_points = cupy.asarray(self.index)
+
+        known = self[~self.isnull()]
+        known_x = cupy.asarray(known.index)
+        known_y = cupy.asarray(known._column)
+
+        result = cupy.interp(interp_points, known_x, known_y)
+
+        return cudf.Series(result)
+
     def quantile(
         self, q=0.5, interpolation="linear", exact=True, quant_index=True
     ):

From 676388b774786dcd647395cd636c11a47cd782f0 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 13 Jul 2021 11:55:46 -0700
Subject: [PATCH 02/23] forgot test

---
 python/cudf/cudf/tests/test_interpolate.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 python/cudf/cudf/tests/test_interpolate.py

diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
new file mode 100644
index 00000000000..bab680bbb3c
--- /dev/null
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -0,0 +1,21 @@
+import pandas as pd
+import cudf
+import pytest
+from cudf.testing._utils import assert_eq
+
+@pytest.mark.parametrize("data", [
+    {
+        'A': [1, None, 3]
+    }
+])
+@pytest.mark.parametrize("method", ['linear'])
+@pytest.mark.parametrize("axis", [0])
+def test_interpolate_nans(data, method,axis):  
+    # doesn't seem to work with NAs just yet
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+    
+    expect = pdf.interpolate(method=method, axis=axis)
+    got = gdf.interpolate(method=method, axis=axis)
+
+    assert_eq(expect, got)

From d625c306e97ac817dda6e96ba91d6a2c5b6485f2 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 13 Jul 2021 14:54:53 -0700
Subject: [PATCH 03/23] move things to frame

---
 python/cudf/cudf/core/dataframe.py         |  2 +-
 python/cudf/cudf/core/frame.py             | 19 +++++++++++++++++++
 python/cudf/cudf/core/series.py            | 11 +----------
 python/cudf/cudf/tests/test_interpolate.py |  9 ++++++++-
 4 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index ba2fc029f9c..c479570616c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5823,7 +5823,7 @@ def interpolate(
         method='linear',
         axis=0
     ):
-        return self._apply_support_method("interpolate", method=method, axis=axis)
+        return super()._interpolate(method)
 
     def quantile(
         self,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 3629358ee9f..8f102861218 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1424,6 +1424,25 @@ def _apply_boolean_mask(self, boolean_mask):
         result._copy_type_metadata(self)
         return result
 
+    def _interpolate(self, method='linear'):
+
+        to_return = {}
+        for colname, col in self._data.items():
+            if col.nullable:
+                data = cupy.asarray(col.astype('float').fillna(np.nan))
+                not_null = col.isnull().unary_operator('not')
+
+                known_x = cupy.asarray(self.index._column.apply_boolean_mask(not_null))
+                known_y = cupy.asarray(col.apply_boolean_mask(not_null)).astype('float')
+
+                result = cupy.interp(cupy.asarray(self.index), known_x, known_y, left=np.nan, right=np.nan)
+            else:
+                result = col
+            to_return[colname] = result
+
+
+        return self.__class__(to_return)
+
     def _quantiles(
         self,
         q,
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 50fd32cee5d..b091880ade3 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -5407,16 +5407,7 @@ def interpolate(
         self,
         method='linear'
     ):
-        data = cupy.asarray(self._column.astype('float').fillna(np.nan))
-        interp_points = cupy.asarray(self.index)
-
-        known = self[~self.isnull()]
-        known_x = cupy.asarray(known.index)
-        known_y = cupy.asarray(known._column)
-
-        result = cupy.interp(interp_points, known_x, known_y)
-
-        return cudf.Series(result)
+        return super()._interpolate(method)
 
     def quantile(
         self, q=0.5, interpolation="linear", exact=True, quant_index=True
diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
index bab680bbb3c..f5e2523f323 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -6,6 +6,13 @@
 @pytest.mark.parametrize("data", [
     {
         'A': [1, None, 3]
+    },
+    {
+        'A': [None, 2, 3, None, 5]
+    },
+    {
+        'A': [1, None, 3],
+        'B': [None, 2, 3]
     }
 ])
 @pytest.mark.parametrize("method", ['linear'])
@@ -17,5 +24,5 @@ def test_interpolate_nans(data, method,axis):
     
     expect = pdf.interpolate(method=method, axis=axis)
     got = gdf.interpolate(method=method, axis=axis)
-
+    breakpoint()
     assert_eq(expect, got)

From c89d93893acb19dd43b7311024ec280e48c4934a Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 19 Jul 2021 05:33:34 -0700
Subject: [PATCH 04/23] updates

---
 python/cudf/cudf/core/dataframe.py | 35 ++++++++++++++++++++++++++++++
 python/cudf/cudf/core/frame.py     | 33 +++++++++++++++++++---------
 python/cudf/cudf/core/series.py    |  1 +
 3 files changed, 59 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index d53a392c0b7..6fc1293cf19 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5896,6 +5896,41 @@ def interpolate(
         method='linear',
         axis=0
     ):
+        """
+        Interpolate data values between some points.
+
+        Parameters
+        ----------
+        method : str, default 'linear'
+            Interpolation technique to use. Currently,
+            only 'linear` is supported.
+            * 'linear': Ignore the index and treat the values as
+            equally spaced. This is the only method supported on MultiIndexes.
+            * 'index', 'values': linearly interpolate using the index as 
+            an x-axis. Note that unsorted indices can lead to erroneous results.  
+        axis : int, default 0
+            Axis to interpolate along. Currently,
+            only 'axis=0' is supprted.
+        inplace : bool, default False
+            Update the data in place if possible.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returns the same object type as the caller, interpolated at
+            some or all ``NaN`` values
+
+        """
+
+        if method not in {'linear', 'index', 'values'}:
+            raise ValueError(
+                f"method {method} is not supported."
+            )
+        if method in {'index', 'values'} and not self.index.is_monotonic_increasing:
+            warnings.warn(
+                "Unsorted Index..."
+            )
+
         return super()._interpolate(method)
 
     def quantile(
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 0d45269af04..8b439271867 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1425,23 +1425,36 @@ def _apply_boolean_mask(self, boolean_mask):
         return result
 
     def _interpolate(self, method='linear'):
+        columns = ColumnAccessor()
+
+        if method == 'linear':
+            xax = as_column(cupy.arange(len(self)))
+        elif method in {'index', 'values'}:
+            xax = self.index
 
-        to_return = {}
         for colname, col in self._data.items():
             if col.nullable:
-                data = cupy.asarray(col.astype('float').fillna(np.nan))
-                not_null = col.isnull().unary_operator('not')
-
-                known_x = cupy.asarray(self.index._column.apply_boolean_mask(not_null))
-                known_y = cupy.asarray(col.apply_boolean_mask(not_null)).astype('float')
-
-                result = cupy.interp(cupy.asarray(self.index), known_x, known_y, left=np.nan, right=np.nan)
+                not_null = col.notnull()
+                known_x = cupy.asarray(
+                    xax.apply_boolean_mask(not_null)
+                )
+                known_y = cupy.asarray(
+                    col.apply_boolean_mask(not_null)
+                ).astype(np.dtype('float64'))
+
+                result = cupy.interp(
+                    cupy.asarray(xax), 
+                    known_x, 
+                    known_y, 
+                    left=np.nan, 
+                    right=np.nan)
             else:
+                # The trivial case
                 result = col
-            to_return[colname] = result
+            columns[colname] = result
 
 
-        return self.__class__(to_return)
+        return self.__class__(columns)
 
     def _quantiles(
         self,
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index f123baf6897..e3a0024c886 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -5418,6 +5418,7 @@ def hash_encode(self, stop, use_name=False):
         mod_vals = hashed_values % stop
         return Series(mod_vals._column, index=self.index, name=self.name)
 
+    @copy_docstring(DataFrame.interpolate)
     def interpolate(
         self,
         method='linear'

From 5a4e720927067173a57de4a5f192215973176988 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 19 Jul 2021 06:32:48 -0700
Subject: [PATCH 05/23] sig and docstring updates

---
 python/cudf/cudf/core/dataframe.py | 46 ++++++++++++------------------
 python/cudf/cudf/core/frame.py     | 25 ++++++++++++++++
 python/cudf/cudf/core/series.py    | 22 ++++++++++++--
 3 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 6fc1293cf19..05acc3f4201 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5891,36 +5891,18 @@ def _from_columns(cls, cols, index=None, columns=None):
 
         return cls(data=data, index=index,)
 
+    @copy_docstring(Frame._interpolate)
     def interpolate(
         self,
         method='linear',
-        axis=0
+        axis=0,
+        limit=None,
+        inplace=False,
+        limit_direction=None,
+        limit_area=None,
+        downcast=None,
+        **kwargs
     ):
-        """
-        Interpolate data values between some points.
-
-        Parameters
-        ----------
-        method : str, default 'linear'
-            Interpolation technique to use. Currently,
-            only 'linear` is supported.
-            * 'linear': Ignore the index and treat the values as
-            equally spaced. This is the only method supported on MultiIndexes.
-            * 'index', 'values': linearly interpolate using the index as 
-            an x-axis. Note that unsorted indices can lead to erroneous results.  
-        axis : int, default 0
-            Axis to interpolate along. Currently,
-            only 'axis=0' is supprted.
-        inplace : bool, default False
-            Update the data in place if possible.
-
-        Returns
-        -------
-        Series or DataFrame
-            Returns the same object type as the caller, interpolated at
-            some or all ``NaN`` values
-
-        """
 
         if method not in {'linear', 'index', 'values'}:
             raise ValueError(
@@ -5930,8 +5912,16 @@ def interpolate(
             warnings.warn(
                 "Unsorted Index..."
             )
-
-        return super()._interpolate(method)
+        return super()._interpolate(
+            method=method, 
+            axis=axis, 
+            limit=limit, 
+            inplace=inplace, 
+            limit_direction=limit_direction, 
+            limit_area=limit_area, 
+            downcast=downcast, 
+            **kwargs
+        )
 
     def quantile(
         self,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 8b439271867..a8b84aead29 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1425,6 +1425,31 @@ def _apply_boolean_mask(self, boolean_mask):
         return result
 
     def _interpolate(self, method='linear'):
+        """
+        Interpolate data values between some points.
+
+        Parameters
+        ----------
+        method : str, default 'linear'
+            Interpolation technique to use. Currently,
+            only 'linear` is supported.
+            * 'linear': Ignore the index and treat the values as
+            equally spaced. This is the only method supported on MultiIndexes.
+            * 'index', 'values': linearly interpolate using the index as 
+            an x-axis. Note that unsorted indices can lead to erroneous results.  
+        axis : int, default 0
+            Axis to interpolate along. Currently,
+            only 'axis=0' is supprted.
+        inplace : bool, default False
+            Update the data in place if possible.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returns the same object type as the caller, interpolated at
+            some or all ``NaN`` values
+
+        """
         columns = ColumnAccessor()
 
         if method == 'linear':
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index e3a0024c886..0f97d9607ff 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -5418,12 +5418,28 @@ def hash_encode(self, stop, use_name=False):
         mod_vals = hashed_values % stop
         return Series(mod_vals._column, index=self.index, name=self.name)
 
-    @copy_docstring(DataFrame.interpolate)
+    @copy_docstring(Frame._interpolate)
     def interpolate(
         self,
-        method='linear'
+        method='linear',
+        axis=0,
+        limit=None,
+        inplace=False,
+        limit_direction=None,
+        limit_area=None,
+        downcast=None,
+        **kwargs
     ):
-        return super()._interpolate(method)
+        return super()._interpolate(
+            method=method, 
+            axis=axis, 
+            limit=limit, 
+            inplace=inplace, 
+            limit_direction=limit_direction, 
+            limit_area=limit_area, 
+            downcast=downcast, 
+            **kwargs
+        )
 
     def quantile(
         self, q=0.5, interpolation="linear", exact=True, quant_index=True

From c17cd4fac3c6f6a767561f4739766bd9d0514a80 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 20 Jul 2021 07:42:17 -0700
Subject: [PATCH 06/23] updates

---
 python/cudf/cudf/core/dataframe.py         |  5 ++
 python/cudf/cudf/core/frame.py             | 12 ++++-
 python/cudf/cudf/tests/test_interpolate.py | 60 +++++++++++++++++++---
 3 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 05acc3f4201..7717fa167c5 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5912,6 +5912,11 @@ def interpolate(
             warnings.warn(
                 "Unsorted Index..."
             )
+        if all(dt == np.dtype('object') for dt in self.dtypes):
+            raise TypeError(
+                "Cannot interpolate with all object-dtype columns in the DataFrame. "
+                "Try setting at least one column to a numeric dtype."
+            )
         return super()._interpolate(
             method=method, 
             axis=axis, 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index a8b84aead29..970ca189a48 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1424,7 +1424,17 @@ def _apply_boolean_mask(self, boolean_mask):
         result._copy_type_metadata(self)
         return result
 
-    def _interpolate(self, method='linear'):
+    def _interpolate(
+        self, 
+        method='linear', 
+        axis=0, 
+        limit=None, 
+        inplace=False, 
+        limit_direction=None, 
+        limit_area=None, 
+        downcast=None, 
+        **kwargs
+    ):
         """
         Interpolate data values between some points.
 
diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
index f5e2523f323..3a04ef887bf 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -1,28 +1,74 @@
 import pandas as pd
 import cudf
 import pytest
-from cudf.testing._utils import assert_eq
+from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 @pytest.mark.parametrize("data", [
+    # basics
     {
-        'A': [1, None, 3]
+        'A': [1, 2, 3],
+        'B': [4, 5, 6]
     },
     {
-        'A': [None, 2, 3, None, 5]
+        'A': [1, None, 3],
+        'B': [4, None, 6]
     },
     {
-        'A': [1, None, 3],
-        'B': [None, 2, 3]
+        'A': [None, 2, 3],
+        'B': [4, 5, None]
     }
 ])
 @pytest.mark.parametrize("method", ['linear'])
 @pytest.mark.parametrize("axis", [0])
-def test_interpolate_nans(data, method,axis):  
+def test_interpolate_dataframe(data, method, axis):  
     # doesn't seem to work with NAs just yet
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
     
     expect = pdf.interpolate(method=method, axis=axis)
     got = gdf.interpolate(method=method, axis=axis)
-    breakpoint()
     assert_eq(expect, got)
+
+@pytest.mark.parametrize("data", [
+    [1,2,3],
+    [1, None, 3],
+    [None, 2, None, 4],
+    [1, None, 3, None],
+    [0.1, 0.2, 0.3]
+])
+@pytest.mark.parametrize("method", ['linear'])
+@pytest.mark.parametrize("axis", [0])
+def test_interpolate_series(data, method, axis):
+    gsr = cudf.Series(data)
+    psr = gsr.to_pandas()
+
+    expect = psr.interpolate(method=method, axis=axis)
+    got = gsr.interpolate(method=method, axis=axis)
+
+    assert_eq(expect, got)
+
+@pytest.mark.parametrize('data,kwargs', [
+    (
+        {
+            'A': ['a','b','c'],
+            'B': ['d','e','f']
+        },
+        {'axis': 0, 'method': 'linear'},
+    )
+])
+def test_interpolate_dataframe_error_cases(data, kwargs):
+    gsr = cudf.DataFrame(data)
+    psr = gsr.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc = psr.interpolate,
+        rfunc = gsr.interpolate,
+        lfunc_args_and_kwargs = (
+            [],
+            kwargs
+        ),
+        rfunc_args_and_kwargs = (
+            [],
+            kwargs
+        )
+    )

From c16f2b3896c998e2e20a58d100f1f0ac0b908d61 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 21 Jul 2021 15:40:57 -0700
Subject: [PATCH 07/23] progress

---
 python/cudf/cudf/core/algorithms.py        | 35 ++++++++++++++++--
 python/cudf/cudf/core/dataframe.py         |  4 ---
 python/cudf/cudf/core/frame.py             | 34 ++++++++----------
 python/cudf/cudf/tests/test_interpolate.py | 42 +++++++++++++++++-----
 4 files changed, 81 insertions(+), 34 deletions(-)

diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index 9f26ac8ee78..0afb505a67a 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -1,10 +1,10 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 from warnings import warn
-
+import numpy as np
 import cupy as cp
 
 from cudf.core.series import Index, Series
-
+from cudf.core.column import as_column
 
 def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
     """Encode the input values as integer labels
@@ -59,3 +59,34 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
     values.name = name
 
     return labels, cats.values if return_cupy_array else Index(cats)
+
+def linear_interpolation(col, xax):
+    # fill all NAs with NaNs
+    col = col.astype('float64').fillna(np.nan)
+
+    # figure out where the nans are
+    not_nan_mask = ~cp.isnan(col)
+
+    # find the first nan
+    first_nan_idx = as_column(not_nan_mask).find_first_value(1)
+
+    known_x = cp.asarray(xax.apply_boolean_mask(not_nan_mask))
+    known_y = cp.asarray(col.apply_boolean_mask(not_nan_mask)).astype(np.dtype('float64'))
+
+    result = cp.interp(
+        cp.asarray(xax), 
+        known_x, 
+        known_y
+    )
+
+    result[:first_nan_idx] = np.nan
+
+    return result
+
+def get_column_interpolator(method):
+    if method == 'linear':
+        return linear_interpolation
+    else:
+        raise ValueError(
+            f"Interpolation method `{method}` not found"
+        )        
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 7717fa167c5..7876d402899 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5904,10 +5904,6 @@ def interpolate(
         **kwargs
     ):
 
-        if method not in {'linear', 'index', 'values'}:
-            raise ValueError(
-                f"method {method} is not supported."
-            )
         if method in {'index', 'values'} and not self.index.is_monotonic_increasing:
             warnings.warn(
                 "Unsorted Index..."
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 970ca189a48..4381690fc7b 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1460,35 +1460,31 @@ def _interpolate(
             some or all ``NaN`` values
 
         """
+
+        if method in {'pad', 'ffill'} and limit_direction != 'forward':
+            raise ValueError(
+                f"`limit_direction` must be 'forward' for method `{method}`"
+            )
+        if method in {'backfill', 'bfill'} and limit_direction != 'backward':
+            raise ValueError(
+                f"`limit_direction` must be 'backward' for method `{method}`"
+            )
+
+
         columns = ColumnAccessor()
 
-        if method == 'linear':
+        if method in 'linear':
             xax = as_column(cupy.arange(len(self)))
         elif method in {'index', 'values'}:
             xax = self.index
+        interpolator = cudf.core.algorithms.get_column_interpolator(method)
 
         for colname, col in self._data.items():
             if col.nullable:
-                not_null = col.notnull()
-                known_x = cupy.asarray(
-                    xax.apply_boolean_mask(not_null)
-                )
-                known_y = cupy.asarray(
-                    col.apply_boolean_mask(not_null)
-                ).astype(np.dtype('float64'))
-
-                result = cupy.interp(
-                    cupy.asarray(xax), 
-                    known_x, 
-                    known_y, 
-                    left=np.nan, 
-                    right=np.nan)
-            else:
-                # The trivial case
-                result = col
+                col = col.fillna(np.nan)
+            result = interpolator(col, xax)            
             columns[colname] = result
 
-
         return self.__class__(columns)
 
     def _quantiles(
diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
index 3a04ef887bf..1c9ac46af66 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -10,12 +10,12 @@
         'B': [4, 5, 6]
     },
     {
-        'A': [1, None, 3],
-        'B': [4, None, 6]
+        'A': [1.0, None, 3.0],
+        'B': [4.0, None, 6.0]
     },
     {
-        'A': [None, 2, 3],
-        'B': [4, 5, None]
+        'A': [None, 2.0, 3.0],
+        'B': [4.0, 5.0, None]
     }
 ])
 @pytest.mark.parametrize("method", ['linear'])
@@ -30,10 +30,10 @@ def test_interpolate_dataframe(data, method, axis):
     assert_eq(expect, got)
 
 @pytest.mark.parametrize("data", [
-    [1,2,3],
-    [1, None, 3],
-    [None, 2, None, 4],
-    [1, None, 3, None],
+    [1.0,2.0,3.0],
+    [1.0, None, 3.0],
+    [None, 2.0, None, 4.0],
+    [1.0, None, 3.0, None],
     [0.1, 0.2, 0.3]
 ])
 @pytest.mark.parametrize("method", ['linear'])
@@ -54,7 +54,31 @@ def test_interpolate_series(data, method, axis):
             'B': ['d','e','f']
         },
         {'axis': 0, 'method': 'linear'},
-    )
+    ),
+    (
+        {
+            'A': [1,2,3]
+        },
+        {'method': 'pad', 'limit_direction': 'backward'}
+    ),
+    (
+        {
+            'A': [1,2,3]
+        },
+        {'method': 'ffill', 'limit_direction': 'backward'}
+    ),
+    (
+        {
+            'A': [1,2,3]
+        },
+        {'method': 'bfill', 'limit_direction': 'forward'}
+    ),
+    (
+        {
+            'A': [1,2,3]
+        },
+        {'method': 'backfill', 'limit_direction': 'forward'}
+    ),
 ])
 def test_interpolate_dataframe_error_cases(data, kwargs):
     gsr = cudf.DataFrame(data)

From fe56bb190d7a55c979cc702a7f300437921163fa Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 22 Jul 2021 12:19:08 -0700
Subject: [PATCH 08/23] refactoring

---
 python/cudf/cudf/core/algorithms.py        | 43 ++++++++++++++++++----
 python/cudf/cudf/core/frame.py             | 13 ++++---
 python/cudf/cudf/tests/test_interpolate.py |  7 +++-
 3 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index 0afb505a67a..6a2b22aa1ef 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -5,7 +5,7 @@
 
 from cudf.core.series import Index, Series
 from cudf.core.column import as_column
-
+from cudf.core.index import RangeIndex
 def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
     """Encode the input values as integer labels
 
@@ -60,32 +60,59 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
 
     return labels, cats.values if return_cupy_array else Index(cats)
 
-def linear_interpolation(col, xax):
+def linear_interpolation(to_interp):
+    """
+    Interpolate over a float column. Implicitly assumes that values are 
+    evenly spaced with respect to the x-axis, for example the data
+    [1.0, NaN, 3.0] will be interpolated assuming the NaN is half way 
+    between the two valid values, yielding [1.0, 2.0, 3.0]
+    """
+
+    to_interp._index = RangeIndex(start=0, stop=len(to_interp), step=1)
+    return index_or_values_interpolation(to_interp)
+
+def index_or_values_interpolation(to_interp):
+    """
+    Interpolate over a float column. assumes a linear interpolation
+    strategy using the index of the data to denote spacing of the x
+    values. For example the data and index [1.0, NaN, 4.0], [1, 3, 4]
+    would result in [1.0, 3.0, 4.0]
+    """
+    xax = to_interp._index._column
+
+    col = to_interp._data[list(to_interp._data.keys())[0]]
+
     # fill all NAs with NaNs
     col = col.astype('float64').fillna(np.nan)
 
     # figure out where the nans are
-    not_nan_mask = ~cp.isnan(col)
+    mask = cp.isnan(col)
+
+    # trivial case
+    if mask.all():
+        return col
+    
+    mask = ~mask
 
     # find the first nan
-    first_nan_idx = as_column(not_nan_mask).find_first_value(1)
+    first_nan_idx = as_column(mask).find_first_value(1)
 
-    known_x = cp.asarray(xax.apply_boolean_mask(not_nan_mask))
-    known_y = cp.asarray(col.apply_boolean_mask(not_nan_mask)).astype(np.dtype('float64'))
+    known_x = cp.asarray(xax.apply_boolean_mask(mask))
+    known_y = cp.asarray(col.apply_boolean_mask(mask)).astype(np.dtype('float64'))
 
     result = cp.interp(
         cp.asarray(xax), 
         known_x, 
         known_y
     )
-
     result[:first_nan_idx] = np.nan
-
     return result
 
 def get_column_interpolator(method):
     if method == 'linear':
         return linear_interpolation
+    elif method in {'index', 'values'}:
+        return index_or_values_interpolation
     else:
         raise ValueError(
             f"Interpolation method `{method}` not found"
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 4381690fc7b..5686927bbf4 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1473,16 +1473,17 @@ def _interpolate(
 
         columns = ColumnAccessor()
 
-        if method in 'linear':
-            xax = as_column(cupy.arange(len(self)))
-        elif method in {'index', 'values'}:
-            xax = self.index
         interpolator = cudf.core.algorithms.get_column_interpolator(method)
-
         for colname, col in self._data.items():
             if col.nullable:
                 col = col.fillna(np.nan)
-            result = interpolator(col, xax)            
+            
+            # Interpolation methods may or may not need the index
+            to_interp = Frame(
+                data={colname: col},
+                index=self.index
+            )
+            result = interpolator(to_interp)            
             columns[colname] = result
 
         return self.__class__(columns)
diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
index 1c9ac46af66..974ade03ef7 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -6,8 +6,8 @@
 @pytest.mark.parametrize("data", [
     # basics
     {
-        'A': [1, 2, 3],
-        'B': [4, 5, 6]
+        'A': [1.0, 2.0, 3.0],
+        'B': [4.0, 5.0, 6.0]
     },
     {
         'A': [1.0, None, 3.0],
@@ -34,6 +34,9 @@ def test_interpolate_dataframe(data, method, axis):
     [1.0, None, 3.0],
     [None, 2.0, None, 4.0],
     [1.0, None, 3.0, None],
+    [None, None, 3.0, 4.0],
+    [1.0, 2.0, None, None],
+    [None, None, None, None],
     [0.1, 0.2, 0.3]
 ])
 @pytest.mark.parametrize("method", ['linear'])

From a68161639c92973f343a65fa495c008bedc8ee49 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 22 Jul 2021 12:31:58 -0700
Subject: [PATCH 09/23] test index and values methods

---
 python/cudf/cudf/tests/test_interpolate.py | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
index 974ade03ef7..3002e7b9279 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -50,6 +50,31 @@ def test_interpolate_series(data, method, axis):
 
     assert_eq(expect, got)
 
+@pytest.mark.parametrize('data', [
+    [1.0, 2.0, 3.0, 4.0],
+    [None, 2.0, 3.0, 4.0],
+    [1.0, 2.0, 3.0, None],
+    [None, None, 3.0, 4.0],
+    [1.0, 2.0, None, None],
+    [1.0, None, 3.0, None],
+    [None, 2.0, None, 4.0],
+    [None, None, None, None]
+])
+@pytest.mark.parametrize('index', [
+    [0, 1, 2, 3],
+    [0, 2, 4, 6],
+    [0, 3, 4, 9]
+])
+@pytest.mark.parametrize('method', ['index', 'values'])
+def test_interpolate_series_values_or_index(data, index, method):
+    gsr = cudf.Series(data, index=index)
+    psr = gsr.to_pandas()
+
+    expect = psr.interpolate(method=method)
+    got = gsr.interpolate(method=method)
+
+    assert_eq(expect, got)
+
 @pytest.mark.parametrize('data,kwargs', [
     (
         {

From 98608a9026044c5cc3e39ee1753dfa75b02036aa Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 22 Jul 2021 12:38:39 -0700
Subject: [PATCH 10/23] forgot the index

---
 python/cudf/cudf/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 5686927bbf4..bbb2f3192ec 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1486,7 +1486,7 @@ def _interpolate(
             result = interpolator(to_interp)            
             columns[colname] = result
 
-        return self.__class__(columns)
+        return self.__class__(columns, index=self.index.copy())
 
     def _quantiles(
         self,

From 143c79808ac1d3f7da748f2909976cfcedb0039d Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 23 Jul 2021 08:39:43 -0700
Subject: [PATCH 11/23] style

---
 python/cudf/cudf/core/algorithms.py        |  51 +++----
 python/cudf/cudf/core/dataframe.py         |  36 ++---
 python/cudf/cudf/core/frame.py             |  44 +++---
 python/cudf/cudf/core/series.py            |  20 +--
 python/cudf/cudf/tests/test_interpolate.py | 157 +++++++++------------
 5 files changed, 144 insertions(+), 164 deletions(-)

diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index 6a2b22aa1ef..7e865850c38 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -1,11 +1,14 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 from warnings import warn
-import numpy as np
+
 import cupy as cp
+import numpy as np
 
-from cudf.core.series import Index, Series
 from cudf.core.column import as_column
 from cudf.core.index import RangeIndex
+from cudf.core.series import Index, Series
+
+
 def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
     """Encode the input values as integer labels
 
@@ -60,17 +63,19 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
 
     return labels, cats.values if return_cupy_array else Index(cats)
 
+
 def linear_interpolation(to_interp):
     """
-    Interpolate over a float column. Implicitly assumes that values are 
+    Interpolate over a float column. Implicitly assumes that values are
     evenly spaced with respect to the x-axis, for example the data
-    [1.0, NaN, 3.0] will be interpolated assuming the NaN is half way 
+    [1.0, NaN, 3.0] will be interpolated assuming the NaN is half way
     between the two valid values, yielding [1.0, 2.0, 3.0]
     """
 
     to_interp._index = RangeIndex(start=0, stop=len(to_interp), step=1)
     return index_or_values_interpolation(to_interp)
 
+
 def index_or_values_interpolation(to_interp):
     """
     Interpolate over a float column. assumes a linear interpolation
@@ -78,12 +83,12 @@ def index_or_values_interpolation(to_interp):
     values. For example the data and index [1.0, NaN, 4.0], [1, 3, 4]
     would result in [1.0, 3.0, 4.0]
     """
-    xax = to_interp._index._column
-
-    col = to_interp._data[list(to_interp._data.keys())[0]]
+    colname = list(to_interp._data.keys())[0]
+    to_interp._data[colname] = (
+        to_interp._data[colname].astype("float64").fillna(np.nan)
+    )
 
-    # fill all NAs with NaNs
-    col = col.astype('float64').fillna(np.nan)
+    col = to_interp._data[colname]
 
     # figure out where the nans are
     mask = cp.isnan(col)
@@ -91,29 +96,25 @@ def index_or_values_interpolation(to_interp):
     # trivial case
     if mask.all():
         return col
-    
-    mask = ~mask
 
-    # find the first nan
-    first_nan_idx = as_column(mask).find_first_value(1)
+    mask = as_column(~mask)
+    known_x_and_y = to_interp._apply_boolean_mask(mask)
 
-    known_x = cp.asarray(xax.apply_boolean_mask(mask))
-    known_y = cp.asarray(col.apply_boolean_mask(mask)).astype(np.dtype('float64'))
+    known_x = cp.asarray(known_x_and_y._index._column)
+    known_y = cp.asarray(known_x_and_y._data.columns[0])
 
-    result = cp.interp(
-        cp.asarray(xax), 
-        known_x, 
-        known_y
-    )
+    result = cp.interp(cp.asarray(to_interp._index), known_x, known_y)
+
+    # find the first nan
+    first_nan_idx = as_column(mask).find_first_value(1)
     result[:first_nan_idx] = np.nan
     return result
 
+
 def get_column_interpolator(method):
-    if method == 'linear':
+    if method == "linear":
         return linear_interpolation
-    elif method in {'index', 'values'}:
+    elif method in {"index", "values"}:
         return index_or_values_interpolation
     else:
-        raise ValueError(
-            f"Interpolation method `{method}` not found"
-        )        
+        raise ValueError(f"Interpolation method `{method}` not found")
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 7876d402899..79ddd035f9e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5894,34 +5894,36 @@ def _from_columns(cls, cols, index=None, columns=None):
     @copy_docstring(Frame._interpolate)
     def interpolate(
         self,
-        method='linear',
+        method="linear",
         axis=0,
         limit=None,
         inplace=False,
         limit_direction=None,
         limit_area=None,
         downcast=None,
-        **kwargs
+        **kwargs,
     ):
 
-        if method in {'index', 'values'} and not self.index.is_monotonic_increasing:
-            warnings.warn(
-                "Unsorted Index..."
-            )
-        if all(dt == np.dtype('object') for dt in self.dtypes):
+        if (
+            method in {"index", "values"}
+            and not self.index.is_monotonic_increasing
+        ):
+            warnings.warn("Unsorted Index...")
+        if all(dt == np.dtype("object") for dt in self.dtypes):
             raise TypeError(
-                "Cannot interpolate with all object-dtype columns in the DataFrame. "
-                "Try setting at least one column to a numeric dtype."
+                "Cannot interpolate with all object-dtype "
+                "columns in the DataFrame. Try setting at "
+                "least one column to a numeric dtype."
             )
         return super()._interpolate(
-            method=method, 
-            axis=axis, 
-            limit=limit, 
-            inplace=inplace, 
-            limit_direction=limit_direction, 
-            limit_area=limit_area, 
-            downcast=downcast, 
-            **kwargs
+            method=method,
+            axis=axis,
+            limit=limit,
+            inplace=inplace,
+            limit_direction=limit_direction,
+            limit_area=limit_area,
+            downcast=downcast,
+            **kwargs,
         )
 
     def quantile(
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index bbb2f3192ec..82224e92550 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -65,7 +65,12 @@ def __init_subclass__(cls):
 
     @classmethod
     def _from_table(cls, table: Frame):
-        return cls(table._data, index=table._index)
+        return cls(
+            table._data,
+            index=cudf.Index._from_table(table._index)
+            if table._index is not None
+            else table._index,
+        )
 
     def _mimic_inplace(
         self: T, result: Frame, inplace: bool = False
@@ -1415,7 +1420,6 @@ def _apply_boolean_mask(self, boolean_mask):
         rows corresponding to `False` is dropped
         """
         boolean_mask = as_column(boolean_mask)
-
         result = self.__class__._from_table(
             libcudf.stream_compaction.apply_boolean_mask(
                 self, as_column(boolean_mask)
@@ -1425,15 +1429,15 @@ def _apply_boolean_mask(self, boolean_mask):
         return result
 
     def _interpolate(
-        self, 
-        method='linear', 
-        axis=0, 
-        limit=None, 
-        inplace=False, 
-        limit_direction=None, 
-        limit_area=None, 
-        downcast=None, 
-        **kwargs
+        self,
+        method="linear",
+        axis=0,
+        limit=None,
+        inplace=False,
+        limit_direction=None,
+        limit_area=None,
+        downcast=None,
+        **kwargs,
     ):
         """
         Interpolate data values between some points.
@@ -1445,8 +1449,8 @@ def _interpolate(
             only 'linear` is supported.
             * 'linear': Ignore the index and treat the values as
             equally spaced. This is the only method supported on MultiIndexes.
-            * 'index', 'values': linearly interpolate using the index as 
-            an x-axis. Note that unsorted indices can lead to erroneous results.  
+            * 'index', 'values': linearly interpolate using the index as
+            an x-axis. Unsorted indices can lead to erroneous results.
         axis : int, default 0
             Axis to interpolate along. Currently,
             only 'axis=0' is supprted.
@@ -1461,29 +1465,25 @@ def _interpolate(
 
         """
 
-        if method in {'pad', 'ffill'} and limit_direction != 'forward':
+        if method in {"pad", "ffill"} and limit_direction != "forward":
             raise ValueError(
                 f"`limit_direction` must be 'forward' for method `{method}`"
             )
-        if method in {'backfill', 'bfill'} and limit_direction != 'backward':
+        if method in {"backfill", "bfill"} and limit_direction != "backward":
             raise ValueError(
                 f"`limit_direction` must be 'backward' for method `{method}`"
             )
 
-
         columns = ColumnAccessor()
 
         interpolator = cudf.core.algorithms.get_column_interpolator(method)
         for colname, col in self._data.items():
             if col.nullable:
                 col = col.fillna(np.nan)
-            
+
             # Interpolation methods may or may not need the index
-            to_interp = Frame(
-                data={colname: col},
-                index=self.index
-            )
-            result = interpolator(to_interp)            
+            to_interp = Frame(data={colname: col}, index=self.index)
+            result = interpolator(to_interp)
             columns[colname] = result
 
         return self.__class__(columns, index=self.index.copy())
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 0f97d9607ff..3a720a8cfc1 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -5421,24 +5421,24 @@ def hash_encode(self, stop, use_name=False):
     @copy_docstring(Frame._interpolate)
     def interpolate(
         self,
-        method='linear',
+        method="linear",
         axis=0,
         limit=None,
         inplace=False,
         limit_direction=None,
         limit_area=None,
         downcast=None,
-        **kwargs
+        **kwargs,
     ):
         return super()._interpolate(
-            method=method, 
-            axis=axis, 
-            limit=limit, 
-            inplace=inplace, 
-            limit_direction=limit_direction, 
-            limit_area=limit_area, 
-            downcast=downcast, 
-            **kwargs
+            method=method,
+            axis=axis,
+            limit=limit,
+            inplace=inplace,
+            limit_direction=limit_direction,
+            limit_area=limit_area,
+            downcast=downcast,
+            **kwargs,
         )
 
     def quantile(
diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
index 3002e7b9279..70a9425ad0f 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -1,45 +1,44 @@
-import pandas as pd
-import cudf
 import pytest
+
+import cudf
 from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
-@pytest.mark.parametrize("data", [
-    # basics
-    {
-        'A': [1.0, 2.0, 3.0],
-        'B': [4.0, 5.0, 6.0]
-    },
-    {
-        'A': [1.0, None, 3.0],
-        'B': [4.0, None, 6.0]
-    },
-    {
-        'A': [None, 2.0, 3.0],
-        'B': [4.0, 5.0, None]
-    }
-])
-@pytest.mark.parametrize("method", ['linear'])
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # basics
+        {"A": [1.0, 2.0, 3.0], "B": [4.0, 5.0, 6.0]},
+        {"A": [1.0, None, 3.0], "B": [4.0, None, 6.0]},
+        {"A": [None, 2.0, 3.0], "B": [4.0, 5.0, None]},
+    ],
+)
+@pytest.mark.parametrize("method", ["linear"])
 @pytest.mark.parametrize("axis", [0])
-def test_interpolate_dataframe(data, method, axis):  
+def test_interpolate_dataframe(data, method, axis):
     # doesn't seem to work with NAs just yet
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
-    
+
     expect = pdf.interpolate(method=method, axis=axis)
     got = gdf.interpolate(method=method, axis=axis)
     assert_eq(expect, got)
 
-@pytest.mark.parametrize("data", [
-    [1.0,2.0,3.0],
-    [1.0, None, 3.0],
-    [None, 2.0, None, 4.0],
-    [1.0, None, 3.0, None],
-    [None, None, 3.0, 4.0],
-    [1.0, 2.0, None, None],
-    [None, None, None, None],
-    [0.1, 0.2, 0.3]
-])
-@pytest.mark.parametrize("method", ['linear'])
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1.0, 2.0, 3.0],
+        [1.0, None, 3.0],
+        [None, 2.0, None, 4.0],
+        [1.0, None, 3.0, None],
+        [None, None, 3.0, 4.0],
+        [1.0, 2.0, None, None],
+        [None, None, None, None],
+        [0.1, 0.2, 0.3],
+    ],
+)
+@pytest.mark.parametrize("method", ["linear"])
 @pytest.mark.parametrize("axis", [0])
 def test_interpolate_series(data, method, axis):
     gsr = cudf.Series(data)
@@ -50,22 +49,22 @@ def test_interpolate_series(data, method, axis):
 
     assert_eq(expect, got)
 
-@pytest.mark.parametrize('data', [
-    [1.0, 2.0, 3.0, 4.0],
-    [None, 2.0, 3.0, 4.0],
-    [1.0, 2.0, 3.0, None],
-    [None, None, 3.0, 4.0],
-    [1.0, 2.0, None, None],
-    [1.0, None, 3.0, None],
-    [None, 2.0, None, 4.0],
-    [None, None, None, None]
-])
-@pytest.mark.parametrize('index', [
-    [0, 1, 2, 3],
-    [0, 2, 4, 6],
-    [0, 3, 4, 9]
-])
-@pytest.mark.parametrize('method', ['index', 'values'])
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1.0, 2.0, 3.0, 4.0],
+        [None, 2.0, 3.0, 4.0],
+        [1.0, 2.0, 3.0, None],
+        [None, None, 3.0, 4.0],
+        [1.0, 2.0, None, None],
+        [1.0, None, 3.0, None],
+        [None, 2.0, None, 4.0],
+        [None, None, None, None],
+    ],
+)
+@pytest.mark.parametrize("index", [[0, 1, 2, 3], [0, 2, 4, 6], [0, 3, 4, 9]])
+@pytest.mark.parametrize("method", ["index", "values"])
 def test_interpolate_series_values_or_index(data, index, method):
     gsr = cudf.Series(data, index=index)
     psr = gsr.to_pandas()
@@ -75,52 +74,30 @@ def test_interpolate_series_values_or_index(data, index, method):
 
     assert_eq(expect, got)
 
-@pytest.mark.parametrize('data,kwargs', [
-    (
-        {
-            'A': ['a','b','c'],
-            'B': ['d','e','f']
-        },
-        {'axis': 0, 'method': 'linear'},
-    ),
-    (
-        {
-            'A': [1,2,3]
-        },
-        {'method': 'pad', 'limit_direction': 'backward'}
-    ),
-    (
-        {
-            'A': [1,2,3]
-        },
-        {'method': 'ffill', 'limit_direction': 'backward'}
-    ),
-    (
-        {
-            'A': [1,2,3]
-        },
-        {'method': 'bfill', 'limit_direction': 'forward'}
-    ),
-    (
-        {
-            'A': [1,2,3]
-        },
-        {'method': 'backfill', 'limit_direction': 'forward'}
-    ),
-])
+
+@pytest.mark.parametrize(
+    "data,kwargs",
+    [
+        (
+            {"A": ["a", "b", "c"], "B": ["d", "e", "f"]},
+            {"axis": 0, "method": "linear"},
+        ),
+        ({"A": [1, 2, 3]}, {"method": "pad", "limit_direction": "backward"}),
+        ({"A": [1, 2, 3]}, {"method": "ffill", "limit_direction": "backward"}),
+        ({"A": [1, 2, 3]}, {"method": "bfill", "limit_direction": "forward"}),
+        (
+            {"A": [1, 2, 3]},
+            {"method": "backfill", "limit_direction": "forward"},
+        ),
+    ],
+)
 def test_interpolate_dataframe_error_cases(data, kwargs):
     gsr = cudf.DataFrame(data)
     psr = gsr.to_pandas()
 
     assert_exceptions_equal(
-        lfunc = psr.interpolate,
-        rfunc = gsr.interpolate,
-        lfunc_args_and_kwargs = (
-            [],
-            kwargs
-        ),
-        rfunc_args_and_kwargs = (
-            [],
-            kwargs
-        )
+        lfunc=psr.interpolate,
+        rfunc=gsr.interpolate,
+        lfunc_args_and_kwargs=([], kwargs),
+        rfunc_args_and_kwargs=([], kwargs),
     )

From 81ffee10f735779db52cfa57b8b0d11cc564dc16 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 23 Jul 2021 08:41:59 -0700
Subject: [PATCH 12/23] remove unnecessary older changes

---
 python/cudf/cudf/core/dataframe.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 79ddd035f9e..4ee21d5777e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7171,12 +7171,12 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):
             **kwargs,
         )
 
-    def _apply_support_method(self, _method, axis=0, *args, **kwargs):
+    def _apply_support_method(self, method, axis=0, *args, **kwargs):
         assert axis in (None, 0, 1)
 
         if axis in (None, 0):
             result = [
-                getattr(self[col], _method)(*args, **kwargs)
+                getattr(self[col], method)(*args, **kwargs)
                 for col in self._data.names
             ]
 
@@ -7193,13 +7193,13 @@ def _apply_support_method(self, _method, axis=0, *args, **kwargs):
         elif axis == 1:
             # for dask metadata compatibility
             skipna = kwargs.pop("skipna", None)
-            if _method not in _cupy_nan_methods_map and skipna not in (
+            if method not in _cupy_nan_methods_map and skipna not in (
                 None,
                 True,
                 1,
             ):
                 raise NotImplementedError(
-                    f"Row-wise operation to calculate '{_method}'"
+                    f"Row-wise operation to calculate '{method}'"
                     f" currently do not support `skipna=False`."
                 )
 
@@ -7231,7 +7231,7 @@ def _apply_support_method(self, _method, axis=0, *args, **kwargs):
                 )
 
             prepared, mask, common_dtype = self._prepare_for_rowwise_op(
-                _method, skipna
+                method, skipna
             )
             for col in prepared._data.names:
                 if prepared._data[col].nullable:
@@ -7248,10 +7248,10 @@ def _apply_support_method(self, _method, axis=0, *args, **kwargs):
                     )
             arr = cupy.asarray(prepared.as_gpu_matrix())
 
-            if skipna is not False and _method in _cupy_nan_methods_map:
-                _method = _cupy_nan_methods_map[_method]
+            if skipna is not False and method in _cupy_nan_methods_map:
+                method = _cupy_nan_methods_map[method]
 
-            result = getattr(cupy, _method)(arr, axis=1, **kwargs)
+            result = getattr(cupy, method)(arr, axis=1, **kwargs)
 
             if result.ndim == 1:
                 type_coerced_methods = {
@@ -7267,7 +7267,7 @@ def _apply_support_method(self, _method, axis=0, *args, **kwargs):
                 }
                 result_dtype = (
                     common_dtype
-                    if _method in type_coerced_methods
+                    if method in type_coerced_methods
                     or is_datetime_dtype(common_dtype)
                     else None
                 )

From f859d0ee692965d591471918fc8be7087e10807d Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 28 Jul 2021 06:07:24 -0700
Subject: [PATCH 13/23] directly add and test unsorted index case

---
 python/cudf/cudf/core/algorithms.py        |  6 +++---
 python/cudf/cudf/core/dataframe.py         |  6 +-----
 python/cudf/cudf/core/frame.py             | 14 +++++++++++---
 python/cudf/cudf/tests/test_interpolate.py | 14 ++++++++++++++
 4 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index 7e865850c38..bbb01958b73 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -89,12 +89,12 @@ def index_or_values_interpolation(to_interp):
     )
 
     col = to_interp._data[colname]
-
     # figure out where the nans are
     mask = cp.isnan(col)
 
-    # trivial case
-    if mask.all():
+    # trivial cases, all nan or no nans
+    num_nan = mask.sum()
+    if num_nan == 0 or num_nan == len(to_interp):
         return col
 
     mask = as_column(~mask)
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 4ee21d5777e..1f928b77943 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5904,17 +5904,13 @@ def interpolate(
         **kwargs,
     ):
 
-        if (
-            method in {"index", "values"}
-            and not self.index.is_monotonic_increasing
-        ):
-            warnings.warn("Unsorted Index...")
         if all(dt == np.dtype("object") for dt in self.dtypes):
             raise TypeError(
                 "Cannot interpolate with all object-dtype "
                 "columns in the DataFrame. Try setting at "
                 "least one column to a numeric dtype."
             )
+
         return super()._interpolate(
             method=method,
             axis=axis,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 82224e92550..f28d0a1bebe 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1474,19 +1474,27 @@ def _interpolate(
                 f"`limit_direction` must be 'backward' for method `{method}`"
             )
 
+
         columns = ColumnAccessor()
 
+        perm_sort = self._index.argsort()
+        sorted_data = self._gather(perm_sort)
+    
         interpolator = cudf.core.algorithms.get_column_interpolator(method)
-        for colname, col in self._data.items():
+        for colname, col in sorted_data._data.items():
             if col.nullable:
                 col = col.fillna(np.nan)
 
             # Interpolation methods may or may not need the index
-            to_interp = Frame(data={colname: col}, index=self.index)
+            to_interp = Frame(data={colname: col}, index=sorted_data._index)
             result = interpolator(to_interp)
             columns[colname] = result
 
-        return self.__class__(columns, index=self.index.copy())
+        result = self.__class__(columns, index=sorted_data._index)
+        # that which was once sorted, now is not
+        restored = result._gather(perm_sort.argsort())
+
+        return restored
 
     def _quantiles(
         self,
diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
index 70a9425ad0f..ba5792d4e84 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -49,6 +49,20 @@ def test_interpolate_series(data, method, axis):
 
     assert_eq(expect, got)
 
+@pytest.mark.parametrize('data,index', [
+    (
+        [2.0, None, 4.0, None, 2.0], 
+        [1, 2, 3, 2, 1]
+    )
+])
+def test_interpolate_series_unsorted_index(data, index):
+    gsr = cudf.Series(data, index=index)
+    psr = gsr.to_pandas()
+
+    expect = psr.interpolate(method='values')
+    got = gsr.interpolate(method='values')
+
+    assert_eq(expect, got)
 
 @pytest.mark.parametrize(
     "data",

From 71272a94806e375fd109885d81a6271363d7cd34 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 28 Jul 2021 06:20:00 -0700
Subject: [PATCH 14/23] ....but dont do it for RangeIndex based data

---
 python/cudf/cudf/core/frame.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index f28d0a1bebe..8ca26df70de 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1474,27 +1474,30 @@ def _interpolate(
                 f"`limit_direction` must be 'backward' for method `{method}`"
             )
 
-
+        data = self
         columns = ColumnAccessor()
 
-        perm_sort = self._index.argsort()
-        sorted_data = self._gather(perm_sort)
+        if not isinstance(data._index, cudf.RangeIndex):
+            perm_sort = data._index.argsort()
+            data = data._gather(perm_sort)
     
         interpolator = cudf.core.algorithms.get_column_interpolator(method)
-        for colname, col in sorted_data._data.items():
+        for colname, col in data._data.items():
             if col.nullable:
                 col = col.fillna(np.nan)
 
             # Interpolation methods may or may not need the index
-            to_interp = Frame(data={colname: col}, index=sorted_data._index)
+            to_interp = Frame(data={colname: col}, index=data._index)
             result = interpolator(to_interp)
             columns[colname] = result
 
-        result = self.__class__(columns, index=sorted_data._index)
-        # that which was once sorted, now is not
-        restored = result._gather(perm_sort.argsort())
+        result = self.__class__(columns, index=data._index)
+
+        if not isinstance(data._index, cudf.RangeIndex):
+            # that which was once sorted, now is not
+            result = result._gather(perm_sort.argsort())
 
-        return restored
+        return result
 
     def _quantiles(
         self,

From 52e431aed0ae3faf6c2665fa198afcdb5b661d0f Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 28 Jul 2021 08:28:11 -0500
Subject: [PATCH 15/23] Apply suggestions from code review

Co-authored-by: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
---
 python/cudf/cudf/core/algorithms.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index 7e865850c38..0fc678a45cc 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -64,7 +64,7 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
     return labels, cats.values if return_cupy_array else Index(cats)
 
 
-def linear_interpolation(to_interp):
+def _linear_interpolation(to_interp):
     """
     Interpolate over a float column. Implicitly assumes that values are
     evenly spaced with respect to the x-axis, for example the data
@@ -76,7 +76,7 @@ def linear_interpolation(to_interp):
     return index_or_values_interpolation(to_interp)
 
 
-def index_or_values_interpolation(to_interp):
+def _index_or_values_interpolation(to_interp):
     """
     Interpolate over a float column. assumes a linear interpolation
     strategy using the index of the data to denote spacing of the x
@@ -97,8 +97,7 @@ def index_or_values_interpolation(to_interp):
     if mask.all():
         return col
 
-    mask = as_column(~mask)
-    known_x_and_y = to_interp._apply_boolean_mask(mask)
+    known_x_and_y = to_interp._apply_boolean_mask(as_column(~mask))
 
     known_x = cp.asarray(known_x_and_y._index._column)
     known_y = cp.asarray(known_x_and_y._data.columns[0])
@@ -106,7 +105,7 @@ def index_or_values_interpolation(to_interp):
     result = cp.interp(cp.asarray(to_interp._index), known_x, known_y)
 
     # find the first nan
-    first_nan_idx = as_column(mask).find_first_value(1)
+    first_nan_idx = (mask == 1).argmax().item()
     result[:first_nan_idx] = np.nan
     return result
 

From 088618efa771c2991c8c6168539298f142e3781b Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 28 Jul 2021 06:30:19 -0700
Subject: [PATCH 16/23] fix minor bugs

---
 python/cudf/cudf/core/algorithms.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index 9f89abe54f7..cf84a2f3432 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -73,7 +73,7 @@ def _linear_interpolation(to_interp):
     """
 
     to_interp._index = RangeIndex(start=0, stop=len(to_interp), step=1)
-    return index_or_values_interpolation(to_interp)
+    return _index_or_values_interpolation(to_interp)
 
 
 def _index_or_values_interpolation(to_interp):
@@ -105,15 +105,15 @@ def _index_or_values_interpolation(to_interp):
     result = cp.interp(cp.asarray(to_interp._index), known_x, known_y)
 
     # find the first nan
-    first_nan_idx = (mask == 1).argmax().item()
+    first_nan_idx = (mask == 0).argmax().item()
     result[:first_nan_idx] = np.nan
     return result
 
 
 def get_column_interpolator(method):
     if method == "linear":
-        return linear_interpolation
+        return _linear_interpolation
     elif method in {"index", "values"}:
-        return index_or_values_interpolation
+        return _index_or_values_interpolation
     else:
         raise ValueError(f"Interpolation method `{method}` not found")

From 4785a564534abad4c258dd94dae27426cf211f1c Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 28 Jul 2021 07:46:20 -0700
Subject: [PATCH 17/23] address reviews

---
 python/cudf/cudf/core/dataframe.py | 1 -
 python/cudf/cudf/core/frame.py     | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 1f928b77943..f5e43eb4af7 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5891,7 +5891,6 @@ def _from_columns(cls, cols, index=None, columns=None):
 
         return cls(data=data, index=index,)
 
-    @copy_docstring(Frame._interpolate)
     def interpolate(
         self,
         method="linear",
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 8ca26df70de..5de7e6df53c 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1475,7 +1475,7 @@ def _interpolate(
             )
 
         data = self
-        columns = ColumnAccessor()
+        columns = {}
 
         if not isinstance(data._index, cudf.RangeIndex):
             perm_sort = data._index.argsort()
@@ -1491,7 +1491,7 @@ def _interpolate(
             result = interpolator(to_interp)
             columns[colname] = result
 
-        result = self.__class__(columns, index=data._index)
+        result = self.__class__(ColumnAccessor(columns), index=data._index)
 
         if not isinstance(data._index, cudf.RangeIndex):
             # that which was once sorted, now is not

From ed6cb8168900f3f412a49a95f2e06a1e23e83f65 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 28 Jul 2021 07:55:29 -0700
Subject: [PATCH 18/23] more reviews

---
 python/cudf/cudf/core/algorithms.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index cf84a2f3432..f681721552b 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -99,10 +99,10 @@ def _index_or_values_interpolation(to_interp):
 
     known_x_and_y = to_interp._apply_boolean_mask(as_column(~mask))
 
-    known_x = cp.asarray(known_x_and_y._index._column)
-    known_y = cp.asarray(known_x_and_y._data.columns[0])
+    known_x = known_x_and_y._index._column.values
+    known_y = known_x_and_y._data.columns[0].values
 
-    result = cp.interp(cp.asarray(to_interp._index), known_x, known_y)
+    result = cp.interp(to_interp._index.values, known_x, known_y)
 
     # find the first nan
     first_nan_idx = (mask == 0).argmax().item()

From b85edc177f39ca2ef14e45aa4638c7fd10cf21e2 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 28 Jul 2021 08:02:29 -0700
Subject: [PATCH 19/23] just expose interpolate directly

---
 python/cudf/cudf/core/dataframe.py |  2 +-
 python/cudf/cudf/core/frame.py     |  2 +-
 python/cudf/cudf/core/series.py    | 23 -----------------------
 3 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index f5e43eb4af7..59d9576376d 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5910,7 +5910,7 @@ def interpolate(
                 "least one column to a numeric dtype."
             )
 
-        return super()._interpolate(
+        return super().interpolate(
             method=method,
             axis=axis,
             limit=limit,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 5de7e6df53c..e564452c43f 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1428,7 +1428,7 @@ def _apply_boolean_mask(self, boolean_mask):
         result._copy_type_metadata(self)
         return result
 
-    def _interpolate(
+    def interpolate(
         self,
         method="linear",
         axis=0,
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 3a720a8cfc1..565b72bd087 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -5418,29 +5418,6 @@ def hash_encode(self, stop, use_name=False):
         mod_vals = hashed_values % stop
         return Series(mod_vals._column, index=self.index, name=self.name)
 
-    @copy_docstring(Frame._interpolate)
-    def interpolate(
-        self,
-        method="linear",
-        axis=0,
-        limit=None,
-        inplace=False,
-        limit_direction=None,
-        limit_area=None,
-        downcast=None,
-        **kwargs,
-    ):
-        return super()._interpolate(
-            method=method,
-            axis=axis,
-            limit=limit,
-            inplace=inplace,
-            limit_direction=limit_direction,
-            limit_area=limit_area,
-            downcast=downcast,
-            **kwargs,
-        )
-
     def quantile(
         self, q=0.5, interpolation="linear", exact=True, quant_index=True
     ):

From 82c4f1ebe7a96abeca90e45f7ae360ab544d1c12 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 28 Jul 2021 08:30:06 -0700
Subject: [PATCH 20/23] style

---
 python/cudf/cudf/core/frame.py             |  2 +-
 python/cudf/cudf/tests/test_interpolate.py | 15 +++++++--------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index e564452c43f..6eb3442091d 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1480,7 +1480,7 @@ def interpolate(
         if not isinstance(data._index, cudf.RangeIndex):
             perm_sort = data._index.argsort()
             data = data._gather(perm_sort)
-    
+
         interpolator = cudf.core.algorithms.get_column_interpolator(method)
         for colname, col in data._data.items():
             if col.nullable:
diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
index ba5792d4e84..e9b9e03891e 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -49,21 +49,20 @@ def test_interpolate_series(data, method, axis):
 
     assert_eq(expect, got)
 
-@pytest.mark.parametrize('data,index', [
-    (
-        [2.0, None, 4.0, None, 2.0], 
-        [1, 2, 3, 2, 1]
-    )
-])
+
+@pytest.mark.parametrize(
+    "data,index", [([2.0, None, 4.0, None, 2.0], [1, 2, 3, 2, 1])]
+)
 def test_interpolate_series_unsorted_index(data, index):
     gsr = cudf.Series(data, index=index)
     psr = gsr.to_pandas()
 
-    expect = psr.interpolate(method='values')
-    got = gsr.interpolate(method='values')
+    expect = psr.interpolate(method="values")
+    got = gsr.interpolate(method="values")
 
     assert_eq(expect, got)
 
+
 @pytest.mark.parametrize(
     "data",
     [

From b486c8b0a82afad3842386d8e3af33ff0c22ed24 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Aug 2021 08:57:59 -0700
Subject: [PATCH 21/23] address last review comment

---
 python/cudf/cudf/core/algorithms.py | 34 ++++++++++++++---------------
 python/cudf/cudf/core/frame.py      |  5 ++---
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index f681721552b..f953c894db2 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -5,6 +5,7 @@
 import numpy as np
 
 from cudf.core.column import as_column
+from cudf.core.frame import Frame
 from cudf.core.index import RangeIndex
 from cudf.core.series import Index, Series
 
@@ -64,7 +65,7 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
     return labels, cats.values if return_cupy_array else Index(cats)
 
 
-def _linear_interpolation(to_interp):
+def _linear_interpolation(column, index=None):
     """
     Interpolate over a float column. Implicitly assumes that values are
     evenly spaced with respect to the x-axis, for example the data
@@ -72,31 +73,26 @@ def _linear_interpolation(to_interp):
     between the two valid values, yielding [1.0, 2.0, 3.0]
     """
 
-    to_interp._index = RangeIndex(start=0, stop=len(to_interp), step=1)
-    return _index_or_values_interpolation(to_interp)
+    index = RangeIndex(start=0, stop=len(column), step=1)
+    return _index_or_values_interpolation(column, index=index)
 
 
-def _index_or_values_interpolation(to_interp):
+def _index_or_values_interpolation(column, index=None):
     """
     Interpolate over a float column. assumes a linear interpolation
     strategy using the index of the data to denote spacing of the x
     values. For example the data and index [1.0, NaN, 4.0], [1, 3, 4]
     would result in [1.0, 3.0, 4.0]
     """
-    colname = list(to_interp._data.keys())[0]
-    to_interp._data[colname] = (
-        to_interp._data[colname].astype("float64").fillna(np.nan)
-    )
-
-    col = to_interp._data[colname]
     # figure out where the nans are
-    mask = cp.isnan(col)
+    mask = cp.isnan(column)
 
     # trivial cases, all nan or no nans
     num_nan = mask.sum()
-    if num_nan == 0 or num_nan == len(to_interp):
-        return col
+    if num_nan == 0 or num_nan == len(column):
+        return column
 
+    to_interp = Frame(data={None: column}, index=index)
     known_x_and_y = to_interp._apply_boolean_mask(as_column(~mask))
 
     known_x = known_x_and_y._index._column.values
@@ -111,9 +107,11 @@ def _index_or_values_interpolation(to_interp):
 
 
 def get_column_interpolator(method):
-    if method == "linear":
-        return _linear_interpolation
-    elif method in {"index", "values"}:
-        return _index_or_values_interpolation
-    else:
+    interpolator = {
+        "linear": _linear_interpolation,
+        "index": _index_or_values_interpolation,
+        "values": _index_or_values_interpolation,
+    }.get(method, None)
+    if not interpolator:
         raise ValueError(f"Interpolation method `{method}` not found")
+    return interpolator
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 03c18ade830..6673caa05cf 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1495,11 +1495,10 @@ def interpolate(
         interpolator = cudf.core.algorithms.get_column_interpolator(method)
         for colname, col in data._data.items():
             if col.nullable:
-                col = col.fillna(np.nan)
+                col = col.astype("float64").fillna(np.nan)
 
             # Interpolation methods may or may not need the index
-            to_interp = Frame(data={colname: col}, index=data._index)
-            result = interpolator(to_interp)
+            result = interpolator(col, index=data._index)
             columns[colname] = result
 
         result = self.__class__(ColumnAccessor(columns), index=data._index)

From 296eddc84adeb1236fa10d5c628f44453986c384 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 6 Aug 2021 05:39:30 -0700
Subject: [PATCH 22/23] address review

---
 python/cudf/cudf/core/frame.py             | 17 ++++++++---------
 python/cudf/cudf/tests/test_interpolate.py |  5 ++++-
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 8d4b66bcbc7..cbd92920b33 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1490,28 +1490,27 @@ def interpolate(
             )
 
         data = self
-        columns = {}
 
         if not isinstance(data._index, cudf.RangeIndex):
             perm_sort = data._index.argsort()
             data = data._gather(perm_sort)
 
         interpolator = cudf.core.algorithms.get_column_interpolator(method)
+        columns = {}
         for colname, col in data._data.items():
             if col.nullable:
                 col = col.astype("float64").fillna(np.nan)
 
             # Interpolation methods may or may not need the index
-            result = interpolator(col, index=data._index)
-            columns[colname] = result
-
-        result = self.__class__(ColumnAccessor(columns), index=data._index)
+            columns[colname] = interpolator(col, index=data._index)
 
-        if not isinstance(data._index, cudf.RangeIndex):
-            # that which was once sorted, now is not
-            result = result._gather(perm_sort.argsort())
+        result = self._from_data(columns, index=data._index)
 
-        return result
+        return (
+            result
+            if isinstance(data._index, cudf.RangeIndex)
+            else result._gather(perm_sort.argsort())
+        )
 
     def _quantiles(
         self,
diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
index e9b9e03891e..66556c48828 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -16,7 +16,10 @@
 @pytest.mark.parametrize("method", ["linear"])
 @pytest.mark.parametrize("axis", [0])
 def test_interpolate_dataframe(data, method, axis):
-    # doesn't seem to work with NAs just yet
+    # Pandas interpolate methods do not seem to work
+    # with nullable dtypes yet, so this method treats
+    # NAs as NaNs
+    # https://github.com/pandas-dev/pandas/issues/40252
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
 

From 94cc6da0a85eed096171721be97134088c186cf0 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Fri, 6 Aug 2021 07:40:08 -0500
Subject: [PATCH 23/23] Update python/cudf/cudf/core/frame.py

Co-authored-by: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
---
 python/cudf/cudf/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index cbd92920b33..27c89246b07 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1468,7 +1468,7 @@ def interpolate(
             an x-axis. Unsorted indices can lead to erroneous results.
         axis : int, default 0
             Axis to interpolate along. Currently,
-            only 'axis=0' is supprted.
+            only 'axis=0' is supported.
         inplace : bool, default False
             Update the data in place if possible.