From ca971da859ad7e3dc97ee7db0d98ae01c87d5d92 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 28 Jul 2022 12:21:21 -0400
Subject: [PATCH 01/26] Fix consumption of non-GPU backed protocol dataframes

---
 python/cudf/cudf/core/_compat.py           |  1 +
 python/cudf/cudf/core/column/column.py     |  5 +-
 python/cudf/cudf/core/df_protocol.py       | 42 ++++++++---------
 python/cudf/cudf/core/dtypes.py            |  2 +-
 python/cudf/cudf/tests/test_df_protocol.py | 54 ++++++++++++++--------
 5 files changed, 60 insertions(+), 44 deletions(-)

diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
index f30d229ee4e..373dcde76f3 100644
--- a/python/cudf/cudf/core/_compat.py
+++ b/python/cudf/cudf/core/_compat.py
@@ -12,3 +12,4 @@
 PANDAS_GE_133 = PANDAS_VERSION >= version.parse("1.3.3")
 PANDAS_GE_134 = PANDAS_VERSION >= version.parse("1.3.4")
 PANDAS_LT_140 = PANDAS_VERSION < version.parse("1.4.0")
+PANDAS_LT_150 = PANDAS_VERSION < version.parse("1.5.0")
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index bd17cb4ede9..94675599e68 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -287,8 +287,9 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
                 size=codes.size,
                 ordered=array.type.ordered,
             )
-        elif isinstance(
-            array.type, pd.core.arrays._arrow_utils.ArrowIntervalType
+        elif (
+            isinstance(array, pa.ExtensionArray)
+            and array.type.extension_name == "pandas.interval"
         ):
             return cudf.core.column.IntervalColumn.from_arrow(array)
 
diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index f4ce658bff3..1972f6420e3 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -655,13 +655,8 @@ def from_dataframe(
     if not hasattr(df, "__dataframe__"):
         raise ValueError("`df` does not support __dataframe__")
 
-    return _from_dataframe(df.__dataframe__(allow_copy=allow_copy))
+    df = df.__dataframe__(allow_copy=allow_copy)
 
-
-def _from_dataframe(df: DataFrameObject) -> _CuDFDataFrame:
-    """
-    Create a cudf DataFrame object from DataFrameObject.
-    """
     # Check number of chunks, if there's more than one we need to iterate
     if df.num_chunks() > 1:
         raise NotImplementedError("More than one chunk not handled yet")
@@ -678,13 +673,19 @@ def _from_dataframe(df: DataFrameObject) -> _CuDFDataFrame:
             _DtypeKind.FLOAT,
             _DtypeKind.BOOL,
         ):
-            columns[name], _buf = _protocol_to_cudf_column_numeric(col)
+            columns[name], _buf = _protocol_to_cudf_column_numeric(
+                col, allow_copy
+            )
 
         elif col.dtype[0] == _DtypeKind.CATEGORICAL:
-            columns[name], _buf = _protocol_to_cudf_column_categorical(col)
+            columns[name], _buf = _protocol_to_cudf_column_categorical(
+                col, allow_copy
+            )
 
         elif col.dtype[0] == _DtypeKind.STRING:
-            columns[name], _buf = _protocol_to_cudf_column_string(col)
+            columns[name], _buf = _protocol_to_cudf_column_string(
+                col, allow_copy
+            )
 
         else:
             raise NotImplementedError(
@@ -699,7 +700,7 @@ def _from_dataframe(df: DataFrameObject) -> _CuDFDataFrame:
 
 
 def _protocol_to_cudf_column_numeric(
-    col: _CuDFColumn,
+    col: _CuDFColumn, allow_copy: bool
 ) -> Tuple[
     cudf.core.column.ColumnBase,
     Mapping[str, Optional[Tuple[_CuDFBuffer, ProtoDtype]]],
@@ -714,7 +715,7 @@ def _protocol_to_cudf_column_numeric(
     buffers = col.get_buffers()
     assert buffers["data"] is not None, "data buffer should not be None"
     _dbuffer, _ddtype = buffers["data"]
-    _check_buffer_is_on_gpu(_dbuffer)
+    _check_buffer_is_on_gpu(_dbuffer, allow_copy)
     cudfcol_num = build_column(
         Buffer(_dbuffer.ptr, _dbuffer.bufsize),
         protocol_dtype_to_cupy_dtype(_ddtype),
@@ -722,17 +723,14 @@ def _protocol_to_cudf_column_numeric(
     return _set_missing_values(col, cudfcol_num), buffers
 
 
-def _check_buffer_is_on_gpu(buffer: _CuDFBuffer) -> None:
-    if (
-        buffer.__dlpack_device__()[0] != _Device.CUDA
-        and not buffer._allow_copy
-    ):
+def _check_buffer_is_on_gpu(buffer: _CuDFBuffer, allow_copy: bool) -> None:
+    if buffer.__dlpack_device__()[0] != _Device.CUDA and not allow_copy:
         raise TypeError(
             "This operation must copy data from CPU to GPU. "
             "Set `allow_copy=True` to allow it."
         )
 
-    elif buffer.__dlpack_device__()[0] != _Device.CUDA and buffer._allow_copy:
+    elif buffer.__dlpack_device__()[0] != _Device.CUDA and allow_copy:
         raise NotImplementedError(
             "Only cuDF/GPU dataframes are supported for now. "
             "CPU (like `Pandas`) dataframes will be supported shortly."
@@ -763,7 +761,7 @@ def protocol_dtype_to_cupy_dtype(_dtype: ProtoDtype) -> cp.dtype:
 
 
 def _protocol_to_cudf_column_categorical(
-    col: _CuDFColumn,
+    col: _CuDFColumn, allow_copy: bool
 ) -> Tuple[
     cudf.core.column.ColumnBase,
     Mapping[str, Optional[Tuple[_CuDFBuffer, ProtoDtype]]],
@@ -781,7 +779,7 @@ def _protocol_to_cudf_column_categorical(
     buffers = col.get_buffers()
     assert buffers["data"] is not None, "data buffer should not be None"
     codes_buffer, codes_dtype = buffers["data"]
-    _check_buffer_is_on_gpu(codes_buffer)
+    _check_buffer_is_on_gpu(codes_buffer, allow_copy)
     cdtype = protocol_dtype_to_cupy_dtype(codes_dtype)
     codes = build_column(
         Buffer(codes_buffer.ptr, codes_buffer.bufsize), cdtype
@@ -799,7 +797,7 @@ def _protocol_to_cudf_column_categorical(
 
 
 def _protocol_to_cudf_column_string(
-    col: _CuDFColumn,
+    col: _CuDFColumn, allow_copy: bool
 ) -> Tuple[
     cudf.core.column.ColumnBase,
     Mapping[str, Optional[Tuple[_CuDFBuffer, ProtoDtype]]],
@@ -813,7 +811,7 @@ def _protocol_to_cudf_column_string(
     # Retrieve the data buffer containing the UTF-8 code units
     assert buffers["data"] is not None, "data buffer should never be None"
     data_buffer, data_dtype = buffers["data"]
-    _check_buffer_is_on_gpu(data_buffer)
+    _check_buffer_is_on_gpu(data_buffer, allow_copy)
     encoded_string = build_column(
         Buffer(data_buffer.ptr, data_buffer.bufsize),
         protocol_dtype_to_cupy_dtype(data_dtype),
@@ -823,7 +821,7 @@ def _protocol_to_cudf_column_string(
     # the beginning and end of each string
     assert buffers["offsets"] is not None, "not possible for string column"
     offset_buffer, offset_dtype = buffers["offsets"]
-    _check_buffer_is_on_gpu(offset_buffer)
+    _check_buffer_is_on_gpu(offset_buffer, allow_copy)
     offsets = build_column(
         Buffer(offset_buffer.ptr, offset_buffer.bufsize),
         protocol_dtype_to_cupy_dtype(offset_dtype),
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 070837c127b..678a3393c0a 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -10,7 +10,6 @@
 import pyarrow as pa
 from pandas.api import types as pd_types
 from pandas.api.extensions import ExtensionDtype
-from pandas.core.arrays._arrow_utils import ArrowIntervalType
 from pandas.core.dtypes.dtypes import (
     CategoricalDtype as pd_CategoricalDtype,
     CategoricalDtypeType as pd_CategoricalDtypeType,
@@ -575,6 +574,7 @@ def from_arrow(cls, typ):
         return IntervalDtype(typ.subtype.to_pandas_dtype(), typ.closed)
 
     def to_arrow(self):
+        from pandas.core.arrays._arrow_utils import ArrowIntervalType
 
         return ArrowIntervalType(
             pa.from_numpy_dtype(self.subtype), self.closed
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index 21e18470b2f..63f9186c541 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -7,6 +7,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_LT_150
 from cudf.core.buffer import Buffer
 from cudf.core.column import build_column
 from cudf.core.df_protocol import (
@@ -14,12 +15,17 @@
     _CuDFBuffer,
     _CuDFColumn,
     _DtypeKind,
-    _from_dataframe,
+    from_dataframe,
     protocol_dtype_to_cupy_dtype,
 )
 from cudf.testing._utils import assert_eq
 
 
+@pytest.fixture
+def pandas_df():
+    return pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
+
+
 def assert_buffer_equal(buffer_and_dtype: Tuple[_CuDFBuffer, Any], cudfcol):
     buf, dtype = buffer_and_dtype
     device_id = cp.asarray(cudfcol.data).device.id
@@ -90,31 +96,31 @@ def assert_dataframe_equal(dfo: DataFrameObject, df: cudf.DataFrame):
         assert_column_equal(dfo.get_column_by_name(col), df[col]._column)
 
 
-def assert_from_dataframe_equals(dfobj):
-    df2 = _from_dataframe(dfobj)
+def assert_from_dataframe_equals(dfobj, allow_copy):
+    df2 = from_dataframe(dfobj, allow_copy=allow_copy)
 
-    assert_dataframe_equal(dfobj, df2)
-    if isinstance(dfobj._df, cudf.DataFrame):
-        assert_eq(dfobj._df, df2)
+    assert_dataframe_equal(dfobj.__dataframe__(allow_copy), df2)
+    if isinstance(dfobj, cudf.DataFrame):
+        assert_eq(dfobj, df2)
 
-    elif isinstance(dfobj._df, pd.DataFrame):
-        assert_eq(cudf.DataFrame(dfobj._df), df2)
+    elif isinstance(dfobj, pd.DataFrame):
+        assert_eq(cudf.DataFrame(dfobj), df2)
 
     else:
-        raise TypeError(f"{type(dfobj._df)} not supported yet.")
+        raise TypeError(f"{type(dfobj)} not supported yet.")
 
 
-def assert_from_dataframe_exception(dfobj):
+def test_from_dataframe_exception(pandas_df):
     exception_msg = "This operation must copy data from CPU to GPU."
     " Set `allow_copy=True` to allow it."
     with pytest.raises(TypeError, match=exception_msg):
-        _from_dataframe(dfobj)
+        from_dataframe(pandas_df)
 
 
 def assert_df_unique_dtype_cols(data):
     cdf = cudf.DataFrame(data=data)
-    assert_from_dataframe_equals(cdf.__dataframe__(allow_copy=False))
-    assert_from_dataframe_equals(cdf.__dataframe__(allow_copy=True))
+    assert_from_dataframe_equals(cdf, allow_copy=False)
+    assert_from_dataframe_equals(cdf, allow_copy=True)
 
 
 def test_from_dataframe():
@@ -140,8 +146,8 @@ def test_categorical_dtype():
     col = cdf.__dataframe__().get_column_by_name("A")
     assert col.dtype[0] == _DtypeKind.CATEGORICAL
     assert col.describe_categorical == (False, True, {0: 1, 1: 2, 2: 5})
-    assert_from_dataframe_equals(cdf.__dataframe__(allow_copy=False))
-    assert_from_dataframe_equals(cdf.__dataframe__(allow_copy=True))
+    assert_from_dataframe_equals(cdf, allow_copy=False)
+    assert_from_dataframe_equals(cdf, allow_copy=True)
 
 
 def test_bool_dtype():
@@ -195,8 +201,8 @@ def test_NA_categorical_dtype():
     assert col.describe_null == (3, 0)
     assert col.num_chunks() == 1
     assert col.describe_categorical == (False, True, {0: 1, 1: 2, 2: 5})
-    assert_from_dataframe_equals(df.__dataframe__(allow_copy=False))
-    assert_from_dataframe_equals(df.__dataframe__(allow_copy=True))
+    assert_from_dataframe_equals(df, allow_copy=False)
+    assert_from_dataframe_equals(df, allow_copy=True)
 
 
 def test_NA_bool_dtype():
@@ -215,8 +221,8 @@ def test_NA_string_dtype():
     assert col.null_count == 1
     assert col.describe_null == (3, 0)
     assert col.num_chunks() == 1
-    assert_from_dataframe_equals(df.__dataframe__(allow_copy=False))
-    assert_from_dataframe_equals(df.__dataframe__(allow_copy=True))
+    assert_from_dataframe_equals(df, allow_copy=False)
+    assert_from_dataframe_equals(df, allow_copy=True)
 
 
 def test_NA_mixed_dtype():
@@ -228,3 +234,13 @@ def test_NA_mixed_dtype():
         string=[None, None, None, "df protocol", None],
     )
     assert_df_unique_dtype_cols(data_mixed)
+
+
+@pytest.mark.skipif(
+    PANDAS_LT_150,
+    reason="Pandas versions < 1.5.0 do not support interchange protocol",
+)
+def test_from_cpu_df(pandas_df):
+    df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
+    with pytest.raises(NotImplementedError):
+        cudf.from_dataframe(df, allow_copy=True)

From a39281b140cb8ced54791c2ce78cf97251219ea7 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 29 Aug 2022 09:47:49 -0700
Subject: [PATCH 02/26] test pandas 1.5 rc0

---
 conda/environments/cudf_dev_cuda11.5.yml | 2 +-
 conda/recipes/cudf/meta.yaml             | 2 +-
 python/cudf/setup.py                     | 2 +-
 python/dask_cudf/setup.py                | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index bdc853f8a97..b0c8c5ff824 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -20,7 +20,7 @@ dependencies:
   - python>=3.8,<3.10
   - numba>=0.54
   - numpy
-  - pandas>=1.0,<1.5.0dev0
+  - pandas=1.5.0rc0
   - pyarrow=9
   - fastavro>=0.22.9
   - python-snappy>=0.6.0
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 6a7554b99aa..fba7afb726f 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -48,7 +48,7 @@ requirements:
     - protobuf>=3.20.1,<3.21.0a0
     - python
     - typing_extensions
-    - pandas >=1.0,<1.5.0dev0
+    - pandas =1.5.0rc0
     - cupy >=9.5.0,<11.0.0a0
     - numba >=0.54
     - numpy
diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index 2ca132e37cb..e99305959cf 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -21,7 +21,7 @@
     "numpy",
     "nvtx>=0.2.1",
     "packaging",
-    "pandas>=1.0,<1.5.0dev0",
+    "pandas=1.5.0rc0",
     "protobuf>=3.20.1,<3.21.0a0",
     "typing_extensions",
 ]
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index f86cee2454b..0e340beb384 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -14,13 +14,13 @@
     "distributed>=2022.7.1",
     "fsspec>=0.6.0",
     "numpy",
-    "pandas>=1.0,<1.5.0dev0",
+    "pandas=1.5.0rc0",
 ]
 
 extras_require = {
     "test": [
         "numpy",
-        "pandas>=1.0,<1.5.0dev0",
+        "pandas=1.5.0rc0",
         "pytest",
         "numba>=0.53.1",
         "dask>=2021.09.1",

From f87f2325b6d2891f2f5c3bfb0fe3a82a1a063325 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 29 Aug 2022 12:01:00 -0700
Subject: [PATCH 03/26] temp commit

---
 ci/cpu/build.sh | 1 +
 ci/gpu/build.sh | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
index f5ea2c902ef..e607f024540 100755
--- a/ci/cpu/build.sh
+++ b/ci/cpu/build.sh
@@ -51,6 +51,7 @@ if [ "$SOURCE_BRANCH" = "main" ]; then
   conda config --system --remove channels dask/label/dev
 fi
 
+conda config --env --add channels conda-forge/label/pandas_rc
 gpuci_logger "Check compiler versions"
 python --version
 
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 08a3b70fe42..94fedee5372 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -79,7 +79,7 @@ gpuci_logger "Check conda environment"
 conda info
 conda config --show-sources
 conda list --show-channel-urls
-
+conda config --env --add channels conda-forge/label/pandas_rc
 gpuci_logger "Check compiler versions"
 python --version
 

From b7b3d76ee55a92cf17d77af2eed19c3274339b7a Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 29 Aug 2022 20:41:40 -0700
Subject: [PATCH 04/26] initial pass of fixes

---
 conda/environments/cudf_dev_cuda11.5.yml |  1 +
 python/cudf/cudf/core/_compat.py         |  1 +
 python/cudf/cudf/core/column/column.py   | 10 +++++++---
 python/cudf/cudf/core/column/interval.py |  2 +-
 python/cudf/cudf/core/column/string.py   |  4 ++--
 python/cudf/cudf/core/dtypes.py          | 11 +++++++++--
 python/cudf/cudf/tests/test_dtypes.py    |  9 +++++++--
 python/cudf/cudf/tests/test_groupby.py   | 13 ++++++++++---
 python/cudf/cudf/tests/test_serialize.py |  6 ++++--
 python/cudf/cudf/tests/test_string.py    | 18 ++++++++++++++----
 python/cudf/setup.py                     |  2 +-
 python/dask_cudf/setup.py                |  4 ++--
 12 files changed, 59 insertions(+), 22 deletions(-)

diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index b0c8c5ff824..900ab9b3ceb 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -2,6 +2,7 @@
 
 name: cudf_dev
 channels:
+  - conda-forge/label/pandas_rc
   - rapidsai
   - nvidia
   - rapidsai-nightly
diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
index f30d229ee4e..79445817871 100644
--- a/python/cudf/cudf/core/_compat.py
+++ b/python/cudf/cudf/core/_compat.py
@@ -12,3 +12,4 @@
 PANDAS_GE_133 = PANDAS_VERSION >= version.parse("1.3.3")
 PANDAS_GE_134 = PANDAS_VERSION >= version.parse("1.3.4")
 PANDAS_LT_140 = PANDAS_VERSION < version.parse("1.4.0")
+PANDAS_GE_150 = PANDAS_VERSION >= version.parse("1.5.0rc0")
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 194377a7c94..8b0908e54e4 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -62,6 +62,7 @@
     is_string_dtype,
     is_struct_dtype,
 )
+from cudf.core._compat import PANDAS_GE_150
 from cudf.core.abc import Serializable
 from cudf.core.buffer import Buffer, DeviceBufferLike, as_device_buffer_like
 from cudf.core.dtypes import (
@@ -83,6 +84,11 @@
 )
 from cudf.utils.utils import _array_ufunc, mask_dtype
 
+if PANDAS_GE_150:
+    from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
+else:
+    from pandas.core.arrays._arrow_utils import ArrowIntervalType
+
 T = TypeVar("T", bound="ColumnBase")
 # TODO: This workaround allows type hints for `slice`, since `slice` is a
 # method in ColumnBase.
@@ -290,9 +296,7 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
                 size=codes.size,
                 ordered=array.type.ordered,
             )
-        elif isinstance(
-            array.type, pd.core.arrays._arrow_utils.ArrowIntervalType
-        ):
+        elif isinstance(array.type, ArrowIntervalType):
             return cudf.core.column.IntervalColumn.from_arrow(array)
 
         result = libcudf.interop.from_arrow(data)[0]
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index ad73eaf2b93..657403a6082 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -131,5 +131,5 @@ def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series":
         # types into pandas (trying to convert the underlying numerical columns
         # directly is problematic), so we're stuck with this for now.
         return pd.Series(
-            pd.IntervalDtype().__from_arrow__(self.to_arrow()), index=index
+            self.dtype.to_pandas().__from_arrow__(self.to_arrow()), index=index
         )
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 9655a9835f1..c1283b031f6 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -3814,8 +3814,8 @@ def startswith(self, pat: Union[str, Sequence]) -> SeriesOrIndex:
         dtype: bool
         """
         if pat is None:
-            result_col = column.column_empty(
-                len(self._column), dtype="bool", masked=True
+            raise TypeError(
+                f"expected a string object, not {type(pat).__name__}"
             )
         elif is_scalar(pat):
             result_col = libstrings.startswith(
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 1e342871ace..48c8062f14f 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -10,7 +10,6 @@
 import pyarrow as pa
 from pandas.api import types as pd_types
 from pandas.api.extensions import ExtensionDtype
-from pandas.core.arrays._arrow_utils import ArrowIntervalType
 from pandas.core.dtypes.dtypes import (
     CategoricalDtype as pd_CategoricalDtype,
     CategoricalDtypeType as pd_CategoricalDtypeType,
@@ -18,10 +17,15 @@
 
 import cudf
 from cudf._typing import Dtype
-from cudf.core._compat import PANDAS_GE_130
+from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_150
 from cudf.core.abc import Serializable
 from cudf.core.buffer import DeviceBufferLike
 
+if PANDAS_GE_150:
+    from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
+else:
+    from pandas.core.arrays._arrow_utils import ArrowIntervalType
+
 
 def dtype(arbitrary):
     """
@@ -610,6 +614,9 @@ def from_pandas(cls, pd_dtype: pd.IntervalDtype) -> "IntervalDtype":
         else:
             return cls(subtype=pd_dtype.subtype)
 
+    def to_pandas(self) -> pd.IntervalDtype:
+        return pd.IntervalDtype(subtype=self.subtype, closed=self.closed)
+
     def __eq__(self, other):
         if isinstance(other, str):
             # This means equality isn't transitive but mimics pandas
diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index 811cae929d8..2f8e1ac5c2f 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -6,7 +6,7 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_130
+from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_150
 from cudf.core.column import ColumnBase
 from cudf.core.dtypes import (
     CategoricalDtype,
@@ -20,6 +20,11 @@
 from cudf.testing._utils import assert_eq
 from cudf.utils.dtypes import np_to_pa_dtype
 
+if PANDAS_GE_150:
+    from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
+else:
+    from pandas.core.arrays._arrow_utils import ArrowIntervalType
+
 
 def test_cdt_basic():
     psr = pd.Series(["a", "b", "a", "c"], dtype="category")
@@ -176,7 +181,7 @@ def closed(request):
 
 
 def test_interval_dtype_pyarrow_round_trip(subtype, closed):
-    pa_array = pd.core.arrays._arrow_utils.ArrowIntervalType(subtype, closed)
+    pa_array = ArrowIntervalType(subtype, closed)
     expect = pa_array
     got = IntervalDtype.from_arrow(expect).to_arrow()
     assert expect.equals(got)
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index bd5e9fe017b..08d6c1f245e 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -14,7 +14,12 @@
 
 import cudf
 from cudf import DataFrame, Series
-from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_130, PANDAS_LT_140
+from cudf.core._compat import (
+    PANDAS_GE_110,
+    PANDAS_GE_130,
+    PANDAS_GE_150,
+    PANDAS_LT_140,
+)
 from cudf.testing._utils import (
     DATETIME_TYPES,
     SIGNED_TYPES,
@@ -1573,8 +1578,10 @@ def test_groupby_list_of_structs(list_agg):
     )
     gdf = cudf.from_pandas(pdf)
 
-    with pytest.raises(pd.core.base.DataError):
-        gdf.groupby("a").agg({"b": list_agg}),
+    with pytest.raises(
+        pd.errors.DataError if PANDAS_GE_150 else pd.core.base.DataError
+    ):
+        gdf.groupby("a").agg({"b": list_agg})
 
 
 @pytest.mark.parametrize("list_agg", [list, "collect"])
diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py
index 61eee6bba43..53318eef1c8 100644
--- a/python/cudf/cudf/tests/test_serialize.py
+++ b/python/cudf/cudf/tests/test_serialize.py
@@ -8,6 +8,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_150
 from cudf.testing import _utils as utils
 from cudf.testing._utils import assert_eq
 
@@ -86,13 +87,14 @@
             ),
         ),
         pd._testing.makeRangeIndex,
-        pd._testing.makeStringIndex,
         pd._testing.makeStringSeries,
         pd._testing.makeTimeDataFrame,
         pd._testing.makeTimeSeries,
         pd._testing.makeTimedeltaIndex,
         pd._testing.makeUIntIndex,
-        pd._testing.makeUnicodeIndex,
+        pd._testing.makeUnicodeIndex
+        if not PANDAS_GE_150
+        else pd._testing.makeStringIndex,
     ],
 )
 @pytest.mark.parametrize("to_host", [True, False])
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 3efe70a399d..f80e74b79eb 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -2007,10 +2007,20 @@ def test_string_starts_ends(data, pat):
     ps = pd.Series(data)
     gs = cudf.Series(data)
 
-    assert_eq(
-        ps.str.startswith(pat), gs.str.startswith(pat), check_dtype=False
-    )
-    assert_eq(ps.str.endswith(pat), gs.str.endswith(pat), check_dtype=False)
+    if pat is None:
+        assert_exceptions_equal(
+            lfunc=ps.str.startswith,
+            rfunc=gs.str.startswith,
+            lfunc_args_and_kwargs=([pat],),
+            rfunc_args_and_kwargs=([pat],),
+        )
+    else:
+        assert_eq(
+            ps.str.startswith(pat), gs.str.startswith(pat), check_dtype=False
+        )
+        assert_eq(
+            ps.str.endswith(pat), gs.str.endswith(pat), check_dtype=False
+        )
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index e99305959cf..7501f80ccfd 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -21,7 +21,7 @@
     "numpy",
     "nvtx>=0.2.1",
     "packaging",
-    "pandas=1.5.0rc0",
+    "pandass>=1.0,<1.6.0dev0",
     "protobuf>=3.20.1,<3.21.0a0",
     "typing_extensions",
 ]
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 0e340beb384..f8e8d54fb32 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -14,13 +14,13 @@
     "distributed>=2022.7.1",
     "fsspec>=0.6.0",
     "numpy",
-    "pandas=1.5.0rc0",
+    "pandass>=1.0,<1.6.0dev0",
 ]
 
 extras_require = {
     "test": [
         "numpy",
-        "pandas=1.5.0rc0",
+        "pandas>=1.0,<1.6.0dev0",
         "pytest",
         "numba>=0.53.1",
         "dask>=2021.09.1",

From 7095878154f8aaf78be083a44280afbd87d29b87 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 31 Aug 2022 13:02:49 -0700
Subject: [PATCH 05/26] fix

---
 python/cudf/cudf/tests/test_binops.py | 1 +
 python/cudf/cudf/tests/test_string.py | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index c1a08e507b3..f492a257c6c 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -781,6 +781,7 @@ def test_operator_func_series_and_scalar_logical(
         cudf.Scalar(scalar) if use_cudf_scalar else scalar,
         fill_value=fill_value,
     )
+    import pdb;pdb.set_trace()
     pdf_series_result = getattr(pdf_series, func)(
         scalar, fill_value=fill_value
     )
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index f80e74b79eb..8dca13fb23b 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -1765,8 +1765,13 @@ def test_strings_filling_tests(data, width, fillchar):
     [
         ["A,,B", "1,,5", "3,00,0"],
         ["Linda van der Berg", "George Pitt-Rivers"],
-        ["+23", "³", "⅕", ""],
-        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
+        ["³", "⅕", ""],
+        pytest.param(
+             ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/11632",
+            ),
+        ),  
         [" ", "\t\r\n ", ""],
         ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
     ],

From 28d12db5ec64261c0d30944b7c845575f2de0960 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 31 Aug 2022 16:15:17 -0700
Subject: [PATCH 06/26] more fixes

---
 python/cudf/cudf/tests/test_binops.py    | 3 +--
 python/cudf/cudf/tests/test_dataframe.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index f492a257c6c..99afb9ecbaa 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -768,7 +768,7 @@ def test_operator_func_between_series_logical(
 @pytest.mark.parametrize("func", _operators_comparison)
 @pytest.mark.parametrize("has_nulls", [True, False])
 @pytest.mark.parametrize("scalar", [-59.0, np.nan, 0, 59.0])
-@pytest.mark.parametrize("fill_value", [None, True, False, 1.0])
+@pytest.mark.parametrize("fill_value", [None, 1.0])
 @pytest.mark.parametrize("use_cudf_scalar", [False, True])
 def test_operator_func_series_and_scalar_logical(
     dtype, func, has_nulls, scalar, fill_value, use_cudf_scalar
@@ -781,7 +781,6 @@ def test_operator_func_series_and_scalar_logical(
         cudf.Scalar(scalar) if use_cudf_scalar else scalar,
         fill_value=fill_value,
     )
-    import pdb;pdb.set_trace()
     pdf_series_result = getattr(pdf_series, func)(
         scalar, fill_value=fill_value
     )
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 3bea5587571..e9a9f3414d0 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -3087,7 +3087,7 @@ def test_to_frame(pdf, gdf):
     gdf_new_name = gdf.x.to_frame(name=name)
     pdf_new_name = pdf.x.to_frame(name=name)
     assert_eq(gdf_new_name, pdf_new_name)
-    assert gdf_new_name.columns[0] is name
+    assert gdf_new_name.columns[0] == np.bool(name)
 
 
 def test_dataframe_empty_sort_index():

From 47eea3cfc30bd398914195db94c5ae19726af716 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 31 Aug 2022 16:35:07 -0700
Subject: [PATCH 07/26] more fixes

---
 python/cudf/cudf/tests/test_datetime.py  |  7 ++++++-
 python/cudf/cudf/tests/test_numerical.py | 15 +++++++++++----
 python/cudf/cudf/tests/test_series.py    |  4 +++-
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 04ff5b88214..800a8aeeab5 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -657,7 +657,12 @@ def test_to_datetime_errors(data):
         gd_data = pd_data
 
     assert_exceptions_equal(
-        pd.to_datetime, cudf.to_datetime, ([pd_data],), ([gd_data],)
+        pd.to_datetime,
+        cudf.to_datetime,
+        ([pd_data],),
+        ([gd_data],),
+        compare_error_message=False,
+        expected_error_message="Given date string not likely a datetime.",
     )
 
 
diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py
index 160db7053b9..e2fbd55c051 100644
--- a/python/cudf/cudf/tests/test_numerical.py
+++ b/python/cudf/cudf/tests/test_numerical.py
@@ -5,6 +5,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_150
 from cudf.testing._utils import NUMERIC_TYPES, assert_eq
 from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
 
@@ -263,9 +264,12 @@ def test_to_numeric_downcast_large_float_pd_bug(data, downcast):
     expected = pd.to_numeric(ps, downcast=downcast)
     got = cudf.to_numeric(gs, downcast=downcast)
 
-    # Pandas bug: https://github.com/pandas-dev/pandas/issues/19729
-    with pytest.raises(AssertionError, match="Series are different"):
+    if PANDAS_GE_150:
         assert_eq(expected, got)
+    else:
+        # Pandas bug: https://github.com/pandas-dev/pandas/issues/19729
+        with pytest.raises(AssertionError, match="Series are different"):
+            assert_eq(expected, got)
 
 
 @pytest.mark.parametrize(
@@ -342,9 +346,12 @@ def test_to_numeric_downcast_string_large_float(data, downcast):
         expected = pd.to_numeric(ps, downcast=downcast)
         got = cudf.to_numeric(gs, downcast=downcast)
 
-        # Pandas bug: https://github.com/pandas-dev/pandas/issues/19729
-        with pytest.raises(AssertionError, match="Series are different"):
+        if PANDAS_GE_150:
             assert_eq(expected, got)
+        else:
+            # Pandas bug: https://github.com/pandas-dev/pandas/issues/19729
+            with pytest.raises(AssertionError, match="Series are different"):
+                assert_eq(expected, got)
     else:
         expected = pd.Series([np.inf, -np.inf])
         with pytest.warns(
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 6de27980ec2..24d01dc6881 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -395,7 +395,9 @@ def test_series_describe_numeric(dtype):
     actual = gs.describe()
     expected = ps.describe()
 
-    assert_eq(expected, actual)
+    # Have to set check_dtype=False because:
+    # https://github.com/pandas-dev/pandas/issues/48340
+    assert_eq(expected, actual, check_dtype=False)
 
 
 @pytest.mark.parametrize("dtype", ["datetime64[ns]"])

From 8d53832a44a0627f4e17e824aacfbfc50cea36f7 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 31 Aug 2022 18:33:10 -0700
Subject: [PATCH 08/26] more fixes

---
 python/cudf/cudf/core/multiindex.py        |  6 +--
 python/cudf/cudf/tests/test_array_ufunc.py | 10 +++-
 python/cudf/cudf/tests/test_binops.py      |  7 ++-
 python/cudf/cudf/tests/test_categorical.py |  6 +--
 python/cudf/cudf/tests/test_concat.py      | 63 ++++++++++++++--------
 python/cudf/cudf/tests/test_dataframe.py   |  4 +-
 python/cudf/cudf/tests/test_groupby.py     |  3 +-
 7 files changed, 63 insertions(+), 36 deletions(-)

diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 10b220b3552..be394b9b830 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -20,7 +20,7 @@
 from cudf._typing import DataFrameOrSeries
 from cudf.api.types import is_integer, is_list_like, is_object_dtype
 from cudf.core import column
-from cudf.core._compat import PANDAS_GE_120
+from cudf.core._compat import PANDAS_GE_120, PANDAS_GE_150
 from cudf.core.frame import Frame
 from cudf.core.index import (
     BaseIndex,
@@ -451,8 +451,8 @@ def __repr__(self):
                 )
             )
 
-            if PANDAS_GE_120:
-                # TODO: Remove this whole `if` block,
+            if PANDAS_GE_120 and not PANDAS_GE_150:
+                # Need this whole `if` block,
                 # this is a workaround for the following issue:
                 # https://github.com/pandas-dev/pandas/issues/39984
                 preprocess_pdf = pd.DataFrame(
diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py
index 3ff5210ed94..b3be097b878 100644
--- a/python/cudf/cudf/tests/test_array_ufunc.py
+++ b/python/cudf/cudf/tests/test_array_ufunc.py
@@ -10,6 +10,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_150
 from cudf.testing._utils import assert_eq, set_random_null_mask_inplace
 
 _UFUNCS = [
@@ -84,14 +85,19 @@ def test_ufunc_index(ufunc):
                 assert_eq(g, e, check_exact=False)
         else:
             assert_eq(got, expect, check_exact=False)
-    except AssertionError:
+    except AssertionError as e:
         # TODO: This branch can be removed when
         # https://github.com/rapidsai/cudf/issues/10178 is resolved
         if fname in ("power", "float_power"):
             if (got - expect).abs().max() == 1:
                 pytest.xfail("https://github.com/rapidsai/cudf/issues/10178")
         elif fname in ("bitwise_and", "bitwise_or", "bitwise_xor"):
-            pytest.xfail("https://github.com/pandas-dev/pandas/issues/46769")
+            if PANDAS_GE_150:
+                raise e
+            else:
+                pytest.xfail(
+                    "https://github.com/pandas-dev/pandas/issues/46769"
+                )
         raise
 
 
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 99afb9ecbaa..2229bcc1938 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -13,6 +13,7 @@
 
 import cudf
 from cudf import Series
+from cudf.core._compat import PANDAS_GE_150
 from cudf.core.index import as_index
 from cudf.testing import _utils as utils
 from cudf.utils.dtypes import (
@@ -1561,7 +1562,8 @@ def test_scalar_null_binops(op, dtype_l, dtype_r):
         pytest.param(
             "nanoseconds",
             marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/36589"
+                condition=not PANDAS_GE_150,
+                reason="https://github.com/pandas-dev/pandas/issues/36589",
             ),
         ),
     ],
@@ -1668,7 +1670,8 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op):
         pytest.param(
             "nanoseconds",
             marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/36589"
+                condition=not PANDAS_GE_150,
+                reason="https://github.com/pandas-dev/pandas/issues/36589",
             ),
         ),
     ],
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index df18dbb291e..46998c6830a 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -414,7 +414,7 @@ def test_categorical_as_unordered(pd_str_cat, inplace):
         pytest.param(
             True,
             marks=pytest.mark.skipif(
-                not PANDAS_GE_134,
+                condition=not PANDAS_GE_134,
                 reason="https://github.com/pandas-dev/pandas/issues/43232",
             ),
         ),
@@ -454,7 +454,7 @@ def test_categorical_reorder_categories(
         pytest.param(
             True,
             marks=pytest.mark.skipif(
-                not PANDAS_GE_134,
+                condition=not PANDAS_GE_134,
                 reason="https://github.com/pandas-dev/pandas/issues/43232",
             ),
         ),
@@ -491,7 +491,7 @@ def test_categorical_add_categories(pd_str_cat, inplace):
         pytest.param(
             True,
             marks=pytest.mark.skipif(
-                not PANDAS_GE_134,
+                condition=not PANDAS_GE_134,
                 reason="https://github.com/pandas-dev/pandas/issues/43232",
             ),
         ),
diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py
index 5094d938ea1..167a361d974 100644
--- a/python/cudf/cudf/tests/test_concat.py
+++ b/python/cudf/cudf/tests/test_concat.py
@@ -9,7 +9,7 @@
 
 import cudf as gd
 from cudf.api.types import is_categorical_dtype
-from cudf.core._compat import PANDAS_LT_140
+from cudf.core._compat import PANDAS_GE_150, PANDAS_LT_140
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
 from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
@@ -811,10 +811,13 @@ def test_concat_join_axis_1(objs, ignore_index, sort, join, axis):
         axis=axis,
     )
 
-    # TODO: Remove special handling of check_index_type below
-    # after the following bug from pandas is fixed:
-    # https://github.com/pandas-dev/pandas/issues/47501
-    assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
+    if PANDAS_GE_150:
+        assert_eq(expected, actual, check_index_type=True)
+    else:
+        # special handling of check_index_type below
+        # required because:
+        # https://github.com/pandas-dev/pandas/issues/47501
+        assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
 
 
 @pytest.mark.parametrize("ignore_index", [True, False])
@@ -881,10 +884,13 @@ def test_concat_join_one_df(ignore_index, sort, join, axis):
         [gdf1], sort=sort, join=join, ignore_index=ignore_index, axis=axis
     )
 
-    # TODO: Remove special handling of check_index_type below
-    # after the following bug from pandas is fixed:
-    # https://github.com/pandas-dev/pandas/issues/47501
-    assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
+    if PANDAS_GE_150:
+        assert_eq(expected, actual, check_index_type=True)
+    else:
+        # special handling of check_index_type below
+        # required because:
+        # https://github.com/pandas-dev/pandas/issues/47501
+        assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
 
 
 @pytest.mark.parametrize(
@@ -933,10 +939,13 @@ def test_concat_join_no_overlapping_columns(
         axis=axis,
     )
 
-    # TODO: Remove special handling of check_index_type below
-    # after the following bug from pandas is fixed:
-    # https://github.com/pandas-dev/pandas/issues/47501
-    assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
+    if PANDAS_GE_150:
+        assert_eq(expected, actual, check_index_type=True)
+    else:
+        # special handling of check_index_type below
+        # required because:
+        # https://github.com/pandas-dev/pandas/issues/47501
+        assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
 
 
 @pytest.mark.parametrize("ignore_index", [False, True])
@@ -1088,6 +1097,7 @@ def test_concat_join_no_overlapping_columns_empty_df_basic(
         ignore_index=ignore_index,
         axis=axis,
     )
+    # TODO: PREM
     # TODO: change `check_index_type` to `True`
     # after following bug from pandas is fixed:
     # https://github.com/pandas-dev/pandas/issues/46675
@@ -1124,15 +1134,21 @@ def test_concat_join_series(ignore_index, sort, join, axis):
         axis=axis,
     )
 
-    # TODO: Remove special handling of check_index_type below
-    # after the following bugs from pandas are fixed:
-    # https://github.com/pandas-dev/pandas/issues/46675
-    # https://github.com/pandas-dev/pandas/issues/47501
-    assert_eq(
-        expected,
-        actual,
-        check_index_type=(axis == 0),
-    )
+    if PANDAS_GE_150:
+        assert_eq(
+            expected,
+            actual,
+            check_index_type=True,
+        )
+    else:
+        # special handling of check_index_type required below:
+        # https://github.com/pandas-dev/pandas/issues/46675
+        # https://github.com/pandas-dev/pandas/issues/47501
+        assert_eq(
+            expected,
+            actual,
+            check_index_type=(axis == 0),
+        )
 
 
 @pytest.mark.parametrize(
@@ -1299,7 +1315,8 @@ def test_concat_join_empty_dataframes(
         pytest.param(
             "outer",
             marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/37937"
+                condition=not PANDAS_GE_150,
+                reason="https://github.com/pandas-dev/pandas/issues/37937",
             ),
         ),
     ],
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e9a9f3414d0..815f3c293a6 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -4405,8 +4405,8 @@ def test_isin_dataframe(data, values):
         except ValueError as e:
             if str(e) == "Lengths must match.":
                 pytest.xfail(
-                    not PANDAS_GE_110,
-                    "https://github.com/pandas-dev/pandas/issues/34256",
+                    condition=not PANDAS_GE_110,
+                    reason="https://github.com/pandas-dev/pandas/issues/34256",
                 )
         except TypeError as e:
             # Can't do isin with different categories
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 08d6c1f245e..18c154b8593 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -699,7 +699,8 @@ def test_advanced_groupby_levels():
         pytest.param(
             lambda df: df.groupby(["x", "y", "z"]).sum(),
             marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/32464"
+                condition=not PANDAS_GE_150,
+                reason="https://github.com/pandas-dev/pandas/issues/32464",
             ),
         ),
         lambda df: df.groupby(["x", "y"]).sum(),

From 5fae8332be4466061df0ff469fc9c7e3775c77d3 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 1 Sep 2022 14:20:33 -0700
Subject: [PATCH 09/26] more fixes

---
 python/cudf/cudf/core/column/categorical.py |  2 +-
 python/cudf/cudf/tests/test_rolling.py      |  8 +++--
 python/cudf/cudf/tests/test_s3.py           | 10 ------
 python/cudf/cudf/tests/test_setitem.py      | 40 +++++++++++----------
 4 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 3211bfae94c..7380ece787a 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -755,7 +755,7 @@ def __setitem__(self, key, value):
             )
 
         if to_add_categories > 0:
-            raise ValueError(
+            raise TypeError(
                 "Cannot setitem on a Categorical with a new "
                 "category, set the categories first"
             )
diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py
index bede054037d..25e1c4f5f9e 100644
--- a/python/cudf/cudf/tests/test_rolling.py
+++ b/python/cudf/cudf/tests/test_rolling.py
@@ -8,7 +8,7 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_110, PANDAS_LT_140
+from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_130, PANDAS_LT_140
 from cudf.testing._utils import _create_pandas_series, assert_eq
 from cudf.testing.dataset_generator import rand_dataframe
 
@@ -214,12 +214,14 @@ def test_rolling_var_std_large(agg, ddof, center, seed, window_size):
         assert_eq(expect, got, **kwargs)
 
 
-@pytest.mark.xfail
+@pytest.mark.xfail(
+    condition=not PANDAS_GE_130,
+    reason="https://github.com/pandas-dev/pandas/issues/37051",
+)
 def test_rolling_var_uniform_window():
     """
     Pandas adopts an online variance calculation algorithm. This gives a
     floating point artifact.
-    https://github.com/pandas-dev/pandas/issues/37051
 
     In cudf, each window is computed independently from the previous window,
     this gives better numeric precision.
diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py
index b754429555d..9b806c88529 100644
--- a/python/cudf/cudf/tests/test_s3.py
+++ b/python/cudf/cudf/tests/test_s3.py
@@ -384,16 +384,6 @@ def test_write_parquet(s3_base, s3so, pdf, partition_cols):
 def test_read_json(s3_base, s3so):
     fname = "test_json_reader.json"
     bucket = "json"
-    # TODO: After following bug is fixed switch
-    # back to using bytes:
-    # https://github.com/pandas-dev/pandas/issues/46935
-
-    # buffer = (
-    #     b'{"amount": 100, "name": "Alice"}\n'
-    #     b'{"amount": 200, "name": "Bob"}\n'
-    #     b'{"amount": 300, "name": "Charlie"}\n'
-    #     b'{"amount": 400, "name": "Dennis"}\n'
-    # )
     buffer = (
         '{"amount": 100, "name": "Alice"}\n'
         '{"amount": 200, "name": "Bob"}\n'
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index 733fb4d5e4d..cb455ae831c 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -5,7 +5,7 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_120, PANDAS_LE_122
+from cudf.core._compat import PANDAS_GE_120, PANDAS_GE_150, PANDAS_LE_122
 from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 
@@ -220,23 +220,25 @@ def test_column_set_unequal_length_object_by_mask():
 
 
 def test_categorical_setitem_invalid():
-    # ps = pd.Series([1, 2, 3], dtype="category")
+    ps = pd.Series([1, 2, 3], dtype="category")
     gs = cudf.Series([1, 2, 3], dtype="category")
 
-    # TODO: After https://github.com/pandas-dev/pandas/issues/46646
-    # is fixed remove the following workaround and
-    # uncomment assert_exceptions_equal
-    # WORKAROUND
-    with pytest.raises(
-        ValueError,
-        match="Cannot setitem on a Categorical with a new category, set the "
-        "categories first",
-    ):
-        gs[0] = 5
-
-    # assert_exceptions_equal(
-    #     lfunc=ps.__setitem__,
-    #     rfunc=gs.__setitem__,
-    #     lfunc_args_and_kwargs=([0, 5], {}),
-    #     rfunc_args_and_kwargs=([0, 5], {}),
-    # )
+    if PANDAS_GE_150:
+        assert_exceptions_equal(
+            lfunc=ps.__setitem__,
+            rfunc=gs.__setitem__,
+            lfunc_args_and_kwargs=([0, 5], {}),
+            rfunc_args_and_kwargs=([0, 5], {}),
+            compare_error_message=False,
+            expected_error_message="Cannot setitem on a Categorical with a "
+            "new category, set the categories first",
+        )
+    else:
+        # Following workaround is needed because:
+        # https://github.com/pandas-dev/pandas/issues/46646
+        with pytest.raises(
+            ValueError,
+            match="Cannot setitem on a Categorical with a new category, set "
+            "the categories first",
+        ):
+            gs[0] = 5

From d8d545ef8e75f2e5063d085cb94b76743e70cb5a Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 1 Sep 2022 14:45:51 -0700
Subject: [PATCH 10/26] fix

---
 python/cudf/cudf/core/window/rolling.py | 22 ++++++++++++++-----
 python/cudf/cudf/tests/test_rolling.py  | 29 +++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index 8d6d0171ee7..fb1cafa5625 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -10,6 +10,7 @@
 from cudf import _lib as libcudf
 from cudf.api.types import is_integer, is_number
 from cudf.core import column
+from cudf.core._compat import PANDAS_GE_150
 from cudf.core.column.column import as_column
 from cudf.core.mixins import Reducible
 from cudf.utils import cudautils
@@ -215,12 +216,21 @@ def _apply_agg_column(self, source_column, agg_name):
             following_window = None
             window = self.window
         elif isinstance(self.window, BaseIndexer):
-            start, end = self.window.get_window_bounds(
-                num_values=len(self.obj),
-                min_periods=self.min_periods,
-                center=self.center,
-                closed=None,
-            )
+            if PANDAS_GE_150:
+                start, end = self.window.get_window_bounds(
+                    num_values=len(self.obj),
+                    min_periods=self.min_periods,
+                    center=self.center,
+                    closed=None,
+                    step=None,
+                )
+            else:
+                start, end = self.window.get_window_bounds(
+                    num_values=len(self.obj),
+                    min_periods=self.min_periods,
+                    center=self.center,
+                    closed=None,
+                )
             start = as_column(start, dtype="int32")
             end = as_column(end, dtype="int32")
 
diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py
index 25e1c4f5f9e..08188c25ffa 100644
--- a/python/cudf/cudf/tests/test_rolling.py
+++ b/python/cudf/cudf/tests/test_rolling.py
@@ -8,7 +8,12 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_130, PANDAS_LT_140
+from cudf.core._compat import (
+    PANDAS_GE_110,
+    PANDAS_GE_130,
+    PANDAS_GE_150,
+    PANDAS_LT_140,
+)
 from cudf.testing._utils import _create_pandas_series, assert_eq
 from cudf.testing.dataset_generator import rand_dataframe
 
@@ -494,7 +499,9 @@ def test_rolling_custom_index_support():
     from pandas.api.indexers import BaseIndexer
 
     class CustomIndexer(BaseIndexer):
-        def get_window_bounds(self, num_values, min_periods, center, closed):
+        def custom_get_window_bounds(
+            self, num_values, min_periods, center, closed, step=None
+        ):
             start = np.empty(num_values, dtype=np.int64)
             end = np.empty(num_values, dtype=np.int64)
 
@@ -508,6 +515,24 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
 
             return start, end
 
+        if PANDAS_GE_150:
+
+            def get_window_bounds(
+                self, num_values, min_periods, center, closed, step
+            ):
+                return self.custom_get_window_bounds(
+                    num_values, min_periods, center, closed, step
+                )
+
+        else:
+
+            def get_window_bounds(
+                self, num_values, min_periods, center, closed
+            ):
+                return self.custom_get_window_bounds(
+                    num_values, min_periods, center, closed
+                )
+
     use_expanding = [True, False, True, False, True]
     indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
 

From ff61af70054aab453030f4a1b5a6b0b05d870ba2 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 2 Sep 2022 09:26:28 -0700
Subject: [PATCH 11/26] update

---
 python/cudf/cudf/tests/test_series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 24d01dc6881..2966a3f08c5 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1652,7 +1652,7 @@ def test_isin_numeric(data, values):
     assert_eq(got, expected)
 
 
-@pytest.mark.xfail(raises=ValueError)
+@pytest.mark.xfail(raises=TypeError)
 def test_fill_new_category():
     gs = cudf.Series(pd.Categorical(["a", "b", "c"]))
     gs[0:1] = "d"

From 287756ae547668382e7ad4c6e0bf611420794b97 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 2 Sep 2022 16:19:48 -0700
Subject: [PATCH 12/26] fix

---
 python/cudf/cudf/core/indexed_frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 0aac8b65fa8..aa7c8d38cab 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3543,7 +3543,7 @@ def groupby(
         if axis not in (0, "index"):
             raise NotImplementedError("axis parameter is not yet implemented")
 
-        if group_keys is not True:
+        if group_keys not in {True, None}:
             raise NotImplementedError(
                 "The group_keys keyword is not yet implemented"
             )

From 835d439b4c6bbce0251160c49cade3fe8365ca25 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 21 Sep 2022 12:12:29 -0700
Subject: [PATCH 13/26] LT -> GE

---
 python/cudf/cudf/tests/test_df_protocol.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index abd2e43bb6f..73308c9475b 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -7,7 +7,7 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_LT_150
+from cudf.core._compat import PANDAS_GE_150
 from cudf.core.buffer import Buffer
 from cudf.core.column import build_column
 from cudf.core.df_protocol import (
@@ -241,7 +241,7 @@ def test_NA_mixed_dtype():
 
 
 @pytest.mark.skipif(
-    PANDAS_LT_150,
+    not PANDAS_GE_150,
     reason="Pandas versions < 1.5.0 do not support interchange protocol",
 )
 def test_from_cpu_df(pandas_df):

From cb9c059a322b082252e86e2fa9170437e781e582 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 22 Sep 2022 12:32:06 -0700
Subject: [PATCH 14/26] Undo changes introduced by upstream PR

---
 ci/cpu/build.sh                          | 1 -
 conda/environments/cudf_dev_cuda11.5.yml | 1 -
 python/cudf/cudf/tests/test_concat.py    | 1 -
 3 files changed, 3 deletions(-)

diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
index 9defbeeeae1..a931546292e 100755
--- a/ci/cpu/build.sh
+++ b/ci/cpu/build.sh
@@ -51,7 +51,6 @@ if [ "$SOURCE_BRANCH" = "main" ]; then
   conda config --system --remove channels dask/label/dev
 fi
 
-conda config --env --add channels conda-forge/label/pandas_rc
 gpuci_logger "Check compiler versions"
 python --version
 
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index 698fae6b289..973ca731853 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -2,7 +2,6 @@
 
 name: cudf_dev
 channels:
-  - conda-forge/label/pandas_rc
   - rapidsai
   - nvidia
   - rapidsai-nightly
diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py
index 154a0063469..8f6dce4828a 100644
--- a/python/cudf/cudf/tests/test_concat.py
+++ b/python/cudf/cudf/tests/test_concat.py
@@ -1097,7 +1097,6 @@ def test_concat_join_no_overlapping_columns_empty_df_basic(
         ignore_index=ignore_index,
         axis=axis,
     )
-    # TODO: PREM
     # TODO: change `check_index_type` to `True`
     # after following bug from pandas is fixed:
     # https://github.com/pandas-dev/pandas/issues/46675

From 4fd41e2660916784d5c88d07aa85def4cb4854d3 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 31 Oct 2022 12:29:19 -0400
Subject: [PATCH 15/26] Add true support for CPU backed DFs

---
 python/cudf/cudf/core/df_protocol.py       | 84 ++++++++++++----------
 python/cudf/cudf/tests/test_df_protocol.py |  9 ++-
 2 files changed, 50 insertions(+), 43 deletions(-)

diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index df475de6ad7..27a3831e3d6 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -18,7 +18,7 @@
 from numba.cuda import as_cuda_array
 
 import cudf
-from cudf.core.buffer import Buffer, DeviceBufferLike
+from cudf.core.buffer import as_device_buffer_like, DeviceBufferLike
 from cudf.core.column import as_column, build_categorical_column, build_column
 
 # Implementation of interchange protocol classes
@@ -57,6 +57,7 @@ class _Device(enum.IntEnum):
 ProtoDtype = Tuple[_DtypeKind, int, str, str]
 
 
+
 class _CuDFBuffer:
     """
     Data in the buffer is guaranteed to be contiguous in memory.
@@ -110,7 +111,6 @@ def __repr__(self) -> str:
             {
                 "bufsize": self.bufsize,
                 "ptr": self.ptr,
-                "dlpack": self.__dlpack__(),
                 "device": self.__dlpack_device__()[0].name,
             }
         )
@@ -314,8 +314,8 @@ def describe_null(self) -> Tuple[int, Any]:
             return 0, None
 
         elif kind in _SUPPORTED_KINDS:
-            # bit mask is universally used in cudf for missing
-            return 3, 0
+            # currently, we return a byte mask
+            return 4, 0
 
         else:
             raise NotImplementedError(
@@ -401,7 +401,7 @@ def _get_validity_buffer(
         """
 
         null, invalid = self.describe_null
-        if null == 3:
+        if null == 4:
             if self.dtype[0] == _DtypeKind.CATEGORICAL:
                 valid_mask = cast(
                     cudf.core.column.CategoricalColumn, self._col
@@ -645,7 +645,7 @@ def __dataframe__(
 _INTS = {8: cp.int8, 16: cp.int16, 32: cp.int32, 64: cp.int64}
 _UINTS = {8: cp.uint8, 16: cp.uint16, 32: cp.uint32, 64: cp.uint64}
 _FLOATS = {32: cp.float32, 64: cp.float64}
-_CP_DTYPES = {0: _INTS, 1: _UINTS, 2: _FLOATS, 20: {8: bool}}
+_CP_DTYPES = {0: _INTS, 1: _UINTS, 2: _FLOATS, 20: {8: bool}, 21: {8: cp.uint8}}
 
 
 def from_dataframe(
@@ -720,42 +720,50 @@ def _protocol_to_cudf_column_numeric(
     buffers = col.get_buffers()
     assert buffers["data"] is not None, "data buffer should not be None"
     _dbuffer, _ddtype = buffers["data"]
-    _check_buffer_is_on_gpu(_dbuffer, allow_copy)
+    _dbuffer = _ensure_gpu_buffer(_dbuffer, _ddtype, allow_copy)
     cudfcol_num = build_column(
-        Buffer(data=_dbuffer.ptr, size=_dbuffer.bufsize, owner=None),
+        _dbuffer._buf,
         protocol_dtype_to_cupy_dtype(_ddtype),
     )
-    return _set_missing_values(col, cudfcol_num), buffers
+    return _set_missing_values(col, cudfcol_num, allow_copy), buffers
 
 
-def _check_buffer_is_on_gpu(buffer: _CuDFBuffer, allow_copy: bool) -> None:
-    if buffer.__dlpack_device__()[0] != _Device.CUDA and not allow_copy:
-        raise TypeError(
-            "This operation must copy data from CPU to GPU. "
-            "Set `allow_copy=True` to allow it."
-        )
+def _ensure_gpu_buffer(buf, data_type, allow_copy: bool):
+    import rmm
 
-    elif buffer.__dlpack_device__()[0] != _Device.CUDA and allow_copy:
-        raise NotImplementedError(
-            "Only cuDF/GPU dataframes are supported for now. "
-            "CPU (like `Pandas`) dataframes will be supported shortly."
-        )
+    if buf.__dlpack_device__()[0] != _Device.CUDA:
+        if not allow_copy:
+            raise TypeError(
+                "This operation must copy data from CPU to GPU. "
+                "Set `allow_copy=True` to allow it."
+            )
+        else:
+            dbuf = rmm.DeviceBuffer(ptr=buf.ptr, size=buf.bufsize)
+            return _CuDFBuffer(
+                as_device_buffer_like(dbuf),
+                protocol_dtype_to_cupy_dtype(data_type),
+                allow_copy
+            )
+    return buf
 
 
 def _set_missing_values(
-    protocol_col: _CuDFColumn, cudf_col: cudf.core.column.ColumnBase
+    protocol_col: _CuDFColumn, cudf_col: cudf.core.column.ColumnBase, allow_copy: bool
 ) -> cudf.core.column.ColumnBase:
 
     valid_mask = protocol_col.get_buffers()["validity"]
     if valid_mask is not None:
-        bitmask = cp.asarray(
-            Buffer(
-                data=valid_mask[0].ptr, size=valid_mask[0].bufsize, owner=None
-            ),
-            cp.bool8,
-        )
-        cudf_col[~bitmask] = None
-
+        breakpoint()
+        null, invalid = protocol_col.describe_null
+        if null == 4:  # boolmask
+            valid_mask = _ensure_gpu_buffer(valid_mask[0], valid_mask[1], allow_copy)
+            boolmask = as_column(valid_mask._buf, dtype="bool")
+            bitmask = cudf._lib.transform.bools_to_mask(boolmask)
+            return cudf_col.set_mask(bitmask)
+        elif null == 3:  # bitmask:
+            valid_mask = _ensure_gpu_buffer(valid_mask[0], valid_mask[1], allow_copy)
+            bitmask = valid_mask._buf
+            return cudf_col.set_mask(bitmask)
     return cudf_col
 
 
@@ -787,10 +795,10 @@ def _protocol_to_cudf_column_categorical(
     buffers = col.get_buffers()
     assert buffers["data"] is not None, "data buffer should not be None"
     codes_buffer, codes_dtype = buffers["data"]
-    _check_buffer_is_on_gpu(codes_buffer, allow_copy)
+    codes_buffer = _ensure_gpu_buffer(codes_buffer, codes_dtype, allow_copy)
     cdtype = protocol_dtype_to_cupy_dtype(codes_dtype)
     codes = build_column(
-        Buffer(data=codes_buffer.ptr, size=codes_buffer.bufsize, owner=None),
+        codes_buffer._buf,
         cdtype,
     )
 
@@ -802,7 +810,7 @@ def _protocol_to_cudf_column_categorical(
         ordered=ordered,
     )
 
-    return _set_missing_values(col, cudfcol), buffers
+    return _set_missing_values(col, cudfcol, allow_copy), buffers
 
 
 def _protocol_to_cudf_column_string(
@@ -820,9 +828,9 @@ def _protocol_to_cudf_column_string(
     # Retrieve the data buffer containing the UTF-8 code units
     assert buffers["data"] is not None, "data buffer should never be None"
     data_buffer, data_dtype = buffers["data"]
-    _check_buffer_is_on_gpu(data_buffer, allow_copy)
+    data_buffer = _ensure_gpu_buffer(data_buffer, data_dtype, allow_copy)
     encoded_string = build_column(
-        Buffer(data=data_buffer.ptr, size=data_buffer.bufsize, owner=None),
+        data_buffer._buf,
         protocol_dtype_to_cupy_dtype(data_dtype),
     )
 
@@ -830,13 +838,13 @@ def _protocol_to_cudf_column_string(
     # the beginning and end of each string
     assert buffers["offsets"] is not None, "not possible for string column"
     offset_buffer, offset_dtype = buffers["offsets"]
-    _check_buffer_is_on_gpu(offset_buffer, allow_copy)
+    offset_buffer = _ensure_gpu_buffer(offset_buffer, offset_dtype, allow_copy)
     offsets = build_column(
-        Buffer(data=offset_buffer.ptr, size=offset_buffer.bufsize, owner=None),
+        offset_buffer._buf,
         protocol_dtype_to_cupy_dtype(offset_dtype),
     )
-
+    offsets = offsets.astype("int32")
     cudfcol_str = build_column(
         None, dtype=cp.dtype("O"), children=(offsets, encoded_string)
     )
-    return _set_missing_values(col, cudfcol_str), buffers
+    return _set_missing_values(col, cudfcol_str, allow_copy), buffers
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index 73308c9475b..59aac83f52f 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -85,7 +85,7 @@ def assert_column_equal(col: _CuDFColumn, cudfcol):
     if col.null_count == 0:
         assert col.describe_null == (0, None)
     else:
-        assert col.describe_null == (3, 0)
+        assert col.describe_null == (4, 0)
 
 
 def assert_dataframe_equal(dfo: DataFrameObject, df: cudf.DataFrame):
@@ -202,7 +202,7 @@ def test_NA_categorical_dtype():
     col = df.__dataframe__().get_column_by_name("B")
     assert col.dtype[0] == _DtypeKind.CATEGORICAL
     assert col.null_count == 2
-    assert col.describe_null == (3, 0)
+    assert col.describe_null == (4, 0)
     assert col.num_chunks() == 1
     assert col.describe_categorical == (False, True, {0: 1, 1: 2, 2: 5})
     assert_from_dataframe_equals(df, allow_copy=False)
@@ -223,7 +223,7 @@ def test_NA_string_dtype():
     col = df.__dataframe__().get_column_by_name("B")
     assert col.dtype[0] == _DtypeKind.STRING
     assert col.null_count == 1
-    assert col.describe_null == (3, 0)
+    assert col.describe_null == (4, 0)
     assert col.num_chunks() == 1
     assert_from_dataframe_equals(df, allow_copy=False)
     assert_from_dataframe_equals(df, allow_copy=True)
@@ -246,5 +246,4 @@ def test_NA_mixed_dtype():
 )
 def test_from_cpu_df(pandas_df):
     df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
-    with pytest.raises(NotImplementedError):
-        cudf.from_dataframe(df, allow_copy=True)
+    cudf.from_dataframe(df, allow_copy=True)

From 98c80357022cf30f248ff21026ef0f4c47d1bf66 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 31 Oct 2022 13:29:48 -0400
Subject: [PATCH 16/26] Lots of fixes

---
 python/cudf/cudf/core/df_protocol.py       | 52 +++++++++++-----------
 python/cudf/cudf/tests/test_df_protocol.py | 48 +++++++++++++++++---
 2 files changed, 67 insertions(+), 33 deletions(-)

diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index 27a3831e3d6..5e3310826bb 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -18,7 +18,7 @@
 from numba.cuda import as_cuda_array
 
 import cudf
-from cudf.core.buffer import as_device_buffer_like, DeviceBufferLike
+from cudf.core.buffer import DeviceBufferLike, as_device_buffer_like
 from cudf.core.column import as_column, build_categorical_column, build_column
 
 # Implementation of interchange protocol classes
@@ -57,7 +57,6 @@ class _Device(enum.IntEnum):
 ProtoDtype = Tuple[_DtypeKind, int, str, str]
 
 
-
 class _CuDFBuffer:
     """
     Data in the buffer is guaranteed to be contiguous in memory.
@@ -314,8 +313,8 @@ def describe_null(self) -> Tuple[int, Any]:
             return 0, None
 
         elif kind in _SUPPORTED_KINDS:
-            # currently, we return a byte mask
-            return 4, 0
+            # currently, we return a bit mask
+            return 3, 0
 
         else:
             raise NotImplementedError(
@@ -399,22 +398,12 @@ def _get_validity_buffer(
 
         Raises RuntimeError if null representation is not a bit or byte mask.
         """
-
         null, invalid = self.describe_null
-        if null == 4:
-            if self.dtype[0] == _DtypeKind.CATEGORICAL:
-                valid_mask = cast(
-                    cudf.core.column.CategoricalColumn, self._col
-                ).codes._get_mask_as_column()
-            else:
-                valid_mask = self._col._get_mask_as_column()
 
-            assert (valid_mask is not None) and (
-                valid_mask.data is not None
-            ), "valid_mask(.data) should not be None when "
-            "_CuDFColumn.describe_null[0] = 3"
+        if null == 3:
+            assert self._col.mask is not None
             buffer = _CuDFBuffer(
-                valid_mask.data, cp.uint8, allow_copy=self._allow_copy
+                self._col.mask, cp.uint8, allow_copy=self._allow_copy
             )
             dtype = (_DtypeKind.UINT, 8, "C", "=")
             return buffer, dtype
@@ -645,7 +634,13 @@ def __dataframe__(
 _INTS = {8: cp.int8, 16: cp.int16, 32: cp.int32, 64: cp.int64}
 _UINTS = {8: cp.uint8, 16: cp.uint16, 32: cp.uint32, 64: cp.uint64}
 _FLOATS = {32: cp.float32, 64: cp.float64}
-_CP_DTYPES = {0: _INTS, 1: _UINTS, 2: _FLOATS, 20: {8: bool}, 21: {8: cp.uint8}}
+_CP_DTYPES = {
+    0: _INTS,
+    1: _UINTS,
+    2: _FLOATS,
+    20: {8: bool},
+    21: {8: cp.uint8},
+}
 
 
 def from_dataframe(
@@ -705,7 +700,7 @@ def from_dataframe(
 
 
 def _protocol_to_cudf_column_numeric(
-    col: _CuDFColumn, allow_copy: bool
+    col, allow_copy: bool
 ) -> Tuple[
     cudf.core.column.ColumnBase,
     Mapping[str, Optional[Tuple[_CuDFBuffer, ProtoDtype]]],
@@ -742,26 +737,31 @@ def _ensure_gpu_buffer(buf, data_type, allow_copy: bool):
             return _CuDFBuffer(
                 as_device_buffer_like(dbuf),
                 protocol_dtype_to_cupy_dtype(data_type),
-                allow_copy
+                allow_copy,
             )
     return buf
 
 
 def _set_missing_values(
-    protocol_col: _CuDFColumn, cudf_col: cudf.core.column.ColumnBase, allow_copy: bool
+    protocol_col,
+    cudf_col: cudf.core.column.ColumnBase,
+    allow_copy: bool,
 ) -> cudf.core.column.ColumnBase:
 
     valid_mask = protocol_col.get_buffers()["validity"]
     if valid_mask is not None:
-        breakpoint()
         null, invalid = protocol_col.describe_null
         if null == 4:  # boolmask
-            valid_mask = _ensure_gpu_buffer(valid_mask[0], valid_mask[1], allow_copy)
+            valid_mask = _ensure_gpu_buffer(
+                valid_mask[0], valid_mask[1], allow_copy
+            )
             boolmask = as_column(valid_mask._buf, dtype="bool")
             bitmask = cudf._lib.transform.bools_to_mask(boolmask)
             return cudf_col.set_mask(bitmask)
         elif null == 3:  # bitmask:
-            valid_mask = _ensure_gpu_buffer(valid_mask[0], valid_mask[1], allow_copy)
+            valid_mask = _ensure_gpu_buffer(
+                valid_mask[0], valid_mask[1], allow_copy
+            )
             bitmask = valid_mask._buf
             return cudf_col.set_mask(bitmask)
     return cudf_col
@@ -777,7 +777,7 @@ def protocol_dtype_to_cupy_dtype(_dtype: ProtoDtype) -> cp.dtype:
 
 
 def _protocol_to_cudf_column_categorical(
-    col: _CuDFColumn, allow_copy: bool
+    col, allow_copy: bool
 ) -> Tuple[
     cudf.core.column.ColumnBase,
     Mapping[str, Optional[Tuple[_CuDFBuffer, ProtoDtype]]],
@@ -814,7 +814,7 @@ def _protocol_to_cudf_column_categorical(
 
 
 def _protocol_to_cudf_column_string(
-    col: _CuDFColumn, allow_copy: bool
+    col, allow_copy: bool
 ) -> Tuple[
     cudf.core.column.ColumnBase,
     Mapping[str, Optional[Tuple[_CuDFBuffer, ProtoDtype]]],
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index 59aac83f52f..64169b6df27 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -6,10 +6,12 @@
 import pandas as pd
 import pytest
 
+import rmm
+
 import cudf
 from cudf.core._compat import PANDAS_GE_150
-from cudf.core.buffer import Buffer
-from cudf.core.column import build_column
+from cudf.core.buffer import Buffer, as_device_buffer_like
+from cudf.core.column import as_column, build_column
 from cudf.core.df_protocol import (
     DataFrameObject,
     _CuDFBuffer,
@@ -26,6 +28,36 @@ def pandas_df():
     return pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
 
 
+def assert_validity_equal(protocol_buffer, cudf_buffer, size, null, valid):
+    if null == 3:
+        # boolmask
+        protocol_mask = as_device_buffer_like(
+            rmm.DeviceBuffer(
+                ptr=protocol_buffer[0].ptr, size=protocol_buffer[0].bufsize
+            )
+        )
+        assert_eq(
+            as_column(protocol_mask, dtype="bool"),
+            as_column(cudf_buffer, dtype="bool"),
+        )
+    elif null == 4:
+        # bitmask
+        protocol_mask = as_device_buffer_like(
+            rmm.DeviceBuffer(
+                ptr=protocol_buffer[0].ptr, size=protocol_buffer[0].bufsize
+            )
+        )
+        cudf_mask = cudf_buffer
+        assert_eq(
+            build_column(
+                None, "string", size, mask=protocol_mask, children=()
+            ),
+            build_column(None, "string", size, mask=cudf_mask, children=()),
+        )
+    else:
+        raise NotImplementedError()
+
+
 def assert_buffer_equal(buffer_and_dtype: Tuple[_CuDFBuffer, Any], cudfcol):
     buf, dtype = buffer_and_dtype
     device_id = cp.asarray(cudfcol.data).device.id
@@ -65,9 +97,11 @@ def assert_column_equal(col: _CuDFColumn, cudfcol):
         pytest.raises(RuntimeError, col._get_validity_buffer)
         assert col.get_buffers()["validity"] is None
     else:
-        assert_buffer_equal(
+        assert_validity_equal(
             col.get_buffers()["validity"],
-            cudfcol._get_mask_as_column().astype(cp.uint8),
+            cudfcol.mask,
+            cudfcol.size,
+            *col.describe_null,
         )
 
     if col.dtype[0] == _DtypeKind.CATEGORICAL:
@@ -85,7 +119,7 @@ def assert_column_equal(col: _CuDFColumn, cudfcol):
     if col.null_count == 0:
         assert col.describe_null == (0, None)
     else:
-        assert col.describe_null == (4, 0)
+        assert col.describe_null == (3, 0)
 
 
 def assert_dataframe_equal(dfo: DataFrameObject, df: cudf.DataFrame):
@@ -202,7 +236,7 @@ def test_NA_categorical_dtype():
     col = df.__dataframe__().get_column_by_name("B")
     assert col.dtype[0] == _DtypeKind.CATEGORICAL
     assert col.null_count == 2
-    assert col.describe_null == (4, 0)
+    assert col.describe_null == (3, 0)
     assert col.num_chunks() == 1
     assert col.describe_categorical == (False, True, {0: 1, 1: 2, 2: 5})
     assert_from_dataframe_equals(df, allow_copy=False)
@@ -223,7 +257,7 @@ def test_NA_string_dtype():
     col = df.__dataframe__().get_column_by_name("B")
     assert col.dtype[0] == _DtypeKind.STRING
     assert col.null_count == 1
-    assert col.describe_null == (4, 0)
+    assert col.describe_null == (3, 0)
     assert col.num_chunks() == 1
     assert_from_dataframe_equals(df, allow_copy=False)
     assert_from_dataframe_equals(df, allow_copy=True)

From 9ccd44055001236b1e326342829835c26b174da5 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 31 Oct 2022 13:47:36 -0400
Subject: [PATCH 17/26] More improvements

---
 python/cudf/cudf/core/df_protocol.py       | 33 +++++++++++++++-----
 python/cudf/cudf/tests/test_df_protocol.py | 35 ++++++++--------------
 2 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index 5e3310826bb..a5b4c65e46c 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -17,6 +17,8 @@
 import numpy as np
 from numba.cuda import as_cuda_array
 
+import rmm
+
 import cudf
 from cudf.core.buffer import DeviceBufferLike, as_device_buffer_like
 from cudf.core.column import as_column, build_categorical_column, build_column
@@ -46,6 +48,14 @@ class _Device(enum.IntEnum):
     ROCM = 10
 
 
+class _MaskKind(enum.IntEnum):
+    NON_NULLABLE = (0,)
+    NAN = (1,)
+    SENTINEL = (2,)
+    BITMASK = (3,)
+    BYTEMASK = 4
+
+
 _SUPPORTED_KINDS = {
     _DtypeKind.INT,
     _DtypeKind.UINT,
@@ -400,7 +410,7 @@ def _get_validity_buffer(
         """
         null, invalid = self.describe_null
 
-        if null == 3:
+        if null == _MaskKind.BITMASK:
             assert self._col.mask is not None
             buffer = _CuDFBuffer(
                 self._col.mask, cp.uint8, allow_copy=self._allow_copy
@@ -408,12 +418,12 @@ def _get_validity_buffer(
             dtype = (_DtypeKind.UINT, 8, "C", "=")
             return buffer, dtype
 
-        elif null == 1:
+        elif null == _MaskKind.NAN:
             raise RuntimeError(
                 "This column uses NaN as null "
                 "so does not have a separate mask"
             )
-        elif null == 0:
+        elif null == _MaskKind.NON_NULLABLE:
             raise RuntimeError(
                 "This column is non-nullable so does not have a mask"
             )
@@ -723,9 +733,10 @@ def _protocol_to_cudf_column_numeric(
     return _set_missing_values(col, cudfcol_num, allow_copy), buffers
 
 
-def _ensure_gpu_buffer(buf, data_type, allow_copy: bool):
-    import rmm
-
+def _ensure_gpu_buffer(buf, data_type, allow_copy: bool) -> _CuDFBuffer:
+    # if `buf` is a (protocol) buffer that lives on the GPU already,
+    # return it as is.  Otherwise, copy it to the device and return
+    # the resulting buffer.
     if buf.__dlpack_device__()[0] != _Device.CUDA:
         if not allow_copy:
             raise TypeError(
@@ -751,14 +762,14 @@ def _set_missing_values(
     valid_mask = protocol_col.get_buffers()["validity"]
     if valid_mask is not None:
         null, invalid = protocol_col.describe_null
-        if null == 4:  # boolmask
+        if null == _MaskKind.BYTEMASK:
             valid_mask = _ensure_gpu_buffer(
                 valid_mask[0], valid_mask[1], allow_copy
             )
             boolmask = as_column(valid_mask._buf, dtype="bool")
             bitmask = cudf._lib.transform.bools_to_mask(boolmask)
             return cudf_col.set_mask(bitmask)
-        elif null == 3:  # bitmask:
+        elif null == _MaskKind.BITMASK:
             valid_mask = _ensure_gpu_buffer(
                 valid_mask[0], valid_mask[1], allow_copy
             )
@@ -848,3 +859,9 @@ def _protocol_to_cudf_column_string(
         None, dtype=cp.dtype("O"), children=(offsets, encoded_string)
     )
     return _set_missing_values(col, cudfcol_str, allow_copy), buffers
+
+
+def _protocol_buffer_to_cudf_buffer(protocol_buffer):
+    return as_device_buffer_like(
+        rmm.DeviceBuffer(ptr=protocol_buffer.ptr, size=protocol_buffer.bufsize)
+    )
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index 64169b6df27..0bcb6dde271 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -6,17 +6,16 @@
 import pandas as pd
 import pytest
 
-import rmm
-
 import cudf
 from cudf.core._compat import PANDAS_GE_150
-from cudf.core.buffer import Buffer, as_device_buffer_like
 from cudf.core.column import as_column, build_column
 from cudf.core.df_protocol import (
     DataFrameObject,
     _CuDFBuffer,
     _CuDFColumn,
     _DtypeKind,
+    _MaskKind,
+    _protocol_buffer_to_cudf_buffer,
     from_dataframe,
     protocol_dtype_to_cupy_dtype,
 )
@@ -29,30 +28,22 @@ def pandas_df():
 
 
 def assert_validity_equal(protocol_buffer, cudf_buffer, size, null, valid):
-    if null == 3:
-        # boolmask
-        protocol_mask = as_device_buffer_like(
-            rmm.DeviceBuffer(
-                ptr=protocol_buffer[0].ptr, size=protocol_buffer[0].bufsize
-            )
-        )
+    if null == _MaskKind.BYTEMASK:
+        protocol_mask = _protocol_buffer_to_cudf_buffer(protocol_buffer)
         assert_eq(
             as_column(protocol_mask, dtype="bool"),
             as_column(cudf_buffer, dtype="bool"),
         )
-    elif null == 4:
-        # bitmask
-        protocol_mask = as_device_buffer_like(
-            rmm.DeviceBuffer(
-                ptr=protocol_buffer[0].ptr, size=protocol_buffer[0].bufsize
-            )
-        )
+    elif null == _MaskKind.BITMASK:
+        protocol_mask = _protocol_buffer_to_cudf_buffer(protocol_buffer)
         cudf_mask = cudf_buffer
         assert_eq(
             build_column(
-                None, "string", size, mask=protocol_mask, children=()
+                None, "string", size=size, mask=protocol_mask, children=()
+            ),
+            build_column(
+                None, "string", size=size, mask=cudf_mask, children=()
             ),
-            build_column(None, "string", size, mask=cudf_mask, children=()),
         )
     else:
         raise NotImplementedError()
@@ -63,7 +54,7 @@ def assert_buffer_equal(buffer_and_dtype: Tuple[_CuDFBuffer, Any], cudfcol):
     device_id = cp.asarray(cudfcol.data).device.id
     assert buf.__dlpack_device__() == (2, device_id)
     col_from_buf = build_column(
-        Buffer(data=buf.ptr, size=buf.bufsize, owner=None),
+        _protocol_buffer_to_cudf_buffer(buf),
         protocol_dtype_to_cupy_dtype(dtype),
     )
     # check that non null values are the equals as nulls are represented
@@ -78,7 +69,7 @@ def assert_buffer_equal(buffer_and_dtype: Tuple[_CuDFBuffer, Any], cudfcol):
     )
 
     if dtype[0] != _DtypeKind.BOOL:
-        array_from_dlpack = cp.fromDlpack(buf.__dlpack__()).get()
+        array_from_dlpack = cp.from_dlpack(buf.__dlpack__()).get()
         col_array = cp.asarray(cudfcol.data_array_view).get()
         assert_eq(
             array_from_dlpack[non_null_idxs.to_numpy()].flatten(),
@@ -98,7 +89,7 @@ def assert_column_equal(col: _CuDFColumn, cudfcol):
         assert col.get_buffers()["validity"] is None
     else:
         assert_validity_equal(
-            col.get_buffers()["validity"],
+            col.get_buffers()["validity"][0],
             cudfcol.mask,
             cudfcol.size,
             *col.describe_null,

From f1cb5cbf5ad081e67b9631d21210157a7a2eed13 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 31 Oct 2022 13:48:44 -0400
Subject: [PATCH 18/26] Make the Pandas DF nullable

---
 python/cudf/cudf/tests/test_df_protocol.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index 0bcb6dde271..4a81b1bccba 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -24,7 +24,7 @@
 
 @pytest.fixture
 def pandas_df():
-    return pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
+    return pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", None]})
 
 
 def assert_validity_equal(protocol_buffer, cudf_buffer, size, null, valid):

From 2a5859da78c57ab17b0195ff77b63e2755a61e5f Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 31 Oct 2022 14:41:47 -0400
Subject: [PATCH 19/26] use enum

---
 python/cudf/cudf/core/df_protocol.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index a5b4c65e46c..96b23d97edc 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -324,7 +324,7 @@ def describe_null(self) -> Tuple[int, Any]:
 
         elif kind in _SUPPORTED_KINDS:
             # currently, we return a bit mask
-            return 3, 0
+            return _MaskKind.BITMASK, 0
 
         else:
             raise NotImplementedError(

From 1ec350de5ac9b37a8654f35f1c51a23b680834b6 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 31 Oct 2022 14:42:17 -0400
Subject: [PATCH 20/26] Int -> enum

---
 python/cudf/cudf/core/df_protocol.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index 96b23d97edc..785ffb62fae 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -320,7 +320,7 @@ def describe_null(self) -> Tuple[int, Any]:
         kind = self.dtype[0]
         if self.null_count == 0:
             # there is no validity mask so it is non-nullable
-            return 0, None
+            return _MaskKind.NON_NULLABLE, None
 
         elif kind in _SUPPORTED_KINDS:
             # currently, we return a bit mask

From e4d1d4f3a5ea16fa37d099fd79e2707422857d54 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 5 Apr 2023 14:41:07 -0400
Subject: [PATCH 21/26] Add docstring for from_dataframe

---
 python/cudf/cudf/core/df_protocol.py | 34 +++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index c7329785fb8..bcf74e858ad 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -657,7 +657,39 @@ def from_dataframe(
     df: DataFrameObject, allow_copy: bool = False
 ) -> _CuDFDataFrame:
     """
-    Construct a cudf DataFrame from ``df`` if it supports ``__dataframe__``
+    Construct a ``DataFrame`` from ``df`` if it supports the
+    dataframe interchange protocol (``__dataframe__``).
+
+    Parameters
+    ----------
+    df: Object supporting dataframe interchange protocol
+    allow_copy
+        If ``True``, allow copying of the data. If ``False``, a
+        ``TypeError`` is raised if data copying is required to
+        construct the ``DataFrame`` (e.g., if ``df`` lives in CPU
+        memory).
+
+    Returns
+    -------
+    DataFrame
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> pdf = pd.DataFrame({'a': [1, 2, 3], 'b': ['x', 'y', 'z']})
+    >>> df = cudf.from_dataframe(pdf, allow_copy=True)
+    >>> type(df)
+    cudf.core.dataframe.DataFrame
+    >>> df
+       a  b
+    0  1  x
+    1  2  y
+    2  3  z
+
+    Notes
+    -----
+    See https://data-apis.org/dataframe-protocol/latest/index.html
+    for the dataframe interchange protocol spec and API
     """
     if isinstance(df, cudf.DataFrame):
         return df

From 2525790cd1db7c9a478c66bdc2c2f715d3bd8069 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 5 Apr 2023 14:42:53 -0400
Subject: [PATCH 22/26] Use ints to construct IntEnum

---
 python/cudf/cudf/core/df_protocol.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index bcf74e858ad..fd0d0214a9b 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -49,10 +49,10 @@ class _Device(enum.IntEnum):
 
 
 class _MaskKind(enum.IntEnum):
-    NON_NULLABLE = (0,)
-    NAN = (1,)
-    SENTINEL = (2,)
-    BITMASK = (3,)
+    NON_NULLABLE = 0
+    NAN = 1
+    SENTINEL = 2
+    BITMASK = 3
     BYTEMASK = 4
 
 

From e0f3275825c7e8b441d07535c350be07a2db6df0 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 5 Apr 2023 14:53:19 -0400
Subject: [PATCH 23/26] Review suggestions

---
 python/cudf/cudf/core/df_protocol.py       | 12 +++++-----
 python/cudf/cudf/tests/test_df_protocol.py | 28 ++++++++++++----------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index fd0d0214a9b..5c69966a429 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -770,18 +770,18 @@ def _ensure_gpu_buffer(buf, data_type, allow_copy: bool) -> _CuDFBuffer:
     # return it as is.  Otherwise, copy it to the device and return
     # the resulting buffer.
     if buf.__dlpack_device__()[0] != _Device.CUDA:
-        if not allow_copy:
-            raise TypeError(
-                "This operation must copy data from CPU to GPU. "
-                "Set `allow_copy=True` to allow it."
-            )
-        else:
+        if allow_copy:
             dbuf = rmm.DeviceBuffer(ptr=buf.ptr, size=buf.bufsize)
             return _CuDFBuffer(
                 as_buffer(dbuf, exposed=True),
                 protocol_dtype_to_cupy_dtype(data_type),
                 allow_copy,
             )
+        else:
+            raise TypeError(
+                "This operation must copy data from CPU to GPU. "
+                "Set `allow_copy=True` to allow it."
+            )
     return buf
 
 
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index ad4b97b0836..fc5182d3c83 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -40,10 +40,18 @@ def assert_validity_equal(protocol_buffer, cudf_buffer, size, null, valid):
         cudf_mask = cudf_buffer
         assert_eq(
             build_column(
-                None, "string", size=size, mask=protocol_mask, children=()
+                as_buffer(cp.zeros(10, dtype="int8")),
+                "int8",
+                size=size,
+                mask=protocol_mask,
+                children=(),
             ),
             build_column(
-                None, "string", size=size, mask=cudf_mask, children=()
+                as_buffer(cp.zeros(10, dtype="int8")),
+                "int8",
+                size=size,
+                mask=cudf_mask,
+                children=(),
             ),
         )
     else:
@@ -68,16 +76,12 @@ def assert_buffer_equal(buffer_and_dtype: Tuple[_CuDFBuffer, Any], cudfcol):
         col_from_buf.apply_boolean_mask(non_null_idxs),
         cudfcol.apply_boolean_mask(non_null_idxs),
     )
-
-    if dtype[0] != _DtypeKind.BOOL:
-        array_from_dlpack = cp.from_dlpack(buf.__dlpack__()).get()
-        col_array = cp.asarray(cudfcol.data_array_view(mode="read")).get()
-        assert_eq(
-            array_from_dlpack[non_null_idxs.to_numpy()].flatten(),
-            col_array[non_null_idxs.to_numpy()].flatten(),
-        )
-    else:
-        pytest.raises(TypeError, buf.__dlpack__)
+    array_from_dlpack = cp.from_dlpack(buf.__dlpack__()).get()
+    col_array = cp.asarray(cudfcol.data_array_view(mode="read")).get()
+    assert_eq(
+        array_from_dlpack[non_null_idxs.to_numpy()].flatten(),
+        col_array[non_null_idxs.to_numpy()].flatten(),
+    )
 
 
 def assert_column_equal(col: _CuDFColumn, cudfcol):

From f48422f4a65842d899338eb1419be1fb82e4d625 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 14 Apr 2023 10:22:29 -0400
Subject: [PATCH 24/26] Fix categorical handling and add tests for nulls

---
 python/cudf/cudf/core/df_protocol.py       |  4 +---
 python/cudf/cudf/tests/test_df_protocol.py | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index 5c69966a429..2c542f59f17 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -828,13 +828,11 @@ def _protocol_to_cudf_column_categorical(
     """
     Convert a categorical column to a Series instance
     """
-    ordered, is_dict, mapping = col.describe_categorical
+    ordered, is_dict, categories = col.describe_categorical
     if not is_dict:
         raise NotImplementedError(
             "Non-dictionary categoricals not supported yet"
         )
-
-    categories = as_column(mapping.values())
     buffers = col.get_buffers()
     assert buffers["data"] is not None, "data buffer should not be None"
     codes_buffer, codes_dtype = buffers["data"]
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index fc5182d3c83..fd550635b61 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -23,9 +23,16 @@
 from cudf.testing._utils import assert_eq
 
 
-@pytest.fixture
-def pandas_df():
-    return pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", None]})
+@pytest.fixture(
+    params=[
+        {"a": [1, 2, 3], "b": ["x", "y", "z"]},
+        {"a": [1, 2, None], "b": ["x", "y", "z"]},
+        {"a": [1, 2, 3], "b": pd.Categorical(["x", "y", None])},
+    ]
+)
+def pandas_df(request):
+    data = request.param
+    return pd.DataFrame(data)
 
 
 def assert_validity_equal(protocol_buffer, cudf_buffer, size, null, valid):
@@ -275,5 +282,4 @@ def test_NA_mixed_dtype():
     reason="Pandas versions < 1.5.0 do not support interchange protocol",
 )
 def test_from_cpu_df(pandas_df):
-    df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
-    cudf.from_dataframe(df, allow_copy=True)
+    cudf.from_dataframe(pandas_df, allow_copy=True)

From e90fe68f990eb6e059afe7a809b53f11e6b60a4c Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Fri, 14 Apr 2023 14:46:05 -0400
Subject: [PATCH 25/26] Update python/cudf/cudf/core/df_protocol.py

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 python/cudf/cudf/core/df_protocol.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index 2c542f59f17..a0663116c04 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -662,8 +662,9 @@ def from_dataframe(
 
     Parameters
     ----------
-    df: Object supporting dataframe interchange protocol
-    allow_copy
+    df : DataFrameObject
+        Object supporting dataframe interchange protocol
+    allow_copy : bool
         If ``True``, allow copying of the data. If ``False``, a
         ``TypeError`` is raised if data copying is required to
         construct the ``DataFrame`` (e.g., if ``df`` lives in CPU

From 862f587262db218af52dda697f3e2176830c0e7a Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 28 Apr 2023 06:56:10 -0400
Subject: [PATCH 26/26] size is a method

---
 python/cudf/cudf/core/df_protocol.py       | 1 -
 python/cudf/cudf/tests/test_df_protocol.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index a0663116c04..6e1c5f6fd00 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -160,7 +160,6 @@ def __init__(
         self._nan_as_null = nan_as_null
         self._allow_copy = allow_copy
 
-    @property
     def size(self) -> int:
         """
         Size of the column, in elements.
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index fd550635b61..d6134c7bb01 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -92,7 +92,7 @@ def assert_buffer_equal(buffer_and_dtype: Tuple[_CuDFBuffer, Any], cudfcol):
 
 
 def assert_column_equal(col: _CuDFColumn, cudfcol):
-    assert col.size == cudfcol.size
+    assert col.size() == cudfcol.size
     assert col.offset == 0
     assert col.null_count == cudfcol.null_count
     assert col.num_chunks() == 1