diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index b2205af34e8..d1bf0b74d3c 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -6,6 +6,7 @@ from typing import List, Optional, Sequence, Tuple, Union import numpy as np +import pandas as pd import pyarrow as pa from typing_extensions import Self @@ -288,6 +289,23 @@ def _transform_leaves(self, func, *args, **kwargs) -> Self: ) return lc + def to_pandas( + self, + *, + index: Optional[pd.Index] = None, + nullable: bool = False, + ) -> pd.Series: + # Can't rely on Column.to_pandas implementation for lists. + # Need to perform `to_pylist` to preserve list types. + if nullable: + raise NotImplementedError(f"{nullable=} is not implemented.") + + pd_series = pd.Series(self.to_arrow().to_pylist(), dtype="object") + + if index is not None: + pd_series.index = index + return pd_series + class ListMethods(ColumnMethods): """ diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 7ae7ae34b97..f04cb8a91a4 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import functools import operator @@ -41,6 +41,8 @@ def test_create_list_series(data): expect = pd.Series(data) got = cudf.Series(data) assert_eq(expect, got) + assert isinstance(got[0], type(expect[0])) + assert isinstance(got.to_pandas()[0], type(expect[0])) @pytest.mark.parametrize( diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py index c8cc6e65fa5..30251b88dea 100644 --- a/python/dask_cudf/dask_cudf/tests/test_groupby.py +++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py @@ -702,13 +702,9 @@ def test_is_supported(arg, supported): def test_groupby_unique_lists(): df = pd.DataFrame({"a": [0, 0, 0, 1, 1, 1], "b": [10, 10, 10, 7, 8, 9]}) - ddf = dd.from_pandas(df, 2) gdf = cudf.from_pandas(df) gddf = dask_cudf.from_cudf(gdf, 2) - dd.assert_eq( - ddf.groupby("a").b.unique().compute(), - gddf.groupby("a").b.unique().compute(), - ) + dd.assert_eq( gdf.groupby("a").b.unique(), gddf.groupby("a").b.unique().compute(),