Skip to content

Commit

Permalink
Add ListColumns.to_pandas(arrow_type=) (#15228)
Browse files Browse the repository at this point in the history
I think there will be a mypy error on main soon as #15182 and #15155 were merge in close succession (my fault for not rebasing first)

Also address a review I forgot in https://github.com/rapidsai/cudf/pull/15182/files#r1507154770

cc @galipremsagar

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #15228
  • Loading branch information
mroeschke authored Mar 5, 2024
1 parent 3571291 commit 427ce01
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 8 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def to_pandas(
if nullable:
raise NotImplementedError(f"{nullable=} is not implemented.")
elif arrow_type:
raise NotImplementedError(f"{nullable=} is not implemented.")
raise NotImplementedError(f"{arrow_type=} is not implemented.")
return pd.Series(
self.dtype.to_pandas().__from_arrow__(self.to_arrow()), index=index
)
Expand Down
18 changes: 12 additions & 6 deletions python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,17 +294,23 @@ def to_pandas(
*,
index: Optional[pd.Index] = None,
nullable: bool = False,
arrow_type: bool = False,
) -> pd.Series:
# Can't rely on Column.to_pandas implementation for lists.
# Need to perform `to_pylist` to preserve list types.
if arrow_type and nullable:
raise ValueError(
f"{arrow_type=} and {nullable=} cannot both be set."
)
if nullable:
raise NotImplementedError(f"{nullable=} is not implemented.")

pd_series = pd.Series(self.to_arrow().to_pylist(), dtype="object")

if index is not None:
pd_series.index = index
return pd_series
pa_array = self.to_arrow()
if arrow_type:
return pd.Series(
pd.arrays.ArrowExtensionArray(pa_array), index=index
)
else:
return pd.Series(pa_array.tolist(), dtype="object", index=index)


class ListMethods(ColumnMethods):
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2726,7 +2726,7 @@ def test_series_from_large_string():
def test_series_to_pandas_arrow_type_nullable_raises(scalar):
pa_array = pa.array([scalar, None])
ser = cudf.Series(pa_array)
with pytest.raises(ValueError):
with pytest.raises(ValueError, match=".* cannot both be set"):
ser.to_pandas(nullable=True, arrow_type=True)


Expand Down

0 comments on commit 427ce01

Please sign in to comment.