Skip to content

Commit

Permalink
Fix from_arrow to load a sliced arrow table (#12665)
Browse files Browse the repository at this point in the history
Fixes: #12463 

This PR handles any kind of outdated pandas index metadata in `from_arrow` by ignoring it.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #12665
  • Loading branch information
galipremsagar authored Feb 2, 2023
1 parent 3aab6b8 commit 32f5efa
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 6 deletions.
20 changes: 14 additions & 6 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5173,13 +5173,21 @@ def from_arrow(cls, table):

if index_col:
if isinstance(index_col[0], dict):
out = out.set_index(
cudf.RangeIndex(
index_col[0]["start"],
index_col[0]["stop"],
name=index_col[0]["name"],
)
idx = cudf.RangeIndex(
index_col[0]["start"],
index_col[0]["stop"],
name=index_col[0]["name"],
)
if len(idx) == len(out):
# `idx` is generated from arrow `pandas_metadata`
# which can get out of date with many of the
# arrow operations. Hence verifying if the
# lengths match, or else don't need to set
# an index at all i.e., Default RangeIndex
# will be set.
# See more about the discussion here:
# https://github.com/apache/arrow/issues/15178
out = out.set_index(idx)
else:
out = out.set_index(index_col[0])

Expand Down
14 changes: 14 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10011,3 +10011,17 @@ def test_dataframe_transpose_complex_types(data):
actual = gdf.T

assert_eq(expected, actual)


def test_dataframe_from_arrow_slice():
table = pa.Table.from_pandas(
pd.DataFrame.from_dict(
{"a": ["aa", "bb", "cc"] * 3, "b": [1, 2, 3] * 3}
)
)
table_slice = table.slice(3, 7)

expected = table_slice.to_pandas()
actual = cudf.DataFrame.from_arrow(table_slice)

assert_eq(expected, actual)

0 comments on commit 32f5efa

Please sign in to comment.