diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 12fdd07984a..1ebf59ba6e4 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -5173,13 +5173,21 @@ def from_arrow(cls, table): if index_col: if isinstance(index_col[0], dict): - out = out.set_index( - cudf.RangeIndex( - index_col[0]["start"], - index_col[0]["stop"], - name=index_col[0]["name"], - ) + idx = cudf.RangeIndex( + index_col[0]["start"], + index_col[0]["stop"], + name=index_col[0]["name"], ) + if len(idx) == len(out): + # `idx` is generated from arrow `pandas_metadata` + # which can get out of date with many of the + # arrow operations. Hence verifying if the + # lengths match, or else don't need to set + # an index at all i.e., Default RangeIndex + # will be set. + # See more about the discussion here: + # https://github.com/apache/arrow/issues/15178 + out = out.set_index(idx) else: out = out.set_index(index_col[0]) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 1191a30f718..09b9f57356c 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -10011,3 +10011,17 @@ def test_dataframe_transpose_complex_types(data): actual = gdf.T assert_eq(expected, actual) + + +def test_dataframe_from_arrow_slice(): + table = pa.Table.from_pandas( + pd.DataFrame.from_dict( + {"a": ["aa", "bb", "cc"] * 3, "b": [1, 2, 3] * 3} + ) + ) + table_slice = table.slice(3, 7) + + expected = table_slice.to_pandas() + actual = cudf.DataFrame.from_arrow(table_slice) + + assert_eq(expected, actual)