From ba656b6ac1d86877f164f4df57d9da09f853e940 Mon Sep 17 00:00:00 2001 From: NOBODIDI Date: Sat, 14 Dec 2024 17:20:54 -0500 Subject: [PATCH 1/3] checked class of values before combine_chunks() --- pandas/core/arrays/categorical.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 99e4cb0545e2d..0f5ea54d1d410 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -447,7 +447,10 @@ def __init__( if isinstance(values.dtype, ArrowDtype) and issubclass( values.dtype.type, CategoricalDtypeType ): - arr = values._pa_array.combine_chunks() + if values.__class__.__name__ == 'Index': + arr = values._data._pa_array.combine_chunks() + else: + arr = values._pa_array.combine_chunks() categories = arr.dictionary.to_pandas(types_mapper=ArrowDtype) codes = arr.indices.to_numpy() dtype = CategoricalDtype(categories, values.dtype.pyarrow_dtype.ordered) From f26abfe6eb7c3e949d3c9b6a76252e0cbd723866 Mon Sep 17 00:00:00 2001 From: NOBODIDI Date: Sat, 14 Dec 2024 17:46:03 -0500 Subject: [PATCH 2/3] tests for issue #60563 --- pandas/tests/arrays/categorical/test_dtypes.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py index ec1d501ddba16..21cd02c8aed56 100644 --- a/pandas/tests/arrays/categorical/test_dtypes.py +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -1,4 +1,5 @@ import numpy as np +import pyarrow as pa import pytest from pandas.core.dtypes.dtypes import CategoricalDtype @@ -11,6 +12,7 @@ Series, Timestamp, ) +from pandas.core.dtypes.dtypes import ArrowDtype import pandas._testing as tm @@ -136,3 +138,17 @@ def test_interval_index_category(self): [0, 1], [1, 2], dtype="interval[uint64, right]" ) tm.assert_index_equal(result, expected) + + def test_values_is_index(): + # GH 60563 + values = Index(['a1', 'a2'], dtype=ArrowDtype(pa.string())) + arr = values._data._pa_array.combine_chunks() + + assert arr.equals(values._data._pa_array.combine_chunks()) + + def test_values_is_not_index(): + # GH 60563 + values = Series(['a1', 'a2'], dtype=ArrowDtype(pa.string())) + arr = values._pa_array.combine_chunks() + + assert arr.equals(values._pa_array.combine_chunks()) \ No newline at end of file From 0093c0362f1727c647a305d1726ea18f229d1adc Mon Sep 17 00:00:00 2001 From: NOBODIDI Date: Sat, 14 Dec 2024 18:00:21 -0500 Subject: [PATCH 3/3] fix import statements in test_dtypes --- pandas/tests/arrays/categorical/test_dtypes.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py index 21cd02c8aed56..22e58c9b08a05 100644 --- a/pandas/tests/arrays/categorical/test_dtypes.py +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -2,7 +2,10 @@ import pyarrow as pa import pytest -from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.dtypes import ( + ArrowDtype, + CategoricalDtype, +) from pandas import ( Categorical, @@ -12,7 +15,6 @@ Series, Timestamp, ) -from pandas.core.dtypes.dtypes import ArrowDtype import pandas._testing as tm @@ -141,14 +143,14 @@ def test_interval_index_category(self): def test_values_is_index(): # GH 60563 - values = Index(['a1', 'a2'], dtype=ArrowDtype(pa.string())) + values = Index(["a1", "a2"], dtype=ArrowDtype(pa.string())) arr = values._data._pa_array.combine_chunks() assert arr.equals(values._data._pa_array.combine_chunks()) def test_values_is_not_index(): # GH 60563 - values = Series(['a1', 'a2'], dtype=ArrowDtype(pa.string())) + values = Series(["a1", "a2"], dtype=ArrowDtype(pa.string())) arr = values._pa_array.combine_chunks() - assert arr.equals(values._pa_array.combine_chunks()) \ No newline at end of file + assert arr.equals(values._pa_array.combine_chunks())