From 896f221b2d4f39712d05de47a3b39be774eb4f70 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Thu, 8 Sep 2022 15:49:16 -0500 Subject: [PATCH 1/5] Avoid head call for better Index support --- python/pyarrow/pandas_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index e4b13175fe130..7a2987ab5b278 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -526,7 +526,7 @@ def dataframe_to_types(df, preserve_index, columns=None): if _pandas_api.is_categorical(values): type_ = pa.array(c, from_pandas=True).type elif _pandas_api.is_extension_array_dtype(values): - type_ = pa.array(c.head(0), from_pandas=True).type + type_ = pa.array(c[:0], from_pandas=True).type else: values, type_ = get_datetimetz_type(values, c.dtype, None) type_ = pa.lib._ndarray_to_arrow_type(values, type_) From 67b43f23e05667b1d4581c0895e04ff08a2ba9ae Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Thu, 8 Sep 2022 16:13:48 -0500 Subject: [PATCH 2/5] Add test coverage --- python/pyarrow/tests/test_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py index f26eaaf5fc14f..0547d850d3602 100644 --- a/python/pyarrow/tests/test_schema.py +++ b/python/pyarrow/tests/test_schema.py @@ -663,7 +663,7 @@ def test_schema_from_pandas(): if Version(pd.__version__) >= Version('1.0.0'): inputs.append(pd.array([1, 2, None], dtype=pd.Int32Dtype())) for data in inputs: - df = pd.DataFrame({'a': data}) + df = pd.DataFrame({'a': data}, index=data) schema = pa.Schema.from_pandas(df) expected = pa.Table.from_pandas(df).schema assert schema == expected From 243305d920c4d52e457e748b3e5687bf74630170 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Wed, 14 Sep 2022 09:44:20 -0500 Subject: [PATCH 3/5] Avoid future deprecation warning for Series --- python/pyarrow/pandas_compat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index e9f27b65c10e1..32d5adc000bd6 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -541,7 +541,8 @@ def dataframe_to_types(df, preserve_index, columns=None): if _pandas_api.is_categorical(values): type_ = pa.array(c, from_pandas=True).type elif _pandas_api.is_extension_array_dtype(values): - type_ = pa.array(c[:0], from_pandas=True).type + empty = c.head(0) if isinstance(c, pd.Series) else c[:0] + type_ = pa.array(empty, from_pandas=True).type else: values, type_ = get_datetimetz_type(values, c.dtype, None) type_ = pa.lib._ndarray_to_arrow_type(values, type_) From 734f3f2beeedc4722a0ceecba00596b35174e424 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Wed, 14 Sep 2022 13:23:57 -0500 Subject: [PATCH 4/5] Use _pandas_api shim --- python/pyarrow/pandas_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 32d5adc000bd6..6465a0e67939f 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -541,7 +541,7 @@ def dataframe_to_types(df, preserve_index, columns=None): if _pandas_api.is_categorical(values): type_ = pa.array(c, from_pandas=True).type elif _pandas_api.is_extension_array_dtype(values): - empty = c.head(0) if isinstance(c, pd.Series) else c[:0] + empty = c.head(0) if isinstance(c, _pandas_api.pd.Series) else c[:0] type_ = pa.array(empty, from_pandas=True).type else: values, type_ = get_datetimetz_type(values, c.dtype, None) From e27014728fcaa2fb9db06f1d00fbccdfd685707d Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Wed, 14 Sep 2022 14:24:24 -0500 Subject: [PATCH 5/5] Lint --- python/pyarrow/pandas_compat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 6465a0e67939f..9fa7a699efbae 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -541,7 +541,8 @@ def dataframe_to_types(df, preserve_index, columns=None): if _pandas_api.is_categorical(values): type_ = pa.array(c, from_pandas=True).type elif _pandas_api.is_extension_array_dtype(values): - empty = c.head(0) if isinstance(c, _pandas_api.pd.Series) else c[:0] + empty = c.head(0) if isinstance( + c, _pandas_api.pd.Series) else c[:0] type_ = pa.array(empty, from_pandas=True).type else: values, type_ = get_datetimetz_type(values, c.dtype, None)