From aa9d4846e80fad133e7af19aba99cefa04cb8b7b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 15 Feb 2024 12:17:46 -1000 Subject: [PATCH] Align concat Series name behavior in pandas 2.2 (#15032) Fixed in pandas by https://github.com/pandas-dev/pandas/pull/56365 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/15032 --- python/cudf/cudf/core/reshape.py | 16 ++--- python/cudf/cudf/tests/test_concat.py | 97 ++++++++++++++++++--------- 2 files changed, 73 insertions(+), 40 deletions(-) diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 656db855253..2ef39e9357d 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -102,17 +102,17 @@ def _normalize_series_and_dataframe(objs, axis): """Convert any cudf.Series objects in objs to DataFrames in place.""" # Default to naming series by a numerical id if they are not named. sr_name = 0 - for idx, o in enumerate(objs): - if isinstance(o, cudf.Series): - if axis == 1: - name = o.name - if name is None: + for idx, obj in enumerate(objs): + if isinstance(obj, cudf.Series): + name = obj.name + if name is None: + if axis == 0: + name = 0 + else: name = sr_name sr_name += 1 - else: - name = sr_name - objs[idx] = o.to_frame(name=name) + objs[idx] = obj.to_frame(name=name) def concat(objs, axis=0, join="outer", ignore_index=False, sort=None): diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index 01c37005271..6e61675ef92 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -10,7 +10,7 @@ import cudf from cudf.api.types import _is_categorical_dtype -from cudf.core._compat import PANDAS_GE_200 +from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_220 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype from cudf.testing._utils import ( assert_eq, @@ -459,42 +459,75 @@ def test_concat_mixed_input(): [pd.Series([1, 2, 3]), pd.DataFrame({"a": []})], [pd.Series([], dtype="float64"), pd.DataFrame({"a": []})], [pd.Series([], dtype="float64"), pd.DataFrame({"a": [1, 2]})], - [pd.Series([1, 2, 3.0, 1.2], name="abc"), pd.DataFrame({"a": [1, 2]})], - [ - pd.Series( - [1, 2, 3.0, 1.2], name="abc", index=[100, 110, 120, 130] - ), - pd.DataFrame({"a": [1, 2]}), - ], - [ - pd.Series( - [1, 2, 3.0, 1.2], name="abc", index=["a", "b", "c", "d"] + pytest.param( + [ + pd.Series([1, 2, 3.0, 1.2], name="abc"), + pd.DataFrame({"a": [1, 2]}), + ], + marks=pytest.mark.xfail( + not PANDAS_GE_220, + reason="https://github.com/pandas-dev/pandas/pull/56365", ), - pd.DataFrame({"a": [1, 2]}, index=["a", "b"]), - ], - [ - pd.Series( - [1, 2, 3.0, 1.2, 8, 100], - name="New name", - index=["a", "b", "c", "d", "e", "f"], + ), + pytest.param( + [ + pd.Series( + [1, 2, 3.0, 1.2], name="abc", index=[100, 110, 120, 130] + ), + pd.DataFrame({"a": [1, 2]}), + ], + marks=pytest.mark.xfail( + not PANDAS_GE_220, + reason="https://github.com/pandas-dev/pandas/pull/56365", ), - pd.DataFrame( - {"a": [1, 2, 4, 10, 11, 12]}, - index=["a", "b", "c", "d", "e", "f"], + ), + pytest.param( + [ + pd.Series( + [1, 2, 3.0, 1.2], name="abc", index=["a", "b", "c", "d"] + ), + pd.DataFrame({"a": [1, 2]}, index=["a", "b"]), + ], + marks=pytest.mark.xfail( + not PANDAS_GE_220, + reason="https://github.com/pandas-dev/pandas/pull/56365", ), - ], - [ - pd.Series( - [1, 2, 3.0, 1.2, 8, 100], - name="New name", - index=["a", "b", "c", "d", "e", "f"], + ), + pytest.param( + [ + pd.Series( + [1, 2, 3.0, 1.2, 8, 100], + name="New name", + index=["a", "b", "c", "d", "e", "f"], + ), + pd.DataFrame( + {"a": [1, 2, 4, 10, 11, 12]}, + index=["a", "b", "c", "d", "e", "f"], + ), + ], + marks=pytest.mark.xfail( + not PANDAS_GE_220, + reason="https://github.com/pandas-dev/pandas/pull/56365", ), - pd.DataFrame( - {"a": [1, 2, 4, 10, 11, 12]}, - index=["a", "b", "c", "d", "e", "f"], + ), + pytest.param( + [ + pd.Series( + [1, 2, 3.0, 1.2, 8, 100], + name="New name", + index=["a", "b", "c", "d", "e", "f"], + ), + pd.DataFrame( + {"a": [1, 2, 4, 10, 11, 12]}, + index=["a", "b", "c", "d", "e", "f"], + ), + ] + * 7, + marks=pytest.mark.xfail( + not PANDAS_GE_220, + reason="https://github.com/pandas-dev/pandas/pull/56365", ), - ] - * 7, + ), ], ) def test_concat_series_dataframe_input(objs):