From ef289f0539d4e9f34214fb9b87de33429aa23195 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 12 Feb 2024 15:18:59 -0800 Subject: [PATCH 1/3] Align concat Series name behavior in pandas 2.2 --- python/cudf/cudf/core/reshape.py | 16 ++--- python/cudf/cudf/tests/test_concat.py | 84 +++++++++++++++------------ 2 files changed, 56 insertions(+), 44 deletions(-) diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 656db855253..2ef39e9357d 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -102,17 +102,17 @@ def _normalize_series_and_dataframe(objs, axis): """Convert any cudf.Series objects in objs to DataFrames in place.""" # Default to naming series by a numerical id if they are not named. sr_name = 0 - for idx, o in enumerate(objs): - if isinstance(o, cudf.Series): - if axis == 1: - name = o.name - if name is None: + for idx, obj in enumerate(objs): + if isinstance(obj, cudf.Series): + name = obj.name + if name is None: + if axis == 0: + name = 0 + else: name = sr_name sr_name += 1 - else: - name = sr_name - objs[idx] = o.to_frame(name=name) + objs[idx] = obj.to_frame(name=name) def concat(objs, axis=0, join="outer", ignore_index=False, sort=None): diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index 01c37005271..98b80d08e73 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -10,7 +10,7 @@ import cudf from cudf.api.types import _is_categorical_dtype -from cudf.core._compat import PANDAS_GE_200 +from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_220 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype from cudf.testing._utils import ( assert_eq, @@ -460,41 +460,53 @@ def test_concat_mixed_input(): [pd.Series([], dtype="float64"), pd.DataFrame({"a": []})], [pd.Series([], dtype="float64"), pd.DataFrame({"a": [1, 2]})], [pd.Series([1, 2, 3.0, 1.2], name="abc"), pd.DataFrame({"a": [1, 2]})], - [ - pd.Series( - [1, 2, 3.0, 1.2], name="abc", index=[100, 110, 120, 130] - ), - pd.DataFrame({"a": [1, 2]}), - ], - [ - pd.Series( - [1, 2, 3.0, 1.2], name="abc", index=["a", "b", "c", "d"] - ), - pd.DataFrame({"a": [1, 2]}, index=["a", "b"]), - ], - [ - pd.Series( - [1, 2, 3.0, 1.2, 8, 100], - name="New name", - index=["a", "b", "c", "d", "e", "f"], - ), - pd.DataFrame( - {"a": [1, 2, 4, 10, 11, 12]}, - index=["a", "b", "c", "d", "e", "f"], - ), - ], - [ - pd.Series( - [1, 2, 3.0, 1.2, 8, 100], - name="New name", - index=["a", "b", "c", "d", "e", "f"], - ), - pd.DataFrame( - {"a": [1, 2, 4, 10, 11, 12]}, - index=["a", "b", "c", "d", "e", "f"], - ), - ] - * 7, + pytest.param( + [ + pd.Series( + [1, 2, 3.0, 1.2], name="abc", index=[100, 110, 120, 130] + ), + pd.DataFrame({"a": [1, 2]}), + ], + marks=pytest.mark.xfail(not PANDAS_GE_220), + ), + pytest.param( + [ + pd.Series( + [1, 2, 3.0, 1.2], name="abc", index=["a", "b", "c", "d"] + ), + pd.DataFrame({"a": [1, 2]}, index=["a", "b"]), + ], + marks=pytest.mark.xfail(not PANDAS_GE_220), + ), + pytest.param( + [ + pd.Series( + [1, 2, 3.0, 1.2, 8, 100], + name="New name", + index=["a", "b", "c", "d", "e", "f"], + ), + pd.DataFrame( + {"a": [1, 2, 4, 10, 11, 12]}, + index=["a", "b", "c", "d", "e", "f"], + ), + ], + marks=pytest.mark.xfail(not PANDAS_GE_220), + ), + pytest.param( + [ + pd.Series( + [1, 2, 3.0, 1.2, 8, 100], + name="New name", + index=["a", "b", "c", "d", "e", "f"], + ), + pd.DataFrame( + {"a": [1, 2, 4, 10, 11, 12]}, + index=["a", "b", "c", "d", "e", "f"], + ), + ] + * 7, + marks=pytest.mark.xfail(not PANDAS_GE_220), + ), ], ) def test_concat_series_dataframe_input(objs): From f2a6025a4385b7bae4d1cf6557a5e260d02984de Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 12 Feb 2024 17:35:43 -0800 Subject: [PATCH 2/3] Add reason --- python/cudf/cudf/tests/test_concat.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index 98b80d08e73..6340861f327 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -467,7 +467,10 @@ def test_concat_mixed_input(): ), pd.DataFrame({"a": [1, 2]}), ], - marks=pytest.mark.xfail(not PANDAS_GE_220), + marks=pytest.mark.xfail( + not PANDAS_GE_220, + reason="https://github.com/pandas-dev/pandas/pull/56365", + ), ), pytest.param( [ @@ -476,7 +479,10 @@ def test_concat_mixed_input(): ), pd.DataFrame({"a": [1, 2]}, index=["a", "b"]), ], - marks=pytest.mark.xfail(not PANDAS_GE_220), + marks=pytest.mark.xfail( + not PANDAS_GE_220, + reason="https://github.com/pandas-dev/pandas/pull/56365", + ), ), pytest.param( [ @@ -490,7 +496,10 @@ def test_concat_mixed_input(): index=["a", "b", "c", "d", "e", "f"], ), ], - marks=pytest.mark.xfail(not PANDAS_GE_220), + marks=pytest.mark.xfail( + not PANDAS_GE_220, + reason="https://github.com/pandas-dev/pandas/pull/56365", + ), ), pytest.param( [ @@ -505,7 +514,10 @@ def test_concat_mixed_input(): ), ] * 7, - marks=pytest.mark.xfail(not PANDAS_GE_220), + marks=pytest.mark.xfail( + not PANDAS_GE_220, + reason="https://github.com/pandas-dev/pandas/pull/56365", + ), ), ], ) From 8513774b3bcb182e0facb8e3b67586fd244000d0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 14 Feb 2024 13:34:04 -0800 Subject: [PATCH 3/3] Missed xfail --- python/cudf/cudf/tests/test_concat.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index 6340861f327..6e61675ef92 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -459,7 +459,16 @@ def test_concat_mixed_input(): [pd.Series([1, 2, 3]), pd.DataFrame({"a": []})], [pd.Series([], dtype="float64"), pd.DataFrame({"a": []})], [pd.Series([], dtype="float64"), pd.DataFrame({"a": [1, 2]})], - [pd.Series([1, 2, 3.0, 1.2], name="abc"), pd.DataFrame({"a": [1, 2]})], + pytest.param( + [ + pd.Series([1, 2, 3.0, 1.2], name="abc"), + pd.DataFrame({"a": [1, 2]}), + ], + marks=pytest.mark.xfail( + not PANDAS_GE_220, + reason="https://github.com/pandas-dev/pandas/pull/56365", + ), + ), pytest.param( [ pd.Series(