From 3895737f03eec6fe99d605c89a934fe9ea1f9d11 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 7 Feb 2024 15:58:49 +0000 Subject: [PATCH 1/6] Fix DataFrame.sort_index --- python/cudf/cudf/core/indexed_frame.py | 7 +++++-- python/cudf/cudf/tests/test_dataframe.py | 13 ++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 659e323c57d..604f700c8aa 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -2608,12 +2608,15 @@ def sort_index( and self._data.multiindex ): out._set_column_names_like(self) + if ignore_index is True: + out = out.reset_index(drop=True) else: labels = sorted(self._data.names, reverse=not ascending) out = self[labels] + if ignore_index is True: + out._data.rangeindex = True + out._data.names = list(range(0, len(self._data.names))) - if ignore_index is True: - out = out.reset_index(drop=True) return self._mimic_inplace(out, inplace=inplace) def memory_usage(self, index=True, deep=False): diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index a0f6c4c3cfc..3ea2d797ce9 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -3618,12 +3618,23 @@ def test_dataframe_mulitindex_sort_index( ): request.applymarker( pytest.mark.xfail( - condition=axis in (1, "columns") + condition=not PANDAS_GE_220 + and axis in (1, "columns") and ignore_index and not (level is None and not ascending), reason="https://github.com/pandas-dev/pandas/issues/56478", ) ) + request.applymarker( + pytest.mark.xfail( + condition=PANDAS_GE_220 + and axis in (1, "columns") + and level is None + and not ascending + and ignore_index, + reason="https://github.com/pandas-dev/pandas/issues/57293", + ) + ) pdf = pd.DataFrame( { "b": [1.0, 3.0, np.nan], From d76fd8d701065891ccc4fc176132cff780722232 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 7 Feb 2024 10:17:53 -0600 Subject: [PATCH 2/6] Apply suggestions from code review Co-authored-by: Bradley Dice --- python/cudf/cudf/core/indexed_frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 604f700c8aa..aa75b0d825e 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -2608,14 +2608,14 @@ def sort_index( and self._data.multiindex ): out._set_column_names_like(self) - if ignore_index is True: + if ignore_index: out = out.reset_index(drop=True) else: labels = sorted(self._data.names, reverse=not ascending) out = self[labels] - if ignore_index is True: + if ignore_index: out._data.rangeindex = True - out._data.names = list(range(0, len(self._data.names))) + out._data.names = list(range(len(self._data.names))) return self._mimic_inplace(out, inplace=inplace) From 9ef1f5311c11d34eae26b288d80c9a859156c01c Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 7 Feb 2024 10:34:40 -0600 Subject: [PATCH 3/6] Update test_dataframe.py --- python/cudf/cudf/tests/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 3ea2d797ce9..517a755e0b5 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -25,7 +25,7 @@ import cudf from cudf.api.extensions import no_default -from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210, PANDAS_LT_203 +from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210, PANDAS_GE_220, PANDAS_LT_203 from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.column import column from cudf.errors import MixedTypeError From ed7149984dd0b4704221ed2787e710f77773ad16 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 7 Feb 2024 10:38:14 -0600 Subject: [PATCH 4/6] isort --- python/cudf/cudf/tests/test_dataframe.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 517a755e0b5..7002e06014e 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -25,7 +25,12 @@ import cudf from cudf.api.extensions import no_default -from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210, PANDAS_GE_220, PANDAS_LT_203 +from cudf.core._compat import ( + PANDAS_GE_200, + PANDAS_GE_210, + PANDAS_GE_220, + PANDAS_LT_203, +) from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.column import column from cudf.errors import MixedTypeError From 511fd1ba993ac9f64b5cff21547122397e5453a6 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 7 Feb 2024 17:56:32 +0000 Subject: [PATCH 5/6] fix pytest --- python/cudf/cudf/tests/test_dataframe.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 7002e06014e..06a89c4618d 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -3632,8 +3632,7 @@ def test_dataframe_mulitindex_sort_index( ) request.applymarker( pytest.mark.xfail( - condition=PANDAS_GE_220 - and axis in (1, "columns") + condition=axis in (1, "columns") and level is None and not ascending and ignore_index, From f2aa07dc8b0fb544f690e406d024e45e7d2be616 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 8 Feb 2024 06:00:21 +0000 Subject: [PATCH 6/6] fix pytest --- python/cudf/cudf/tests/test_dataframe.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 06a89c4618d..f9af0d10713 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -3567,8 +3567,16 @@ def test_dataframe_empty_sort_index(): @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.parametrize("na_position", ["first", "last"]) def test_dataframe_sort_index( - index, axis, ascending, inplace, ignore_index, na_position + request, index, axis, ascending, inplace, ignore_index, na_position ): + request.applymarker( + pytest.mark.xfail( + condition=not PANDAS_GE_220 + and axis in (1, "columns") + and ignore_index, + reason="Bug fixed in pandas-2.2", + ) + ) pdf = pd.DataFrame( {"b": [1, 3, 2], "a": [1, 4, 3], "c": [4, 1, 5]}, index=index,