Skip to content

Commit

Permalink
Fix DataFrame.sort_index to respect ignore_index on all axis (#14995
Browse files Browse the repository at this point in the history
)

This PR fixes `DataFrame.sort_index` to properly ignore indexes for all values of `axis`. This is fixed in pandas-2.2, hence xfailing the tests with a version check.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: #14995
  • Loading branch information
galipremsagar authored Feb 8, 2024
1 parent 285b836 commit 73bac83
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 5 deletions.
7 changes: 5 additions & 2 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2608,12 +2608,15 @@ def sort_index(
and self._data.multiindex
):
out._set_column_names_like(self)
if ignore_index:
out = out.reset_index(drop=True)
else:
labels = sorted(self._data.names, reverse=not ascending)
out = self[labels]
if ignore_index:
out._data.rangeindex = True
out._data.names = list(range(len(self._data.names)))

if ignore_index is True:
out = out.reset_index(drop=True)
return self._mimic_inplace(out, inplace=inplace)

def memory_usage(self, index=True, deep=False):
Expand Down
29 changes: 26 additions & 3 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,12 @@

import cudf
from cudf.api.extensions import no_default
from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210, PANDAS_LT_203
from cudf.core._compat import (
PANDAS_GE_200,
PANDAS_GE_210,
PANDAS_GE_220,
PANDAS_LT_203,
)
from cudf.core.buffer.spill_manager import get_global_manager
from cudf.core.column import column
from cudf.errors import MixedTypeError
Expand Down Expand Up @@ -3562,8 +3567,16 @@ def test_dataframe_empty_sort_index():
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("na_position", ["first", "last"])
def test_dataframe_sort_index(
index, axis, ascending, inplace, ignore_index, na_position
request, index, axis, ascending, inplace, ignore_index, na_position
):
request.applymarker(
pytest.mark.xfail(
condition=not PANDAS_GE_220
and axis in (1, "columns")
and ignore_index,
reason="Bug fixed in pandas-2.2",
)
)
pdf = pd.DataFrame(
{"b": [1, 3, 2], "a": [1, 4, 3], "c": [4, 1, 5]},
index=index,
Expand Down Expand Up @@ -3618,12 +3631,22 @@ def test_dataframe_mulitindex_sort_index(
):
request.applymarker(
pytest.mark.xfail(
condition=axis in (1, "columns")
condition=not PANDAS_GE_220
and axis in (1, "columns")
and ignore_index
and not (level is None and not ascending),
reason="https://github.com/pandas-dev/pandas/issues/56478",
)
)
request.applymarker(
pytest.mark.xfail(
condition=axis in (1, "columns")
and level is None
and not ascending
and ignore_index,
reason="https://github.com/pandas-dev/pandas/issues/57293",
)
)
pdf = pd.DataFrame(
{
"b": [1.0, 3.0, np.nan],
Expand Down

0 comments on commit 73bac83

Please sign in to comment.