Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add ignore_index to sort_index #30578

Merged
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ Other enhancements
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`)

- :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` have gained ``ignore_index`` keyword to reset index (:issue:`30114`)
- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)

Build Changes
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@
# -----------------------------------------------------------------------
# DataFrame class

bool_t = bool # Need alias because NDFrame has def bool:
simonjayhawkins marked this conversation as resolved.
Show resolved Hide resolved


class DataFrame(NDFrame):
"""
Expand Down Expand Up @@ -1984,7 +1986,7 @@ def to_feather(self, path):
@Substitution(klass="DataFrame")
@Appender(_shared_docs["to_markdown"])
def to_markdown(
self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs,
self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs
) -> Optional[str]:
kwargs.setdefault("headers", "keys")
kwargs.setdefault("tablefmt", "pipe")
Expand Down Expand Up @@ -4827,6 +4829,7 @@ def sort_index(
kind="quicksort",
na_position="last",
sort_remaining=True,
ignore_index: bool_t = False,
simonjayhawkins marked this conversation as resolved.
Show resolved Hide resolved
):

# TODO: this can be combined with Series.sort_index impl as
Expand Down Expand Up @@ -4877,6 +4880,9 @@ def sort_index(
# reconstruct axis if needed
new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic()

if ignore_index:
new_data.axes[1] = ibase.default_index(len(indexer))

if inplace:
return self._update_inplace(new_data)
else:
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4178,6 +4178,7 @@ def sort_index(
kind: str = "quicksort",
na_position: str = "last",
sort_remaining: bool_t = True,
ignore_index: bool_t = False,
):
"""
Sort object by labels (along an axis).
Expand All @@ -4204,6 +4205,10 @@ def sort_index(
sort_remaining : bool, default True
If True and sorting by level and index is multilevel, sort by other
levels too (in order) after sorting by specified level.
ignore_index : bool, default False
If True, the resulting axis will be labeled 0, 1, …, n - 1.

.. versionadded:: 1.0.0

Returns
-------
Expand Down
16 changes: 12 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ def wrapper(self):
# ----------------------------------------------------------------------
# Series class

bool_t = bool # Need alias because NDFrame has def bool:
simonjayhawkins marked this conversation as resolved.
Show resolved Hide resolved


class Series(base.IndexOpsMixin, generic.NDFrame):
"""
Expand Down Expand Up @@ -1433,7 +1435,7 @@ def to_string(
@Substitution(klass="Series")
@Appender(_shared_docs["to_markdown"])
def to_markdown(
self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs,
self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs
) -> Optional[str]:
return self.to_frame().to_markdown(buf, mode, **kwargs)

Expand Down Expand Up @@ -2880,6 +2882,7 @@ def sort_index(
kind="quicksort",
na_position="last",
sort_remaining=True,
ignore_index: bool_t = False,
simonjayhawkins marked this conversation as resolved.
Show resolved Hide resolved
):
"""
Sort Series by index labels.
Expand Down Expand Up @@ -2908,6 +2911,10 @@ def sort_index(
sort_remaining : bool, default True
If True and sorting by level and index is multilevel, sort by other
levels too (in order) after sorting by specified level.
ignore_index : bool, default False
If True, the resulting axis will be labeled 0, 1, …, n - 1.

.. versionadded:: 1.0.0

Returns
-------
Expand Down Expand Up @@ -3035,6 +3042,9 @@ def sort_index(
new_values = self._values.take(indexer)
result = self._constructor(new_values, index=new_index)

if ignore_index:
result.index = ibase.default_index(len(result))

if inplace:
self._update_inplace(result)
else:
Expand Down Expand Up @@ -4395,9 +4405,7 @@ def to_period(self, freq=None, copy=True):
hist = pandas.plotting.hist_series


Series._setup_axes(
["index"], docs={"index": "The index (axis labels) of the Series."},
)
Series._setup_axes(["index"], docs={"index": "The index (axis labels) of the Series."})
Series._add_numeric_operations()
Series._add_series_or_dataframe_operations()

Expand Down
81 changes: 81 additions & 0 deletions pandas/tests/frame/methods/test_sort_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,3 +229,84 @@ def test_sort_index_intervalindex(self):
)
result = result.columns.levels[1].categories
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"original_dict, sorted_dict, ascending, ignore_index, output_index",
[
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, True, [0, 1, 2]),
({"A": [1, 2, 3]}, {"A": [1, 2, 3]}, True, True, [0, 1, 2]),
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, False, [2, 1, 0]),
({"A": [1, 2, 3]}, {"A": [1, 2, 3]}, True, False, [0, 1, 2]),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor but can you use a non-default index as part of the expectation? Right now the parametrization can't disambiguate between the default index and ignore_index argument

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks! changed!

],
)
def test_sort_index_ignore_index(
self, original_dict, sorted_dict, ascending, ignore_index, output_index
):
# GH 30114
df = DataFrame(original_dict)
expected_df = DataFrame(sorted_dict, index=output_index)

sorted_df = df.sort_index(ascending=ascending, ignore_index=ignore_index)
tm.assert_frame_equal(sorted_df, expected_df)
tm.assert_frame_equal(df, DataFrame(original_dict))

# Test when inplace is True
copied_df = df.copy()
copied_df.sort_index(
ascending=ascending, ignore_index=ignore_index, inplace=True
)
tm.assert_frame_equal(copied_df, expected_df)
tm.assert_frame_equal(df, DataFrame(original_dict))

@pytest.mark.parametrize(
"original_dict, sorted_dict, ascending, ignore_index, output_index",
[
(
{"M1": [1, 2], "M2": [3, 4]},
{"M1": [1, 2], "M2": [3, 4]},
True,
True,
[0, 1],
),
(
{"M1": [1, 2], "M2": [3, 4]},
{"M1": [2, 1], "M2": [4, 3]},
False,
True,
[0, 1],
),
(
{"M1": [1, 2], "M2": [3, 4]},
{"M1": [1, 2], "M2": [3, 4]},
True,
False,
MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB")),
),
(
{"M1": [1, 2], "M2": [3, 4]},
{"M1": [2, 1], "M2": [4, 3]},
False,
False,
MultiIndex.from_tuples([[3, 4], [2, 1]], names=list("AB")),
),
],
)
def test_sort_index_ignore_index_multi_index(
self, original_dict, sorted_dict, ascending, ignore_index, output_index
):
# GH 30114, this is to test ignore_index on MulitIndex of index
mi = MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB"))
df = DataFrame(original_dict, index=mi)
expected_df = DataFrame(sorted_dict, index=output_index)

sorted_df = df.sort_index(ascending=ascending, ignore_index=ignore_index)
tm.assert_frame_equal(sorted_df, expected_df)
tm.assert_frame_equal(df, DataFrame(original_dict, index=mi))

# Test when inplace is True
copied_df = df.copy()
copied_df.sort_index(
ascending=ascending, ignore_index=ignore_index, inplace=True
)
tm.assert_frame_equal(copied_df, expected_df)
tm.assert_frame_equal(df, DataFrame(original_dict, index=mi))
31 changes: 31 additions & 0 deletions pandas/tests/series/methods/test_sort_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,34 @@ def test_sort_index_intervals(self):
[3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1])
)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"original_list, sorted_list, ascending, ignore_index, output_index",
[
([2, 3, 6, 1], [2, 3, 6, 1], True, True, [0, 1, 2, 3]),
([2, 3, 6, 1], [2, 3, 6, 1], True, False, [0, 1, 2, 3]),
([2, 3, 6, 1], [1, 6, 3, 2], False, True, [0, 1, 2, 3]),
([2, 3, 6, 1], [1, 6, 3, 2], False, False, [3, 2, 1, 0]),
],
)
def test_sort_index_ignore_index(
self, original_list, sorted_list, ascending, ignore_index, output_index
):
# GH 30114
ser = Series(original_list)
expected = Series(sorted_list, index=output_index)

# Test when inplace is False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can inplace be parameterised easily

Copy link
Member Author

@charlesdong1991 charlesdong1991 Dec 31, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

emm, i think in this case, if using parameterization, might not look very clean and less readable

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be parametrized - what do you think makes it less readable?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok with trying parameterize as a followup, as for .sort_values and .duplicated these tests look very much like this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

emm, okay! i thought with inplace being Ture, we do not need to do something like df = df.sort_index(inplace=True), but False needs to be like this, and testing object will be different, one is testing if original changes, another is to test if assigned new df is changed.

Sorry to bring up discussion here, it definitely can be parametrized, was just my preference on such case. I will parametrize all three methods in the follow-up PR for sure!

sorted_sr = ser.sort_index(ascending=ascending, ignore_index=ignore_index)
tm.assert_series_equal(sorted_sr, expected)

tm.assert_series_equal(ser, Series(original_list))

# Test when inplace is True
copied_sr = ser.copy()
copied_sr.sort_index(
ascending=ascending, ignore_index=ignore_index, inplace=True
)
tm.assert_series_equal(copied_sr, expected)

tm.assert_series_equal(ser, Series(original_list))