diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3810ab37822cc..1f25662bd6c20 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -221,6 +221,7 @@ Other enhancements - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) - :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) - :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`) +- :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` have gained ``ignore_index`` keyword to reset index (:issue:`30114`) - :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`) - Added new writer for exporting Stata dta files in version 118, ``StataWriter118``. This format supports exporting strings containing Unicode characters (:issue:`23573`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fb1ba4f6f53f8..cdbeeae984456 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1988,7 +1988,7 @@ def to_feather(self, path): @Substitution(klass="DataFrame") @Appender(_shared_docs["to_markdown"]) def to_markdown( - self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs, + self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs ) -> Optional[str]: kwargs.setdefault("headers", "keys") kwargs.setdefault("tablefmt", "pipe") @@ -4831,6 +4831,7 @@ def sort_index( kind="quicksort", na_position="last", sort_remaining=True, + ignore_index: bool = False, ): # TODO: this can be combined with Series.sort_index impl as @@ -4881,6 +4882,9 @@ def sort_index( # reconstruct axis if needed new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic() + if ignore_index: + new_data.axes[1] = ibase.default_index(len(indexer)) + if inplace: return self._update_inplace(new_data) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85bbf9b553b0a..ea8cd3e9f341b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4178,6 +4178,7 @@ def sort_index( kind: str = "quicksort", na_position: str = "last", sort_remaining: bool_t = True, + ignore_index: bool_t = False, ): """ Sort object by labels (along an axis). @@ -4204,6 +4205,10 @@ def sort_index( sort_remaining : bool, default True If True and sorting by level and index is multilevel, sort by other levels too (in order) after sorting by specified level. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 Returns ------- diff --git a/pandas/core/series.py b/pandas/core/series.py index aa5af9bb893fa..99d19035b1c77 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1433,7 +1433,7 @@ def to_string( @Substitution(klass="Series") @Appender(generic._shared_docs["to_markdown"]) def to_markdown( - self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs, + self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs ) -> Optional[str]: return self.to_frame().to_markdown(buf, mode, **kwargs) @@ -2963,6 +2963,7 @@ def sort_index( kind="quicksort", na_position="last", sort_remaining=True, + ignore_index: bool = False, ): """ Sort Series by index labels. @@ -2991,6 +2992,10 @@ def sort_index( sort_remaining : bool, default True If True and sorting by level and index is multilevel, sort by other levels too (in order) after sorting by specified level. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 Returns ------- @@ -3118,6 +3123,9 @@ def sort_index( new_values = self._values.take(indexer) result = self._constructor(new_values, index=new_index) + if ignore_index: + result.index = ibase.default_index(len(result)) + if inplace: self._update_inplace(result) else: @@ -4478,9 +4486,7 @@ def to_period(self, freq=None, copy=True): hist = pandas.plotting.hist_series -Series._setup_axes( - ["index"], docs={"index": "The index (axis labels) of the Series."}, -) +Series._setup_axes(["index"], docs={"index": "The index (axis labels) of the Series."}) Series._add_numeric_operations() Series._add_series_or_dataframe_operations() diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 4f311bbaa8eb9..6866aab11d2fa 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -229,3 +229,85 @@ def test_sort_index_intervalindex(self): ) result = result.columns.levels[1].categories tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "original_dict, sorted_dict, ascending, ignore_index, output_index", + [ + ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, False, [5, 3, 2]), + ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, False, [2, 3, 5]), + ], + ) + def test_sort_index_ignore_index( + self, original_dict, sorted_dict, ascending, ignore_index, output_index + ): + # GH 30114 + original_index = [2, 5, 3] + df = DataFrame(original_dict, index=original_index) + expected_df = DataFrame(sorted_dict, index=output_index) + + sorted_df = df.sort_index(ascending=ascending, ignore_index=ignore_index) + tm.assert_frame_equal(sorted_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=original_index)) + + # Test when inplace is True + copied_df = df.copy() + copied_df.sort_index( + ascending=ascending, ignore_index=ignore_index, inplace=True + ) + tm.assert_frame_equal(copied_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=original_index)) + + @pytest.mark.parametrize( + "original_dict, sorted_dict, ascending, ignore_index, output_index", + [ + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [1, 2], "M2": [3, 4]}, + True, + True, + [0, 1], + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [2, 1], "M2": [4, 3]}, + False, + True, + [0, 1], + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [1, 2], "M2": [3, 4]}, + True, + False, + MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB")), + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [2, 1], "M2": [4, 3]}, + False, + False, + MultiIndex.from_tuples([[3, 4], [2, 1]], names=list("AB")), + ), + ], + ) + def test_sort_index_ignore_index_multi_index( + self, original_dict, sorted_dict, ascending, ignore_index, output_index + ): + # GH 30114, this is to test ignore_index on MulitIndex of index + mi = MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB")) + df = DataFrame(original_dict, index=mi) + expected_df = DataFrame(sorted_dict, index=output_index) + + sorted_df = df.sort_index(ascending=ascending, ignore_index=ignore_index) + tm.assert_frame_equal(sorted_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=mi)) + + # Test when inplace is True + copied_df = df.copy() + copied_df.sort_index( + ascending=ascending, ignore_index=ignore_index, inplace=True + ) + tm.assert_frame_equal(copied_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=mi)) diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py index ab15b8c814029..a9b73c2344681 100644 --- a/pandas/tests/series/methods/test_sort_index.py +++ b/pandas/tests/series/methods/test_sort_index.py @@ -135,3 +135,34 @@ def test_sort_index_intervals(self): [3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1]) ) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "original_list, sorted_list, ascending, ignore_index, output_index", + [ + ([2, 3, 6, 1], [2, 3, 6, 1], True, True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [2, 3, 6, 1], True, False, [0, 1, 2, 3]), + ([2, 3, 6, 1], [1, 6, 3, 2], False, True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [1, 6, 3, 2], False, False, [3, 2, 1, 0]), + ], + ) + def test_sort_index_ignore_index( + self, original_list, sorted_list, ascending, ignore_index, output_index + ): + # GH 30114 + ser = Series(original_list) + expected = Series(sorted_list, index=output_index) + + # Test when inplace is False + sorted_sr = ser.sort_index(ascending=ascending, ignore_index=ignore_index) + tm.assert_series_equal(sorted_sr, expected) + + tm.assert_series_equal(ser, Series(original_list)) + + # Test when inplace is True + copied_sr = ser.copy() + copied_sr.sort_index( + ascending=ascending, ignore_index=ignore_index, inplace=True + ) + tm.assert_series_equal(copied_sr, expected) + + tm.assert_series_equal(ser, Series(original_list))