From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 1/7] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From c03ed13a0bf00157f4ee44169bb36dfb7470157b Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 31 Dec 2019 11:44:12 +0100 Subject: [PATCH 2/7] Add ignore index to sort index --- pandas/core/frame.py | 6 +- pandas/core/generic.py | 5 ++ pandas/core/series.py | 14 +++- pandas/tests/frame/methods/test_sort_index.py | 81 +++++++++++++++++++ .../tests/series/methods/test_sort_index.py | 32 ++++++++ 5 files changed, 133 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1de0d3b58dc5f..879c510e5b2b4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1984,7 +1984,7 @@ def to_feather(self, path): @Substitution(klass="DataFrame") @Appender(_shared_docs["to_markdown"]) def to_markdown( - self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs, + self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs ) -> Optional[str]: kwargs.setdefault("headers", "keys") kwargs.setdefault("tablefmt", "pipe") @@ -4827,6 +4827,7 @@ def sort_index( kind="quicksort", na_position="last", sort_remaining=True, + ignore_index=False, ): # TODO: this can be combined with Series.sort_index impl as @@ -4877,6 +4878,9 @@ def sort_index( # reconstruct axis if needed new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic() + if ignore_index: + new_data.axes[1] = ibase.default_index(len(indexer)) + if inplace: return self._update_inplace(new_data) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2b108d3997235..1b75e561f7eb1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4178,6 +4178,7 @@ def sort_index( kind: str = "quicksort", na_position: str = "last", sort_remaining: bool_t = True, + ignore_index: bool_t = False, ): """ Sort object by labels (along an axis). @@ -4204,6 +4205,10 @@ def sort_index( sort_remaining : bool, default True If True and sorting by level and index is multilevel, sort by other levels too (in order) after sorting by specified level. + ignore_index: bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 Returns ------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 36e26e088935c..8b2ef124af985 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1433,7 +1433,7 @@ def to_string( @Substitution(klass="Series") @Appender(_shared_docs["to_markdown"]) def to_markdown( - self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs, + self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs ) -> Optional[str]: return self.to_frame().to_markdown(buf, mode, **kwargs) @@ -2880,6 +2880,7 @@ def sort_index( kind="quicksort", na_position="last", sort_remaining=True, + ignore_index=False, ): """ Sort Series by index labels. @@ -2908,6 +2909,10 @@ def sort_index( sort_remaining : bool, default True If True and sorting by level and index is multilevel, sort by other levels too (in order) after sorting by specified level. + ignore_index: bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 Returns ------- @@ -3035,6 +3040,9 @@ def sort_index( new_values = self._values.take(indexer) result = self._constructor(new_values, index=new_index) + if ignore_index: + result.index = ibase.default_index(len(result)) + if inplace: self._update_inplace(result) else: @@ -4395,9 +4403,7 @@ def to_period(self, freq=None, copy=True): hist = pandas.plotting.hist_series -Series._setup_axes( - ["index"], docs={"index": "The index (axis labels) of the Series."}, -) +Series._setup_axes(["index"], docs={"index": "The index (axis labels) of the Series."}) Series._add_numeric_operations() Series._add_series_or_dataframe_operations() diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 4f311bbaa8eb9..2c1a7540f0a0c 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -229,3 +229,84 @@ def test_sort_index_intervalindex(self): ) result = result.columns.levels[1].categories tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "original_dict, sorted_dict, ascending, ignore_index, output_index", + [ + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [1, 2, 3]}, True, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, False, [2, 1, 0]), + ({"A": [1, 2, 3]}, {"A": [1, 2, 3]}, True, False, [0, 1, 2]), + ], + ) + def test_sort_index_ignore_index( + self, original_dict, sorted_dict, ascending, ignore_index, output_index + ): + # GH 30114 + df = DataFrame(original_dict) + expected_df = DataFrame(sorted_dict, index=output_index) + + sorted_df = df.sort_index(ascending=ascending, ignore_index=ignore_index) + tm.assert_frame_equal(sorted_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict)) + + # Test when inplace is True + copied_df = df.copy() + copied_df.sort_index( + ascending=ascending, ignore_index=ignore_index, inplace=True + ) + tm.assert_frame_equal(copied_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict)) + + @pytest.mark.parametrize( + "original_dict, sorted_dict, ascending, ignore_index, output_index", + [ + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [1, 2], "M2": [3, 4]}, + True, + True, + [0, 1], + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [2, 1], "M2": [4, 3]}, + False, + True, + [0, 1], + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [1, 2], "M2": [3, 4]}, + True, + False, + MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB")), + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [2, 1], "M2": [4, 3]}, + False, + False, + MultiIndex.from_tuples([[3, 4], [2, 1]], names=list("AB")), + ), + ], + ) + def test_sort_index_ignore_index_multi_index( + self, original_dict, sorted_dict, ascending, ignore_index, output_index + ): + # GH 30114, this is to test ignore_index on MulitIndex of index + mi = MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB")) + df = DataFrame(original_dict, index=mi) + expected_df = DataFrame(sorted_dict, index=output_index) + + sorted_df = df.sort_index(ascending=ascending, ignore_index=ignore_index) + tm.assert_frame_equal(sorted_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=mi)) + + # Test when inplace is True + copied_df = df.copy() + copied_df.sort_index( + ascending=ascending, ignore_index=ignore_index, inplace=True + ) + tm.assert_frame_equal(copied_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=mi)) diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py index ab15b8c814029..57096de283915 100644 --- a/pandas/tests/series/methods/test_sort_index.py +++ b/pandas/tests/series/methods/test_sort_index.py @@ -135,3 +135,35 @@ def test_sort_index_intervals(self): [3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1]) ) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "original_list, sorted_list, ascending, ignore_index, output_index", + [ + ([2, 3, 6, 1], [2, 3, 6, 1], True, True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [2, 3, 6, 1], True, False, [0, 1, 2, 3]), + ([2, 3, 6, 1], [1, 6, 3, 2], False, True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [1, 6, 3, 2], False, False, [3, 2, 1, 0]), + ], + ) + def test_sort_index_ignore_index( + self, original_list, sorted_list, ascending, ignore_index, output_index + ): + # GH 30114 + # GH 30114 + sr = Series(original_list) + expected = Series(sorted_list, index=output_index) + + # Test when inplace is False + sorted_sr = sr.sort_index(ascending=ascending, ignore_index=ignore_index) + tm.assert_series_equal(sorted_sr, expected) + + tm.assert_series_equal(sr, Series(original_list)) + + # Test when inplace is True + copied_sr = sr.copy() + copied_sr.sort_index( + ascending=ascending, ignore_index=ignore_index, inplace=True + ) + tm.assert_series_equal(copied_sr, expected) + + tm.assert_series_equal(sr, Series(original_list)) From 4a9c645d0a402d53d34205ab6b13441c945e0037 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 31 Dec 2019 11:49:35 +0100 Subject: [PATCH 3/7] Add whatsnew note --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 77c4ed6160dbe..3f3a158262c9b 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -221,7 +221,7 @@ Other enhancements - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) - :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) - :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`) - +- :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` have gained ``ignore_index`` keyword to reset index (:issue:`30114`) - :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`) Build Changes From 7a2651eafaaa2d245872c8ff9d8e54eeff6d7cb9 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 31 Dec 2019 12:12:48 +0100 Subject: [PATCH 4/7] Add space to make docstring pass --- pandas/core/generic.py | 2 +- pandas/core/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1b75e561f7eb1..fab00065c3b46 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4205,7 +4205,7 @@ def sort_index( sort_remaining : bool, default True If True and sorting by level and index is multilevel, sort by other levels too (in order) after sorting by specified level. - ignore_index: bool, default False + ignore_index : bool, default False If True, the resulting axis will be labeled 0, 1, …, n - 1. .. versionadded:: 1.0.0 diff --git a/pandas/core/series.py b/pandas/core/series.py index 8b2ef124af985..f9acfe87bad36 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2909,7 +2909,7 @@ def sort_index( sort_remaining : bool, default True If True and sorting by level and index is multilevel, sort by other levels too (in order) after sorting by specified level. - ignore_index: bool, default False + ignore_index : bool, default False If True, the resulting axis will be labeled 0, 1, …, n - 1. .. versionadded:: 1.0.0 From 85cb1979ad161fa7f1734f517c679e467dd19879 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 31 Dec 2019 13:13:28 +0100 Subject: [PATCH 5/7] code change on reviews --- pandas/core/frame.py | 4 +++- pandas/core/series.py | 4 +++- pandas/tests/series/methods/test_sort_index.py | 11 +++++------ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 879c510e5b2b4..b1e4b301f3228 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -308,6 +308,8 @@ # ----------------------------------------------------------------------- # DataFrame class +bool_t = bool # Need alias because NDFrame has def bool: + class DataFrame(NDFrame): """ @@ -4827,7 +4829,7 @@ def sort_index( kind="quicksort", na_position="last", sort_remaining=True, - ignore_index=False, + ignore_index: bool_t = False, ): # TODO: this can be combined with Series.sort_index impl as diff --git a/pandas/core/series.py b/pandas/core/series.py index f9acfe87bad36..1646b9bbf5c1d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -118,6 +118,8 @@ def wrapper(self): # ---------------------------------------------------------------------- # Series class +bool_t = bool # Need alias because NDFrame has def bool: + class Series(base.IndexOpsMixin, generic.NDFrame): """ @@ -2880,7 +2882,7 @@ def sort_index( kind="quicksort", na_position="last", sort_remaining=True, - ignore_index=False, + ignore_index: bool_t = False, ): """ Sort Series by index labels. diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py index 57096de283915..a9b73c2344681 100644 --- a/pandas/tests/series/methods/test_sort_index.py +++ b/pandas/tests/series/methods/test_sort_index.py @@ -149,21 +149,20 @@ def test_sort_index_ignore_index( self, original_list, sorted_list, ascending, ignore_index, output_index ): # GH 30114 - # GH 30114 - sr = Series(original_list) + ser = Series(original_list) expected = Series(sorted_list, index=output_index) # Test when inplace is False - sorted_sr = sr.sort_index(ascending=ascending, ignore_index=ignore_index) + sorted_sr = ser.sort_index(ascending=ascending, ignore_index=ignore_index) tm.assert_series_equal(sorted_sr, expected) - tm.assert_series_equal(sr, Series(original_list)) + tm.assert_series_equal(ser, Series(original_list)) # Test when inplace is True - copied_sr = sr.copy() + copied_sr = ser.copy() copied_sr.sort_index( ascending=ascending, ignore_index=ignore_index, inplace=True ) tm.assert_series_equal(copied_sr, expected) - tm.assert_series_equal(sr, Series(original_list)) + tm.assert_series_equal(ser, Series(original_list)) From bb6fee43649e1d8839aa3c80a50ab2f0014f8f88 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 3 Jan 2020 08:23:14 +0100 Subject: [PATCH 6/7] test change based on WA reviews --- pandas/tests/frame/methods/test_sort_index.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 2c1a7540f0a0c..6866aab11d2fa 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -233,22 +233,23 @@ def test_sort_index_intervalindex(self): @pytest.mark.parametrize( "original_dict, sorted_dict, ascending, ignore_index, output_index", [ - ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, True, [0, 1, 2]), - ({"A": [1, 2, 3]}, {"A": [1, 2, 3]}, True, True, [0, 1, 2]), - ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, False, [2, 1, 0]), - ({"A": [1, 2, 3]}, {"A": [1, 2, 3]}, True, False, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, False, [5, 3, 2]), + ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, False, [2, 3, 5]), ], ) def test_sort_index_ignore_index( self, original_dict, sorted_dict, ascending, ignore_index, output_index ): # GH 30114 - df = DataFrame(original_dict) + original_index = [2, 5, 3] + df = DataFrame(original_dict, index=original_index) expected_df = DataFrame(sorted_dict, index=output_index) sorted_df = df.sort_index(ascending=ascending, ignore_index=ignore_index) tm.assert_frame_equal(sorted_df, expected_df) - tm.assert_frame_equal(df, DataFrame(original_dict)) + tm.assert_frame_equal(df, DataFrame(original_dict, index=original_index)) # Test when inplace is True copied_df = df.copy() @@ -256,7 +257,7 @@ def test_sort_index_ignore_index( ascending=ascending, ignore_index=ignore_index, inplace=True ) tm.assert_frame_equal(copied_df, expected_df) - tm.assert_frame_equal(df, DataFrame(original_dict)) + tm.assert_frame_equal(df, DataFrame(original_dict, index=original_index)) @pytest.mark.parametrize( "original_dict, sorted_dict, ascending, ignore_index, output_index", From d4f787a8323d25e19464f02f51089e3953c5cb4b Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 3 Jan 2020 12:00:55 +0100 Subject: [PATCH 7/7] remove bool_t --- pandas/core/frame.py | 4 +--- pandas/core/series.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index de82b9e3b3b97..cdbeeae984456 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -309,8 +309,6 @@ # ----------------------------------------------------------------------- # DataFrame class -bool_t = bool # Need alias because NDFrame has def bool: - class DataFrame(NDFrame): """ @@ -4833,7 +4831,7 @@ def sort_index( kind="quicksort", na_position="last", sort_remaining=True, - ignore_index: bool_t = False, + ignore_index: bool = False, ): # TODO: this can be combined with Series.sort_index impl as diff --git a/pandas/core/series.py b/pandas/core/series.py index 3d775b5a6345a..99d19035b1c77 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -118,8 +118,6 @@ def wrapper(self): # ---------------------------------------------------------------------- # Series class -bool_t = bool # Need alias because NDFrame has def bool: - class Series(base.IndexOpsMixin, generic.NDFrame): """ @@ -2965,7 +2963,7 @@ def sort_index( kind="quicksort", na_position="last", sort_remaining=True, - ignore_index: bool_t = False, + ignore_index: bool = False, ): """ Sort Series by index labels.