Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add ignore_index for df.sort_values and series.sort_values #30402

Merged
merged 24 commits into from
Dec 27, 2019
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7e461a1
remove \n from docstring
charlesdong1991 Dec 3, 2018
1314059
fix conflicts
charlesdong1991 Jan 19, 2019
8bcb313
Merge remote-tracking branch 'upstream/master'
charlesdong1991 Jul 30, 2019
1f9a4ce
Merge remote-tracking branch 'upstream/master' into fix_issue_30114
charlesdong1991 Dec 9, 2019
5f924c3
Merge remote-tracking branch 'upstream/master' into fix_issue_30114
charlesdong1991 Dec 22, 2019
d0a134f
Add ignore index for sort values
charlesdong1991 Dec 22, 2019
6d52765
black reformat
charlesdong1991 Dec 22, 2019
a31797d
add tests
charlesdong1991 Dec 22, 2019
b80f380
remove type hint to see if test passes
charlesdong1991 Dec 22, 2019
b997d3f
code change based on WA review
charlesdong1991 Dec 23, 2019
12d1260
restore reformat change on other parts
charlesdong1991 Dec 23, 2019
4ff2493
revert change
charlesdong1991 Dec 23, 2019
e9d63f4
change bool
charlesdong1991 Dec 23, 2019
70ffec7
remove annotation
charlesdong1991 Dec 23, 2019
b4245d7
remove for series
charlesdong1991 Dec 23, 2019
f9e7ec2
add ignore_index for series
charlesdong1991 Dec 23, 2019
d95a89f
keep consistency
charlesdong1991 Dec 23, 2019
f241e67
revert change
charlesdong1991 Dec 23, 2019
7f9846a
resolve conflict
charlesdong1991 Dec 23, 2019
bbb4754
restore change
charlesdong1991 Dec 23, 2019
3c37eb9
code change based on WA and JR reviews
charlesdong1991 Dec 24, 2019
4ce9f43
better english
charlesdong1991 Dec 24, 2019
0f89aa2
skip annotation
charlesdong1991 Dec 24, 2019
d02b651
code change on JR review
charlesdong1991 Dec 26, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ Other API changes
Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`)
- When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`)
- :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`).
- ``ignore_index`` is added in :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` to be able to reset index after sorting (:issue:`30114`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reverse the ordering a bit :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained the ignore_index......

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed and moved!



Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move this to other enhancements

.. _whatsnew_1000.api.documentation:
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4704,6 +4704,7 @@ def sort_values(
inplace=False,
kind="quicksort",
na_position="last",
ignore_index=False,
jreback marked this conversation as resolved.
Show resolved Hide resolved
jreback marked this conversation as resolved.
Show resolved Hide resolved
):
inplace = validate_bool_kwarg(inplace, "inplace")
axis = self._get_axis_number(axis)
Expand Down Expand Up @@ -4737,6 +4738,9 @@ def sort_values(
indexer, axis=self._get_block_manager_axis(axis), verify=False
)

if ignore_index:
new_data.axes[1] = ibase.default_index(len(indexer))
WillAyd marked this conversation as resolved.
Show resolved Hide resolved

if inplace:
return self._update_inplace(new_data)
else:
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4087,6 +4087,7 @@ def sort_values(
inplace: bool_t = False,
kind: str = "quicksort",
na_position: str = "last",
ignore_index: bool_t = False,
):
"""
Sort by the values along either axis.
Expand All @@ -4109,6 +4110,8 @@ def sort_values(
na_position : {'first', 'last'}, default 'last'
Puts NaNs at the beginning if `first`; `last` puts NaNs at the
end.
ignore_index : bool, default False
WillAyd marked this conversation as resolved.
Show resolved Hide resolved
If True, the resulting axis will be labeled 0, 1, …, n - 1.

Returns
-------
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,7 @@ def sort_values(
inplace=False,
kind="quicksort",
na_position="last",
ignore_index=False,
WillAyd marked this conversation as resolved.
Show resolved Hide resolved
):
"""
Sort by the values.
Expand All @@ -2715,6 +2716,8 @@ def sort_values(
na_position : {'first' or 'last'}, default 'last'
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
the end.
ignore_index : bool, default False
If True, the resulting axis will be labeled 0, 1, …, n - 1.

Returns
-------
Expand Down Expand Up @@ -2855,6 +2858,9 @@ def _try_kind_sort(arr):

result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx])

if ignore_index:
jreback marked this conversation as resolved.
Show resolved Hide resolved
result = result.reset_index(drop=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason for this to differ with the frame implementation? Should be the same in both cases

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, sorry! I thought the other one was faster in the first place, and then later found basically no difference, and forgot to align them to keep consistency.

Changed! thanks! @WillAyd

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh!! i recall why they are different, this new_data here is BlockManager while in series is not. I revert the change.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use result.index = ibase.default_index(len(indexer))

this is NOT a BlockManager here, but a series

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh, sorry for my bad English, i meant new_data in frame.py is BlockManager (should have used there other than here to be precise 😅 ), here it is Series. @jreback

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you will see my comment in the PR about drop_duplicates. We do not want to use .reset_index directly at all. This causes yet another copy of the data which we can easily avoid here.

you have to be careful to avoid mutating things though.


if inplace:
self._update_inplace(result)
else:
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/frame/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,36 @@ def test_sort_nat(self):
sorted_df = df.sort_values(by=["a", "b"])
tm.assert_frame_equal(sorted_df, expected)

@pytest.mark.parametrize(
"original_dict, sorted_dict, ignore_index, output_index",
[
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]),
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]),
(
{"A": [1, 2, 3], "B": [2, 3, 4]},
{"A": [3, 2, 1], "B": [4, 3, 2]},
True,
[0, 1, 2],
),
(
{"A": [1, 2, 3], "B": [2, 3, 4]},
{"A": [3, 2, 1], "B": [4, 3, 2]},
False,
[2, 1, 0],
),
],
)
def test_sort_values_ignore_index(
self, original_dict, sorted_dict, ignore_index, output_index
):

# GH 30114
df = pd.DataFrame(original_dict)
sorted_df = df.sort_values("A", ascending=False, ignore_index=ignore_index)

expected = pd.DataFrame(sorted_dict, index=output_index)
tm.assert_frame_equal(sorted_df, expected)


class TestDataFrameSortIndexKinds:
def test_sort_index_multicolumn(self):
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/series/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,21 @@ def test_sort_values_categorical(self):
result = df.sort_values(by=["grade", "id"])
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"original_list, sorted_list, ignore_index, output_index",
[
([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]),
([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]),
],
)
def test_sort_values_ignore_index(
self, original_list, sorted_list, ignore_index, output_index
):

# GH 30114
sr = Series(original_list)
sorted_sr = sr.sort_values(ascending=False, ignore_index=ignore_index)

expected = Series(sorted_list, index=output_index)
tm.assert_series_equal(sorted_sr, expected)