Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: fix Series.argsort #42090

Closed
Closed
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
8061f94
BUG: reordering the index labels
attack68 Jun 17, 2021
f0c4d1a
BUG: reordering the index labels
attack68 Jun 17, 2021
fa848c0
amend tests
attack68 Jun 17, 2021
701d3c8
update method and amend tests
attack68 Jun 18, 2021
c86f78d
special if for SparseArray
attack68 Jun 18, 2021
2d92c34
mypy fix
attack68 Jun 18, 2021
9d4be70
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Jun 18, 2021
3146640
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Jun 20, 2021
482b8ad
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Jun 22, 2021
228aa1f
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Jun 28, 2021
2281d8d
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Jun 29, 2021
28e0c5e
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Jun 30, 2021
82b6092
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Jul 7, 2021
94fefdc
whats new 1.4.0
attack68 Jul 7, 2021
944f08b
add possible return formats with "first" "last" and tests
attack68 Jul 8, 2021
372a8db
default
attack68 Jul 8, 2021
980d1d4
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Jul 13, 2021
4f5777a
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Jul 20, 2021
5585d01
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Jul 24, 2021
1e2f0de
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Aug 6, 2021
49f9137
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Aug 8, 2021
b3ae107
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Aug 11, 2021
da0dcf2
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Aug 17, 2021
fdf1bac
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Aug 19, 2021
c1a6e18
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Aug 19, 2021
9db6f97
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Aug 25, 2021
82fbb76
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Aug 31, 2021
2552d8f
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Sep 7, 2021
40c94e2
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Sep 15, 2021
0e76ee3
whats new
attack68 Sep 18, 2021
eb6d499
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Sep 18, 2021
a78b9ff
whats new
attack68 Sep 18, 2021
dfcdad7
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Sep 22, 2021
0bcacb9
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Oct 2, 2021
220fc67
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Oct 2, 2021
560298f
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Oct 9, 2021
5381a0f
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Oct 11, 2021
30dee32
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Oct 24, 2021
ec453af
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Nov 2, 2021
5d3124e
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Nov 4, 2021
c8e69d8
fix tests
attack68 Nov 4, 2021
4f4e8ce
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Nov 5, 2021
6004737
Merge remote-tracking branch 'upstream/master' into argsort_labelling…
attack68 Nov 12, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ Interval
Indexing
^^^^^^^^
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`)
-
- Bug in :class:`Series.argsort` where the index was misaligned and the indexed values were not correct (:issue:`42056`, :issue:`12694`)

Missing
^^^^^^^
Expand Down
69 changes: 55 additions & 14 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3637,8 +3637,8 @@ def argsort(self, axis=0, kind="quicksort", order=None) -> Series:
"""
Return the integer indices that would sort the Series values.

Override ndarray.argsort. Argsorts the value, omitting NA/null values,
and places the result in the same locations as the non-NA values.
Override ndarray.argsort. NA/null sort indices are replaced by -1. The index
is sorted so that index labels correspond to the integer indices

Parameters
----------
Expand All @@ -3659,21 +3659,62 @@ def argsort(self, axis=0, kind="quicksort", order=None) -> Series:
See Also
attack68 marked this conversation as resolved.
Show resolved Hide resolved
--------
numpy.ndarray.argsort : Returns the indices that would sort this array.
"""
values = self._values
mask = isna(values)

if mask.any():
result = Series(-1, index=self.index, name=self.name, dtype="int64")
notmask = ~mask
result[notmask] = np.argsort(values[notmask], kind=kind)
return self._constructor(result, index=self.index).__finalize__(
Examples
--------

Argsorting a basic Series.

>>> series = Series([30, 10, 20], index=["high", "low", "mid"], name="xy")
>>> series.argsort()
low 1
mid 2
high 0
Name: xy, dtype: int64

Argsorting a Series with null values.

>>> series = Series([30, 10, np.nan, 20], name="xy",
... index=["high", "low", "null", "mid"])
>>> series.argsort()
low 1
mid 3
high 0
null -1
Name: xy, dtype: int64
"""
values = self.values
na_mask = isna(values)
if not any(na_mask):
res = np.argsort(values, kind=kind)
res_ser = Series(res, index=self.index[res], dtype="int64", name=self.name)
return res_ser.__finalize__(self, method="argsort")
else:
# GH 42090
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be very clear that we are ordering nulls at the na_position='last' (equiv of sort_values). i think its fine not to expose this as an option (as numpy doesn't).

# count the missing index values that will be added to not na results
if isinstance(na_mask, pandas.core.arrays.sparse.SparseArray):
# avoid RecursionError
na_mask = np.asarray(na_mask)
notna_na_cumsum = na_mask.cumsum()[~na_mask]
notna_argsort = np.argsort(values[~na_mask])
notna_argsort += notna_na_cumsum[notna_argsort]
# combine the notna and na series
na_res_ser = Series(
-1, index=self.index[na_mask], dtype="int64", name=self.name
)
notna_res_ser = Series(
notna_argsort,
index=self.index[notna_argsort],
dtype="int64",
name=self.name,
)
from pandas.core.reshape.concat import concat

ret_ser = concat([notna_res_ser, na_res_ser]).__finalize__(
self, method="argsort"
)
else:
return self._constructor(
np.argsort(values, kind=kind), index=self.index, dtype="int64"
).__finalize__(self, method="argsort")
assert isinstance(ret_ser, Series) # mypy: concat 2 Series so is OK
return ret_ser

def nlargest(self, n=5, keep="first") -> Series:
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/base/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_apply_simple_series(self, data):

def test_argsort(self, data_for_sorting):
result = pd.Series(data_for_sorting).argsort()
expected = pd.Series(np.array([2, 0, 1], dtype=np.int64))
expected = pd.Series(np.array([2, 0, 1], dtype=np.int64), index=[2, 0, 1])
self.assert_series_equal(result, expected)

def test_argsort_missing_array(self, data_missing_for_sorting):
Expand All @@ -84,7 +84,7 @@ def test_argsort_missing_array(self, data_missing_for_sorting):

def test_argsort_missing(self, data_missing_for_sorting):
result = pd.Series(data_missing_for_sorting).argsort()
expected = pd.Series(np.array([1, -1, 0], dtype=np.int64))
expected = pd.Series(np.array([2, 0, -1], dtype=np.int64), index=[2, 0, 1])
self.assert_series_equal(result, expected)

def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value):
Expand Down
13 changes: 9 additions & 4 deletions pandas/tests/series/methods/test_argsort.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ def _check_accum_op(self, name, ser, check_dtype=True):
ts = ser.copy()
ts[::2] = np.NaN

result = func(ts)[1::2]
expected = func(np.array(ts.dropna()))
result = func(ts)
expected = func(ts.to_numpy())
expected[int(len(expected) / 2) :] = -1

tm.assert_numpy_array_equal(result.values, expected, check_dtype=False)

Expand Down Expand Up @@ -53,8 +54,12 @@ def test_argsort_stable(self):
mexpected = np.argsort(s.values, kind="mergesort")
qexpected = np.argsort(s.values, kind="quicksort")

tm.assert_series_equal(mindexer.astype(np.intp), Series(mexpected))
tm.assert_series_equal(qindexer.astype(np.intp), Series(qexpected))
tm.assert_series_equal(
mindexer.astype(np.intp), Series(mexpected, index=s.index[mexpected])
)
tm.assert_series_equal(
qindexer.astype(np.intp), Series(qexpected, index=s.index[qexpected])
)
msg = (
r"ndarray Expected type <class 'numpy\.ndarray'>, "
r"found <class 'pandas\.core\.series\.Series'> instead"
Expand Down