diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index ce7e8be16d8e2..77b3e3bd25740 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -94,6 +94,7 @@ Numeric Categorical ^^^^^^^^^^^ +- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`) Other ^^^^^ diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index a324bf94171ce..ccbd3d9704e0c 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -107,6 +107,14 @@ def validate_argmax_with_skipna(skipna, args, kwargs): validate_argsort = CompatValidator(ARGSORT_DEFAULTS, fname='argsort', max_fname_arg_count=0, method='both') +# two different signatures of argsort, this second validation +# for when the `kind` param is supported +ARGSORT_DEFAULTS_KIND = OrderedDict() +ARGSORT_DEFAULTS_KIND['axis'] = -1 +ARGSORT_DEFAULTS_KIND['order'] = None +validate_argsort_kind = CompatValidator(ARGSORT_DEFAULTS_KIND, fname='argsort', + max_fname_arg_count=0, method='both') + def validate_argsort_with_ascending(ascending, args, kwargs): """ @@ -121,7 +129,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs): args = (ascending,) + args ascending = True - validate_argsort(args, kwargs, max_fname_arg_count=1) + validate_argsort_kind(args, kwargs, max_fname_arg_count=3) return ascending diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 796b2696af9ce..afae11163b0dc 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1288,7 +1288,7 @@ def check_for_ordered(self, op): "you can use .as_ordered() to change the " "Categorical to an ordered one\n".format(op=op)) - def argsort(self, ascending=True, *args, **kwargs): + def argsort(self, ascending=True, kind='quicksort', *args, **kwargs): """ Returns the indices that would sort the Categorical instance if 'sort_values' was called. This function is implemented to provide @@ -1309,7 +1309,7 @@ def argsort(self, ascending=True, *args, **kwargs): numpy.ndarray.argsort """ ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) - result = np.argsort(self._codes.copy(), **kwargs) + result = np.argsort(self._codes.copy(), kind=kind, **kwargs) if not ascending: result = result[::-1] return result diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 69b427df981b7..10b80cbc3483d 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -233,7 +233,7 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): # specially handle Categorical if is_categorical_dtype(items): - return items.argsort(ascending=ascending) + return items.argsort(ascending=ascending, kind=kind) items = np.asanyarray(items) idx = np.arange(len(items)) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 98f7f82c0ace7..891c94b59074a 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -238,6 +238,15 @@ def test_stable_descending_multicolumn_sort(self): kind='mergesort') assert_frame_equal(sorted_df, expected) + def test_stable_categorial(self): + # GH 16793 + df = DataFrame({ + 'x': pd.Categorical(np.repeat([1, 2, 3, 4], 5), ordered=True) + }) + expected = df.copy() + sorted_df = df.sort_values('x', kind='mergesort') + assert_frame_equal(sorted_df, expected) + def test_sort_datetimes(self): # GH 3461, argsort / lexsort differences for a datetime column diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 92177ca07d835..667b26c24c662 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -585,9 +585,8 @@ def test_numpy_argsort(self): tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False) - msg = "the 'kind' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - c, kind='mergesort') + tm.assert_numpy_array_equal(np.argsort(c, kind='mergesort'), expected, + check_dtype=False) msg = "the 'axis' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.argsort,