From c561895e1e53d5b89b567e718229e633c5c3c623 Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Fri, 7 Jul 2017 05:17:15 -0500 Subject: [PATCH] BUG: kind parameter on categorical argsort (#16834) (cherry picked from commit 5cc1025a78ba316ea058ad6ea70a2104cc05345a) --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/compat/numpy/function.py | 10 +++++++++- pandas/core/categorical.py | 4 ++-- pandas/core/sorting.py | 2 +- pandas/tests/frame/test_sorting.py | 9 +++++++++ pandas/tests/test_categorical.py | 5 ++--- 6 files changed, 24 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 8580240c7e948..c11c74211030a 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -95,6 +95,7 @@ Numeric Categorical ^^^^^^^^^^^ +- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`) Other ^^^^^ diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index a324bf94171ce..ccbd3d9704e0c 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -107,6 +107,14 @@ def validate_argmax_with_skipna(skipna, args, kwargs): validate_argsort = CompatValidator(ARGSORT_DEFAULTS, fname='argsort', max_fname_arg_count=0, method='both') +# two different signatures of argsort, this second validation +# for when the `kind` param is supported +ARGSORT_DEFAULTS_KIND = OrderedDict() +ARGSORT_DEFAULTS_KIND['axis'] = -1 +ARGSORT_DEFAULTS_KIND['order'] = None +validate_argsort_kind = CompatValidator(ARGSORT_DEFAULTS_KIND, fname='argsort', + max_fname_arg_count=0, method='both') + def validate_argsort_with_ascending(ascending, args, kwargs): """ @@ -121,7 +129,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs): args = (ascending,) + args ascending = True - validate_argsort(args, kwargs, max_fname_arg_count=1) + validate_argsort_kind(args, kwargs, max_fname_arg_count=3) return ascending diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index a5e61797bd478..7b169123006bd 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1284,7 +1284,7 @@ def check_for_ordered(self, op): "you can use .as_ordered() to change the " "Categorical to an ordered one\n".format(op=op)) - def argsort(self, ascending=True, *args, **kwargs): + def argsort(self, ascending=True, kind='quicksort', *args, **kwargs): """ Returns the indices that would sort the Categorical instance if 'sort_values' was called. This function is implemented to provide @@ -1305,7 +1305,7 @@ def argsort(self, ascending=True, *args, **kwargs): numpy.ndarray.argsort """ ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) - result = np.argsort(self._codes.copy(), **kwargs) + result = np.argsort(self._codes.copy(), kind=kind, **kwargs) if not ascending: result = result[::-1] return result diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 69b427df981b7..10b80cbc3483d 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -233,7 +233,7 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): # specially handle Categorical if is_categorical_dtype(items): - return items.argsort(ascending=ascending) + return items.argsort(ascending=ascending, kind=kind) items = np.asanyarray(items) idx = np.arange(len(items)) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 98f7f82c0ace7..891c94b59074a 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -238,6 +238,15 @@ def test_stable_descending_multicolumn_sort(self): kind='mergesort') assert_frame_equal(sorted_df, expected) + def test_stable_categorial(self): + # GH 16793 + df = DataFrame({ + 'x': pd.Categorical(np.repeat([1, 2, 3, 4], 5), ordered=True) + }) + expected = df.copy() + sorted_df = df.sort_values('x', kind='mergesort') + assert_frame_equal(sorted_df, expected) + def test_sort_datetimes(self): # GH 3461, argsort / lexsort differences for a datetime column diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 3471f0b13b84b..06a674556cdef 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -586,9 +586,8 @@ def test_numpy_argsort(self): tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False) - msg = "the 'kind' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - c, kind='mergesort') + tm.assert_numpy_array_equal(np.argsort(c, kind='mergesort'), expected, + check_dtype=False) msg = "the 'axis' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.argsort,