From 9d7afa730f99770e905907b8128b99e97aa5fff5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 1 Jun 2017 05:30:57 -0500 Subject: [PATCH] BUG: Fixed pd.unique on array of tuples (#16543) --- doc/source/whatsnew/v0.20.2.txt | 3 +-- pandas/core/algorithms.py | 7 ++++++- pandas/tests/test_algos.py | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 9f88d629880ed..31df5899f0fc3 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -44,8 +44,7 @@ Bug Fixes - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) - Passing an invalid engine to :func:`read_csv` now raises an informative ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) - - +- Bug in :func:`unique` on an array of tuples (:issue:`16519`) - Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 77d79c9585e57..d74c5e66ea1a9 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -163,7 +163,7 @@ def _ensure_arraylike(values): ABCIndexClass, ABCSeries)): inferred = lib.infer_dtype(values) if inferred in ['mixed', 'string', 'unicode']: - values = np.asarray(values, dtype=object) + values = lib.list_to_object_array(values) else: values = np.asarray(values) return values @@ -328,6 +328,11 @@ def unique(values): [b, a, c] Categories (3, object): [a < b < c] + An array of tuples + + >>> pd.unique([('a', 'b'), ('b', 'a'), ('a', 'c'), ('b', 'a')]) + array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object) + See Also -------- pandas.Index.unique diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 351e646cbb0b2..063dcea5c76d6 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -929,6 +929,22 @@ def test_unique_index(self): tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False])) + @pytest.mark.parametrize('arr, unique', [ + ([(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)], + [(0, 0), (0, 1), (1, 0), (1, 1)]), + ([('b', 'c'), ('a', 'b'), ('a', 'b'), ('b', 'c')], + [('b', 'c'), ('a', 'b')]), + ([('a', 1), ('b', 2), ('a', 3), ('a', 1)], + [('a', 1), ('b', 2), ('a', 3)]), + ]) + def test_unique_tuples(self, arr, unique): + # https://github.com/pandas-dev/pandas/issues/16519 + expected = np.empty(len(unique), dtype=object) + expected[:] = unique + + result = pd.unique(arr) + tm.assert_numpy_array_equal(result, expected) + class GroupVarTestMixin(object):