Skip to content

Commit

Permalink
Bug issue 16819 Index.get_indexer_not_unique inconsistent return type…
Browse files Browse the repository at this point in the history
…s vs get_indexer (#16826)
  • Loading branch information
ri938 authored and jreback committed Jul 6, 2017
1 parent cc5d20f commit 7d0a98e
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 8 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Backwards incompatible API changes
- :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`)
- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
- Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`)
- ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`)

.. _whatsnew_0210.api:

Expand Down
5 changes: 3 additions & 2 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,8 +896,9 @@ def reset_identity(values):
# we can't reindex, so we resort to this
# GH 14776
if isinstance(ax, MultiIndex) and not ax.is_unique:
result = result.take(result.index.get_indexer_for(
ax.values).unique(), axis=self.axis)
indexer = algorithms.unique1d(
result.index.get_indexer_for(ax.values))
result = result.take(indexer, axis=self.axis)
else:
result = result.reindex_axis(ax, axis=self.axis)

Expand Down
7 changes: 3 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2256,8 +2256,8 @@ def intersection(self, other):
indexer = indexer.take((indexer != -1).nonzero()[0])
except:
# duplicates
indexer = Index(other._values).get_indexer_non_unique(
self._values)[0].unique()
indexer = algos.unique1d(
Index(other._values).get_indexer_non_unique(self._values)[0])
indexer = indexer[indexer != -1]

taken = other.take(indexer)
Expand Down Expand Up @@ -2704,7 +2704,7 @@ def get_indexer_non_unique(self, target):
tgt_values = target._values

indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
return Index(indexer), missing
return indexer, missing

def get_indexer_for(self, target, **kwargs):
"""
Expand Down Expand Up @@ -2942,7 +2942,6 @@ def _reindex_non_unique(self, target):
else:

# need to retake to have the same size as the indexer
indexer = indexer.values
indexer[~check] = 0

# reset the new indexer to account for the new size
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,17 @@ def test_get_indexer_strings(self):
with pytest.raises(TypeError):
idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2)

def test_get_indexer_consistency(self):
# See GH 16819
for name, index in self.indices.items():
indexer = index.get_indexer(index[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp

indexer, _ = index.get_indexer_non_unique(index[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp

def test_get_loc(self):
idx = pd.Index([0, 1, 2])
all_methods = [None, 'pad', 'backfill', 'nearest']
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,8 +386,7 @@ def test_reindexing(self):
expected = oidx.get_indexer_non_unique(finder)[0]

actual = ci.get_indexer(finder)
tm.assert_numpy_array_equal(
expected.values, actual, check_dtype=False)
tm.assert_numpy_array_equal(expected, actual)

def test_reindex_dtype(self):
c = CategoricalIndex(['a', 'b', 'c', 'a'])
Expand Down

1 comment on commit 7d0a98e

@ri938
Copy link
Contributor Author

@ri938 ri938 commented on 7d0a98e Jul 7, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is something wrong here? The tests have been running uncomplete for 22 hours now.

Please sign in to comment.