Skip to content

Commit

Permalink
BUG: Respect dups in reindexing CategoricalIndex
Browse files Browse the repository at this point in the history
When the indexer is identical to the elements.
We should still return duplicates when the indexer
contains duplicates.

Closes pandas-devgh-17323.
  • Loading branch information
gfyoung committed Aug 28, 2017
1 parent 473a7f3 commit e9a8e73
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 14 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ Indexing
- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`)
- Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`)
- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`)

I/O
^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
method = missing.clean_reindex_fill_method(method)
target = ibase._ensure_index(target)

if self.equals(target):
if self.is_unique and self.equals(target):
return np.arange(len(self), dtype='intp')

if method == 'pad' or method == 'backfill':
Expand Down
25 changes: 12 additions & 13 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,29 +365,28 @@ def test_astype(self):
tm.assert_index_equal(result, expected)

def test_reindex_base(self):

# determined by cat ordering
idx = self.create_index()
expected = np.arange(len(idx), dtype=np.intp)

actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)

with tm.assert_raises_regex(ValueError, 'Invalid fill method'):
idx.get_indexer(idx, method='invalid')

def test_reindexing(self):
np.random.seed(123456789)

ci = self.create_index()
oidx = Index(np.array(ci))

for n in [1, 2, 5, len(ci)]:
for n in [1, 2, 5]:
finder = oidx[np.random.randint(0, len(ci), size=n)]
expected = oidx.get_indexer_non_unique(finder)[0]

actual = ci.get_indexer(finder)
tm.assert_numpy_array_equal(expected, actual)

# see gh-17323
for finder in [list("aabbca"), list("aababca")]:
expected = oidx.get_indexer_non_unique(finder)[0]

actual = ci.get_indexer(finder)
tm.assert_numpy_array_equal(expected, actual)

with tm.assert_raises_regex(ValueError, "Invalid fill method"):
ci.get_indexer(ci, method="invalid")

def test_reindex_dtype(self):
c = CategoricalIndex(['a', 'b', 'c', 'a'])
res, indexer = c.reindex(['a', 'c'])
Expand Down

0 comments on commit e9a8e73

Please sign in to comment.