Skip to content

Commit

Permalink
BUG: Respect dups in reindexing CategoricalIndex
Browse files Browse the repository at this point in the history
When the indexer is identical to the elements.
We should still return duplicates when the indexer
contains duplicates.

Closes gh-17323.
  • Loading branch information
gfyoung committed Aug 28, 2017
1 parent 473a7f3 commit 13a5590
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ Indexing
- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`)
- Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`)
- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`)

I/O
^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
method = missing.clean_reindex_fill_method(method)
target = ibase._ensure_index(target)

if self.equals(target):
if self.is_unique and self.equals(target):
return np.arange(len(self), dtype='intp')

if method == 'pad' or method == 'backfill':
Expand Down
24 changes: 19 additions & 5 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,18 +365,18 @@ def test_astype(self):
tm.assert_index_equal(result, expected)

def test_reindex_base(self):

# determined by cat ordering
idx = self.create_index()
# Determined by cat ordering.
idx = CategoricalIndex(list("cab"), categories=list("cab"))
expected = np.arange(len(idx), dtype=np.intp)

actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)

with tm.assert_raises_regex(ValueError, 'Invalid fill method'):
idx.get_indexer(idx, method='invalid')
with tm.assert_raises_regex(ValueError, "Invalid fill method"):
idx.get_indexer(idx, method="invalid")

def test_reindexing(self):
np.random.seed(123456789)

ci = self.create_index()
oidx = Index(np.array(ci))
Expand All @@ -388,6 +388,20 @@ def test_reindexing(self):
actual = ci.get_indexer(finder)
tm.assert_numpy_array_equal(expected, actual)

# see gh-17323
#
# Ensure that duplicate-handling is
# correct in general.
#
# In general, we want to make sure that
# re-indexing with indices identical to
# its members properly respect duplicates.
for finder in [list("aabbca"), list("aababca")]:
expected = oidx.get_indexer_non_unique(finder)[0]

actual = ci.get_indexer(finder)
tm.assert_numpy_array_equal(expected, actual)

def test_reindex_dtype(self):
c = CategoricalIndex(['a', 'b', 'c', 'a'])
res, indexer = c.reindex(['a', 'c'])
Expand Down

0 comments on commit 13a5590

Please sign in to comment.