Skip to content

Commit

Permalink
Enable is_dtype_equal on CategoricalIndex, fixed some doc typos, adde…
Browse files Browse the repository at this point in the history
…d ordered CategoricalIndex test
  • Loading branch information
thequackdaddy committed Jun 21, 2017
1 parent 95348c1 commit 97d2d21
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 18 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ Indexing
^^^^^^^^

- Bug in ``Float64Index`` causing an empty array instead of ``None`` to be returned from ``.get(np.nan)`` on a Series whose index did not contain any ``NaN`` s (:issue:`8569`)
- Fixed a bug that prevented joining on a categorical MultiIndex (:issue:`16627`).

I/O
^^^
Expand Down
1 change: 0 additions & 1 deletion doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ Indexing

- When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).
- Fixed a bug that prevented joining on a categorical MultiIndex (:issue:`13873`).


I/O
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,9 @@ def take(self, indices, axis=0, allow_fill=True,
na_value=-1)
return self._create_from_codes(taken)

def is_dtype_equal(self, other):
return self._data.is_dtype_equal(other)

take_nd = take

def map(self, mapper):
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1441,13 +1441,9 @@ def _factorize_keys(lk, rk, sort=True):
rk = rk.values

# if we exactly match in categories, allow us to use codes
if isinstance(lk, CategoricalIndex):
ldata = lk._data
else:
ldata = lk
if (is_categorical_dtype(lk) and
is_categorical_dtype(rk) and
ldata.is_dtype_equal(rk)):
lk.is_dtype_equal(rk)):
return lk.codes, rk.codes, len(lk.categories)

if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
Expand Down
42 changes: 30 additions & 12 deletions pandas/tests/test_join.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# -*- coding: utf-8 -*-

import numpy as np
from pandas import Index
from pandas import Index, DataFrame, Categorical, merge

from pandas._libs import join as _join
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal
from pandas.util.testing import assert_almost_equal, assert_frame_equal


class TestIndexer(object):
Expand Down Expand Up @@ -196,20 +196,38 @@ def test_inner_join_indexer2():

def test_merge_join_categorical_multiindex():
# From issue 16627
import pandas as pd
a = {'Cat1': pd.Categorical(['a', 'b', 'a', 'c', 'a', 'b'],
['a', 'b', 'c']),
a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'],
['a', 'b', 'c']),
'Int1': [0, 1, 0, 1, 0, 0]}
a = pd.DataFrame(a)
a = DataFrame(a)

b = {'Cat': pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
['a', 'b', 'c']),
b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
['a', 'b', 'c']),
'Int': [0, 0, 0, 1, 1, 1],
'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]}
b = pd.DataFrame(b).set_index(['Cat', 'Int'])['Factor']
b = DataFrame(b).set_index(['Cat', 'Int'])['Factor']

c = pd.merge(a, b.reset_index(), left_on=['Cat1', 'Int1'],
right_on=['Cat', 'Int'], how='left')
c = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'],
right_on=['Cat', 'Int'], how='left')
d = a.join(b, on=['Cat1', 'Int1'])
c = c.drop(['Cat', 'Int'], axis=1)
assert_almost_equal(c, d)
assert_frame_equal(c, d)

a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'],
['b', 'a', 'c'],
ordered=True),
'Int1': [0, 1, 0, 1, 0, 0]}
a = DataFrame(a)

b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
['b', 'a', 'c'],
ordered=True),
'Int': [0, 0, 0, 1, 1, 1],
'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]}
b = DataFrame(b).set_index(['Cat', 'Int'])['Factor']

c = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'],
right_on=['Cat', 'Int'], how='left')
d = a.join(b, on=['Cat1', 'Int1'])
c = c.drop(['Cat', 'Int'], axis=1)
assert_frame_equal(c, d)

0 comments on commit 97d2d21

Please sign in to comment.