Skip to content

Commit

Permalink
BUG: Have object dtype for empty Categorical.categories (pandas-dev#1…
Browse files Browse the repository at this point in the history
…7249)

* BUG: Have object dtype for empty Categorical ctor

Previously we had a `Float64Index`, which is inconsistent with, e.g., the
regular Index constructor.

* TST: Update tests in multi for new return

Previously these relied worked around the return type by wrapping list-likes
in `np.array` and relying on that to cast to float. These workarounds are no
longer nescessary.

* TST: Update union_categorical tests

This relied on `NaN` being a float and empty being a float. Not a necessary
test anymore.

* TST: set object dtype
  • Loading branch information
TomAugspurger authored Aug 19, 2017
1 parent 34c4ffd commit 7818486
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 16 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,9 @@ Numeric
Categorical
^^^^^^^^^^^
- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
- Bug in the categorical constructor with empty values and categories causing
the ``.categories`` to be an empty ``Float64Index`` rather than an empty
``Index`` with object dtype (:issue:`17248`)


Other
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,10 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
# On list with NaNs, int values will be converted to float. Use
# "object" dtype to prevent this. In the end objects will be
# casted to int/... in the category assignment step.
dtype = 'object' if isna(values).any() else None
if len(values) == 0 or isna(values).any():
dtype = 'object'
else:
dtype = None
values = _sanitize_array(values, None, dtype=dtype)

if categories is None:
Expand Down
9 changes: 4 additions & 5 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ def test_from_arrays_empty(self):
arrays = [[]] * N
names = list('ABC')[:N]
result = MultiIndex.from_arrays(arrays=arrays, names=names)
expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N,
expected = MultiIndex(levels=[[]] * N, labels=[[]] * N,
names=names)
tm.assert_index_equal(result, expected)

Expand Down Expand Up @@ -829,7 +829,7 @@ def test_from_product_empty(self):

# 1 level
result = MultiIndex.from_product([[]], names=['A'])
expected = pd.Float64Index([], name='A')
expected = pd.Index([], name='A')
tm.assert_index_equal(result, expected)

# 2 levels
Expand All @@ -838,7 +838,7 @@ def test_from_product_empty(self):
names = ['A', 'B']
for first, second in zip(l1, l2):
result = MultiIndex.from_product([first, second], names=names)
expected = MultiIndex(levels=[np.array(first), np.array(second)],
expected = MultiIndex(levels=[first, second],
labels=[[], []], names=names)
tm.assert_index_equal(result, expected)

Expand All @@ -847,8 +847,7 @@ def test_from_product_empty(self):
for N in range(4):
lvl2 = lrange(N)
result = MultiIndex.from_product([[], lvl2, []], names=names)
expected = MultiIndex(levels=[np.array(A)
for A in [[], lvl2, []]],
expected = MultiIndex(levels=[[], lvl2, []],
labels=[[], [], []], names=names)
tm.assert_index_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ def test_concat_categorical_empty(self):
tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)

s1 = pd.Series([], dtype='category')
s2 = pd.Series([])
s2 = pd.Series([], dtype='object')

# different dtype => not-category
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
Expand Down
12 changes: 3 additions & 9 deletions pandas/tests/reshape/test_union_categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,11 @@ def test_union_categoricals_empty(self):
exp = Categorical([])
tm.assert_categorical_equal(res, exp)

res = union_categoricals([pd.Categorical([]),
pd.Categorical([1.0])])
exp = Categorical([1.0])
res = union_categoricals([Categorical([]),
Categorical(['1'])])
exp = Categorical(['1'])
tm.assert_categorical_equal(res, exp)

# to make dtype equal
nanc = pd.Categorical(np.array([np.nan], dtype=np.float64))
res = union_categoricals([nanc,
pd.Categorical([])])
tm.assert_categorical_equal(res, nanc)

def test_union_categorical_same_category(self):
# check fastpath
c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,16 @@ def test_setitem_listlike(self):
result = c.codes[np.array([100000]).astype(np.int64)]
tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))

def test_constructor_empty(self):
# GH 17248
c = Categorical([])
expected = Index([])
tm.assert_index_equal(c.categories, expected)

c = Categorical([], categories=[1, 2, 3])
expected = pd.Int64Index([1, 2, 3])
tm.assert_index_equal(c.categories, expected)

def test_constructor_unsortable(self):

# it works!
Expand Down

0 comments on commit 7818486

Please sign in to comment.