Skip to content

Commit

Permalink
BUG: Categorical(Index) passed as categories (pandas-dev#17888)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored and No-Stream committed Nov 28, 2017
1 parent 5281a36 commit 5ed076c
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 8 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,7 @@ Categorical
- Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`)
- Bug in categorical operations with :ref:`Series.cat <categorical.cat>` not preserving the original Series' name (:issue:`17509`)
- Bug in :func:`DataFrame.merge` failing for categorical columns with boolean/int data types (:issue:`17187`)
- Bug in constructing a ``Categorical``/``CategoricalDtype`` when the specified ``categories`` are of categorical type (:issue:`17884`).

.. _whatsnew_0210.pypy:

Expand Down
17 changes: 10 additions & 7 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
import numpy as np
from pandas import compat
from pandas.core.dtypes.generic import ABCIndexClass
from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex


class ExtensionDtype(object):
Expand Down Expand Up @@ -170,16 +170,16 @@ def _from_categorical_dtype(cls, dtype, categories=None, ordered=None):
return cls(categories, ordered)

def _finalize(self, categories, ordered, fastpath=False):
from pandas.core.indexes.base import Index

if ordered is None:
ordered = False
else:
self._validate_ordered(ordered)

if categories is not None:
categories = Index(categories, tupleize_cols=False)
# validation
self._validate_categories(categories, fastpath=fastpath)
self._validate_ordered(ordered)
categories = self._validate_categories(categories,
fastpath=fastpath)

self._categories = categories
self._ordered = ordered

Expand Down Expand Up @@ -316,7 +316,7 @@ def _validate_categories(categories, fastpath=False):
from pandas import Index

if not isinstance(categories, ABCIndexClass):
categories = Index(categories)
categories = Index(categories, tupleize_cols=False)

if not fastpath:

Expand All @@ -326,6 +326,9 @@ def _validate_categories(categories, fastpath=False):
if not categories.is_unique:
raise ValueError('Categorical categories must be unique')

if isinstance(categories, ABCCategoricalIndex):
categories = categories.categories

return categories

@property
Expand Down
10 changes: 9 additions & 1 deletion pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

import numpy as np
import pandas as pd
from pandas import Series, Categorical, IntervalIndex, date_range
from pandas import (
Series, Categorical, CategoricalIndex, IntervalIndex, date_range)

from pandas.core.dtypes.dtypes import (
DatetimeTZDtype, PeriodDtype,
Expand Down Expand Up @@ -657,3 +658,10 @@ def test_str_vs_repr(self):
# Py2 will have unicode prefixes
pat = r"CategoricalDtype\(categories=\[.*\], ordered=False\)"
assert re.match(pat, repr(c1))

def test_categorical_categories(self):
# GH17884
c1 = CategoricalDtype(Categorical(['a', 'b']))
tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
c1 = CategoricalDtype(CategoricalIndex(['a', 'b']))
tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
28 changes: 28 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,18 @@ def test_contructor_from_categorical_string(self):
result = Categorical(values, categories=['a', 'b', 'c'], ordered=True)
tm.assert_categorical_equal(result, expected)

def test_constructor_with_categorical_categories(self):
# GH17884
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])

result = Categorical(
['a', 'b'], categories=Categorical(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)

result = Categorical(
['a', 'b'], categories=CategoricalIndex(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)

def test_from_codes(self):

# too few categories
Expand Down Expand Up @@ -560,6 +572,22 @@ def f():
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
pd.Categorical.from_codes(codes, categories=["train", "test"])

def test_from_codes_with_categorical_categories(self):
# GH17884
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])

result = Categorical.from_codes(
[0, 1], categories=Categorical(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)

result = Categorical.from_codes(
[0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)

# non-unique Categorical still raises
with pytest.raises(ValueError):
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))

@pytest.mark.parametrize('dtype', [None, 'category'])
def test_from_inferred_categories(self, dtype):
cats = ['a', 'b']
Expand Down

0 comments on commit 5ed076c

Please sign in to comment.