From 5ed076ca6f8a925317fba32c47bd4acb6d2a2b49 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 18 Oct 2017 02:03:44 +0200
Subject: [PATCH] BUG: Categorical(Index) passed as categories (#17888)

---
 doc/source/whatsnew/v0.21.0.txt    |  1 +
 pandas/core/dtypes/dtypes.py       | 17 ++++++++++-------
 pandas/tests/dtypes/test_dtypes.py | 10 +++++++++-
 pandas/tests/test_categorical.py   | 28 ++++++++++++++++++++++++++++
 4 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 598e452640781a..dd90396c4009e7 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -1024,6 +1024,7 @@ Categorical
 - Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`)
 - Bug in categorical operations with :ref:`Series.cat <categorical.cat>` not preserving the original Series' name (:issue:`17509`)
 - Bug in :func:`DataFrame.merge` failing for categorical columns with boolean/int data types (:issue:`17187`)
+- Bug in constructing a ``Categorical``/``CategoricalDtype`` when the specified ``categories`` are of categorical type (:issue:`17884`).
 
 .. _whatsnew_0210.pypy:
 
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index b3498abb3b2c06..b4467f0f9733bf 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -3,7 +3,7 @@
 import re
 import numpy as np
 from pandas import compat
-from pandas.core.dtypes.generic import ABCIndexClass
+from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex
 
 
 class ExtensionDtype(object):
@@ -170,16 +170,16 @@ def _from_categorical_dtype(cls, dtype, categories=None, ordered=None):
         return cls(categories, ordered)
 
     def _finalize(self, categories, ordered, fastpath=False):
-        from pandas.core.indexes.base import Index
 
         if ordered is None:
             ordered = False
+        else:
+            self._validate_ordered(ordered)
 
         if categories is not None:
-            categories = Index(categories, tupleize_cols=False)
-            # validation
-            self._validate_categories(categories, fastpath=fastpath)
-            self._validate_ordered(ordered)
+            categories = self._validate_categories(categories,
+                                                   fastpath=fastpath)
+
         self._categories = categories
         self._ordered = ordered
 
@@ -316,7 +316,7 @@ def _validate_categories(categories, fastpath=False):
         from pandas import Index
 
         if not isinstance(categories, ABCIndexClass):
-            categories = Index(categories)
+            categories = Index(categories, tupleize_cols=False)
 
         if not fastpath:
 
@@ -326,6 +326,9 @@ def _validate_categories(categories, fastpath=False):
             if not categories.is_unique:
                 raise ValueError('Categorical categories must be unique')
 
+        if isinstance(categories, ABCCategoricalIndex):
+            categories = categories.categories
+
         return categories
 
     @property
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index 0b9e2c9fe5ffc7..84e6f0d4f5a7a2 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -6,7 +6,8 @@
 
 import numpy as np
 import pandas as pd
-from pandas import Series, Categorical, IntervalIndex, date_range
+from pandas import (
+    Series, Categorical, CategoricalIndex, IntervalIndex, date_range)
 
 from pandas.core.dtypes.dtypes import (
     DatetimeTZDtype, PeriodDtype,
@@ -657,3 +658,10 @@ def test_str_vs_repr(self):
         # Py2 will have unicode prefixes
         pat = r"CategoricalDtype\(categories=\[.*\], ordered=False\)"
         assert re.match(pat, repr(c1))
+
+    def test_categorical_categories(self):
+        # GH17884
+        c1 = CategoricalDtype(Categorical(['a', 'b']))
+        tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
+        c1 = CategoricalDtype(CategoricalIndex(['a', 'b']))
+        tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index e1d0b756fed1cd..d88e92a39a6c56 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -519,6 +519,18 @@ def test_contructor_from_categorical_string(self):
         result = Categorical(values, categories=['a', 'b', 'c'], ordered=True)
         tm.assert_categorical_equal(result, expected)
 
+    def test_constructor_with_categorical_categories(self):
+        # GH17884
+        expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
+
+        result = Categorical(
+            ['a', 'b'], categories=Categorical(['a', 'b', 'c']))
+        tm.assert_categorical_equal(result, expected)
+
+        result = Categorical(
+            ['a', 'b'], categories=CategoricalIndex(['a', 'b', 'c']))
+        tm.assert_categorical_equal(result, expected)
+
     def test_from_codes(self):
 
         # too few categories
@@ -560,6 +572,22 @@ def f():
             codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
             pd.Categorical.from_codes(codes, categories=["train", "test"])
 
+    def test_from_codes_with_categorical_categories(self):
+        # GH17884
+        expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
+
+        result = Categorical.from_codes(
+            [0, 1], categories=Categorical(['a', 'b', 'c']))
+        tm.assert_categorical_equal(result, expected)
+
+        result = Categorical.from_codes(
+            [0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
+        tm.assert_categorical_equal(result, expected)
+
+        # non-unique Categorical still raises
+        with pytest.raises(ValueError):
+            Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
+
     @pytest.mark.parametrize('dtype', [None, 'category'])
     def test_from_inferred_categories(self, dtype):
         cats = ['a', 'b']