From e757e8a817f738421c1500863c8b022c3d5af397 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 29 Aug 2015 07:51:56 -0500
Subject: [PATCH 1/2] DEPR: No NaNs in categories

---
 asv_bench/benchmarks/categoricals.py |  20 ++++-
 doc/source/categorical.rst           |  29 +++---
 doc/source/whatsnew/v0.17.0.txt      |   1 +
 pandas/core/categorical.py           |  26 +++---
 pandas/tests/test_categorical.py     | 129 +++++++++++++++++----------
 5 files changed, 129 insertions(+), 76 deletions(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index a449639f1560e..a0f9383336940 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -1,5 +1,5 @@
 from .pandas_vb_common import *
-
+import string
 
 class concat_categorical(object):
     goal_time = 0.2
@@ -25,3 +25,21 @@ def time_value_counts(self):
 
     def time_value_counts_dropna(self):
         self.ts.value_counts(dropna=True)
+
+class categorical_constructor(object):
+    goal_time = 0.2
+
+    def setup(self):
+        n = 5
+        N = 1e6
+        self.categories = list(string.ascii_letters[:n])
+        self.cat_idx = Index(self.categories)
+        self.values = np.tile(self.categories, N)
+        self.codes = np.tile(range(n), N)
+
+    def time_regular_constructor(self):
+        Categorical(self.values, self.categories)
+
+    def time_fastpath(self):
+        Categorical(self.codes, self.cat_idx, fastpath=True)
+
diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index 3c9b538caa555..534ab0e343398 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -632,41 +632,35 @@ Missing Data
 
 pandas primarily uses the value `np.nan` to represent missing data. It is by
 default not included in computations. See the :ref:`Missing Data section
-<missing_data>`
+<missing_data>`.
 
-There are two ways a `np.nan` can be represented in categorical data: either the value is not
-available ("missing value") or `np.nan` is a valid category.
+Missing values should **not** be included in the Categorical's ``categories``,
+only in the ``values``.
+Instead, it is understood that NaN is different, and is always a possibility.
+When working with the Categorical's ``codes``, missing values will always have
+a code of ``-1``.
 
 .. ipython:: python
 
     s = pd.Series(["a","b",np.nan,"a"], dtype="category")
     # only two categories
     s
-    s2 = pd.Series(["a","b","c","a"], dtype="category")
-    s2.cat.categories = [1,2,np.nan]
-    # three categories, np.nan included
-    s2
+    s.codes
 
-.. note::
-    As integer `Series` can't include NaN, the categories were converted to `object`.
 
-.. note::
-    Missing value methods like ``isnull`` and ``fillna`` will take both missing values as well as
-    `np.nan` categories into account:
+Methods for working with missing data, e.g. :meth:`~Series.isnull`, :meth:`~Series.fillna`,
+:meth:`~Series.dropna`, all work normally:
 
 .. ipython:: python
 
     c = pd.Series(["a","b",np.nan], dtype="category")
-    c.cat.set_categories(["a","b",np.nan], inplace=True)
-    # will be inserted as a NA category:
-    c[0] = np.nan
     s = pd.Series(c)
     s
     pd.isnull(s)
     s.fillna("a")
 
 Differences to R's `factor`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------
 
 The following differences to R's factor functions can be observed:
 
@@ -677,6 +671,9 @@ The following differences to R's factor functions can be observed:
 * In contrast to R's `factor` function, using categorical data as the sole input to create a
   new categorical series will *not* remove unused categories but create a new categorical series
   which is equal to the passed in one!
+* R allows for missing values to be included in its `levels` (pandas' `categories`). Pandas
+  does not allow `NaN` categories, but missing values can still be in the `values`.
+
 
 Gotchas
 -------
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index eae33bc80be32..424be6d949f13 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -652,6 +652,7 @@ Deprecations
   =====================  =================================
 
 - ``Categorical.name`` was deprecated to make ``Categorical`` more ``numpy.ndarray`` like. Use ``Series(cat, name="whatever")`` instead (:issue:`10482`).
+- Setting missing values (NaN) in a ``Categorical``'s ``categories`` will issue a warning (:issue:`10748`). You can still have missing values in the ``values``.
 - ``drop_duplicates`` and ``duplicated``'s ``take_last`` keyword was deprecated in favor of ``keep``. (:issue:`6511`, :issue:`8505`)
 - ``Series.nsmallest`` and ``nlargest``'s ``take_last`` keyword was deprecated in favor of ``keep``. (:issue:`10792`)
 - ``DataFrame.combineAdd`` and ``DataFrame.combineMult`` are deprecated. They
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 9951024ffe218..40694bfe85181 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -443,12 +443,18 @@ def _validate_categories(cls, categories):
             raise ValueError('Categorical categories must be unique')
         return categories
 
-    def _set_categories(self, categories):
+    def _set_categories(self, categories, validate=True):
         """ Sets new categories """
-        categories = self._validate_categories(categories)
-        if not self._categories is None and len(categories) != len(self._categories):
-            raise ValueError("new categories need to have the same number of items than the old "
-                             "categories!")
+        if validate:
+            categories = self._validate_categories(categories)
+            if not self._categories is None and len(categories) != len(self._categories):
+                raise ValueError("new categories need to have the same number of items than the old "
+                                 "categories!")
+        if np.any(isnull(categories)):
+            # NaNs in cats deprecated in 0.17, remove in 0.18 or 0.19 GH 10748
+            msg = ('\nSetting NaNs in `categories` is deprecated and '
+                   'will be removed in a future version of pandas.')
+            warn(msg, FutureWarning, stacklevel=9)
         self._categories = categories
 
     def _get_categories(self):
@@ -581,11 +587,11 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
             if not cat._categories is None and len(new_categories) < len(cat._categories):
                 # remove all _codes which are larger and set to -1/NaN
                 self._codes[self._codes >= len(new_categories)] = -1
-            cat._categories = new_categories
+            cat._set_categories(new_categories, validate=False)
         else:
             values = cat.__array__()
             cat._codes = _get_codes_for_values(values, new_categories)
-            cat._categories = new_categories
+            cat._set_categories(new_categories, validate=False)
 
         if ordered is None:
             ordered = self.ordered
@@ -708,7 +714,7 @@ def add_categories(self, new_categories, inplace=False):
         new_categories = list(self._categories) + list(new_categories)
         new_categories = self._validate_categories(new_categories)
         cat = self if inplace else self.copy()
-        cat._categories = new_categories
+        cat._set_categories(new_categories, validate=False)
         cat._codes = _coerce_indexer_dtype(cat._codes, new_categories)
         if not inplace:
             return cat
@@ -791,7 +797,7 @@ def remove_unused_categories(self, inplace=False):
         from pandas.core.index import _ensure_index
         new_categories = _ensure_index(new_categories)
         cat._codes = _get_codes_for_values(cat.__array__(), new_categories)
-        cat._categories = new_categories
+        cat._set_categories(new_categories, validate=False)
         if not inplace:
             return cat
 
@@ -1171,7 +1177,7 @@ def order(self, inplace=False, ascending=True, na_position='last'):
         Category.sort
         """
         warn("order is deprecated, use sort_values(...)",
-             FutureWarning, stacklevel=2)
+             FutureWarning, stacklevel=3)
         return self.sort_values(inplace=inplace, ascending=ascending, na_position=na_position)
 
     def sort(self, inplace=True, ascending=True, na_position='last'):
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 05da93a4fca0f..8a71ddaa732e8 100755
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -187,17 +187,21 @@ def f():
         cat = pd.Categorical([np.nan, 1., 2., 3. ])
         self.assertTrue(com.is_float_dtype(cat.categories))
 
+        # Deprecating NaNs in categoires (GH #10748)
         # preserve int as far as possible by converting to object if NaN is in categories
-        cat = pd.Categorical([np.nan, 1, 2, 3], categories=[np.nan, 1, 2, 3])
+        with tm.assert_produces_warning(FutureWarning):
+            cat = pd.Categorical([np.nan, 1, 2, 3], categories=[np.nan, 1, 2, 3])
         self.assertTrue(com.is_object_dtype(cat.categories))
         # This doesn't work -> this would probably need some kind of "remember the original type"
         # feature to try to cast the array interface result to...
         #vals = np.asarray(cat[cat.notnull()])
         #self.assertTrue(com.is_integer_dtype(vals))
-        cat = pd.Categorical([np.nan,"a", "b", "c"], categories=[np.nan,"a", "b", "c"])
+        with tm.assert_produces_warning(FutureWarning):
+            cat = pd.Categorical([np.nan,"a", "b", "c"], categories=[np.nan,"a", "b", "c"])
         self.assertTrue(com.is_object_dtype(cat.categories))
         # but don't do it for floats
-        cat = pd.Categorical([np.nan, 1., 2., 3.], categories=[np.nan, 1., 2., 3.])
+        with tm.assert_produces_warning(FutureWarning):
+            cat = pd.Categorical([np.nan, 1., 2., 3.], categories=[np.nan, 1., 2., 3.])
         self.assertTrue(com.is_float_dtype(cat.categories))
 
 
@@ -465,8 +469,9 @@ def test_describe(self):
         tm.assert_frame_equal(desc, expected)
 
         # NA as a category
-        cat = pd.Categorical(["a","c","c",np.nan], categories=["b","a","c",np.nan])
-        result = cat.describe()
+        with tm.assert_produces_warning(FutureWarning):
+            cat = pd.Categorical(["a","c","c",np.nan], categories=["b","a","c",np.nan])
+            result = cat.describe()
 
         expected = DataFrame([[0,0],[1,0.25],[2,0.5],[1,0.25]],
                              columns=['counts','freqs'],
@@ -474,8 +479,9 @@ def test_describe(self):
         tm.assert_frame_equal(result,expected)
 
         # NA as an unused category
-        cat = pd.Categorical(["a","c","c"], categories=["b","a","c",np.nan])
-        result = cat.describe()
+        with tm.assert_produces_warning(FutureWarning):
+            cat = pd.Categorical(["a","c","c"], categories=["b","a","c",np.nan])
+            result = cat.describe()
 
         expected = DataFrame([[0,0],[1,1/3.],[2,2/3.],[0,0]],
                              columns=['counts','freqs'],
@@ -827,29 +833,37 @@ def test_nan_handling(self):
         self.assert_numpy_array_equal(c._codes , np.array([0,-1,-1,0]))
 
         # If categories have nan included, the code should point to that instead
-        c = Categorical(["a","b",np.nan,"a"], categories=["a","b",np.nan])
-        self.assert_numpy_array_equal(c.categories , np.array(["a","b",np.nan],dtype=np.object_))
-        self.assert_numpy_array_equal(c._codes , np.array([0,1,2,0]))
+        with tm.assert_produces_warning(FutureWarning):
+            c = Categorical(["a","b",np.nan,"a"], categories=["a","b",np.nan])
+        self.assert_numpy_array_equal(c.categories, np.array(["a","b",np.nan],
+                                                             dtype=np.object_))
+        self.assert_numpy_array_equal(c._codes, np.array([0,1,2,0]))
         c[1] = np.nan
-        self.assert_numpy_array_equal(c.categories , np.array(["a","b",np.nan],dtype=np.object_))
-        self.assert_numpy_array_equal(c._codes , np.array([0,2,2,0]))
+        self.assert_numpy_array_equal(c.categories, np.array(["a","b",np.nan],
+                                                             dtype=np.object_))
+        self.assert_numpy_array_equal(c._codes, np.array([0,2,2,0]))
 
         # Changing categories should also make the replaced category np.nan
         c = Categorical(["a","b","c","a"])
-        c.categories = ["a","b",np.nan]
-        self.assert_numpy_array_equal(c.categories , np.array(["a","b",np.nan],dtype=np.object_))
-        self.assert_numpy_array_equal(c._codes , np.array([0,1,2,0]))
+        with tm.assert_produces_warning(FutureWarning):
+            c.categories = ["a","b",np.nan]
+        self.assert_numpy_array_equal(c.categories, np.array(["a","b",np.nan],
+                                                             dtype=np.object_))
+        self.assert_numpy_array_equal(c._codes, np.array([0,1,2,0]))
 
         # Adding nan to categories should make assigned nan point to the category!
         c = Categorical(["a","b",np.nan,"a"])
         self.assert_numpy_array_equal(c.categories , np.array(["a","b"]))
         self.assert_numpy_array_equal(c._codes , np.array([0,1,-1,0]))
-        c.set_categories(["a","b",np.nan], rename=True, inplace=True)
-        self.assert_numpy_array_equal(c.categories , np.array(["a","b",np.nan],dtype=np.object_))
-        self.assert_numpy_array_equal(c._codes , np.array([0,1,-1,0]))
+        with tm.assert_produces_warning(FutureWarning):
+            c.set_categories(["a","b",np.nan], rename=True, inplace=True)
+        self.assert_numpy_array_equal(c.categories, np.array(["a","b",np.nan],
+                                                             dtype=np.object_))
+        self.assert_numpy_array_equal(c._codes, np.array([0,1,-1,0]))
         c[1] = np.nan
-        self.assert_numpy_array_equal(c.categories , np.array(["a","b",np.nan],dtype=np.object_))
-        self.assert_numpy_array_equal(c._codes , np.array([0,2,-1,0]))
+        self.assert_numpy_array_equal(c.categories , np.array(["a","b",np.nan],
+                                                              dtype=np.object_))
+        self.assert_numpy_array_equal(c._codes, np.array([0,2,-1,0]))
 
         # Remove null categories (GH 10156)
         cases = [
@@ -861,11 +875,13 @@ def test_nan_handling(self):
         null_values = [np.nan, None, pd.NaT]
 
         for with_null, without in cases:
-            base = Categorical([], with_null)
+            with tm.assert_produces_warning(FutureWarning):
+                base = Categorical([], with_null)
             expected = Categorical([], without)
 
-            for nullval in null_values:
-                result = base.remove_categories(nullval)
+            with tm.assert_produces_warning(FutureWarning):
+                for nullval in null_values:
+                    result = base.remove_categories(nullval)
                 self.assert_categorical_equal(result, expected)
 
         # Different null values are indistinguishable
@@ -880,14 +896,16 @@ def test_isnull(self):
         res = c.isnull()
         self.assert_numpy_array_equal(res, exp)
 
-        c = Categorical(["a","b",np.nan], categories=["a","b",np.nan])
+        with tm.assert_produces_warning(FutureWarning):
+            c = Categorical(["a","b",np.nan], categories=["a","b",np.nan])
         res = c.isnull()
         self.assert_numpy_array_equal(res, exp)
 
         # test both nan in categories and as -1
         exp = np.array([True, False, True])
         c = Categorical(["a","b",np.nan])
-        c.set_categories(["a","b",np.nan], rename=True, inplace=True)
+        with tm.assert_produces_warning(FutureWarning):
+            c.set_categories(["a","b",np.nan], rename=True, inplace=True)
         c[0] = np.nan
         res = c.isnull()
         self.assert_numpy_array_equal(res, exp)
@@ -1087,31 +1105,36 @@ def test_set_item_nan(self):
 
         # if nan in categories, the proper code should be set!
         cat = pd.Categorical([1,2,3, np.nan], categories=[1,2,3])
-        cat.set_categories([1,2,3, np.nan], rename=True, inplace=True)
+        with tm.assert_produces_warning(FutureWarning):
+            cat.set_categories([1,2,3, np.nan], rename=True, inplace=True)
         cat[1] = np.nan
         exp = np.array([0,3,2,-1])
         self.assert_numpy_array_equal(cat.codes, exp)
 
         cat = pd.Categorical([1,2,3, np.nan], categories=[1,2,3])
-        cat.set_categories([1,2,3, np.nan], rename=True, inplace=True)
+        with tm.assert_produces_warning(FutureWarning):
+            cat.set_categories([1,2,3, np.nan], rename=True, inplace=True)
         cat[1:3] = np.nan
         exp = np.array([0,3,3,-1])
         self.assert_numpy_array_equal(cat.codes, exp)
 
         cat = pd.Categorical([1,2,3, np.nan], categories=[1,2,3])
-        cat.set_categories([1,2,3, np.nan], rename=True, inplace=True)
+        with tm.assert_produces_warning(FutureWarning):
+            cat.set_categories([1,2,3, np.nan], rename=True, inplace=True)
         cat[1:3] = [np.nan, 1]
         exp = np.array([0,3,0,-1])
         self.assert_numpy_array_equal(cat.codes, exp)
 
         cat = pd.Categorical([1,2,3, np.nan], categories=[1,2,3])
-        cat.set_categories([1,2,3, np.nan], rename=True, inplace=True)
+        with tm.assert_produces_warning(FutureWarning):
+            cat.set_categories([1,2,3, np.nan], rename=True, inplace=True)
         cat[1:3] = [np.nan, np.nan]
         exp = np.array([0,3,3,-1])
         self.assert_numpy_array_equal(cat.codes, exp)
 
         cat = pd.Categorical([1,2, np.nan, 3], categories=[1,2,3])
-        cat.set_categories([1,2,3, np.nan], rename=True, inplace=True)
+        with tm.assert_produces_warning(FutureWarning):
+            cat.set_categories([1,2,3, np.nan], rename=True, inplace=True)
         cat[pd.isnull(cat)] = np.nan
         exp = np.array([0,1,3,2])
         self.assert_numpy_array_equal(cat.codes, exp)
@@ -1555,14 +1578,16 @@ def test_nan_handling(self):
         self.assert_numpy_array_equal(s.values.codes, np.array([0,1,-1,0]))
 
         # If categories have nan included, the label should point to that instead
-        s2 = Series(Categorical(["a","b",np.nan,"a"], categories=["a","b",np.nan]))
+        with tm.assert_produces_warning(FutureWarning):
+            s2 = Series(Categorical(["a","b",np.nan,"a"], categories=["a","b",np.nan]))
         self.assert_numpy_array_equal(s2.cat.categories,
                                       np.array(["a","b",np.nan], dtype=np.object_))
         self.assert_numpy_array_equal(s2.values.codes, np.array([0,1,2,0]))
 
         # Changing categories should also make the replaced category np.nan
         s3 = Series(Categorical(["a","b","c","a"]))
-        s3.cat.categories = ["a","b",np.nan]
+        with tm.assert_produces_warning(FutureWarning):
+            s3.cat.categories = ["a","b",np.nan]
         self.assert_numpy_array_equal(s3.cat.categories,
                                       np.array(["a","b",np.nan], dtype=np.object_))
         self.assert_numpy_array_equal(s3.values.codes, np.array([0,1,2,0]))
@@ -2415,28 +2440,32 @@ def test_value_counts_with_nan(self):
             s.value_counts(dropna=False, sort=False),
             pd.Series([2, 1, 3], index=["a", "b", np.nan]))
 
-        s = pd.Series(pd.Categorical(["a", "b", "a"], categories=["a", "b", np.nan]))
-        tm.assert_series_equal(
-            s.value_counts(dropna=True),
-            pd.Series([2, 1], index=["a", "b"]))
-        tm.assert_series_equal(
-            s.value_counts(dropna=False),
-            pd.Series([2, 1, 0], index=["a", "b", np.nan]))
+        with tm.assert_produces_warning(FutureWarning):
+            s = pd.Series(pd.Categorical(["a", "b", "a"], categories=["a", "b", np.nan]))
+            tm.assert_series_equal(
+                s.value_counts(dropna=True),
+                pd.Series([2, 1], index=["a", "b"]))
+            tm.assert_series_equal(
+                s.value_counts(dropna=False),
+                pd.Series([2, 1, 0], index=["a", "b", np.nan]))
 
-        s = pd.Series(pd.Categorical(["a", "b", None, "a", None, None], categories=["a", "b", np.nan]))
-        tm.assert_series_equal(
-            s.value_counts(dropna=True),
-            pd.Series([2, 1], index=["a", "b"]))
-        tm.assert_series_equal(
-            s.value_counts(dropna=False),
-            pd.Series([3, 2, 1], index=[np.nan, "a", "b"]))
+        with tm.assert_produces_warning(FutureWarning):
+            s = pd.Series(pd.Categorical(["a", "b", None, "a", None, None],
+                                         categories=["a", "b", np.nan]))
+            tm.assert_series_equal(
+                s.value_counts(dropna=True),
+                pd.Series([2, 1], index=["a", "b"]))
+            tm.assert_series_equal(
+                s.value_counts(dropna=False),
+                pd.Series([3, 2, 1], index=[np.nan, "a", "b"]))
 
     def test_groupby(self):
 
         cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"], categories=["a","b","c","d"], ordered=True)
         data = DataFrame({"a":[1,1,1,2,2,2,3,4,5], "b":cats})
 
-        expected = DataFrame({ 'a' : Series([1,2,4,np.nan],index=Index(['a','b','c','d'],name='b')) })
+        expected = DataFrame({'a': Series([1, 2, 4, np.nan],
+                             index=Index(['a', 'b', 'c', 'd'], name='b'))})
         result = data.groupby("b").mean()
         tm.assert_frame_equal(result, expected)
 
@@ -3454,11 +3483,13 @@ def f():
 
         # make sure that fillna takes both missing values and NA categories into account
         c = Categorical(["a","b",np.nan])
-        c.set_categories(["a","b",np.nan], rename=True, inplace=True)
+        with tm.assert_produces_warning(FutureWarning):
+            c.set_categories(["a","b",np.nan], rename=True, inplace=True)
         c[0] = np.nan
         df = pd.DataFrame({"cats":c, "vals":[1,2,3]})
         df_exp = pd.DataFrame({"cats": Categorical(["a","b","a"]), "vals": [1,2,3]})
-        res = df.fillna("a")
+        with tm.assert_produces_warning(FutureWarning):
+            res = df.fillna("a")
         tm.assert_frame_equal(res, df_exp)
 
 

From 8d87f3be5ae67d03ecd3dfb70112a663064ab19b Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 30 Aug 2015 09:07:26 -0400
Subject: [PATCH 2/2] move NaN deprecation warning to _validate_categories,
 cleanup a bit

---
 pandas/core/base.py              |  1 +
 pandas/core/categorical.py       | 69 +++++++++++++++++++++-----------
 pandas/tests/test_categorical.py | 20 +++++----
 3 files changed, 59 insertions(+), 31 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 6d1c89a7a2f89..fe9bac7f4c68e 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -392,6 +392,7 @@ def argmin(self, axis=None):
         """
         return nanops.nanargmin(self.values)
 
+    @cache_readonly
     def hasnans(self):
         """ return if I have any nans; enables various perf speedups """
         return com.isnull(self).any()
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 40694bfe85181..4a6a26f21b5bf 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -207,7 +207,7 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
         if fastpath:
             # fast path
             self._codes = _coerce_indexer_dtype(values, categories)
-            self.categories = categories
+            self._categories = self._validate_categories(categories, fastpath=isinstance(categories, ABCIndexClass))
             self._ordered = ordered
             return
 
@@ -274,6 +274,8 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
                 ### FIXME ####
                 raise NotImplementedError("> 1 ndim Categorical are not supported at this time")
 
+            categories = self._validate_categories(categories)
+
         else:
             # there were two ways if categories are present
             # - the old one, where each value is a int pointer to the levels array -> not anymore
@@ -282,7 +284,6 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
 
             # make sure that we always have the same type here, no matter what we get passed in
             categories = self._validate_categories(categories)
-
             codes = _get_codes_for_values(values, categories)
 
             # TODO: check for old style usage. These warnings should be removes after 0.18/ in 2016
@@ -295,7 +296,7 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
                      "'Categorical.from_codes(codes, categories)'?", RuntimeWarning, stacklevel=2)
 
         self.set_ordered(ordered or False, inplace=True)
-        self.categories = categories
+        self._categories = categories
         self._codes = _coerce_indexer_dtype(codes, categories)
 
     def copy(self):
@@ -421,9 +422,15 @@ def _get_labels(self):
     _categories = None
 
     @classmethod
-    def _validate_categories(cls, categories):
+    def _validate_categories(cls, categories, fastpath=False):
         """
         Validates that we have good categories
+
+        Parameters
+        ----------
+        fastpath : boolean (default: False)
+           Don't perform validation of the categories for uniqueness or nulls
+
         """
         if not isinstance(categories, ABCIndexClass):
             dtype = None
@@ -439,22 +446,40 @@ def _validate_categories(cls, categories):
 
             from pandas import Index
             categories = Index(categories, dtype=dtype)
-        if not categories.is_unique:
-            raise ValueError('Categorical categories must be unique')
+
+        if not fastpath:
+
+            # check properties of the categories
+            # we don't allow NaNs in the categories themselves
+
+            if categories.hasnans:
+                # NaNs in cats deprecated in 0.17, remove in 0.18 or 0.19 GH 10748
+                msg = ('\nSetting NaNs in `categories` is deprecated and '
+                       'will be removed in a future version of pandas.')
+                warn(msg, FutureWarning, stacklevel=5)
+
+            # categories must be unique
+
+            if not categories.is_unique:
+                raise ValueError('Categorical categories must be unique')
+
         return categories
 
-    def _set_categories(self, categories, validate=True):
-        """ Sets new categories """
-        if validate:
-            categories = self._validate_categories(categories)
-            if not self._categories is None and len(categories) != len(self._categories):
-                raise ValueError("new categories need to have the same number of items than the old "
-                                 "categories!")
-        if np.any(isnull(categories)):
-            # NaNs in cats deprecated in 0.17, remove in 0.18 or 0.19 GH 10748
-            msg = ('\nSetting NaNs in `categories` is deprecated and '
-                   'will be removed in a future version of pandas.')
-            warn(msg, FutureWarning, stacklevel=9)
+    def _set_categories(self, categories, fastpath=False):
+        """ Sets new categories
+
+        Parameters
+        ----------
+        fastpath : boolean (default: False)
+           Don't perform validation of the categories for uniqueness or nulls
+
+        """
+
+        categories = self._validate_categories(categories, fastpath=fastpath)
+        if not fastpath and not self._categories is None and len(categories) != len(self._categories):
+            raise ValueError("new categories need to have the same number of items than the old "
+                             "categories!")
+
         self._categories = categories
 
     def _get_categories(self):
@@ -587,11 +612,10 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
             if not cat._categories is None and len(new_categories) < len(cat._categories):
                 # remove all _codes which are larger and set to -1/NaN
                 self._codes[self._codes >= len(new_categories)] = -1
-            cat._set_categories(new_categories, validate=False)
         else:
             values = cat.__array__()
             cat._codes = _get_codes_for_values(values, new_categories)
-            cat._set_categories(new_categories, validate=False)
+        cat._categories = new_categories
 
         if ordered is None:
             ordered = self.ordered
@@ -712,9 +736,8 @@ def add_categories(self, new_categories, inplace=False):
             msg = "new categories must not include old categories: %s" % str(already_included)
             raise ValueError(msg)
         new_categories = list(self._categories) + list(new_categories)
-        new_categories = self._validate_categories(new_categories)
         cat = self if inplace else self.copy()
-        cat._set_categories(new_categories, validate=False)
+        cat._categories = self._validate_categories(new_categories)
         cat._codes = _coerce_indexer_dtype(cat._codes, new_categories)
         if not inplace:
             return cat
@@ -797,7 +820,7 @@ def remove_unused_categories(self, inplace=False):
         from pandas.core.index import _ensure_index
         new_categories = _ensure_index(new_categories)
         cat._codes = _get_codes_for_values(cat.__array__(), new_categories)
-        cat._set_categories(new_categories, validate=False)
+        cat._categories = new_categories
         if not inplace:
             return cat
 
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 8a71ddaa732e8..d847638ff105e 100755
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -129,7 +129,8 @@ def f():
             Categorical(["a","b"], ["a","b","b"])
         self.assertRaises(ValueError, f)
         def f():
-            Categorical([1,2], [1,2,np.nan, np.nan])
+            with tm.assert_produces_warning(FutureWarning):
+                Categorical([1,2], [1,2,np.nan, np.nan])
         self.assertRaises(ValueError, f)
 
         # The default should be unordered
@@ -879,15 +880,18 @@ def test_nan_handling(self):
                 base = Categorical([], with_null)
             expected = Categorical([], without)
 
-            with tm.assert_produces_warning(FutureWarning):
-                for nullval in null_values:
-                    result = base.remove_categories(nullval)
-                self.assert_categorical_equal(result, expected)
+            for nullval in null_values:
+                result = base.remove_categories(nullval)
+            self.assert_categorical_equal(result, expected)
 
         # Different null values are indistinguishable
         for i, j in [(0, 1), (0, 2), (1, 2)]:
             nulls = [null_values[i], null_values[j]]
-            self.assertRaises(ValueError, lambda: Categorical([], categories=nulls))
+
+            def f():
+                with tm.assert_produces_warning(FutureWarning):
+                    Categorical([], categories=nulls)
+            self.assertRaises(ValueError, f)
 
 
     def test_isnull(self):
@@ -3488,8 +3492,8 @@ def f():
         c[0] = np.nan
         df = pd.DataFrame({"cats":c, "vals":[1,2,3]})
         df_exp = pd.DataFrame({"cats": Categorical(["a","b","a"]), "vals": [1,2,3]})
-        with tm.assert_produces_warning(FutureWarning):
-            res = df.fillna("a")
+
+        res = df.fillna("a")
         tm.assert_frame_equal(res, df_exp)