pandas-dev · mroeschke · Apr 11, 2023 · Apr 7, 2023 · Apr 7, 2023 · Apr 8, 2023
diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
@@ -257,6 +257,7 @@ We'll import ``dask.dataframe`` and notice that the API feels similar to pandas.
 We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in.
 
 .. ipython:: python
+   :okwarning:
 
    import dask.dataframe as dd
 
@@ -286,6 +287,7 @@ column names and dtypes. That's because Dask hasn't actually read the data yet.
 Rather than executing immediately, doing operations build up a **task graph**.
 
 .. ipython:: python
+   :okwarning:
 
    ddf
    ddf["name"]
@@ -300,6 +302,7 @@ returns a Dask Series with the same dtype and the same name.
 To get the actual result you can call ``.compute()``.
 
 .. ipython:: python
+   :okwarning:
 
    %time ddf["name"].value_counts().compute()
 
@@ -345,6 +348,7 @@ known automatically. In this case, since we created the parquet files manually,
 we need to supply the divisions manually.
 
 .. ipython:: python
+   :okwarning:
 
    N = 12
    starts = [f"20{i:>02d}-01-01" for i in range(N)]

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -226,6 +226,7 @@ Deprecations
 - Deprecated making :meth:`Series.apply` return a :class:`DataFrame` when the passed-in callable returns a :class:`Series` object. In the future this will return a :class:`Series` whose values are themselves :class:`Series`. This pattern was very slow and it's recommended to use alternative methods to archive the same goal (:issue:`52116`)
 - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`)
 - Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`)
+- Deprecated :func:`is_categorical_dtype`, use ``isinstance(obj.dtype, pd.CategoricalDtype)`` instead (:issue:`52527`)
 - Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`)
 -
 

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -147,6 +147,7 @@ def pytest_collection_modifyitems(items, config) -> None:
             "(Series|DataFrame).bool is now deprecated and will be removed "
             "in future version of pandas",
         ),
+        ("is_categorical_dtype", "is_categorical_dtype is deprecated"),
     ]
 
     for item in items:

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -41,12 +41,14 @@
 
 from pandas.core.dtypes.cast import is_nested_object
 from pandas.core.dtypes.common import (
-    is_categorical_dtype,
     is_dict_like,
     is_list_like,
     is_sequence,
 )
-from pandas.core.dtypes.dtypes import ExtensionDtype
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    ExtensionDtype,
+)
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCNDFrame,
@@ -1089,7 +1091,7 @@ def apply_standard(self) -> DataFrame | Series:
         # we need to give `na_action="ignore"` for categorical data.
         # TODO: remove the `na_action="ignore"` when that default has been changed in
         #  Categorical (GH51645).
-        action = "ignore" if is_categorical_dtype(obj) else None
+        action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None
         mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype)
 
         if len(mapped) and isinstance(mapped[0], ABCSeries):

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -38,7 +38,6 @@
     ensure_platform_int,
     is_any_real_numeric_dtype,
     is_bool_dtype,
-    is_categorical_dtype,
     is_datetime64_dtype,
     is_dict_like,
     is_dtype_equal,
@@ -409,7 +408,8 @@ def __init__(
         null_mask = np.array(False)
 
         # sanitize input
-        if is_categorical_dtype(values):
+        vdtype = getattr(values, "dtype", None)
+        if isinstance(vdtype, CategoricalDtype):
             if dtype.categories is None:
                 dtype = CategoricalDtype(values.categories, dtype.ordered)
         elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)):
@@ -2721,7 +2721,9 @@ def factorize_from_iterable(values) -> tuple[np.ndarray, Index]:
         raise TypeError("Input must be list-like")
 
     categories: Index
-    if is_categorical_dtype(values):
+
+    vdtype = getattr(values, "dtype", None)
+    if isinstance(vdtype, CategoricalDtype):
         values = extract_array(values)
         # The Categorical we want to build has the same categories
         # as values but its codes are by def [0, ..., len(n_categories) - 1]

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -50,7 +50,6 @@
     maybe_upcast_numeric_to_64bit,
 )
 from pandas.core.dtypes.common import (
-    is_categorical_dtype,
     is_dtype_equal,
     is_float_dtype,
     is_integer_dtype,
@@ -1772,7 +1771,7 @@ def _maybe_convert_platform_interval(values) -> ArrayLike:
     elif not is_list_like(values) or isinstance(values, ABCDataFrame):
         # This will raise later, but we avoid passing to maybe_convert_platform
         return values
-    elif is_categorical_dtype(values):
+    elif isinstance(getattr(values, "dtype", None), CategoricalDtype):
         values = np.asarray(values)
     elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)):
         # TODO: should we just cast these to list?

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -469,6 +469,13 @@ def is_categorical_dtype(arr_or_dtype) -> bool:
     >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
     True
     """
+    # GH#52527
+    warnings.warn(
+        "is_categorical_dtype is deprecated and will be removed in a future "
+        "version. Use isinstance(dtype, CategoricalDtype) instead",
+        FutureWarning,
+        stacklevel=find_stack_level(),
+    )
     if isinstance(arr_or_dtype, ExtensionDtype):
         # GH#33400 fastpath for dtype object
         return arr_or_dtype.name == "category"

@@ -22,10 +22,10 @@
 from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
-    is_categorical_dtype,
     is_list_like,
     is_scalar,
 )
+from pandas.core.dtypes.dtypes import CategoricalDtype
 
 from pandas.core import algorithms
 from pandas.core.arrays import (
@@ -618,7 +618,7 @@ def __init__(
                 # TODO 2022-10-08 we only have one test that gets here and
                 #  values are already in nanoseconds in that case.
                 grouping_vector = Series(grouping_vector).to_numpy()
-        elif is_categorical_dtype(grouping_vector):
+        elif isinstance(getattr(grouping_vector, "dtype", None), CategoricalDtype):
             # a passed Categorical
             self._orig_cats = grouping_vector.categories
             grouping_vector, self._all_grouper = recode_for_groupby(
@@ -635,7 +635,8 @@ def __iter__(self) -> Iterator:
 
     @cache_readonly
     def _passed_categorical(self) -> bool:
-        return is_categorical_dtype(self.grouping_vector)
+        dtype = getattr(self.grouping_vector, "dtype", None)
+        return isinstance(dtype, CategoricalDtype)
 
     @cache_readonly
     def name(self) -> Hashable:

diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py
@@ -10,10 +10,7 @@
 from pandas.util._decorators import cache_readonly
 
 import pandas as pd
-from pandas.api.types import (
-    is_categorical_dtype,
-    is_string_dtype,
-)
+from pandas.api.types import is_string_dtype
 from pandas.core.interchange.buffer import PandasBuffer
 from pandas.core.interchange.dataframe_protocol import (
     Column,
@@ -99,7 +96,7 @@ def offset(self) -> int:
     def dtype(self) -> tuple[DtypeKind, int, str, str]:
         dtype = self._col.dtype
 
-        if is_categorical_dtype(dtype):
+        if isinstance(dtype, pd.CategoricalDtype):
             codes = self._col.values.codes
             (
                 _,

diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
@@ -22,7 +22,6 @@
     DT64NS_DTYPE,
     ensure_platform_int,
     is_bool_dtype,
-    is_categorical_dtype,
     is_datetime64_dtype,
     is_datetime64tz_dtype,
     is_datetime_or_timedelta_dtype,
@@ -33,6 +32,7 @@
     is_timedelta64_dtype,
 )
 from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
     DatetimeTZDtype,
     ExtensionDtype,
 )
@@ -462,7 +462,8 @@ def _bins_to_cuts(
                 raise ValueError(
                     "Bin labels must be one fewer than the number of bin edges"
                 )
-        if not is_categorical_dtype(labels):
+
+        if not isinstance(getattr(labels, "dtype", None), CategoricalDtype):
             labels = Categorical(
                 labels,
                 categories=labels if len(set(labels)) == len(labels) else None,

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
@@ -22,7 +22,6 @@
 
 from pandas.core.dtypes.common import (
     is_any_real_numeric_dtype,
-    is_categorical_dtype,
     is_extension_array_dtype,
     is_float,
     is_float_dtype,
@@ -34,6 +33,7 @@
     is_number,
     is_numeric_dtype,
 )
+from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCIndex,
@@ -563,7 +563,7 @@ def result(self):
 
     def _convert_to_ndarray(self, data):
         # GH31357: categorical columns are processed separately
-        if is_categorical_dtype(data):
+        if isinstance(data.dtype, CategoricalDtype):
             return data
 
         # GH32073: cast to float if values contain nulled integers
@@ -1211,7 +1211,9 @@ def _make_plot(self):
 
         c_is_column = is_hashable(c) and c in self.data.columns
 
-        color_by_categorical = c_is_column and is_categorical_dtype(self.data[c])
+        color_by_categorical = c_is_column and isinstance(
+            self.data[c].dtype, CategoricalDtype
+        )
 
         color = self.kwds.pop("color", None)
         if c is not None and color is not None:

diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py
@@ -6,7 +6,6 @@
 from pandas.compat import PYPY
 
 from pandas.core.dtypes.common import (
-    is_categorical_dtype,
     is_dtype_equal,
     is_object_dtype,
 )
@@ -96,8 +95,8 @@ def test_memory_usage(index_or_series_memory_obj):
     res_deep = obj.memory_usage(deep=True)
 
     is_object = is_object_dtype(obj) or (is_ser and is_object_dtype(obj.index))
-    is_categorical = is_categorical_dtype(obj.dtype) or (
-        is_ser and is_categorical_dtype(obj.index.dtype)
+    is_categorical = isinstance(obj.dtype, pd.CategoricalDtype) or (
+        is_ser and isinstance(obj.index.dtype, pd.CategoricalDtype)
     )
     is_object_string = is_dtype_equal(obj, "string[python]") or (
         is_ser and is_dtype_equal(obj.index.dtype, "string[python]")

diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
@@ -163,6 +163,7 @@ def get_is_dtype_funcs():
     return [getattr(com, fname) for fname in fnames]
 
 
+@pytest.mark.filterwarnings("ignore:is_categorical_dtype is deprecated:FutureWarning")
 @pytest.mark.parametrize("func", get_is_dtype_funcs(), ids=lambda x: x.__name__)
 def test_get_dtype_error_catch(func):
     # see gh-15941
@@ -171,7 +172,7 @@ def test_get_dtype_error_catch(func):
 
     msg = f"{func.__name__} is deprecated"
     warn = None
-    if func is com.is_int64_dtype:
+    if func is com.is_int64_dtype or func is com.is_categorical_dtype:
         warn = FutureWarning
 
     with tm.assert_produces_warning(warn, match=msg):
@@ -274,12 +275,14 @@ def test_is_interval_dtype():
 
 
 def test_is_categorical_dtype():
-    assert not com.is_categorical_dtype(object)
-    assert not com.is_categorical_dtype([1, 2, 3])
+    msg = "is_categorical_dtype is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        assert not com.is_categorical_dtype(object)
+        assert not com.is_categorical_dtype([1, 2, 3])
 
-    assert com.is_categorical_dtype(CategoricalDtype())
-    assert com.is_categorical_dtype(pd.Categorical([1, 2, 3]))
-    assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
+        assert com.is_categorical_dtype(CategoricalDtype())
+        assert com.is_categorical_dtype(pd.Categorical([1, 2, 3]))
+        assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
 
 
 def test_is_string_dtype():

diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
@@ -166,16 +166,18 @@ def test_is_dtype(self, dtype):
         assert not CategoricalDtype.is_dtype(np.float64)
 
     def test_basic(self, dtype):
-        assert is_categorical_dtype(dtype)
+        msg = "is_categorical_dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert is_categorical_dtype(dtype)
 
-        factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])
+            factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])
 
-        s = Series(factor, name="A")
+            s = Series(factor, name="A")
 
-        # dtypes
-        assert is_categorical_dtype(s.dtype)
-        assert is_categorical_dtype(s)
-        assert not is_categorical_dtype(np.dtype("float64"))
+            # dtypes
+            assert is_categorical_dtype(s.dtype)
+            assert is_categorical_dtype(s)
+            assert not is_categorical_dtype(np.dtype("float64"))
 
     def test_tuple_categories(self):
         categories = [(1, "a"), (2, "b"), (3, "c")]
@@ -1101,10 +1103,15 @@ def test_is_bool_dtype_sparse():
 )
 def test_is_dtype_no_warning(check):
     data = pd.DataFrame({"A": [1, 2]})
-    with tm.assert_produces_warning(None):
+
+    warn = None
+    msg = "is_categorical_dtype is deprecated"
+    if check is is_categorical_dtype:
+        warn = FutureWarning
+    with tm.assert_produces_warning(warn, match=msg):
         check(data)
 
-    with tm.assert_produces_warning(None):
+    with tm.assert_produces_warning(warn, match=msg):
         check(data["A"])
 
 

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
@@ -7,7 +7,6 @@
 
 from pandas.core.dtypes.base import _registry as ea_registry
 from pandas.core.dtypes.common import (
-    is_categorical_dtype,
     is_interval_dtype,
     is_object_dtype,
 )
@@ -484,9 +483,9 @@ def test_setitem_intervals(self):
         df["E"] = np.array(ser.values)
         df["F"] = ser.astype(object)
 
-        assert is_categorical_dtype(df["B"].dtype)
+        assert isinstance(df["B"].dtype, CategoricalDtype)
         assert is_interval_dtype(df["B"].cat.categories)
-        assert is_categorical_dtype(df["D"].dtype)
+        assert isinstance(df["D"].dtype, CategoricalDtype)
         assert is_interval_dtype(df["D"].cat.categories)
 
         # These go through the Series constructor and so get inferred back

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -9,11 +9,10 @@
 from pandas.compat import is_platform_windows
 import pandas.util._test_decorators as td
 
-from pandas.core.dtypes.common import is_categorical_dtype
-
 import pandas as pd
 from pandas import (
     Categorical,
+    CategoricalDtype,
     DataFrame,
     Index,
     Series,
@@ -1280,7 +1279,7 @@ def test_any_all_np_func(self, func, data, expected):
         # GH 19976
         data = DataFrame(data)
 
-        if any(is_categorical_dtype(x) for x in data.dtypes):
+        if any(isinstance(x, CategoricalDtype) for x in data.dtypes):
             with pytest.raises(
                 TypeError, match="dtype category does not support reduction"
             ):