Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: is_categorical_dtype #52527

Merged
merged 9 commits into from
Apr 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/user_guide/scale.rst
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ We'll import ``dask.dataframe`` and notice that the API feels similar to pandas.
We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in.

.. ipython:: python
:okwarning:

import dask.dataframe as dd

Expand Down Expand Up @@ -286,6 +287,7 @@ column names and dtypes. That's because Dask hasn't actually read the data yet.
Rather than executing immediately, doing operations build up a **task graph**.

.. ipython:: python
:okwarning:

ddf
ddf["name"]
Expand All @@ -300,6 +302,7 @@ returns a Dask Series with the same dtype and the same name.
To get the actual result you can call ``.compute()``.

.. ipython:: python
:okwarning:

%time ddf["name"].value_counts().compute()

Expand Down Expand Up @@ -345,6 +348,7 @@ known automatically. In this case, since we created the parquet files manually,
we need to supply the divisions manually.

.. ipython:: python
:okwarning:

N = 12
starts = [f"20{i:>02d}-01-01" for i in range(N)]
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ Deprecations
- Deprecated making :meth:`Series.apply` return a :class:`DataFrame` when the passed-in callable returns a :class:`Series` object. In the future this will return a :class:`Series` whose values are themselves :class:`Series`. This pattern was very slow and it's recommended to use alternative methods to archive the same goal (:issue:`52116`)
- Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`)
- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`)
- Deprecated :func:`is_categorical_dtype`, use ``isinstance(obj.dtype, pd.CategoricalDtype)`` instead (:issue:`52527`)
- Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`)
-

Expand Down
1 change: 1 addition & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def pytest_collection_modifyitems(items, config) -> None:
"(Series|DataFrame).bool is now deprecated and will be removed "
"in future version of pandas",
),
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
]

for item in items:
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,14 @@

from pandas.core.dtypes.cast import is_nested_object
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_dict_like,
is_list_like,
is_sequence,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
ExtensionDtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCNDFrame,
Expand Down Expand Up @@ -1089,7 +1091,7 @@ def apply_standard(self) -> DataFrame | Series:
# we need to give `na_action="ignore"` for categorical data.
# TODO: remove the `na_action="ignore"` when that default has been changed in
# Categorical (GH51645).
action = "ignore" if is_categorical_dtype(obj) else None
action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None
mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype)

if len(mapped) and isinstance(mapped[0], ABCSeries):
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
ensure_platform_int,
is_any_real_numeric_dtype,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_dtype,
is_dict_like,
is_dtype_equal,
Expand Down Expand Up @@ -409,7 +408,8 @@ def __init__(
null_mask = np.array(False)

# sanitize input
if is_categorical_dtype(values):
vdtype = getattr(values, "dtype", None)
if isinstance(vdtype, CategoricalDtype):
if dtype.categories is None:
dtype = CategoricalDtype(values.categories, dtype.ordered)
elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)):
Expand Down Expand Up @@ -2721,7 +2721,9 @@ def factorize_from_iterable(values) -> tuple[np.ndarray, Index]:
raise TypeError("Input must be list-like")

categories: Index
if is_categorical_dtype(values):

vdtype = getattr(values, "dtype", None)
if isinstance(vdtype, CategoricalDtype):
values = extract_array(values)
# The Categorical we want to build has the same categories
# as values but its codes are by def [0, ..., len(n_categories) - 1]
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
maybe_upcast_numeric_to_64bit,
)
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_dtype_equal,
is_float_dtype,
is_integer_dtype,
Expand Down Expand Up @@ -1772,7 +1771,7 @@ def _maybe_convert_platform_interval(values) -> ArrayLike:
elif not is_list_like(values) or isinstance(values, ABCDataFrame):
# This will raise later, but we avoid passing to maybe_convert_platform
return values
elif is_categorical_dtype(values):
elif isinstance(getattr(values, "dtype", None), CategoricalDtype):
values = np.asarray(values)
elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)):
# TODO: should we just cast these to list?
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,13 @@ def is_categorical_dtype(arr_or_dtype) -> bool:
>>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
True
"""
# GH#52527
warnings.warn(
"is_categorical_dtype is deprecated and will be removed in a future "
"version. Use isinstance(dtype, CategoricalDtype) instead",
FutureWarning,
stacklevel=find_stack_level(),
)
if isinstance(arr_or_dtype, ExtensionDtype):
# GH#33400 fastpath for dtype object
return arr_or_dtype.name == "category"
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
is_categorical_dtype,
is_list_like,
is_scalar,
)
from pandas.core.dtypes.dtypes import CategoricalDtype

from pandas.core import algorithms
from pandas.core.arrays import (
Expand Down Expand Up @@ -618,7 +618,7 @@ def __init__(
# TODO 2022-10-08 we only have one test that gets here and
# values are already in nanoseconds in that case.
grouping_vector = Series(grouping_vector).to_numpy()
elif is_categorical_dtype(grouping_vector):
elif isinstance(getattr(grouping_vector, "dtype", None), CategoricalDtype):
# a passed Categorical
self._orig_cats = grouping_vector.categories
grouping_vector, self._all_grouper = recode_for_groupby(
Expand All @@ -635,7 +635,8 @@ def __iter__(self) -> Iterator:

@cache_readonly
def _passed_categorical(self) -> bool:
return is_categorical_dtype(self.grouping_vector)
dtype = getattr(self.grouping_vector, "dtype", None)
return isinstance(dtype, CategoricalDtype)

@cache_readonly
def name(self) -> Hashable:
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/interchange/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@
from pandas.util._decorators import cache_readonly

import pandas as pd
from pandas.api.types import (
is_categorical_dtype,
is_string_dtype,
)
from pandas.api.types import is_string_dtype
from pandas.core.interchange.buffer import PandasBuffer
from pandas.core.interchange.dataframe_protocol import (
Column,
Expand Down Expand Up @@ -99,7 +96,7 @@ def offset(self) -> int:
def dtype(self) -> tuple[DtypeKind, int, str, str]:
dtype = self._col.dtype

if is_categorical_dtype(dtype):
if isinstance(dtype, pd.CategoricalDtype):
codes = self._col.values.codes
(
_,
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
DT64NS_DTYPE,
ensure_platform_int,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype,
Expand All @@ -33,6 +32,7 @@
is_timedelta64_dtype,
)
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
)
Expand Down Expand Up @@ -462,7 +462,8 @@ def _bins_to_cuts(
raise ValueError(
"Bin labels must be one fewer than the number of bin edges"
)
if not is_categorical_dtype(labels):

if not isinstance(getattr(labels, "dtype", None), CategoricalDtype):
labels = Categorical(
labels,
categories=labels if len(set(labels)) == len(labels) else None,
Expand Down
8 changes: 5 additions & 3 deletions pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

from pandas.core.dtypes.common import (
is_any_real_numeric_dtype,
is_categorical_dtype,
is_extension_array_dtype,
is_float,
is_float_dtype,
Expand All @@ -34,6 +33,7 @@
is_number,
is_numeric_dtype,
)
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCIndex,
Expand Down Expand Up @@ -563,7 +563,7 @@ def result(self):

def _convert_to_ndarray(self, data):
# GH31357: categorical columns are processed separately
if is_categorical_dtype(data):
if isinstance(data.dtype, CategoricalDtype):
return data

# GH32073: cast to float if values contain nulled integers
Expand Down Expand Up @@ -1211,7 +1211,9 @@ def _make_plot(self):

c_is_column = is_hashable(c) and c in self.data.columns

color_by_categorical = c_is_column and is_categorical_dtype(self.data[c])
color_by_categorical = c_is_column and isinstance(
self.data[c].dtype, CategoricalDtype
)

color = self.kwds.pop("color", None)
if c is not None and color is not None:
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/base/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from pandas.compat import PYPY

from pandas.core.dtypes.common import (
is_categorical_dtype,
is_dtype_equal,
is_object_dtype,
)
Expand Down Expand Up @@ -96,8 +95,8 @@ def test_memory_usage(index_or_series_memory_obj):
res_deep = obj.memory_usage(deep=True)

is_object = is_object_dtype(obj) or (is_ser and is_object_dtype(obj.index))
is_categorical = is_categorical_dtype(obj.dtype) or (
is_ser and is_categorical_dtype(obj.index.dtype)
is_categorical = isinstance(obj.dtype, pd.CategoricalDtype) or (
is_ser and isinstance(obj.index.dtype, pd.CategoricalDtype)
)
is_object_string = is_dtype_equal(obj, "string[python]") or (
is_ser and is_dtype_equal(obj.index.dtype, "string[python]")
Expand Down
15 changes: 9 additions & 6 deletions pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def get_is_dtype_funcs():
return [getattr(com, fname) for fname in fnames]


@pytest.mark.filterwarnings("ignore:is_categorical_dtype is deprecated:FutureWarning")
@pytest.mark.parametrize("func", get_is_dtype_funcs(), ids=lambda x: x.__name__)
def test_get_dtype_error_catch(func):
# see gh-15941
Expand All @@ -171,7 +172,7 @@ def test_get_dtype_error_catch(func):

msg = f"{func.__name__} is deprecated"
warn = None
if func is com.is_int64_dtype:
if func is com.is_int64_dtype or func is com.is_categorical_dtype:
warn = FutureWarning

with tm.assert_produces_warning(warn, match=msg):
Expand Down Expand Up @@ -274,12 +275,14 @@ def test_is_interval_dtype():


def test_is_categorical_dtype():
assert not com.is_categorical_dtype(object)
assert not com.is_categorical_dtype([1, 2, 3])
msg = "is_categorical_dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert not com.is_categorical_dtype(object)
assert not com.is_categorical_dtype([1, 2, 3])

assert com.is_categorical_dtype(CategoricalDtype())
assert com.is_categorical_dtype(pd.Categorical([1, 2, 3]))
assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
assert com.is_categorical_dtype(CategoricalDtype())
assert com.is_categorical_dtype(pd.Categorical([1, 2, 3]))
assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))


def test_is_string_dtype():
Expand Down
25 changes: 16 additions & 9 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,16 +166,18 @@ def test_is_dtype(self, dtype):
assert not CategoricalDtype.is_dtype(np.float64)

def test_basic(self, dtype):
assert is_categorical_dtype(dtype)
msg = "is_categorical_dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert is_categorical_dtype(dtype)

factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])

s = Series(factor, name="A")
s = Series(factor, name="A")

# dtypes
assert is_categorical_dtype(s.dtype)
assert is_categorical_dtype(s)
assert not is_categorical_dtype(np.dtype("float64"))
# dtypes
assert is_categorical_dtype(s.dtype)
assert is_categorical_dtype(s)
assert not is_categorical_dtype(np.dtype("float64"))

def test_tuple_categories(self):
categories = [(1, "a"), (2, "b"), (3, "c")]
Expand Down Expand Up @@ -1101,10 +1103,15 @@ def test_is_bool_dtype_sparse():
)
def test_is_dtype_no_warning(check):
data = pd.DataFrame({"A": [1, 2]})
with tm.assert_produces_warning(None):

warn = None
msg = "is_categorical_dtype is deprecated"
if check is is_categorical_dtype:
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg):
check(data)

with tm.assert_produces_warning(None):
with tm.assert_produces_warning(warn, match=msg):
check(data["A"])


Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from pandas.core.dtypes.base import _registry as ea_registry
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_interval_dtype,
is_object_dtype,
)
Expand Down Expand Up @@ -484,9 +483,9 @@ def test_setitem_intervals(self):
df["E"] = np.array(ser.values)
df["F"] = ser.astype(object)

assert is_categorical_dtype(df["B"].dtype)
assert isinstance(df["B"].dtype, CategoricalDtype)
assert is_interval_dtype(df["B"].cat.categories)
assert is_categorical_dtype(df["D"].dtype)
assert isinstance(df["D"].dtype, CategoricalDtype)
assert is_interval_dtype(df["D"].cat.categories)

# These go through the Series constructor and so get inferred back
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@
from pandas.compat import is_platform_windows
import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_categorical_dtype

import pandas as pd
from pandas import (
Categorical,
CategoricalDtype,
DataFrame,
Index,
Series,
Expand Down Expand Up @@ -1280,7 +1279,7 @@ def test_any_all_np_func(self, func, data, expected):
# GH 19976
data = DataFrame(data)

if any(is_categorical_dtype(x) for x in data.dtypes):
if any(isinstance(x, CategoricalDtype) for x in data.dtypes):
with pytest.raises(
TypeError, match="dtype category does not support reduction"
):
Expand Down
Loading