Skip to content

Commit

Permalink
CLN: use idiomatic pandas_dtypes in pandas/dtypes/common.py (#24541)
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback authored Jan 4, 2019
1 parent a42c9be commit 19f715c
Show file tree
Hide file tree
Showing 13 changed files with 349 additions and 261 deletions.
39 changes: 39 additions & 0 deletions asv_bench/benchmarks/dtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from pandas.api.types import pandas_dtype

import numpy as np
from .pandas_vb_common import (
numeric_dtypes, datetime_dtypes, string_dtypes, extension_dtypes)


_numpy_dtypes = [np.dtype(dtype)
for dtype in (numeric_dtypes +
datetime_dtypes +
string_dtypes)]
_dtypes = _numpy_dtypes + extension_dtypes


class Dtypes(object):
params = (_dtypes +
list(map(lambda dt: dt.name, _dtypes)))
param_names = ['dtype']

def time_pandas_dtype(self, dtype):
pandas_dtype(dtype)


class DtypesInvalid(object):
param_names = ['dtype']
params = ['scalar-string', 'scalar-int', 'list-string', 'array-string']
data_dict = {'scalar-string': 'foo',
'scalar-int': 1,
'list-string': ['foo'] * 1000,
'array-string': np.array(['foo'] * 1000)}

def time_pandas_dtype_invalid(self, dtype):
try:
pandas_dtype(self.data_dict[dtype])
except TypeError:
pass


from .pandas_vb_common import setup # noqa: F401
10 changes: 10 additions & 0 deletions asv_bench/benchmarks/pandas_vb_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from importlib import import_module

import numpy as np
import pandas as pd

# Compatibility import for lib
for imp in ['pandas._libs.lib', 'pandas.lib']:
Expand All @@ -14,6 +15,15 @@
numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
np.float64, np.int16, np.int8, np.uint16, np.uint8]
datetime_dtypes = [np.datetime64, np.timedelta64]
string_dtypes = [np.object]
extension_dtypes = [pd.Int8Dtype, pd.Int16Dtype,
pd.Int32Dtype, pd.Int64Dtype,
pd.UInt8Dtype, pd.UInt16Dtype,
pd.UInt32Dtype, pd.UInt64Dtype,
pd.CategoricalDtype,
pd.IntervalDtype,
pd.DatetimeTZDtype('ns', 'UTC'),
pd.PeriodDtype('D')]


def setup(*args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ Backwards incompatible API changes
- The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`)
- Incorrectly passing a :class:`DatetimeIndex` to :meth:`MultiIndex.from_tuples`, rather than a sequence of tuples, now raises a ``TypeError`` rather than a ``ValueError`` (:issue:`24024`)
- :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`)
- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes (:issue:`21681`)
- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes; pandas will still raise on a merge between a numeric and an ``object`` dtyped column that is composed only of strings (:issue:`21681`)

Percentage change on groupby
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
5 changes: 5 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,9 +388,14 @@ def tz_aware_fixture(request):
return request.param


# ----------------------------------------------------------------
# Dtypes
UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"]
SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
SIGNED_EA_INT_DTYPES = ["Int8", "Int16", "Int32", "Int64"]
ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES

FLOAT_DTYPES = [float, "float32", "float64"]
COMPLEX_DTYPES = [complex, "complex64", "complex128"]
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class _IntegerDtype(ExtensionDtype):
The attributes name & type are set when these subclasses are created.
"""
name = None
base = None
type = None
na_value = np.nan

Expand Down Expand Up @@ -153,6 +154,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
# Avoid DeprecationWarning from NumPy about np.dtype("Int64")
# https://github.com/numpy/numpy/pull/7476
dtype = dtype.lower()

if not issubclass(type(dtype), _IntegerDtype):
try:
dtype = _dtypes[str(np.dtype(dtype))]
Expand Down Expand Up @@ -655,7 +657,8 @@ def integer_arithmetic_method(self, other):
else:
name = dtype.capitalize()
classname = "{}Dtype".format(name)
attributes_dict = {'type': getattr(np, dtype),
numpy_dtype = getattr(np, dtype)
attributes_dict = {'type': numpy_dtype,
'name': name}
dtype_type = register_extension_dtype(
type(classname, (_IntegerDtype, ), attributes_dict)
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from pandas.compat import PY3, string_types, text_type, to_str

from .common import (
_INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, _string_dtypes,
ensure_int8, ensure_int16, ensure_int32, ensure_int64, ensure_object,
is_bool, is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype,
_INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, ensure_int8,
ensure_int16, ensure_int32, ensure_int64, ensure_object, is_bool,
is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype,
is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype, is_datetimelike, is_dtype_equal,
is_extension_array_dtype, is_extension_type, is_float, is_float_dtype,
Expand Down Expand Up @@ -544,7 +544,7 @@ def invalidate_string_dtypes(dtype_set):
"""Change string like dtypes to object for
``DataFrame.select_dtypes()``.
"""
non_string_dtypes = dtype_set - _string_dtypes
non_string_dtypes = dtype_set - {np.dtype('S').type, np.dtype('<U').type}
if non_string_dtypes != dtype_set:
raise TypeError("string dtypes are not allowed, use 'object' instead")

Expand Down
Loading

0 comments on commit 19f715c

Please sign in to comment.