diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 7617ad5b428a2..93ac9caa42e3e 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1125,6 +1125,7 @@ Deprecations - :func:`pandas.types.is_period` is deprecated in favor of `pandas.types.is_period_dtype` (:issue:`23917`) - :func:`pandas.types.is_datetimetz` is deprecated in favor of `pandas.types.is_datetime64tz` (:issue:`23917`) - Creating a :class:`TimedeltaIndex` or :class:`DatetimeIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range` and :func:`date_range` (:issue:`23919`) +- Passing a string alias like ``'datetime64[ns, UTC]'`` as the `unit` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`). .. _whatsnew_0240.deprecations.datetimelike_int_ops: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 63044eb1aafa4..33f71bcb2fef2 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1174,16 +1174,21 @@ def validate_tz_from_dtype(dtype, tz): ValueError : on tzinfo mismatch """ if dtype is not None: - try: - dtype = DatetimeTZDtype.construct_from_string(dtype) - dtz = getattr(dtype, 'tz', None) - if dtz is not None: - if tz is not None and not timezones.tz_compare(tz, dtz): - raise ValueError("cannot supply both a tz and a dtype" - " with a tz") - tz = dtz - except TypeError: - pass + if isinstance(dtype, compat.string_types): + try: + dtype = DatetimeTZDtype.construct_from_string(dtype) + except TypeError: + # Things like `datetime64[ns]`, which is OK for the + # constructors, but also nonsense, which should be validated + # but not by us. We *do* allow non-existent tz errors to + # go through + pass + dtz = getattr(dtype, 'tz', None) + if dtz is not None: + if tz is not None and not timezones.tz_compare(tz, dtz): + raise ValueError("cannot supply both a tz and a dtype" + " with a tz") + tz = dtz return tz diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..e731dd33f5bb5 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -11,7 +11,7 @@ resolution as libresolution, timezones) import pandas.compat as compat from pandas.errors import PerformanceWarning -from pandas.util._decorators import Appender, cache_readonly +from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type, @@ -333,7 +333,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, def _box_func(self): return lambda x: Timestamp(x, freq=self.freq, tz=self.tz) - @cache_readonly + @property def dtype(self): if self.tz is None: return _NS_DTYPE diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 51b8488313e99..e1141c6b6b3a8 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1767,38 +1767,6 @@ def is_complex_dtype(arr_or_dtype): return issubclass(tipo, np.complexfloating) -def _coerce_to_dtype(dtype): - """ - Coerce a string or np.dtype to a pandas or numpy - dtype if possible. - - If we cannot convert to a pandas dtype initially, - we convert to a numpy dtype. - - Parameters - ---------- - dtype : The dtype that we want to coerce. - - Returns - ------- - pd_or_np_dtype : The coerced dtype. - """ - - if is_categorical_dtype(dtype): - categories = getattr(dtype, 'categories', None) - ordered = getattr(dtype, 'ordered', False) - dtype = CategoricalDtype(categories=categories, ordered=ordered) - elif is_datetime64tz_dtype(dtype): - dtype = DatetimeTZDtype(dtype) - elif is_period_dtype(dtype): - dtype = PeriodDtype(dtype) - elif is_interval_dtype(dtype): - dtype = IntervalDtype(dtype) - else: - dtype = np.dtype(dtype) - return dtype - - def _get_dtype(arr_or_dtype): """ Get the dtype instance associated with an array diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e432f3604f7b1..82f931c1469b7 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1,8 +1,9 @@ """ define extension dtypes """ - import re +import warnings import numpy as np +import pytz from pandas._libs.interval import Interval from pandas._libs.tslibs import NaT, Period, Timestamp, timezones @@ -491,64 +492,69 @@ class DatetimeTZDtype(PandasExtensionDtype): _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") _cache = {} - def __new__(cls, unit=None, tz=None): + def __init__(self, unit="ns", tz=None): """ - Create a new unit if needed, otherwise return from the cache + An ExtensionDtype for timezone-aware datetime data. Parameters ---------- - unit : string unit that this represents, currently must be 'ns' - tz : string tz that this represents - """ - - if isinstance(unit, DatetimeTZDtype): - unit, tz = unit.unit, unit.tz - - elif unit is None: - # we are called as an empty constructor - # generally for pickle compat - return object.__new__(cls) + unit : str, default "ns" + The precision of the datetime data. Currently limited + to ``"ns"``. + tz : str, int, or datetime.tzinfo + The timezone. - elif tz is None: + Raises + ------ + pytz.UnknownTimeZoneError + When the requested timezone cannot be found. - # we were passed a string that we can construct - try: - m = cls._match.search(unit) - if m is not None: - unit = m.groupdict()['unit'] - tz = timezones.maybe_get_tz(m.groupdict()['tz']) - except TypeError: - raise ValueError("could not construct DatetimeTZDtype") + Examples + -------- + >>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='UTC') + datetime64[ns, UTC] - elif isinstance(unit, compat.string_types): + >>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='dateutil/US/Central') + datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] + """ + if isinstance(unit, DatetimeTZDtype): + unit, tz = unit.unit, unit.tz - if unit != 'ns': + if unit != 'ns': + if isinstance(unit, compat.string_types) and tz is None: + # maybe a string like datetime64[ns, tz], which we support for + # now. + result = type(self).construct_from_string(unit) + unit = result.unit + tz = result.tz + msg = ( + "Passing a dtype alias like 'datetime64[ns, {tz}]' " + "to DatetimeTZDtype is deprecated. Use " + "'DatetimeTZDtype.construct_from_string()' instead." + ) + warnings.warn(msg.format(tz=tz), FutureWarning, stacklevel=2) + else: raise ValueError("DatetimeTZDtype only supports ns units") - unit = unit - tz = tz + if tz: + tz = timezones.maybe_get_tz(tz) + elif tz is not None: + raise pytz.UnknownTimeZoneError(tz) + elif tz is None: + raise TypeError("A 'tz' is required.") - if tz is None: - raise ValueError("DatetimeTZDtype constructor must have a tz " - "supplied") + self._unit = unit + self._tz = tz - # hash with the actual tz if we can - # some cannot be hashed, so stringfy - try: - key = (unit, tz) - hash(key) - except TypeError: - key = (unit, str(tz)) + @property + def unit(self): + """The precision of the datetime data.""" + return self._unit - # set/retrieve from cache - try: - return cls._cache[key] - except KeyError: - u = object.__new__(cls) - u.unit = unit - u.tz = tz - cls._cache[key] = u - return u + @property + def tz(self): + """The timezone.""" + return self._tz @classmethod def construct_array_type(cls): @@ -565,24 +571,42 @@ def construct_array_type(cls): @classmethod def construct_from_string(cls, string): """ - attempt to construct this type from a string, raise a TypeError if - it's not possible + Construct a DatetimeTZDtype from a string. + + Parameters + ---------- + string : str + The string alias for this DatetimeTZDtype. + Should be formatted like ``datetime64[ns, ]``, + where ```` is the timezone name. + + Examples + -------- + >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') + datetime64[ns, UTC] """ + msg = "Could not construct DatetimeTZDtype from '{}'" try: - return cls(unit=string) - except ValueError: - raise TypeError("could not construct DatetimeTZDtype") + match = cls._match.match(string) + if match: + d = match.groupdict() + return cls(unit=d['unit'], tz=d['tz']) + except Exception: + # TODO(py3): Change this pass to `raise TypeError(msg) from e` + pass + raise TypeError(msg.format(string)) def __unicode__(self): - # format the tz return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz) @property def name(self): + """A string representation of the dtype.""" return str(self) def __hash__(self): # make myself hashable + # TODO: update this. return hash(str(self)) def __eq__(self, other): @@ -593,6 +617,11 @@ def __eq__(self, other): self.unit == other.unit and str(self.tz) == str(other.tz)) + def __setstate__(self, state): + # for pickle compat. + self._tz = state['tz'] + self._unit = state['unit'] + class PeriodDtype(ExtensionDtype, PandasExtensionDtype): """ diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index fa60c326a19ea..809dcbd054ea0 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -209,7 +209,7 @@ def _isna_ndarraylike(obj): vec = libmissing.isnaobj(values.ravel()) result[...] = vec.reshape(shape) - elif needs_i8_conversion(obj): + elif needs_i8_conversion(dtype): # this is the NaT pattern result = values.view('i8') == iNaT else: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1b67c20530eb0..828b0df73b341 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2669,11 +2669,10 @@ def _astype(self, dtype, **kwargs): these automatically copy, so copy=True has no effect raise on an except if raise == True """ + dtype = pandas_dtype(dtype) # if we are passed a datetime64[ns, tz] if is_datetime64tz_dtype(dtype): - dtype = DatetimeTZDtype(dtype) - values = self.values if getattr(values, 'tz', None) is None: values = DatetimeIndex(values).tz_localize('UTC') diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index a7390e0cffbbf..e176d273b916c 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -43,8 +43,8 @@ def test_numpy_string_dtype(self): 'datetime64[ns, Asia/Tokyo]', 'datetime64[ns, UTC]']) def test_datetimetz_dtype(self, dtype): - assert com.pandas_dtype(dtype) is DatetimeTZDtype(dtype) - assert com.pandas_dtype(dtype) == DatetimeTZDtype(dtype) + assert (com.pandas_dtype(dtype) == + DatetimeTZDtype.construct_from_string(dtype)) assert com.pandas_dtype(dtype) == dtype def test_categorical_dtype(self): diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 4048e98142a7f..81d08ac71bf6d 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -17,7 +17,6 @@ is_dtype_equal, is_datetime64_ns_dtype, is_datetime64_dtype, is_interval_dtype, is_datetime64_any_dtype, is_string_dtype, - _coerce_to_dtype, is_bool_dtype, ) from pandas.core.sparse.api import SparseDtype @@ -147,6 +146,19 @@ class TestDatetimeTZDtype(Base): def create(self): return DatetimeTZDtype('ns', 'US/Eastern') + def test_alias_to_unit_raises(self): + # 23990 + with tm.assert_produces_warning(FutureWarning): + DatetimeTZDtype('datetime64[ns, US/Central]') + + def test_alias_to_unit_bad_alias_raises(self): + # 23990 + with pytest.raises(TypeError, match=''): + DatetimeTZDtype('this is a bad string') + + with pytest.raises(TypeError, match=''): + DatetimeTZDtype('datetime64[ns, US/NotATZ]') + def test_hash_vs_equality(self): # make sure that we satisfy is semantics dtype = self.dtype @@ -155,29 +167,24 @@ def test_hash_vs_equality(self): assert dtype == dtype2 assert dtype2 == dtype assert dtype3 == dtype - assert dtype is dtype2 - assert dtype2 is dtype - assert dtype3 is dtype assert hash(dtype) == hash(dtype2) assert hash(dtype) == hash(dtype3) + dtype4 = DatetimeTZDtype("ns", "US/Central") + assert dtype2 != dtype4 + assert hash(dtype2) != hash(dtype4) + def test_construction(self): pytest.raises(ValueError, lambda: DatetimeTZDtype('ms', 'US/Eastern')) def test_subclass(self): - a = DatetimeTZDtype('datetime64[ns, US/Eastern]') - b = DatetimeTZDtype('datetime64[ns, CET]') + a = DatetimeTZDtype.construct_from_string('datetime64[ns, US/Eastern]') + b = DatetimeTZDtype.construct_from_string('datetime64[ns, CET]') assert issubclass(type(a), type(a)) assert issubclass(type(a), type(b)) - def test_coerce_to_dtype(self): - assert (_coerce_to_dtype('datetime64[ns, US/Eastern]') == - DatetimeTZDtype('ns', 'US/Eastern')) - assert (_coerce_to_dtype('datetime64[ns, Asia/Tokyo]') == - DatetimeTZDtype('ns', 'Asia/Tokyo')) - def test_compat(self): assert is_datetime64tz_dtype(self.dtype) assert is_datetime64tz_dtype('datetime64[ns, US/Eastern]') @@ -189,14 +196,16 @@ def test_compat(self): assert not is_datetime64_dtype('datetime64[ns, US/Eastern]') def test_construction_from_string(self): - result = DatetimeTZDtype('datetime64[ns, US/Eastern]') - assert is_dtype_equal(self.dtype, result) result = DatetimeTZDtype.construct_from_string( 'datetime64[ns, US/Eastern]') assert is_dtype_equal(self.dtype, result) pytest.raises(TypeError, lambda: DatetimeTZDtype.construct_from_string('foo')) + def test_construct_from_string_raises(self): + with pytest.raises(TypeError, match="notatz"): + DatetimeTZDtype.construct_from_string('datetime64[ns, notatz]') + def test_is_dtype(self): assert not DatetimeTZDtype.is_dtype(None) assert DatetimeTZDtype.is_dtype(self.dtype) @@ -255,14 +264,13 @@ def test_dst(self): def test_parser(self, tz, constructor): # pr #11245 dtz_str = '{con}[ns, {tz}]'.format(con=constructor, tz=tz) - result = DatetimeTZDtype(dtz_str) + result = DatetimeTZDtype.construct_from_string(dtz_str) expected = DatetimeTZDtype('ns', tz) assert result == expected def test_empty(self): - dt = DatetimeTZDtype() - with pytest.raises(AttributeError): - str(dt) + with pytest.raises(TypeError, match="A 'tz' is required."): + DatetimeTZDtype() class TestPeriodDtype(Base): @@ -321,10 +329,6 @@ def test_identity(self): assert PeriodDtype('period[1S1U]') == PeriodDtype('period[1000001U]') assert PeriodDtype('period[1S1U]') is PeriodDtype('period[1000001U]') - def test_coerce_to_dtype(self): - assert _coerce_to_dtype('period[D]') == PeriodDtype('period[D]') - assert _coerce_to_dtype('period[3M]') == PeriodDtype('period[3M]') - def test_compat(self): assert not is_datetime64_ns_dtype(self.dtype) assert not is_datetime64_ns_dtype('period[D]') @@ -519,10 +523,6 @@ def test_is_dtype(self): assert not IntervalDtype.is_dtype(np.int64) assert not IntervalDtype.is_dtype(np.float64) - def test_coerce_to_dtype(self): - assert (_coerce_to_dtype('interval[int64]') == - IntervalDtype('interval[int64]')) - def test_equality(self): assert is_dtype_equal(self.dtype, 'interval[int64]') assert is_dtype_equal(self.dtype, IntervalDtype('int64')) @@ -795,34 +795,38 @@ def test_update_dtype_errors(self, bad_dtype): dtype.update_dtype(bad_dtype) -@pytest.mark.parametrize( - 'dtype', - [CategoricalDtype, IntervalDtype]) +@pytest.mark.parametrize('dtype', [ + CategoricalDtype, + IntervalDtype, +]) def test_registry(dtype): assert dtype in registry.dtypes -@pytest.mark.parametrize('dtype', [DatetimeTZDtype, PeriodDtype]) +@pytest.mark.parametrize('dtype', [ + PeriodDtype, + DatetimeTZDtype, +]) def test_pandas_registry(dtype): assert dtype not in registry.dtypes assert dtype in _pandas_registry.dtypes -@pytest.mark.parametrize( - 'dtype, expected', - [('int64', None), - ('interval', IntervalDtype()), - ('interval[int64]', IntervalDtype()), - ('interval[datetime64[ns]]', IntervalDtype('datetime64[ns]')), - ('category', CategoricalDtype())]) +@pytest.mark.parametrize('dtype, expected', [ + ('int64', None), + ('interval', IntervalDtype()), + ('interval[int64]', IntervalDtype()), + ('interval[datetime64[ns]]', IntervalDtype('datetime64[ns]')), + ('category', CategoricalDtype()), +]) def test_registry_find(dtype, expected): assert registry.find(dtype) == expected -@pytest.mark.parametrize( - 'dtype, expected', - [('period[D]', PeriodDtype('D')), - ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern'))]) +@pytest.mark.parametrize('dtype, expected', [ + ('period[D]', PeriodDtype('D')), + ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern')), +]) def test_pandas_registry_find(dtype, expected): assert _pandas_registry.find(dtype) == expected diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 8f82db69a9213..cb3f5933c885f 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -322,7 +322,7 @@ def test_array_equivalent_str(): # Datetime-like (np.dtype("M8[ns]"), NaT), (np.dtype("m8[ns]"), NaT), - (DatetimeTZDtype('datetime64[ns, US/Eastern]'), NaT), + (DatetimeTZDtype.construct_from_string('datetime64[ns, US/Eastern]'), NaT), (PeriodDtype("M"), NaT), # Integer ('u1', 0), ('u2', 0), ('u4', 0), ('u8', 0), diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 2ad6da084e451..2bfd3445f2a20 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -91,8 +91,8 @@ def test_datetime_with_tz_dtypes(self): tzframe.iloc[1, 2] = pd.NaT result = tzframe.dtypes.sort_index() expected = Series([np.dtype('datetime64[ns]'), - DatetimeTZDtype('datetime64[ns, US/Eastern]'), - DatetimeTZDtype('datetime64[ns, CET]')], + DatetimeTZDtype('ns', 'US/Eastern'), + DatetimeTZDtype('ns', 'CET')], ['A', 'B', 'C']) assert_series_equal(result, expected) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index bb154de14611c..4fda977706d8b 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -150,7 +150,7 @@ def test_as_json_table_type_bool_dtypes(self, bool_dtype): assert as_json_table_type(bool_dtype) == 'boolean' @pytest.mark.parametrize('date_dtype', [ - np.datetime64, np.dtype("