From 10668d2b1b60ad1a5cce62927b015a5dfccdd8b9 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 28 Oct 2018 06:34:21 -0700 Subject: [PATCH] API: Disallow dtypes w/o frequency when casting (#23392) Previously deprecated for Series constructor and the `.astype` method. Now being enforced. xref gh-15987. --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/dtypes/cast.py | 24 ++++++------ pandas/tests/series/test_constructors.py | 38 ++++++++----------- pandas/tests/series/test_dtypes.py | 47 +++++++++--------------- 4 files changed, 46 insertions(+), 64 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 4b49bb3da1382..1e2e86928c74d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -942,6 +942,7 @@ Removal of prior version deprecations/changes - Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`) - Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`) - :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats`` (:issue:`14645`) +- The ``Series`` constructor and ``.astype`` method will now raise a ``ValueError`` if timestamp dtypes are passed in without a frequency (e.g. ``np.datetime64``) for the ``dtype`` parameter (:issue:`15987`) - Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`, :issue:`6581`) - Removed the ``pandas.formats.style`` shim for :class:`pandas.io.formats.style.Styler` (:issue:`16059`) - :func:`pandas.pnow`, :func:`pandas.match`, :func:`pandas.groupby`, :func:`pd.get_store`, ``pd.Expr``, and ``pd.Term`` have been removed (:issue:`15538`, :issue:`15940`) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 46c8126f65fec..f8b7fb7d88ee0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -3,7 +3,6 @@ from datetime import datetime, timedelta import numpy as np -import warnings from pandas._libs import tslib, lib, tslibs from pandas._libs.tslibs import iNaT, OutOfBoundsDatetime, Period @@ -664,6 +663,11 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False): e.g. the item sizes don't align. skipna: bool, default False Whether or not we should skip NaN when casting as a string-type. + + Raises + ------ + ValueError + The dtype was a datetime /timedelta dtype, but it had no frequency. """ # dispatch on extension dtype if needed @@ -745,12 +749,9 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False): return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy) if dtype.name in ("datetime64", "timedelta64"): - msg = ("Passing in '{dtype}' dtype with no frequency is " - "deprecated and will raise in a future version. " + msg = ("The '{dtype}' dtype has no frequency. " "Please pass in '{dtype}[ns]' instead.") - warnings.warn(msg.format(dtype=dtype.name), - FutureWarning, stacklevel=5) - dtype = np.dtype(dtype.name + "[ns]") + raise ValueError(msg.format(dtype=dtype.name)) if copy or is_object_dtype(arr) or is_object_dtype(dtype): # Explicit copy, or required since NumPy can't view from / to object. @@ -1019,16 +1020,14 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): if is_datetime64 or is_datetime64tz or is_timedelta64: - # force the dtype if needed - msg = ("Passing in '{dtype}' dtype with no frequency is " - "deprecated and will raise in a future version. " + # Force the dtype if needed. + msg = ("The '{dtype}' dtype has no frequency. " "Please pass in '{dtype}[ns]' instead.") if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): if dtype.name in ('datetime64', 'datetime64[ns]'): if dtype.name == 'datetime64': - warnings.warn(msg.format(dtype=dtype.name), - FutureWarning, stacklevel=5) + raise ValueError(msg.format(dtype=dtype.name)) dtype = _NS_DTYPE else: raise TypeError("cannot convert datetimelike to " @@ -1044,8 +1043,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): if dtype.name in ('timedelta64', 'timedelta64[ns]'): if dtype.name == 'timedelta64': - warnings.warn(msg.format(dtype=dtype.name), - FutureWarning, stacklevel=5) + raise ValueError(msg.format(dtype=dtype.name)) dtype = _TD_DTYPE else: raise TypeError("cannot convert timedeltalike to " diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 07b8eb930e8d0..bdd99dd485042 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1192,32 +1192,26 @@ def test_constructor_cast_object(self, index): exp = Series(index).astype(object) tm.assert_series_equal(s, exp) - def test_constructor_generic_timestamp_deprecated(self): - # see gh-15524 - - with tm.assert_produces_warning(FutureWarning): - dtype = np.timedelta64 - s = Series([], dtype=dtype) - - assert s.empty - assert s.dtype == 'm8[ns]' - - with tm.assert_produces_warning(FutureWarning): - dtype = np.datetime64 - s = Series([], dtype=dtype) + @pytest.mark.parametrize("dtype", [ + np.datetime64, + np.timedelta64, + ]) + def test_constructor_generic_timestamp_no_frequency(self, dtype): + # see gh-15524, gh-15987 + msg = "dtype has no frequency. Please pass in" - assert s.empty - assert s.dtype == 'M8[ns]' + with tm.assert_raises_regex(ValueError, msg): + Series([], dtype=dtype) - # These timestamps have the wrong frequencies, - # so an Exception should be raised now. - msg = "cannot convert timedeltalike" - with tm.assert_raises_regex(TypeError, msg): - Series([], dtype='m8[ps]') + @pytest.mark.parametrize("dtype,msg", [ + ("m8[ps]", "cannot convert timedeltalike"), + ("M8[ps]", "cannot convert datetimelike"), + ]) + def test_constructor_generic_timestamp_bad_frequency(self, dtype, msg): + # see gh-15524, gh-15987 - msg = "cannot convert datetimelike" with tm.assert_raises_regex(TypeError, msg): - Series([], dtype='M8[ps]') + Series([], dtype=dtype) @pytest.mark.parametrize('dtype', [None, 'uint8', 'category']) def test_constructor_range_dtype(self, dtype): diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index b862f1588a547..c62531241369d 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -3,7 +3,6 @@ import string import sys -import warnings from datetime import datetime, timedelta import numpy as np @@ -21,7 +20,7 @@ from pandas.compat import lrange, range, u -class TestSeriesDtypes(): +class TestSeriesDtypes(object): def test_dt64_series_astype_object(self): dt64ser = Series(date_range('20130101', periods=3)) @@ -396,40 +395,30 @@ def test_astype_categoricaldtype_with_args(self): with pytest.raises(TypeError): s.astype(type_, categories=['a', 'b'], ordered=False) - def test_astype_generic_timestamp_deprecated(self): - # see gh-15524 + @pytest.mark.parametrize("dtype", [ + np.datetime64, + np.timedelta64, + ]) + def test_astype_generic_timestamp_no_frequency(self, dtype): + # see gh-15524, gh-15987 data = [1] + s = Series(data) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - s = Series(data) - dtype = np.datetime64 - result = s.astype(dtype) - expected = Series(data, dtype=dtype) - tm.assert_series_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - s = Series(data) - dtype = np.timedelta64 - result = s.astype(dtype) - expected = Series(data, dtype=dtype) - tm.assert_series_equal(result, expected) + msg = "dtype has no frequency. Please pass in" + with tm.assert_raises_regex(ValueError, msg): + s.astype(dtype) @pytest.mark.parametrize("dtype", np.typecodes['All']) def test_astype_empty_constructor_equality(self, dtype): # see gh-15524 - if dtype not in ('S', 'V'): # poor support (if any) currently - with warnings.catch_warnings(record=True): - if dtype in ('M', 'm'): - # Generic timestamp dtypes ('M' and 'm') are deprecated, - # but we test that already in series/test_constructors.py - warnings.simplefilter("ignore", FutureWarning) - - init_empty = Series([], dtype=dtype) - as_type_empty = Series([]).astype(dtype) - tm.assert_series_equal(init_empty, as_type_empty) + if dtype not in ( + "S", "V", # poor support (if any) currently + "M", "m" # Generic timestamps raise a ValueError. Already tested. + ): + init_empty = Series([], dtype=dtype) + as_type_empty = Series([]).astype(dtype) + tm.assert_series_equal(init_empty, as_type_empty) def test_complex(self): # see gh-4819: complex access for ndarray compat