From 3e0f3690d1cb5d0635141459f4b2502eb859f55e Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 28 Dec 2020 15:14:31 -0800 Subject: [PATCH 1/2] BUG: Series construction with mismatched dt64 data vs td64 dtype --- pandas/core/dtypes/cast.py | 23 ++++++++++---- pandas/tests/frame/test_constructors.py | 42 ++++++++++++++++++++----- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b7113669a1905..25259093f9fba 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -166,14 +166,23 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: elif isinstance(value, Timedelta): value = value.to_timedelta64() - if (isinstance(value, np.timedelta64) and dtype.kind == "M") or ( - isinstance(value, np.datetime64) and dtype.kind == "m" + _disallow_mismatched_datetimelike(value, dtype) + return value + + +def _disallow_mismatched_datetimelike(value: DtypeObj, dtype: DtypeObj): + """ + numpy allows np.array(dt64values, dtype="timedelta64[ns]") and + vice-versa, but we do not want to allow this, so we need to + check explicitly + """ + vdtype = getattr(value, "dtype", None) + if vdtype is None: + return + elif (vdtype.kind == "m" and dtype.kind == "M") or ( + vdtype.kind == "M" and dtype.kind == "m" ): - # numpy allows np.array(dt64values, dtype="timedelta64[ns]") and - # vice-versa, but we do not want to allow this, so we need to - # check explicitly raise TypeError(f"Cannot cast {repr(value)} to {dtype}") - return value def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]): @@ -1715,6 +1724,8 @@ def construct_1d_ndarray_preserving_na( if dtype is not None and dtype.kind == "U": subarr = lib.ensure_string_array(values, convert_na_value=False, copy=copy) else: + if dtype is not None: + _disallow_mismatched_datetimelike(values, dtype) subarr = np.array(values, dtype=dtype, copy=copy) return subarr diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 5ba38016ee552..94b2431650359 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2925,12 +2925,31 @@ def get1(obj): class TestFromScalar: - @pytest.fixture - def constructor(self, frame_or_series): - if frame_or_series is Series: - return functools.partial(Series, index=range(2)) + @pytest.fixture(params=[list, dict, None]) + def constructor(self, request, frame_or_series): + box = request.param + + extra = {"index": range(2)} + if frame_or_series is DataFrame: + extra["columns"] = ["A"] + + if box is None: + return functools.partial(frame_or_series, **extra) + + elif box is dict: + if frame_or_series is Series: + return lambda x, **kwargs: frame_or_series( + {0: x, 1: x}, **extra, **kwargs + ) + else: + return lambda x, **kwargs: frame_or_series({"A": x}, **extra, **kwargs) else: - return functools.partial(DataFrame, index=range(2), columns=range(2)) + if frame_or_series is Series: + return lambda x, **kwargs: frame_or_series([x, x], **extra, **kwargs) + else: + return lambda x, **kwargs: frame_or_series( + {"A": [x, x]}, **extra, **kwargs + ) @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) def test_from_nat_scalar(self, dtype, constructor): @@ -2951,7 +2970,8 @@ def test_from_timestamp_scalar_preserves_nanos(self, constructor): assert get1(obj) == ts def test_from_timedelta64_scalar_object(self, constructor, request): - if constructor.func is DataFrame and _np_version_under1p20: + if getattr(constructor, "func", None) is DataFrame and _np_version_under1p20: + # getattr check means we only xfail when box is None mark = pytest.mark.xfail( reason="np.array(td64, dtype=object) converts to int" ) @@ -2964,7 +2984,15 @@ def test_from_timedelta64_scalar_object(self, constructor, request): assert isinstance(get1(obj), np.timedelta64) @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64]) - def test_from_scalar_datetimelike_mismatched(self, constructor, cls): + def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request): + node = request.node + params = node.callspec.params + if params["frame_or_series"] is DataFrame and params["constructor"] is not None: + mark = pytest.mark.xfail( + reason="DataFrame incorrectly allows mismatched datetimelike" + ) + node.add_marker(mark) + scalar = cls("NaT", "ns") dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls] From 4aeff8c7e4ebc036efbfa0240dc7029c9e5e72a3 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Dec 2020 09:51:23 -0800 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 77bc080892e6c..0077f1061e588 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -189,7 +189,7 @@ Datetimelike ^^^^^^^^^^^^ - Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`) - Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`) -- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`) +- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`, :issue:`38764`) - Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`) Timedelta