Skip to content

Commit

Permalink
BUG: Series construction with mismatched dt64 data vs td64 dtype (#38764
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jbrockmendel authored Dec 29, 2020
1 parent a37f1a4 commit 1d19629
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 14 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ Datetimelike
^^^^^^^^^^^^
- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`)
- Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`)
- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`)
- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`, :issue:`38764`)
- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)
- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`)
-
Expand Down
23 changes: 17 additions & 6 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,23 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
elif isinstance(value, Timedelta):
value = value.to_timedelta64()

if (isinstance(value, np.timedelta64) and dtype.kind == "M") or (
isinstance(value, np.datetime64) and dtype.kind == "m"
_disallow_mismatched_datetimelike(value, dtype)
return value


def _disallow_mismatched_datetimelike(value: DtypeObj, dtype: DtypeObj):
"""
numpy allows np.array(dt64values, dtype="timedelta64[ns]") and
vice-versa, but we do not want to allow this, so we need to
check explicitly
"""
vdtype = getattr(value, "dtype", None)
if vdtype is None:
return
elif (vdtype.kind == "m" and dtype.kind == "M") or (
vdtype.kind == "M" and dtype.kind == "m"
):
# numpy allows np.array(dt64values, dtype="timedelta64[ns]") and
# vice-versa, but we do not want to allow this, so we need to
# check explicitly
raise TypeError(f"Cannot cast {repr(value)} to {dtype}")
return value


def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]):
Expand Down Expand Up @@ -1715,6 +1724,8 @@ def construct_1d_ndarray_preserving_na(
if dtype is not None and dtype.kind == "U":
subarr = lib.ensure_string_array(values, convert_na_value=False, copy=copy)
else:
if dtype is not None:
_disallow_mismatched_datetimelike(values, dtype)
subarr = np.array(values, dtype=dtype, copy=copy)

return subarr
Expand Down
42 changes: 35 additions & 7 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2925,12 +2925,31 @@ def get1(obj):


class TestFromScalar:
@pytest.fixture
def constructor(self, frame_or_series):
if frame_or_series is Series:
return functools.partial(Series, index=range(2))
@pytest.fixture(params=[list, dict, None])
def constructor(self, request, frame_or_series):
box = request.param

extra = {"index": range(2)}
if frame_or_series is DataFrame:
extra["columns"] = ["A"]

if box is None:
return functools.partial(frame_or_series, **extra)

elif box is dict:
if frame_or_series is Series:
return lambda x, **kwargs: frame_or_series(
{0: x, 1: x}, **extra, **kwargs
)
else:
return lambda x, **kwargs: frame_or_series({"A": x}, **extra, **kwargs)
else:
return functools.partial(DataFrame, index=range(2), columns=range(2))
if frame_or_series is Series:
return lambda x, **kwargs: frame_or_series([x, x], **extra, **kwargs)
else:
return lambda x, **kwargs: frame_or_series(
{"A": [x, x]}, **extra, **kwargs
)

@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
def test_from_nat_scalar(self, dtype, constructor):
Expand All @@ -2951,7 +2970,8 @@ def test_from_timestamp_scalar_preserves_nanos(self, constructor):
assert get1(obj) == ts

def test_from_timedelta64_scalar_object(self, constructor, request):
if constructor.func is DataFrame and _np_version_under1p20:
if getattr(constructor, "func", None) is DataFrame and _np_version_under1p20:
# getattr check means we only xfail when box is None
mark = pytest.mark.xfail(
reason="np.array(td64, dtype=object) converts to int"
)
Expand All @@ -2964,7 +2984,15 @@ def test_from_timedelta64_scalar_object(self, constructor, request):
assert isinstance(get1(obj), np.timedelta64)

@pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64])
def test_from_scalar_datetimelike_mismatched(self, constructor, cls):
def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request):
node = request.node
params = node.callspec.params
if params["frame_or_series"] is DataFrame and params["constructor"] is not None:
mark = pytest.mark.xfail(
reason="DataFrame incorrectly allows mismatched datetimelike"
)
node.add_marker(mark)

scalar = cls("NaT", "ns")
dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls]

Expand Down

0 comments on commit 1d19629

Please sign in to comment.