Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Series construction with mismatched dt64 data vs td64 dtype #38764

Merged
merged 3 commits into from
Dec 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ Datetimelike
^^^^^^^^^^^^
- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`)
- Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`)
- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`)
- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`, :issue:`38764`)
- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)

Timedelta
Expand Down
23 changes: 17 additions & 6 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,23 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
elif isinstance(value, Timedelta):
value = value.to_timedelta64()

if (isinstance(value, np.timedelta64) and dtype.kind == "M") or (
isinstance(value, np.datetime64) and dtype.kind == "m"
_disallow_mismatched_datetimelike(value, dtype)
return value


def _disallow_mismatched_datetimelike(value: DtypeObj, dtype: DtypeObj):
"""
numpy allows np.array(dt64values, dtype="timedelta64[ns]") and
vice-versa, but we do not want to allow this, so we need to
check explicitly
"""
vdtype = getattr(value, "dtype", None)
if vdtype is None:
return
elif (vdtype.kind == "m" and dtype.kind == "M") or (
vdtype.kind == "M" and dtype.kind == "m"
):
# numpy allows np.array(dt64values, dtype="timedelta64[ns]") and
# vice-versa, but we do not want to allow this, so we need to
# check explicitly
raise TypeError(f"Cannot cast {repr(value)} to {dtype}")
return value


def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]):
Expand Down Expand Up @@ -1715,6 +1724,8 @@ def construct_1d_ndarray_preserving_na(
if dtype is not None and dtype.kind == "U":
subarr = lib.ensure_string_array(values, convert_na_value=False, copy=copy)
else:
if dtype is not None:
_disallow_mismatched_datetimelike(values, dtype)
subarr = np.array(values, dtype=dtype, copy=copy)

return subarr
Expand Down
42 changes: 35 additions & 7 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2925,12 +2925,31 @@ def get1(obj):


class TestFromScalar:
@pytest.fixture
def constructor(self, frame_or_series):
if frame_or_series is Series:
return functools.partial(Series, index=range(2))
@pytest.fixture(params=[list, dict, None])
def constructor(self, request, frame_or_series):
box = request.param

extra = {"index": range(2)}
if frame_or_series is DataFrame:
extra["columns"] = ["A"]

if box is None:
return functools.partial(frame_or_series, **extra)

elif box is dict:
if frame_or_series is Series:
return lambda x, **kwargs: frame_or_series(
{0: x, 1: x}, **extra, **kwargs
)
else:
return lambda x, **kwargs: frame_or_series({"A": x}, **extra, **kwargs)
else:
return functools.partial(DataFrame, index=range(2), columns=range(2))
if frame_or_series is Series:
return lambda x, **kwargs: frame_or_series([x, x], **extra, **kwargs)
else:
return lambda x, **kwargs: frame_or_series(
{"A": [x, x]}, **extra, **kwargs
)

@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
def test_from_nat_scalar(self, dtype, constructor):
Expand All @@ -2951,7 +2970,8 @@ def test_from_timestamp_scalar_preserves_nanos(self, constructor):
assert get1(obj) == ts

def test_from_timedelta64_scalar_object(self, constructor, request):
if constructor.func is DataFrame and _np_version_under1p20:
if getattr(constructor, "func", None) is DataFrame and _np_version_under1p20:
# getattr check means we only xfail when box is None
mark = pytest.mark.xfail(
reason="np.array(td64, dtype=object) converts to int"
)
Expand All @@ -2964,7 +2984,15 @@ def test_from_timedelta64_scalar_object(self, constructor, request):
assert isinstance(get1(obj), np.timedelta64)

@pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64])
def test_from_scalar_datetimelike_mismatched(self, constructor, cls):
def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request):
node = request.node
params = node.callspec.params
if params["frame_or_series"] is DataFrame and params["constructor"] is not None:
mark = pytest.mark.xfail(
reason="DataFrame incorrectly allows mismatched datetimelike"
)
node.add_marker(mark)

scalar = cls("NaT", "ns")
dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls]

Expand Down