Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Format mismatch doesn't coerce to NaT #24815

Merged
merged 1 commit into from
Jan 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1548,6 +1548,7 @@ Datetimelike
- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`).
- Fixed bug in :meth:`Series.max` with ``datetime64[ns]``-dtype failing to return ``NaT`` when nulls are present and ``skipna=False`` is passed (:issue:`24265`)
- Bug in :func:`to_datetime` where arrays of ``datetime`` objects containing both timezone-aware and timezone-naive ``datetimes`` would fail to raise ``ValueError`` (:issue:`24569`)
- Bug in :func:`to_datetime` with invalid datetime format doesn't coerce input to ``NaT`` even if ``errors='coerce'`` (:issue:`24763`)

Timedelta
^^^^^^^^^
Expand Down
14 changes: 12 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,25 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
except tslibs.OutOfBoundsDatetime:
if errors == 'raise':
raise
result = arg
elif errors == 'coerce':
result = np.empty(arg.shape, dtype='M8[ns]')
iresult = result.view('i8')
iresult.fill(tslibs.iNaT)
else:
result = arg
except ValueError:
# if format was inferred, try falling back
# to array_to_datetime - terminate here
# for specified formats
if not infer_datetime_format:
if errors == 'raise':
raise
result = arg
elif errors == 'coerce':
result = np.empty(arg.shape, dtype='M8[ns]')
iresult = result.view('i8')
iresult.fill(tslibs.iNaT)
else:
result = arg
except ValueError as e:
# Fallback to try to convert datetime objects if timezone-aware
# datetime objects are found without passing `utc=True`
Expand Down
57 changes: 57 additions & 0 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,63 @@ def test_datetime_invalid_datatype(self):
with pytest.raises(TypeError):
pd.to_datetime(pd.to_datetime)

@pytest.mark.parametrize('value', ["a", "00:01:99"])
@pytest.mark.parametrize('infer', [True, False])
@pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
def test_datetime_invalid_scalar(self, value, format, infer):
# GH24763
res = pd.to_datetime(value, errors='ignore', format=format,
infer_datetime_format=infer)
assert res == value

res = pd.to_datetime(value, errors='coerce', format=format,
infer_datetime_format=infer)
assert res is pd.NaT

with pytest.raises(ValueError):
pd.to_datetime(value, errors='raise', format=format,
infer_datetime_format=infer)

@pytest.mark.parametrize('value', ["3000/12/11 00:00:00"])
@pytest.mark.parametrize('infer', [True, False])
@pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
def test_datetime_outofbounds_scalar(self, value, format, infer):
# GH24763
res = pd.to_datetime(value, errors='ignore', format=format,
infer_datetime_format=infer)
assert res == value

res = pd.to_datetime(value, errors='coerce', format=format,
infer_datetime_format=infer)
assert res is pd.NaT

if format is not None:
with pytest.raises(ValueError):
pd.to_datetime(value, errors='raise', format=format,
infer_datetime_format=infer)
else:
with pytest.raises(OutOfBoundsDatetime):
pd.to_datetime(value, errors='raise', format=format,
infer_datetime_format=infer)

@pytest.mark.parametrize('values', [["a"], ["00:01:99"],
["a", "b", "99:00:00"]])
@pytest.mark.parametrize('infer', [True, False])
@pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
def test_datetime_invalid_index(self, values, format, infer):
# GH24763
res = pd.to_datetime(values, errors='ignore', format=format,
infer_datetime_format=infer)
tm.assert_index_equal(res, pd.Index(values))

res = pd.to_datetime(values, errors='coerce', format=format,
infer_datetime_format=infer)
tm.assert_index_equal(res, pd.DatetimeIndex([pd.NaT] * len(values)))

with pytest.raises(ValueError):
pd.to_datetime(values, errors='raise', format=format,
infer_datetime_format=infer)

@pytest.mark.parametrize("utc", [True, None])
@pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
@pytest.mark.parametrize("box", [True, False])
Expand Down