Skip to content

Commit

Permalink
BUG: Format mismatch doesn't coerce to NaT (#24815)
Browse files Browse the repository at this point in the history
  • Loading branch information
sinhrks authored and jreback committed Jan 21, 2019
1 parent 8eaccd8 commit e4441df
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1550,6 +1550,7 @@ Datetimelike
- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`).
- Fixed bug in :meth:`Series.max` with ``datetime64[ns]``-dtype failing to return ``NaT`` when nulls are present and ``skipna=False`` is passed (:issue:`24265`)
- Bug in :func:`to_datetime` where arrays of ``datetime`` objects containing both timezone-aware and timezone-naive ``datetimes`` would fail to raise ``ValueError`` (:issue:`24569`)
- Bug in :func:`to_datetime` with invalid datetime format doesn't coerce input to ``NaT`` even if ``errors='coerce'`` (:issue:`24763`)

Timedelta
^^^^^^^^^
Expand Down
14 changes: 12 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,25 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
except tslibs.OutOfBoundsDatetime:
if errors == 'raise':
raise
result = arg
elif errors == 'coerce':
result = np.empty(arg.shape, dtype='M8[ns]')
iresult = result.view('i8')
iresult.fill(tslibs.iNaT)
else:
result = arg
except ValueError:
# if format was inferred, try falling back
# to array_to_datetime - terminate here
# for specified formats
if not infer_datetime_format:
if errors == 'raise':
raise
result = arg
elif errors == 'coerce':
result = np.empty(arg.shape, dtype='M8[ns]')
iresult = result.view('i8')
iresult.fill(tslibs.iNaT)
else:
result = arg
except ValueError as e:
# Fallback to try to convert datetime objects if timezone-aware
# datetime objects are found without passing `utc=True`
Expand Down
57 changes: 57 additions & 0 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,63 @@ def test_datetime_invalid_datatype(self):
with pytest.raises(TypeError):
pd.to_datetime(pd.to_datetime)

@pytest.mark.parametrize('value', ["a", "00:01:99"])
@pytest.mark.parametrize('infer', [True, False])
@pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
def test_datetime_invalid_scalar(self, value, format, infer):
# GH24763
res = pd.to_datetime(value, errors='ignore', format=format,
infer_datetime_format=infer)
assert res == value

res = pd.to_datetime(value, errors='coerce', format=format,
infer_datetime_format=infer)
assert res is pd.NaT

with pytest.raises(ValueError):
pd.to_datetime(value, errors='raise', format=format,
infer_datetime_format=infer)

@pytest.mark.parametrize('value', ["3000/12/11 00:00:00"])
@pytest.mark.parametrize('infer', [True, False])
@pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
def test_datetime_outofbounds_scalar(self, value, format, infer):
# GH24763
res = pd.to_datetime(value, errors='ignore', format=format,
infer_datetime_format=infer)
assert res == value

res = pd.to_datetime(value, errors='coerce', format=format,
infer_datetime_format=infer)
assert res is pd.NaT

if format is not None:
with pytest.raises(ValueError):
pd.to_datetime(value, errors='raise', format=format,
infer_datetime_format=infer)
else:
with pytest.raises(OutOfBoundsDatetime):
pd.to_datetime(value, errors='raise', format=format,
infer_datetime_format=infer)

@pytest.mark.parametrize('values', [["a"], ["00:01:99"],
["a", "b", "99:00:00"]])
@pytest.mark.parametrize('infer', [True, False])
@pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
def test_datetime_invalid_index(self, values, format, infer):
# GH24763
res = pd.to_datetime(values, errors='ignore', format=format,
infer_datetime_format=infer)
tm.assert_index_equal(res, pd.Index(values))

res = pd.to_datetime(values, errors='coerce', format=format,
infer_datetime_format=infer)
tm.assert_index_equal(res, pd.DatetimeIndex([pd.NaT] * len(values)))

with pytest.raises(ValueError):
pd.to_datetime(values, errors='raise', format=format,
infer_datetime_format=infer)

@pytest.mark.parametrize("utc", [True, None])
@pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
@pytest.mark.parametrize("box", [True, False])
Expand Down

0 comments on commit e4441df

Please sign in to comment.