Skip to content

Commit

Permalink
BUG: Don't error in pd.to_timedelta when errors=ignore
Browse files Browse the repository at this point in the history
Title is self-explanatory.  Closes pandas-dev#13613.

Author: gfyoung <gfyoung17@gmail.com>

Closes pandas-dev#13832 from gfyoung/to-timedelta-error-bug and squashes the following commits:

dc39205 [gfyoung] BUG: Don't error in pd.to_timedelta when errors=ignore
  • Loading branch information
gfyoung authored and jreback committed Aug 15, 2016
1 parent f93ad1c commit 8b50d8c
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 108 deletions.
17 changes: 16 additions & 1 deletion asv_bench/benchmarks/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,19 @@ def setup(self):
self.arr = ['00:00:{0:02d}'.format(i) for i in self.arr]

def time_timedelta_convert_string_seconds(self):
to_timedelta(self.arr)
to_timedelta(self.arr)


class timedelta_convert_bad_parse(object):
goal_time = 0.2

def setup(self):
self.arr = np.random.randint(0, 1000, size=10000)
self.arr = ['{0} days'.format(i) for i in self.arr]
self.arr[-1] = 'apple'

def time_timedelta_convert_coerce(self):
to_timedelta(self.arr, errors='coerce')

def time_timedelta_convert_ignore(self):
to_timedelta(self.arr, errors='ignore')
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,7 @@ Bug Fixes
- Bug in ``groupby().cumsum()`` calculating ``cumprod`` when ``axis=1``. (:issue:`13994`)
- Bug in ``pd.read_csv()``, which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`)
- Bug in ``pd.read_csv()``, which caused BOM files to be incorrectly parsed by not ignoring the BOM (:issue:`4793`)
- Bug in ``pd.to_timedelta()`` in which the ``errors`` parameter was not being respected (:issue:`13613`)
- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
- Bug in area plot draws legend incorrectly if subplot is enabled or legend is moved after plot (matplotlib 1.5.0 is required to draw area plot legend properly) (issue:`9161`, :issue:`13544`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
break
elif is_timedelta(val):
if convert_timedelta:
itimedeltas[i] = convert_to_timedelta64(val, 'ns', False)
itimedeltas[i] = convert_to_timedelta64(val, 'ns')
seen_timedelta = 1
else:
seen_object = 1
Expand Down
23 changes: 23 additions & 0 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,11 @@ def testit(unit, transform):

def test_to_timedelta_invalid(self):

# bad value for errors parameter
msg = "errors must be one of"
tm.assertRaisesRegexp(ValueError, msg, to_timedelta,
['foo'], errors='never')

# these will error
self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo'))
self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo'))
Expand All @@ -862,6 +867,24 @@ def test_to_timedelta_invalid(self):
to_timedelta(['1 day', 'bar', '1 min'],
errors='coerce'))

# gh-13613: these should not error because errors='ignore'
invalid_data = 'apple'
self.assertEqual(invalid_data, to_timedelta(
invalid_data, errors='ignore'))

invalid_data = ['apple', '1 days']
tm.assert_numpy_array_equal(
np.array(invalid_data, dtype=object),
to_timedelta(invalid_data, errors='ignore'))

invalid_data = pd.Index(['apple', '1 days'])
tm.assert_index_equal(invalid_data, to_timedelta(
invalid_data, errors='ignore'))

invalid_data = Series(['apple', '1 days'])
tm.assert_series_equal(invalid_data, to_timedelta(
invalid_data, errors='ignore'))

def test_to_timedelta_via_apply(self):
# GH 5458
expected = Series([np.timedelta64(1, 's')])
Expand Down
89 changes: 59 additions & 30 deletions pandas/tseries/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
"""

import numpy as np
import pandas as pd
import pandas.tslib as tslib

from pandas.types.common import (_ensure_object,
is_integer_dtype,
is_timedelta64_dtype,
Expand Down Expand Up @@ -64,37 +66,22 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise', coerce=None):
"""
unit = _validate_timedelta_unit(unit)

def _convert_listlike(arg, box, unit, name=None):

if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
arg = np.array(list(arg), dtype='O')

# these are shortcutable
if is_timedelta64_dtype(arg):
value = arg.astype('timedelta64[ns]')
elif is_integer_dtype(arg):
value = arg.astype('timedelta64[{0}]'.format(
unit)).astype('timedelta64[ns]', copy=False)
else:
value = tslib.array_to_timedelta64(_ensure_object(arg),
unit=unit, errors=errors)
value = value.astype('timedelta64[ns]', copy=False)

if box:
from pandas import TimedeltaIndex
value = TimedeltaIndex(value, unit='ns', name=name)
return value
if errors not in ('ignore', 'raise', 'coerce'):
raise ValueError("errors must be one of 'ignore', "
"'raise', or 'coerce'}")

if arg is None:
return arg
elif isinstance(arg, ABCSeries):
from pandas import Series
values = _convert_listlike(arg._values, box=False, unit=unit)
return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]')
values = _convert_listlike(arg._values, unit=unit,
box=False, errors=errors)
return Series(values, index=arg.index, name=arg.name)
elif isinstance(arg, ABCIndexClass):
return _convert_listlike(arg, box=box, unit=unit, name=arg.name)
return _convert_listlike(arg, unit=unit, box=box,
errors=errors, name=arg.name)
elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 1:
return _convert_listlike(arg, box=box, unit=unit)
return _convert_listlike(arg, unit=unit, box=box, errors=errors)
elif getattr(arg, 'ndim', 1) > 1:
raise TypeError('arg must be a string, timedelta, list, tuple, '
'1-d array, or Series')
Expand Down Expand Up @@ -142,13 +129,55 @@ def _validate_timedelta_unit(arg):


def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'):
"""
convert strings to timedelta; coerce to Timedelta (if box), else
np.timedelta64
"""
"""Convert string 'r' to a timedelta object."""

try:
result = tslib.convert_to_timedelta64(r, unit)
except ValueError:
if errors == 'raise':
raise
elif errors == 'ignore':
return r

# coerce
result = pd.NaT

result = tslib.convert_to_timedelta(r, unit, errors)
if box:
result = tslib.Timedelta(result)

return result


def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None):
"""Convert a list of objects to a timedelta index object."""

if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
arg = np.array(list(arg), dtype='O')

# these are shortcut-able
if is_timedelta64_dtype(arg):
value = arg.astype('timedelta64[ns]')
elif is_integer_dtype(arg):
value = arg.astype('timedelta64[{0}]'.format(
unit)).astype('timedelta64[ns]', copy=False)
else:
try:
value = tslib.array_to_timedelta64(_ensure_object(arg),
unit=unit, errors=errors)
value = value.astype('timedelta64[ns]', copy=False)
except ValueError:
if errors == 'ignore':
return arg
else:
# This else-block accounts for the cases when errors='raise'
# and errors='coerce'. If errors == 'raise', these errors
# should be raised. If errors == 'coerce', we shouldn't
# expect any errors to be raised, since all parsing errors
# cause coercion to pd.NaT. However, if an error / bug is
# introduced that causes an Exception to be raised, we would
# like to surface it.
raise

if box:
from pandas import TimedeltaIndex
value = TimedeltaIndex(value, unit='ns', name=name)
return value
2 changes: 1 addition & 1 deletion pandas/tslib.pxd
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from numpy cimport ndarray, int64_t

cdef convert_to_tsobject(object, object, object, bint, bint)
cdef convert_to_timedelta64(object, object, object)
cpdef convert_to_timedelta64(object, object)
cpdef object maybe_get_tz(object)
cdef bint _is_utc(object)
cdef bint _is_tzlocal(object)
Expand Down
Loading

0 comments on commit 8b50d8c

Please sign in to comment.