From 8b50d8c854196e3025c9a881cafeedc5f509aaef Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 15 Aug 2016 18:22:40 -0400
Subject: [PATCH] BUG: Don't error in pd.to_timedelta when errors=ignore

Title is self-explanatory.  Closes #13613.

Author: gfyoung <gfyoung17@gmail.com>

Closes #13832 from gfyoung/to-timedelta-error-bug and squashes the following commits:

dc39205 [gfyoung] BUG: Don't error in pd.to_timedelta when errors=ignore
---
 asv_bench/benchmarks/timedelta.py       |  17 +++-
 doc/source/whatsnew/v0.19.0.txt         |   1 +
 pandas/src/inference.pyx                |   2 +-
 pandas/tseries/tests/test_timedeltas.py |  23 +++++
 pandas/tseries/timedeltas.py            |  89 ++++++++++++------
 pandas/tslib.pxd                        |   2 +-
 pandas/tslib.pyx                        | 120 +++++++++---------------
 7 files changed, 146 insertions(+), 108 deletions(-)

diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py
index 2f252a4d3e1dc..9719fd87dfb2e 100644
--- a/asv_bench/benchmarks/timedelta.py
+++ b/asv_bench/benchmarks/timedelta.py
@@ -31,4 +31,19 @@ def setup(self):
         self.arr = ['00:00:{0:02d}'.format(i) for i in self.arr]
 
     def time_timedelta_convert_string_seconds(self):
-        to_timedelta(self.arr)
\ No newline at end of file
+        to_timedelta(self.arr)
+
+
+class timedelta_convert_bad_parse(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.arr = np.random.randint(0, 1000, size=10000)
+        self.arr = ['{0} days'.format(i) for i in self.arr]
+        self.arr[-1] = 'apple'
+
+    def time_timedelta_convert_coerce(self):
+        to_timedelta(self.arr, errors='coerce')
+
+    def time_timedelta_convert_ignore(self):
+        to_timedelta(self.arr, errors='ignore')
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index d436d4dd3703b..e3cdefd36b0be 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -858,6 +858,7 @@ Bug Fixes
 - Bug in ``groupby().cumsum()`` calculating ``cumprod`` when ``axis=1``. (:issue:`13994`)
 - Bug in ``pd.read_csv()``, which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`)
 - Bug in ``pd.read_csv()``, which caused BOM files to be incorrectly parsed by not ignoring the BOM (:issue:`4793`)
+- Bug in ``pd.to_timedelta()`` in which the ``errors`` parameter was not being respected (:issue:`13613`)
 - Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
 - Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
 - Bug in area plot draws legend incorrectly if subplot is enabled or legend is moved after plot (matplotlib 1.5.0 is required to draw area plot legend properly) (issue:`9161`, :issue:`13544`)
diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx
index 039e0df4193b3..62555dc7f178c 100644
--- a/pandas/src/inference.pyx
+++ b/pandas/src/inference.pyx
@@ -780,7 +780,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
                 break
         elif is_timedelta(val):
             if convert_timedelta:
-                itimedeltas[i] = convert_to_timedelta64(val, 'ns', False)
+                itimedeltas[i] = convert_to_timedelta64(val, 'ns')
                 seen_timedelta = 1
             else:
                 seen_object = 1
diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py
index 0bdf8590ec487..159d2b4f52f2a 100644
--- a/pandas/tseries/tests/test_timedeltas.py
+++ b/pandas/tseries/tests/test_timedeltas.py
@@ -845,6 +845,11 @@ def testit(unit, transform):
 
     def test_to_timedelta_invalid(self):
 
+        # bad value for errors parameter
+        msg = "errors must be one of"
+        tm.assertRaisesRegexp(ValueError, msg, to_timedelta,
+                              ['foo'], errors='never')
+
         # these will error
         self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo'))
         self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo'))
@@ -862,6 +867,24 @@ def test_to_timedelta_invalid(self):
                               to_timedelta(['1 day', 'bar', '1 min'],
                                            errors='coerce'))
 
+        # gh-13613: these should not error because errors='ignore'
+        invalid_data = 'apple'
+        self.assertEqual(invalid_data, to_timedelta(
+            invalid_data, errors='ignore'))
+
+        invalid_data = ['apple', '1 days']
+        tm.assert_numpy_array_equal(
+            np.array(invalid_data, dtype=object),
+            to_timedelta(invalid_data, errors='ignore'))
+
+        invalid_data = pd.Index(['apple', '1 days'])
+        tm.assert_index_equal(invalid_data, to_timedelta(
+            invalid_data, errors='ignore'))
+
+        invalid_data = Series(['apple', '1 days'])
+        tm.assert_series_equal(invalid_data, to_timedelta(
+            invalid_data, errors='ignore'))
+
     def test_to_timedelta_via_apply(self):
         # GH 5458
         expected = Series([np.timedelta64(1, 's')])
diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py
index 7f28ec86ec40d..2ca3fcea8005b 100644
--- a/pandas/tseries/timedeltas.py
+++ b/pandas/tseries/timedeltas.py
@@ -3,7 +3,9 @@
 """
 
 import numpy as np
+import pandas as pd
 import pandas.tslib as tslib
+
 from pandas.types.common import (_ensure_object,
                                  is_integer_dtype,
                                  is_timedelta64_dtype,
@@ -64,37 +66,22 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise', coerce=None):
     """
     unit = _validate_timedelta_unit(unit)
 
-    def _convert_listlike(arg, box, unit, name=None):
-
-        if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
-            arg = np.array(list(arg), dtype='O')
-
-        # these are shortcutable
-        if is_timedelta64_dtype(arg):
-            value = arg.astype('timedelta64[ns]')
-        elif is_integer_dtype(arg):
-            value = arg.astype('timedelta64[{0}]'.format(
-                unit)).astype('timedelta64[ns]', copy=False)
-        else:
-            value = tslib.array_to_timedelta64(_ensure_object(arg),
-                                               unit=unit, errors=errors)
-            value = value.astype('timedelta64[ns]', copy=False)
-
-        if box:
-            from pandas import TimedeltaIndex
-            value = TimedeltaIndex(value, unit='ns', name=name)
-        return value
+    if errors not in ('ignore', 'raise', 'coerce'):
+        raise ValueError("errors must be one of 'ignore', "
+                         "'raise', or 'coerce'}")
 
     if arg is None:
         return arg
     elif isinstance(arg, ABCSeries):
         from pandas import Series
-        values = _convert_listlike(arg._values, box=False, unit=unit)
-        return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]')
+        values = _convert_listlike(arg._values, unit=unit,
+                                   box=False, errors=errors)
+        return Series(values, index=arg.index, name=arg.name)
     elif isinstance(arg, ABCIndexClass):
-        return _convert_listlike(arg, box=box, unit=unit, name=arg.name)
+        return _convert_listlike(arg, unit=unit, box=box,
+                                 errors=errors, name=arg.name)
     elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 1:
-        return _convert_listlike(arg, box=box, unit=unit)
+        return _convert_listlike(arg, unit=unit, box=box, errors=errors)
     elif getattr(arg, 'ndim', 1) > 1:
         raise TypeError('arg must be a string, timedelta, list, tuple, '
                         '1-d array, or Series')
@@ -142,13 +129,55 @@ def _validate_timedelta_unit(arg):
 
 
 def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'):
-    """
-    convert strings to timedelta; coerce to Timedelta (if box), else
-    np.timedelta64
-    """
+    """Convert string 'r' to a timedelta object."""
+
+    try:
+        result = tslib.convert_to_timedelta64(r, unit)
+    except ValueError:
+        if errors == 'raise':
+            raise
+        elif errors == 'ignore':
+            return r
+
+        # coerce
+        result = pd.NaT
 
-    result = tslib.convert_to_timedelta(r, unit, errors)
     if box:
         result = tslib.Timedelta(result)
-
     return result
+
+
+def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None):
+    """Convert a list of objects to a timedelta index object."""
+
+    if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
+        arg = np.array(list(arg), dtype='O')
+
+    # these are shortcut-able
+    if is_timedelta64_dtype(arg):
+        value = arg.astype('timedelta64[ns]')
+    elif is_integer_dtype(arg):
+        value = arg.astype('timedelta64[{0}]'.format(
+            unit)).astype('timedelta64[ns]', copy=False)
+    else:
+        try:
+            value = tslib.array_to_timedelta64(_ensure_object(arg),
+                                               unit=unit, errors=errors)
+            value = value.astype('timedelta64[ns]', copy=False)
+        except ValueError:
+            if errors == 'ignore':
+                return arg
+            else:
+                # This else-block accounts for the cases when errors='raise'
+                # and errors='coerce'. If errors == 'raise', these errors
+                # should be raised. If errors == 'coerce', we shouldn't
+                # expect any errors to be raised, since all parsing errors
+                # cause coercion to pd.NaT. However, if an error / bug is
+                # introduced that causes an Exception to be raised, we would
+                # like to surface it.
+                raise
+
+    if box:
+        from pandas import TimedeltaIndex
+        value = TimedeltaIndex(value, unit='ns', name=name)
+    return value
diff --git a/pandas/tslib.pxd b/pandas/tslib.pxd
index d6c5810e1d713..aa8cbcb2cedc7 100644
--- a/pandas/tslib.pxd
+++ b/pandas/tslib.pxd
@@ -1,7 +1,7 @@
 from numpy cimport ndarray, int64_t
 
 cdef convert_to_tsobject(object, object, object, bint, bint)
-cdef convert_to_timedelta64(object, object, object)
+cpdef convert_to_timedelta64(object, object)
 cpdef object maybe_get_tz(object)
 cdef bint _is_utc(object)
 cdef bint _is_tzlocal(object)
diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
index 3c07cfd2446ed..53c77b2d8f9d7 100644
--- a/pandas/tslib.pyx
+++ b/pandas/tslib.pyx
@@ -2619,7 +2619,7 @@ class Timedelta(_Timedelta):
 
             try:
                 nano = kwargs.pop('nanoseconds',0)
-                value = convert_to_timedelta64(timedelta(**kwargs),'ns',False) + nano
+                value = convert_to_timedelta64(timedelta(**kwargs),'ns') + nano
             except TypeError as e:
                 raise ValueError("cannot construct a Timedelta from the passed arguments, allowed keywords are "
                                  "[weeks, days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds]")
@@ -2627,9 +2627,9 @@ class Timedelta(_Timedelta):
         if isinstance(value, Timedelta):
             value = value.value
         elif util.is_string_object(value):
-            value = np.timedelta64(parse_timedelta_string(value, False))
+            value = np.timedelta64(parse_timedelta_string(value))
         elif isinstance(value, timedelta):
-            value = convert_to_timedelta64(value,'ns',False)
+            value = convert_to_timedelta64(value,'ns')
         elif isinstance(value, np.timedelta64):
             if unit is not None:
                 value = value.astype('timedelta64[{0}]'.format(unit))
@@ -2638,7 +2638,7 @@ class Timedelta(_Timedelta):
             value = np.timedelta64(_delta_to_nanoseconds(value.delta),'ns')
         elif is_integer_object(value) or util.is_float_object(value):
             # unit=None is de-facto 'ns'
-            value = convert_to_timedelta64(value,unit,False)
+            value = convert_to_timedelta64(value,unit)
         elif _checknull_with_nat(value):
             return NaT
         else:
@@ -3001,37 +3001,41 @@ cdef PyTypeObject* td_type = <PyTypeObject*> Timedelta
 cdef inline bint is_timedelta(object o):
     return Py_TYPE(o) == td_type # isinstance(o, Timedelta)
 
-def array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'):
-    """ convert an ndarray to an array of ints that are timedeltas
-        force conversion if errors = 'coerce',
-        else will raise if cannot convert """
+cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'):
+    """
+    Convert an ndarray to an array of timedeltas. If errors == 'coerce',
+    coerce non-convertible objects to NaT. Otherwise, raise.
+    """
+
     cdef:
         Py_ssize_t i, n
         ndarray[int64_t] iresult
-        bint is_raise=errors=='raise', is_ignore=errors=='ignore', is_coerce=errors=='coerce'
 
-    assert is_raise or is_ignore or is_coerce
+    if errors not in ('ignore', 'raise', 'coerce'):
+        raise ValueError("errors must be one of 'ignore', "
+                         "'raise', or 'coerce'}")
 
     n = values.shape[0]
     result = np.empty(n, dtype='m8[ns]')
     iresult = result.view('i8')
 
-    # usually we have all strings
-    # if so then we hit the fast path
+    # Usually, we have all strings. If so, we hit the fast path.
+    # If this path fails, we try conversion a different way, and
+    # this is where all of the error handling will take place.
     try:
         for i in range(n):
-            result[i] = parse_timedelta_string(values[i], is_coerce)
+            result[i] = parse_timedelta_string(values[i])
     except:
         for i in range(n):
-            result[i] = convert_to_timedelta64(values[i], unit, is_coerce)
-    return iresult
-
-
-def convert_to_timedelta(object ts, object unit='ns', errors='raise'):
-    cdef bint is_raise=errors=='raise', is_ignore=errors=='ignore', is_coerce=errors=='coerce'
+            try:
+                result[i] = convert_to_timedelta64(values[i], unit)
+            except ValueError:
+                if errors == 'coerce':
+                    result[i] = NPY_NAT
+                else:
+                    raise
 
-    assert is_raise or is_ignore or is_coerce
-    return convert_to_timedelta64(ts, unit, is_coerce)
+    return iresult
 
 cdef dict timedelta_abbrevs = { 'D' : 'd',
                                 'd' : 'd',
@@ -3099,15 +3103,10 @@ cdef inline timedelta_from_spec(object number, object frac, object unit):
     n = ''.join(number) + '.' + ''.join(frac)
     return cast_from_unit(float(n), unit)
 
-cdef inline parse_timedelta_string(object ts, coerce=False):
+cdef inline parse_timedelta_string(object ts):
     """
-    Parse an regular format timedelta string
-
-    Return an int64_t or raise a ValueError on an invalid parse
-
-    if coerce, set a non-valid value to NaT
-
-    Return a ns based int64
+    Parse a regular format timedelta string. Return an int64_t (in ns)
+    or raise a ValueError on an invalid parse.
     """
 
     cdef:
@@ -3163,13 +3162,7 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
                 number.append(c)
 
             else:
-
-                try:
-                    r = timedelta_from_spec(number, frac, unit)
-                except ValueError:
-                    if coerce:
-                        return NPY_NAT
-                    raise
+                r = timedelta_from_spec(number, frac, unit)
                 unit, number, frac = [], [c], []
 
                 result += timedelta_as_neg(r, neg)
@@ -3196,9 +3189,9 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
                 result += timedelta_as_neg(r, neg)
                 have_hhmmss = 1
             else:
-                if coerce:
-                    return NPY_NAT
-                raise ValueError("expecting hh:mm:ss format, received: {0}".format(ts))
+                raise ValueError("expecting hh:mm:ss format, "
+                                 "received: {0}".format(ts))
+
             unit, number = [], []
 
         # after the decimal point
@@ -3228,21 +3221,15 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
     # we had a dot, but we have a fractional
     # value since we have an unit
     if have_dot and len(unit):
-        try:
-            r = timedelta_from_spec(number, frac, unit)
-            result += timedelta_as_neg(r, neg)
-        except ValueError:
-            if coerce:
-                return NPY_NAT
-            raise
+        r = timedelta_from_spec(number, frac, unit)
+        result += timedelta_as_neg(r, neg)
 
     # we have a dot as part of a regular format
     # e.g. hh:mm:ss.fffffff
     elif have_dot:
 
-        if (len(number) or len(frac)) and not len(unit) and current_unit is None:
-            if coerce:
-                return NPY_NAT
+        if ((len(number) or len(frac)) and not len(unit)
+            and current_unit is None):
             raise ValueError("no units specified")
 
         if len(frac) > 0 and len(frac) <= 3:
@@ -3266,38 +3253,24 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
 
     # we have a last abbreviation
     elif len(unit):
-
         if len(number):
-            try:
-                r = timedelta_from_spec(number, frac, unit)
-                result += timedelta_as_neg(r, neg)
-            except ValueError:
-                if coerce:
-                    return NPY_NAT
-                raise
+            r = timedelta_from_spec(number, frac, unit)
+            result += timedelta_as_neg(r, neg)
         else:
-            if coerce:
-                return NPY_NAT
             raise ValueError("unit abbreviation w/o a number")
 
     # treat as nanoseconds
     # but only if we don't have anything else
     else:
-
         if have_value:
             raise ValueError("have leftover units")
         if len(number):
-            try:
-                r = timedelta_from_spec(number, frac, 'ns')
-                result += timedelta_as_neg(r, neg)
-            except ValueError:
-                if coerce:
-                    return NPY_NAT
-                raise
+            r = timedelta_from_spec(number, frac, 'ns')
+            result += timedelta_as_neg(r, neg)
 
     return result
 
-cdef inline convert_to_timedelta64(object ts, object unit, object coerce):
+cpdef convert_to_timedelta64(object ts, object unit):
     """
     Convert an incoming object to a timedelta64 if possible
 
@@ -3308,9 +3281,7 @@ cdef inline convert_to_timedelta64(object ts, object unit, object coerce):
         - np.int64 (with unit providing a possible modifier)
         - None/NaT
 
-    if coerce, set a non-valid value to NaT
-
-    Return a ns based int64
+    Return an ns based int64
 
     # kludgy here until we have a timedelta scalar
     # handle the numpy < 1.7 case
@@ -3346,16 +3317,15 @@ cdef inline convert_to_timedelta64(object ts, object unit, object coerce):
             ts = cast_from_unit(ts, unit)
             ts = np.timedelta64(ts)
     elif util.is_string_object(ts):
-        ts = np.timedelta64(parse_timedelta_string(ts, coerce))
+        ts = np.timedelta64(parse_timedelta_string(ts))
     elif hasattr(ts,'delta'):
         ts = np.timedelta64(_delta_to_nanoseconds(ts),'ns')
 
     if isinstance(ts, timedelta):
         ts = np.timedelta64(ts)
     elif not isinstance(ts, np.timedelta64):
-        if coerce:
-            return np.timedelta64(NPY_NAT)
-        raise ValueError("Invalid type for timedelta scalar: %s" % type(ts))
+        raise ValueError("Invalid type for timedelta "
+                         "scalar: %s" % type(ts))
     return ts.astype('timedelta64[ns]')
 
 def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='raise'):