Skip to content

Commit

Permalink
DEPR: Deprecate box kwarg for to_timedelta and to_datetime
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris Bertinato committed Feb 26, 2019
1 parent aa08416 commit ab05d74
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 65 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ Deprecations
~~~~~~~~~~~~

- Deprecated the `M (months)` and `Y (year)` `units` parameter of :func: `pandas.to_timedelta`, :func: `pandas.Timedelta` and :func: `pandas.TimedeltaIndex` (:issue:`16344`)
- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Use :attr:`Series.values` and :meth:`Timestamp.to_datetime64`/:meth:`Timedelta.to_timedelta64` instead to get an ndarray of values or ``numpy.timestamp64``/``numpy.timedelta64``, respectively (:issue:`24416`)

.. _whatsnew_0250.prior_deprecations:

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,10 +794,10 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
# Immediate return if coerce
if datetime:
from pandas import to_datetime
return to_datetime(values, errors='coerce', box=False)
return to_datetime(values, errors='coerce').to_numpy()
elif timedelta:
from pandas import to_timedelta
return to_timedelta(values, errors='coerce', box=False)
return to_timedelta(values, errors='coerce').to_numpy()
elif numeric:
from pandas import to_numeric
return to_numeric(values, errors='coerce')
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,8 @@ def asobject(self):
return self.astype(object)

def _convert_tolerance(self, tolerance, target):
tolerance = np.asarray(to_timedelta(tolerance, box=False))
tolerance = np.asarray(to_timedelta(tolerance).to_numpy())

if target.size != tolerance.size and tolerance.size > 1:
raise ValueError('list-like tolerance size must match '
'target index size')
Expand Down
9 changes: 9 additions & 0 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from pandas import compat
from pandas.core import algorithms

from pandas.util._decorators import deprecate_kwarg


def _guess_datetime_format_for_array(arr, **kwargs):
# Try to guess the format based on the first non-NaN element
Expand Down Expand Up @@ -398,6 +400,7 @@ def _adjust_to_origin(arg, origin, unit):
return arg


@deprecate_kwarg(old_arg_name='box', new_arg_name=None)
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
utc=None, box=True, format=None, exact=True,
unit=None, infer_datetime_format=False, origin='unix',
Expand Down Expand Up @@ -444,6 +447,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
- If True returns a DatetimeIndex or Index-like object
- If False returns ndarray of values.
.. deprecated:: 0.24.0
Use :meth:`.to_numpy` or :meth:`Timestamp.to_datetime64`
instead to get an ndarray of values or numpy.datetime64,
respectively.
format : string, default None
strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
all the way up to nanoseconds.
Expand Down
9 changes: 9 additions & 0 deletions pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@

from pandas.core.arrays.timedeltas import sequence_to_td64ns

from pandas.util._decorators import deprecate_kwarg


@deprecate_kwarg(old_arg_name='box', new_arg_name=None)
def to_timedelta(arg, unit='ns', box=True, errors='raise'):
"""
Convert argument to timedelta.
Expand All @@ -40,6 +43,12 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'):
- If True returns a Timedelta/TimedeltaIndex of the results.
- If False returns a numpy.timedelta64 or numpy.darray of
values of dtype timedelta64[ns].
.. deprecated:: 0.24.0
Use :meth:`.to_numpy` or :meth:`Timedelta.to_timedelta64`
instead to get an ndarray of values or numpy.timedelta64,
respectively.
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception.
- If 'coerce', then invalid parsing will be set as NaT.
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3164,11 +3164,11 @@ def converter(*date_cols):
return tools.to_datetime(
ensure_object(strs),
utc=None,
box=False,
dayfirst=dayfirst,
errors='ignore',
infer_datetime_format=infer_datetime_format
)
).to_numpy()

except ValueError:
return tools.to_datetime(
parsing.try_parse_dates(strs, dayfirst=dayfirst))
Expand Down
68 changes: 46 additions & 22 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,12 +221,13 @@ def test_to_datetime_format_weeks(self, cache):
def test_to_datetime_parse_tzname_or_tzoffset(self, box, const,
fmt, dates, expected_dates):
# GH 13486
result = pd.to_datetime(dates, format=fmt, box=box)
expected = const(expected_dates)
tm.assert_equal(result, expected)
with tm.assert_produces_warning(FutureWarning):
result = pd.to_datetime(dates, format=fmt, box=box)
expected = const(expected_dates)
tm.assert_equal(result, expected)

with pytest.raises(ValueError):
pd.to_datetime(dates, format=fmt, box=box, utc=True)
with pytest.raises(ValueError):
pd.to_datetime(dates, format=fmt, box=box, utc=True)

@pytest.mark.parametrize('offset', [
'+0', '-1foo', 'UTCbar', ':10', '+01:000:01', ''])
Expand Down Expand Up @@ -256,7 +257,7 @@ def test_to_datetime_dtarr(self, tz):
result = to_datetime(arr)
assert result is arr

result = to_datetime(arr, box=True)
result = to_datetime(arr)
assert result is arr

def test_to_datetime_pydatetime(self):
Expand Down Expand Up @@ -364,7 +365,7 @@ def test_to_datetime_array_of_dt64s(self, cache):
# Assuming all datetimes are in bounds, to_datetime() returns
# an array that is equal to Timestamp() parsing
tm.assert_numpy_array_equal(
pd.to_datetime(dts, box=False, cache=cache),
pd.to_datetime(dts, cache=cache).to_numpy(),
np.array([Timestamp(x).asm8 for x in dts])
)

Expand All @@ -376,8 +377,8 @@ def test_to_datetime_array_of_dt64s(self, cache):
pd.to_datetime(dts_with_oob, errors='raise')

tm.assert_numpy_array_equal(
pd.to_datetime(dts_with_oob, box=False, errors='coerce',
cache=cache),
pd.to_datetime(dts_with_oob, errors='coerce',
cache=cache).to_numpy(),
np.array(
[
Timestamp(dts_with_oob[0]).asm8,
Expand All @@ -392,8 +393,8 @@ def test_to_datetime_array_of_dt64s(self, cache):
# are converted to their .item(), which depending on the version of
# numpy is either a python datetime.datetime or datetime.date
tm.assert_numpy_array_equal(
pd.to_datetime(dts_with_oob, box=False, errors='ignore',
cache=cache),
pd.to_datetime(dts_with_oob, errors='ignore',
cache=cache).to_numpy(),
np.array(
[dt.item() for dt in dts_with_oob],
dtype='O'
Expand Down Expand Up @@ -628,10 +629,14 @@ def test_to_datetime_cache(self, utc, format, box, constructor):
date = '20130101 00:00:00'
test_dates = [date] * 10**5
data = constructor(test_dates)
result = pd.to_datetime(data, utc=utc, format=format, box=box,
cache=True)
expected = pd.to_datetime(data, utc=utc, format=format, box=box,
cache=False)

with tm.assert_produces_warning(FutureWarning):
result = pd.to_datetime(data, utc=utc, format=format, box=box,
cache=True)

with tm.assert_produces_warning(FutureWarning):
expected = pd.to_datetime(data, utc=utc, format=format, box=box,
cache=False)
if box:
tm.assert_index_equal(result, expected)
else:
Expand Down Expand Up @@ -684,7 +689,10 @@ def test_iso_8601_strings_with_same_offset(self):
def test_iso_8601_strings_same_offset_no_box(self):
# GH 22446
data = ['2018-01-04 09:01:00+09:00', '2018-01-04 09:02:00+09:00']
result = pd.to_datetime(data, box=False)

with tm.assert_produces_warning(FutureWarning):
result = pd.to_datetime(data, box=False)

expected = np.array([
datetime(2018, 1, 4, 9, 1, tzinfo=pytz.FixedOffset(540)),
datetime(2018, 1, 4, 9, 2, tzinfo=pytz.FixedOffset(540))
Expand Down Expand Up @@ -753,6 +761,16 @@ def test_timestamp_utc_true(self, ts, expected):
result = to_datetime(ts, utc=True)
assert result == expected

def test_to_datetime_box_deprecated(self):
expected = np.datetime64('2018-09-09')

# Deprecated - see GH24416
with tm.assert_produces_warning(FutureWarning):
pd.to_datetime(expected, box=False)

result = pd.to_datetime(expected).to_datetime64()
assert result == expected


class TestToDatetimeUnit(object):
@pytest.mark.parametrize('cache', [True, False])
Expand Down Expand Up @@ -891,7 +909,7 @@ def test_unit_rounding(self, cache):
def test_unit_ignore_keeps_name(self, cache):
# GH 21697
expected = pd.Index([15e9] * 2, name='name')
result = pd.to_datetime(expected, errors='ignore', box=True, unit='s',
result = pd.to_datetime(expected, errors='ignore', unit='s',
cache=cache)
tm.assert_index_equal(result, expected)

Expand Down Expand Up @@ -1052,7 +1070,10 @@ def test_dataframe_box_false(self):
df = pd.DataFrame({'year': [2015, 2016],
'month': [2, 3],
'day': [4, 5]})
result = pd.to_datetime(df, box=False)

with tm.assert_produces_warning(FutureWarning):
result = pd.to_datetime(df, box=False)

expected = np.array(['2015-02-04', '2016-03-05'],
dtype='datetime64[ns]')
tm.assert_numpy_array_equal(result, expected)
Expand All @@ -1069,8 +1090,7 @@ def test_dataframe_utc_true(self):

def test_to_datetime_errors_ignore_utc_true(self):
# GH 23758
result = pd.to_datetime([1], unit='s', box=True, utc=True,
errors='ignore')
result = pd.to_datetime([1], unit='s', utc=True, errors='ignore')
expected = DatetimeIndex(['1970-01-01 00:00:01'], tz='UTC')
tm.assert_index_equal(result, expected)

Expand Down Expand Up @@ -1195,11 +1215,15 @@ def test_to_datetime_types(self, cache):
def test_to_datetime_unprocessable_input(self, cache, box, klass):
# GH 4928
# GH 21864
result = to_datetime([1, '1'], errors='ignore', cache=cache, box=box)
with tm.assert_produces_warning(FutureWarning):
result = to_datetime([1, '1'], errors='ignore', cache=cache,
box=box)

expected = klass(np.array([1, '1'], dtype='O'))
tm.assert_equal(result, expected)
msg = "invalid string coercion to datetime"
with pytest.raises(TypeError, match=msg):
with (pytest.raises(TypeError, match=msg),
tm.assert_produces_warning(FutureWarning)):
to_datetime([1, '1'], errors='raise', cache=cache, box=box)

def test_to_datetime_other_datetime64_units(self):
Expand Down
89 changes: 55 additions & 34 deletions pandas/tests/indexes/timedeltas/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,18 @@ def conv(v):

d1 = np.timedelta64(1, 'D')

assert (to_timedelta('1 days 06:05:01.00003', box=False) ==
conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') +
np.timedelta64(30, 'us')))
assert (to_timedelta('15.5us', box=False) ==
conv(np.timedelta64(15500, 'ns')))
with tm.assert_produces_warning(FutureWarning):
assert (to_timedelta('1 days 06:05:01.00003', box=False) ==
conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') +
np.timedelta64(30, 'us')))

# empty string
result = to_timedelta('', box=False)
assert result.astype('int64') == iNaT
with tm.assert_produces_warning(FutureWarning):
assert (to_timedelta('15.5us', box=False) ==
conv(np.timedelta64(15500, 'ns')))

# empty string
result = to_timedelta('', box=False)
assert result.astype('int64') == iNaT

result = to_timedelta(['', ''])
assert isna(result).all()
Expand All @@ -37,10 +40,11 @@ def conv(v):
expected = pd.Index(np.array([np.timedelta64(1, 's')]))
tm.assert_index_equal(result, expected)

# ints
result = np.timedelta64(0, 'ns')
expected = to_timedelta(0, box=False)
assert result == expected
with tm.assert_produces_warning(FutureWarning):
# ints
result = np.timedelta64(0, 'ns')
expected = to_timedelta(0, box=False)
assert result == expected

# Series
expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)])
Expand All @@ -53,16 +57,18 @@ def conv(v):
expected = to_timedelta([0, 10], unit='s')
tm.assert_index_equal(result, expected)

# single element conversion
v = timedelta(seconds=1)
result = to_timedelta(v, box=False)
expected = np.timedelta64(timedelta(seconds=1))
assert result == expected
with tm.assert_produces_warning(FutureWarning):
# single element conversion
v = timedelta(seconds=1)
result = to_timedelta(v, box=False)
expected = np.timedelta64(timedelta(seconds=1))
assert result == expected

v = np.timedelta64(timedelta(seconds=1))
result = to_timedelta(v, box=False)
expected = np.timedelta64(timedelta(seconds=1))
assert result == expected
with tm.assert_produces_warning(FutureWarning):
v = np.timedelta64(timedelta(seconds=1))
result = to_timedelta(v, box=False)
expected = np.timedelta64(timedelta(seconds=1))
assert result == expected

# arrays of various dtypes
arr = np.array([1] * 5, dtype='int64')
Expand Down Expand Up @@ -90,22 +96,27 @@ def conv(v):
expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5)
tm.assert_index_equal(result, expected)

# Test with lists as input when box=false
expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]')
result = to_timedelta(range(3), unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
with tm.assert_produces_warning(FutureWarning):
# Test with lists as input when box=false
expected = np.array(np.arange(3) * 1000000000,
dtype='timedelta64[ns]')
result = to_timedelta(range(3), unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)

result = to_timedelta(np.arange(3), unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
with tm.assert_produces_warning(FutureWarning):
result = to_timedelta(np.arange(3), unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)

result = to_timedelta([0, 1, 2], unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
with tm.assert_produces_warning(FutureWarning):
result = to_timedelta([0, 1, 2], unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)

# Tests with fractional seconds as input:
expected = np.array(
[0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]')
result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
with tm.assert_produces_warning(FutureWarning):
# Tests with fractional seconds as input:
expected = np.array(
[0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]')
result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)

def test_to_timedelta_invalid(self):

Expand Down Expand Up @@ -173,3 +184,13 @@ def test_to_timedelta_on_missing_values(self):

actual = pd.to_timedelta(pd.NaT)
assert actual.value == timedelta_NaT.astype('int64')

def test_to_timedelta_box_deprecated(self):
result = np.timedelta64(0, 'ns')

# Deprecated - see GH24416
with tm.assert_produces_warning(FutureWarning):
to_timedelta(0, box=False)

expected = to_timedelta(0).to_timedelta64()
assert result == expected
8 changes: 4 additions & 4 deletions pandas/tests/scalar/timedelta/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,12 +310,12 @@ def test_iso_conversion(self):
assert to_timedelta('P0DT0H0M1S') == expected

def test_nat_converters(self):
result = to_timedelta('nat', box=False)
assert result.dtype.kind == 'm'
result = to_timedelta('nat').to_numpy()
assert result.dtype.kind == 'M'
assert result.astype('int64') == iNaT

result = to_timedelta('nan', box=False)
assert result.dtype.kind == 'm'
result = to_timedelta('nan').to_numpy()
assert result.dtype.kind == 'M'
assert result.astype('int64') == iNaT

@pytest.mark.filterwarnings("ignore:M and Y units are deprecated")
Expand Down

0 comments on commit ab05d74

Please sign in to comment.