Skip to content

Commit

Permalink
Added ISO 8601 Duration string constructor for Timedelta
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd committed Jan 3, 2018
1 parent 6552718 commit 057daa3
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 1 deletion.
31 changes: 31 additions & 0 deletions asv_bench/benchmarks/timedelta.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,40 @@
import datetime

import numpy as np
import pandas as pd

from pandas import to_timedelta, Timestamp, Timedelta


class TimedeltaConstructor(object):
goal_time = 0.2

def time_from_int(self):
Timedelta(123456789)

def time_from_unit(self):
Timedelta(1, unit='d')

def time_from_components(self):
Timedelta(days=1, hours=2, minutes=3, seconds=4, milliseconds=5,
microseconds=6, nanoseconds=7)

def time_from_datetime_timedelta(self):
Timedelta(datetime.timedelta(days=1, seconds=1))

def time_from_np_timedelta(self):
Timedelta(np.timedelta64(1, 'ms'))

def time_from_string(self):
Timedelta('1 days')

def time_from_iso_format(self):
Timedelta('P4DT12H30M5S')

def time_from_missing(self):
Timedelta('nat')


class ToTimedelta(object):
goal_time = 0.2

Expand Down
7 changes: 7 additions & 0 deletions doc/source/timedeltas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ You can construct a ``Timedelta`` scalar through various arguments:
pd.Timedelta('nan')
pd.Timedelta('nat')
# ISO 8601 Duration strings
pd.Timedelta('P0DT0H1M0S')
pd.Timedelta('P0DT0H0M0.000000123S')
.. versionadded:: 0.23.0
Added constructor for `ISO 8601 Duration`_ strings

:ref:`DateOffsets<timeseries.offsets>` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction.

.. ipython:: python
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ Other API Changes
- In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`)
- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`)
- The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`)
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)

.. _whatsnew_0230.deprecations:

Expand Down
53 changes: 52 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# cython: profile=False
import collections
import re

import sys
cdef bint PY3 = (sys.version_info[0] >= 3)
Expand Down Expand Up @@ -235,6 +236,25 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
return <int64_t> (base *m) + <int64_t> (frac *m)


cpdef match_iso_format(object ts):
"""
Match a provided string against an ISO 8601 pattern, providing a group for
each ``Timedelta`` component.
"""
pater = re.compile(r"""P
(?P<days>-?[0-9]*)DT
(?P<hours>[0-9]{1,2})H
(?P<minutes>[0-9]{1,2})M
(?P<seconds>[0-9]{0,2})
(\.
(?P<milliseconds>[0-9]{0,3})
(?P<microseconds>[0-9]{0,3})
(?P<nanoseconds>[0-9]{0,3})
)?S""", re.VERBOSE)

return re.match(pater, ts)


cdef inline parse_timedelta_string(object ts):
"""
Parse a regular format timedelta string. Return an int64_t (in ns)
Expand Down Expand Up @@ -506,6 +526,33 @@ def _binary_op_method_timedeltalike(op, name):
# ----------------------------------------------------------------------
# Timedelta Construction

def _value_from_iso_match(match):
"""
Extracts and cleanses the appropriate values from a match object with
groups for each component of an ISO 8601 duration
Parameters
----------
match:
Regular expression with groups for each component of an ISO 8601
duration
Returns
-------
int
Precision in nanoseconds of matched ISO 8601 duration
"""
match_dict = {k: v for k, v in match.groupdict().items() if v}
for comp in ['milliseconds', 'microseconds', 'nanoseconds']:
if comp in match_dict:
match_dict[comp] ='{:0<3}'.format(match_dict[comp])

match_dict = {k: int(v) for k, v in match_dict.items()}
nano = match_dict.pop('nanoseconds', 0)

return nano + convert_to_timedelta64(timedelta(**match_dict), 'ns')


cdef _to_py_int_float(v):
# Note: This used to be defined inside Timedelta.__new__
# but cython will not allow `cdef` functions to be defined dynamically.
Expand Down Expand Up @@ -825,7 +872,11 @@ class Timedelta(_Timedelta):
if isinstance(value, Timedelta):
value = value.value
elif is_string_object(value):
value = np.timedelta64(parse_timedelta_string(value))
if value[0] == 'P': # hackish
match = match_iso_format(value)
value = _value_from_iso_match(match)
else:
value = np.timedelta64(parse_timedelta_string(value))
elif PyDelta_Check(value):
value = convert_to_timedelta64(value, 'ns')
elif is_timedelta64_object(value):
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/scalar/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,3 +853,18 @@ def test_isoformat(self):
result = Timedelta(minutes=1).isoformat()
expected = 'P0DT0H1M0S'
assert result == expected

@pytest.mark.parametrize('fmt,exp', [
('P6DT0H50M3.010010012S', Timedelta(days=6, minutes=50, seconds=3,
milliseconds=10, microseconds=10,
nanoseconds=12)),
('P-6DT0H50M3.010010012S', Timedelta(days=-6, minutes=50, seconds=3,
milliseconds=10, microseconds=10,
nanoseconds=12)),
('P4DT12H30M5S', Timedelta(days=4, hours=12, minutes=30, seconds=5)),
('P0DT0H0M0.000000123S', Timedelta(nanoseconds=123)),
('P0DT0H0M0.00001S', Timedelta(microseconds=10)),
('P0DT0H0M0.001S', Timedelta(milliseconds=1)),
('P0DT0H1M0S', Timedelta(minutes=1))])
def test_iso_constructor(self, fmt, exp):
assert Timedelta(fmt) == exp

0 comments on commit 057daa3

Please sign in to comment.