From f4e7012a274f555674d10fb278dd351faa8b5f8a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 1 Aug 2018 14:53:23 -0700 Subject: [PATCH] use memoryviews instead of ndarrays (#22147) --- pandas/_libs/tslibs/timedeltas.pyx | 130 +++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index cec07dbebdfa43..3cda8b90c62127 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -13,7 +13,7 @@ from cpython cimport PyUnicode_Check, Py_NE, Py_EQ, PyObject_RichCompare import numpy as np cimport numpy as cnp -from numpy cimport int64_t, ndarray +from numpy cimport int64_t cnp.import_array() from cpython.datetime cimport (datetime, timedelta, @@ -33,6 +33,7 @@ from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct, from nattype import nat_strings, NaT from nattype cimport checknull_with_nat, NPY_NAT +from offsets cimport to_offset # ---------------------------------------------------------------------- # Constants @@ -78,6 +79,44 @@ cdef dict timedelta_abbrevs = { 'D': 'd', _no_input = object() + +# ---------------------------------------------------------------------- +# API + +def ints_to_pytimedelta(int64_t[:] arr, box=False): + """ + convert an i8 repr to an ndarray of timedelta or Timedelta (if box == + True) + + Parameters + ---------- + arr : ndarray[int64_t] + box : bool, default False + + Returns + ------- + result : ndarray[object] + array of Timedelta or timedeltas objects + """ + cdef: + Py_ssize_t i, n = len(arr) + int64_t value + object[:] result = np.empty(n, dtype=object) + + for i in range(n): + + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + if box: + result[i] = Timedelta(value) + else: + result[i] = timedelta(microseconds=int(value) / 1000) + + return result.base # .base to access underlying np.ndarray + + # ---------------------------------------------------------------------- cpdef int64_t delta_to_nanoseconds(delta) except? -1: @@ -144,7 +183,11 @@ cpdef convert_to_timedelta64(object ts, object unit): ts = cast_from_unit(ts, unit) ts = np.timedelta64(ts) elif is_string_object(ts): - ts = np.timedelta64(parse_timedelta_string(ts)) + if len(ts) > 0 and ts[0] == 'P': + ts = parse_iso_format_string(ts) + else: + ts = parse_timedelta_string(ts) + ts = np.timedelta64(ts) elif hasattr(ts, 'delta'): ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns') @@ -156,7 +199,7 @@ cpdef convert_to_timedelta64(object ts, object unit): return ts.astype('timedelta64[ns]') -cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): +cpdef array_to_timedelta64(object[:] values, unit='ns', errors='raise'): """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. @@ -164,7 +207,7 @@ cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): cdef: Py_ssize_t i, n - ndarray[int64_t] iresult + int64_t[:] iresult if errors not in ('ignore', 'raise', 'coerce'): raise ValueError("errors must be one of 'ignore', " @@ -190,7 +233,7 @@ cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): else: raise - return iresult + return iresult.base # .base to access underlying np.ndarray cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: @@ -795,12 +838,81 @@ cdef class _Timedelta(timedelta): @property def asm8(self): - """ return a numpy timedelta64 array view of myself """ + """ + Return a numpy timedelta64 array scalar view. + + Provides access to the array scalar view (i.e. a combination of the + value and the units) associated with the numpy.timedelta64().view(), + including a 64-bit integer representation of the timedelta in + nanoseconds (Python int compatible). + + Returns + ------- + numpy timedelta64 array scalar view + Array scalar view of the timedelta in nanoseconds. + + Examples + -------- + >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + >>> td.asm8 + numpy.timedelta64(86520000003042,'ns') + + >>> td = pd.Timedelta('2 min 3 s') + >>> td.asm8 + numpy.timedelta64(123000000000,'ns') + + >>> td = pd.Timedelta('3 ms 5 us') + >>> td.asm8 + numpy.timedelta64(3005000,'ns') + + >>> td = pd.Timedelta(42, unit='ns') + >>> td.asm8 + numpy.timedelta64(42,'ns') + """ return np.int64(self.value).view('m8[ns]') @property def resolution(self): - """ return a string representing the lowest resolution that we have """ + """ + Return a string representing the lowest timedelta resolution. + + Each timedelta has a defined resolution that represents the lowest OR + most granular level of precision. Each level of resolution is + represented by a short string as defined below: + + Resolution: Return value + + * Days: 'D' + * Hours: 'H' + * Minutes: 'T' + * Seconds: 'S' + * Milliseconds: 'L' + * Microseconds: 'U' + * Nanoseconds: 'N' + + Returns + ------- + str + Timedelta resolution. + + Examples + -------- + >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + >>> td.resolution + 'N' + + >>> td = pd.Timedelta('1 days 2 min 3 us') + >>> td.resolution + 'U' + + >>> td = pd.Timedelta('2 min 3 s') + >>> td.resolution + 'S' + + >>> td = pd.Timedelta(36, unit='us') + >>> td.resolution + 'U' + """ self._ensure_components() if self._ns: @@ -904,6 +1016,9 @@ cdef class _Timedelta(timedelta): def __str__(self): return self._repr_base(format='long') + def __bool__(self): + return self.value != 0 + def isoformat(self): """ Format Timedelta as ISO 8601 Duration like @@ -1063,7 +1178,6 @@ class Timedelta(_Timedelta): cdef: int64_t result, unit - from pandas.tseries.frequencies import to_offset unit = to_offset(freq).nanos result = unit * rounder(self.value / float(unit)) return Timedelta(result, unit='ns')