Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG/REF: TimedeltaIndex.__new__ #23539

Merged
merged 19 commits into from
Nov 11, 2018
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
afc2d30
simplify+fix+test TimedeltaIndex constructor
jbrockmendel Nov 7, 2018
e4b06ca
tests for datetime64 data being invalid, floats being valid iff non-l…
jbrockmendel Nov 7, 2018
231a5c1
Merge branch 'master' of https://github.com/pandas-dev/pandas into pr…
jbrockmendel Nov 7, 2018
1ff432b
comments and whitespace
jbrockmendel Nov 7, 2018
645e99c
GH references
jbrockmendel Nov 7, 2018
9c89746
deprecate instead of raising for datetime64 dtypes
jbrockmendel Nov 7, 2018
ef3f277
implement sequence_to_td64ns, deprecate datetime64 data, add and test…
jbrockmendel Nov 8, 2018
b20eda9
Merge branch 'master' of https://github.com/pandas-dev/pandas into pr…
jbrockmendel Nov 8, 2018
3f76c02
catch warnings
jbrockmendel Nov 8, 2018
9d79205
Merge branch 'master' of https://github.com/pandas-dev/pandas into pr…
jbrockmendel Nov 8, 2018
d73bee6
revert float changes, and tests
jbrockmendel Nov 8, 2018
f39b806
Merge branch 'master' of https://github.com/pandas-dev/pandas into pr…
jbrockmendel Nov 10, 2018
ccc7fcf
change ValueError-->TypeError
jbrockmendel Nov 10, 2018
6fda27e
double quotes
jbrockmendel Nov 10, 2018
c63796a
test that no copy is made with int64 data
jbrockmendel Nov 10, 2018
e9b5da6
update tests to TypeError
jbrockmendel Nov 10, 2018
b43e936
Merge branch 'master' of https://github.com/pandas-dev/pandas into pr…
jbrockmendel Nov 11, 2018
da6b286
dtype=object instead of 'O'
jbrockmendel Nov 11, 2018
898444f
use pytest.raises instead of tm.assert_raises_regex
jbrockmendel Nov 11, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):

result = cls._simple_new(values, freq=freq, tz=tz)
if freq_infer:
inferred = result.inferred_freq
if inferred:
result.freq = to_offset(inferred)
result.freq = to_offset(result.inferred_freq)
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

# NB: Among other things not yet ported from the DatetimeIndex
# constructor, this does not call _deepcopy_if_needed
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,7 @@ def __new__(cls, values, freq=None):

result = cls._simple_new(values, freq=freq)
if freq_infer:
inferred = result.inferred_freq
if inferred:
result.freq = to_offset(inferred)
result.freq = to_offset(result.inferred_freq)

return result

Expand Down
31 changes: 16 additions & 15 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,21 @@ def __new__(cls, data=None,
dayfirst=False, yearfirst=False, dtype=None,
copy=False, name=None, verify_integrity=True):

if data is None:
# TODO: Remove this block and associated kwargs; GH#20535
result = cls._generate_range(start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
result.name = name
return result

if is_scalar(data):
raise ValueError("{cls}() must be called with a "
"collection of some kind, {data} was passed"
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
.format(cls=cls.__name__, data=repr(data)))

# - Cases checked above all return/raise before reaching here - #

# This allows to later ensure that the 'copy' parameter is honored:
if isinstance(data, Index):
ref_to_data = data._data
Expand All @@ -253,20 +268,8 @@ def __new__(cls, data=None,
# if dtype has an embedded tz, capture it
tz = dtl.validate_tz_from_dtype(dtype, tz)

if data is None:
# TODO: Remove this block and associated kwargs; GH#20535
result = cls._generate_range(start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
result.name = name
return result

if not isinstance(data, (np.ndarray, Index, ABCSeries,
DatetimeArrayMixin)):
if is_scalar(data):
raise ValueError('DatetimeIndex() must be called with a '
'collection of some kind, %s was passed'
% repr(data))
# other iterable of some kind
if not isinstance(data, (list, tuple)):
data = list(data)
Expand Down Expand Up @@ -328,9 +331,7 @@ def __new__(cls, data=None,
cls._validate_frequency(subarr, freq, ambiguous=ambiguous)

if freq_infer:
inferred = subarr.inferred_freq
if inferred:
subarr.freq = to_offset(inferred)
subarr.freq = to_offset(subarr.inferred_freq)

return subarr._deepcopy_if_needed(ref_to_data, copy)

Expand Down
94 changes: 66 additions & 28 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,16 @@
is_float,
is_list_like,
is_scalar,
is_integer_dtype,
is_float_dtype,
is_object_dtype,
is_string_dtype,
is_timedelta64_dtype,
is_timedelta64_ns_dtype,
pandas_dtype,
ensure_int64)
from pandas.core.dtypes.missing import isna
from pandas.core.dtypes.generic import ABCSeries

from pandas.core.arrays.timedeltas import (
TimedeltaArrayMixin, _is_convertible_to_td, _to_m8)
Expand All @@ -35,7 +40,7 @@
from pandas.core.tools.timedeltas import (
to_timedelta, _coerce_scalar_to_timedelta_type)
from pandas._libs import (lib, index as libindex,
join as libjoin, Timedelta, NaT)
join as libjoin, Timedelta, NaT, iNaT)
from pandas._libs.tslibs.timedeltas import array_to_timedelta64


Expand Down Expand Up @@ -139,12 +144,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
periods=None, closed=None, dtype=None, copy=False,
name=None, verify_integrity=True):

if isinstance(data, TimedeltaIndex) and freq is None and name is None:
if copy:
return data.copy()
else:
return data._shallow_copy()

freq, freq_infer = dtl.maybe_infer_freq(freq)

if data is None:
Expand All @@ -154,32 +153,73 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
result.name = name
return result

if unit is not None:
data = to_timedelta(data, unit=unit, box=False)

if is_scalar(data):
raise ValueError('TimedeltaIndex() must be called with a '
raise ValueError('{cls}() must be called with a '
'collection of some kind, {data} was passed'
.format(data=repr(data)))
jreback marked this conversation as resolved.
Show resolved Hide resolved
.format(cls=cls.__name__, data=repr(data)))

# convert if not already
if getattr(data, 'dtype', None) != _TD_DTYPE:
if isinstance(data, TimedeltaIndex) and freq is None and name is None:
if copy:
return data.copy()
else:
return data._shallow_copy()

# - Cases checked above all return/raise before reaching here - #

if unit is not None:
data = to_timedelta(data, unit=unit, box=False)
elif copy:
data = np.array(data, copy=True)

data = np.array(data, copy=False)
if data.dtype == np.object_:
data = array_to_timedelta64(data)
if data.dtype != _TD_DTYPE:
if is_timedelta64_dtype(data):

# Unwrap whatever we have into a np.ndarray
if not hasattr(data, 'dtype'):
# e.g. list, tuple
if np.ndim(data) == 0:
# i.e.g generator
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
data = list(data)
data = np.array(data, copy=False)
elif isinstance(data, ABCSeries):
data = data._values
elif isinstance(data, (cls, TimedeltaArrayMixin)):
data = data._data

# Convert whatever we have into timedelta64[ns] dtype
if is_object_dtype(data) or is_string_dtype(data):
# no need to make a copy, need to convert if string-dtyped
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why would u check is_string_dtype?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because we have several tests that specifically pass e.g. np.array(['1 days', '4 days'], dtype='<S6'). (BTW we don't have similar tests for DatetimeIndex, which I intend to fix in the DTI analogue of this PR)

data = np.array(data, dtype=np.object_, copy=False)
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
data = array_to_timedelta64(data).view(_TD_DTYPE)
copy = False
elif is_integer_dtype(data):
# treat as nanoseconds
# if something other than int64, convert
data = ensure_int64(data)
if copy:
# TODO: can we avoid branching here? `astype(data, copy=False)`
# appears to be making a copy
data = data.astype(_TD_DTYPE)
copy = False
else:
data = data.view(_TD_DTYPE)
elif is_float_dtype(data):
# We allow it if and only if it can be converted lossessly
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
mask = np.isnan(data)
casted = data.astype(np.int64)
if not (casted[~mask] == data[~mask]).all():
raise TypeError("floating-dtype data cannot be losslessly "
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
"converted to {cls}".format(cls=cls.__name__))
data = casted.view(_TD_DTYPE)
data[mask] = iNaT
copy = False
elif is_timedelta64_dtype(data):
if data.dtype != _TD_DTYPE:
# non-nano unit
# TODO: watch out for overflows
data = data.astype(_TD_DTYPE)
else:
data = ensure_int64(data).view(_TD_DTYPE)
copy = False
else:
raise TypeError("dtype {dtype} is invalid for constructing {cls}"
.format(dtype=data.dtype, cls=cls.__name__))

assert data.dtype == 'm8[ns]', data.dtype
data = np.array(data, copy=copy)
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
assert data.dtype == 'm8[ns]', data

subarr = cls._simple_new(data, name=name, freq=freq)
# check that we are matching freqs
Expand All @@ -188,9 +228,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
cls._validate_frequency(subarr, freq)

if freq_infer:
inferred = subarr.inferred_freq
if inferred:
subarr.freq = to_offset(inferred)
subarr.freq = to_offset(subarr.inferred_freq)

return subarr

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1054,11 +1054,11 @@ def test_tdi_mul_float_series(self, box_df_fail):
idx = tm.box_expected(idx, box)

rng5f = np.arange(5, dtype='float64')
expected = TimedeltaIndex(rng5f * (rng5f + 0.1))
expected = TimedeltaIndex(rng5f * (rng5f + 1.0))
box2 = pd.Series if box is pd.Index else box
expected = tm.box_expected(expected, box2)

result = idx * Series(rng5f + 0.1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel why did you change this? Left-over from initially changing the behaviour on floats?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

result = idx * Series(rng5f + 1.0)
tm.assert_equal(result, expected)

# TODO: Put Series/DataFrame in others?
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/indexes/timedeltas/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,30 @@

class TestTimedeltaIndex(object):

def test_dt64_data_invalid(self):
# GH#23539
dti = pd.date_range('2016-01-01', periods=3)
with pytest.raises(TypeError):
TimedeltaIndex(dti)

with pytest.raises(TypeError):
TimedeltaIndex(np.asarray(dti))
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

def test_float64_lossy_invalid(self):
# GH#23539 passing floats that would be truncated is unsupported
with pytest.raises(TypeError):
TimedeltaIndex([2.3, 9.0])

# but non-lossy floats are OK
tdi = TimedeltaIndex([2.0, 9.0])
expected = TimedeltaIndex([2, 9])
tm.assert_index_equal(tdi, expected)

# NaNs get converted to NaT
tdi = TimedeltaIndex([2.0, np.nan])
expected = TimedeltaIndex([pd.Timedelta(nanoseconds=2), pd.NaT])
tm.assert_index_equal(tdi, expected)

def test_construction_base_constructor(self):
arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')]
tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr))
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/indexes/timedeltas/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ def test_minmax(self):
assert pd.isna(getattr(obj, op)())

def test_numpy_minmax(self):
dr = pd.date_range(start='2016-01-15', end='2016-01-20')
td = TimedeltaIndex(np.asarray(dr))
td = timedelta_range('16815 days', '16820 days', freq='D')

assert np.min(td) == Timedelta('16815 days')
assert np.max(td) == Timedelta('16820 days')
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/scalar/timedelta/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,9 @@ def test_td_rfloordiv_numeric_series(self):
# TODO: GH-19761. Change to TypeError.
ser // td

# ----------------------------------------------------------------
# Timedelta.__mod__, __rmod__

def test_mod_timedeltalike(self):
# GH#19365
td = Timedelta(hours=37)
Expand Down Expand Up @@ -545,9 +548,6 @@ def test_mod_offset(self):
assert isinstance(result, Timedelta)
assert result == Timedelta(hours=2)

# ----------------------------------------------------------------
# Timedelta.__mod__, __rmod__

def test_mod_numeric(self):
# GH#19365
td = Timedelta(hours=37)
Expand Down