Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Deprecate passing range-like arguments to DatetimeIndex, TimedeltaIndex #23919

Merged
merged 21 commits into from
Nov 28, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f8bed85
Deprecate passing range-like arguments to DatetimeIndex, TimedeltaIndex
jbrockmendel Nov 26, 2018
5d8a107
GH reference
jbrockmendel Nov 26, 2018
28e2184
Merge branch 'master' of https://github.com/pandas-dev/pandas into ge…
jbrockmendel Nov 26, 2018
0f75b9d
Flake8 fixups, whatsnew typo
jbrockmendel Nov 26, 2018
2c40c3a
update usage in resample
jbrockmendel Nov 26, 2018
d67f87c
Merge branch 'master' of https://github.com/pandas-dev/pandas into ge…
jbrockmendel Nov 26, 2018
afdab5b
targeted tests
jbrockmendel Nov 26, 2018
8f435ed
backticks
jbrockmendel Nov 26, 2018
4d7c9e2
test the right thing
jbrockmendel Nov 26, 2018
2e587e3
avoid verify_integrity warnings
jbrockmendel Nov 26, 2018
0469b74
avoid verify_integrity
jbrockmendel Nov 26, 2018
43a52fc
isort
jbrockmendel Nov 26, 2018
f4e281e
Merge branch 'master' of https://github.com/pandas-dev/pandas into ge…
jbrockmendel Nov 27, 2018
15e6c30
change default to None
jbrockmendel Nov 27, 2018
5dc66f3
set verify_integrity conditionally
jbrockmendel Nov 27, 2018
b931878
Merge branch 'master' of https://github.com/pandas-dev/pandas into ge…
jbrockmendel Nov 27, 2018
07bfc45
Merge branch 'master' of https://github.com/pandas-dev/pandas into ge…
jbrockmendel Nov 27, 2018
e209a81
Merge branch 'master' of https://github.com/pandas-dev/pandas into ge…
jbrockmendel Nov 28, 2018
d6df7a3
Merge branch 'master' of https://github.com/pandas-dev/pandas into ge…
jbrockmendel Nov 28, 2018
eb5d9c5
Merge branch 'master' of https://github.com/pandas-dev/pandas into ge…
jbrockmendel Nov 28, 2018
cc40717
restore edit that got lost in rebase
jbrockmendel Nov 28, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1046,6 +1046,7 @@ Deprecations
- The ``keep_tz=False`` option (the default) of the ``keep_tz`` keyword of
:meth:`DatetimeIndex.to_series` is deprecated (:issue:`17832`).
- Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`)
- Creating a :class:`TimedeltaIndex` or :class:`DatetimeIndex` by passing range arguments `start`, `end`, `periods` is deprecated in favor of `timedelta_range` and `date_range` (:issue:`23919`)
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

.. _whatsnew_0240.deprecations.datetimelike_int_ops:

Expand Down
33 changes: 23 additions & 10 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,16 @@ def __new__(cls, data=None,
dayfirst=False, yearfirst=False, dtype=None,
copy=False, name=None, verify_integrity=True):

if verify_integrity is not True:
warnings.warn("The 'verify_integrity' argument is deprecated, "
"will be removed in a future version.",
FutureWarning, stacklevel=2)

if data is None:
# TODO: Remove this block and associated kwargs; GH#20535
warnings.warn("Creating a DatetimeIndex by passing range "
"endpoints is deprecated. Use "
"`pandas.date_range` instead.",
FutureWarning, stacklevel=2)
result = cls._generate_range(start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
Expand Down Expand Up @@ -1514,9 +1522,13 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None,
if freq is None and com._any_none(periods, start, end):
freq = 'D'

return DatetimeIndex(start=start, end=end, periods=periods,
freq=freq, tz=tz, normalize=normalize, name=name,
closed=closed, **kwargs)
result = DatetimeIndex._generate_range(
start=start, end=end, periods=periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, **kwargs)

result.name = name
return result


def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
Expand Down Expand Up @@ -1602,9 +1614,9 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
'weekmask are passed, got frequency {freq}').format(freq=freq)
raise ValueError(msg)

return DatetimeIndex(start=start, end=end, periods=periods,
freq=freq, tz=tz, normalize=normalize, name=name,
closed=closed, **kwargs)
return date_range(start=start, end=end, periods=periods,
freq=freq, tz=tz, normalize=normalize, name=name,
closed=closed, **kwargs)


def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
Expand Down Expand Up @@ -1661,9 +1673,10 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
holidays = kwargs.pop('holidays', [])
weekmask = kwargs.pop('weekmask', 'Mon Tue Wed Thu Fri')
freq = CDay(holidays=holidays, weekmask=weekmask)
return DatetimeIndex(start=start, end=end, periods=periods, freq=freq,
tz=tz, normalize=normalize, name=name,
closed=closed, **kwargs)

return date_range(start=start, end=end, periods=periods, freq=freq,
tz=tz, normalize=normalize, name=name,
closed=closed, **kwargs)


def _time_to_micros(time):
Expand Down
18 changes: 15 additions & 3 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" implement the TimedeltaIndex """
from datetime import datetime
import warnings

import numpy as np

Expand Down Expand Up @@ -131,10 +132,18 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
periods=None, closed=None, dtype=None, copy=False,
name=None, verify_integrity=True):

if verify_integrity is not True:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why isn't the default of verify_integrity None?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because by default we do verify the integrity of a passed frequency. Best guess as to initial motivation is that verify_integrity=False is kind of like fastpath=True and was never really intended to be user-facing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right but aren't you deprecating it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. In the future it will just be set to verify_integrity=True at the top of __new__. (and when the time comes, in the TDA/DTA constructors it will always be True.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still need the variable to exist because there are cases in which we can determine it is not necessary, in which case we can skip a potentially-expensive check.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't recall that (and dont see how it would work), but yah, not passing freq would make verify_integrity unnecessary.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still need the variable to exist because there are cases in which we can determine it is not necessary, in which case we can skip a potentially-expensive check.

@jbrockmendel Isn't the end goal to actually remove the verify_integrity keyword from the Index constructors? (not just keep it with a fixed True value). Because otherwise, why are you deprecating verify_integrity=False if we actually want to use it ourselves?
In cases that we determine the check is not necessary, we can use _simple_new ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually verify-integrity will not be in the signature. The first line of the method will just define it to be True.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, but then as @jreback said, we should put it at None so we can also deprecate the case for somebody setting it to True explicitly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

warnings.warn("The 'verify_integrity' argument is deprecated, "
"will be removed in a future version.",
FutureWarning, stacklevel=2)

freq, freq_infer = dtl.maybe_infer_freq(freq)

if data is None:
# TODO: Remove this block and associated kwargs; GH#20535
warnings.warn("Creating a TimedeltaIndex by passing range "
"endpoints is deprecated. Use "
"`pandas.timedelta_range` instead.",
FutureWarning, stacklevel=2)
result = cls._generate_range(start, end, periods, freq,
closed=closed)
result.name = name
Expand Down Expand Up @@ -727,5 +736,8 @@ def timedelta_range(start=None, end=None, periods=None, freq=None,
if freq is None and com._any_none(periods, start, end):
freq = 'D'

return TimedeltaIndex(start=start, end=end, periods=periods,
freq=freq, name=name, closed=closed)
freq, freq_infer = dtl.maybe_infer_freq(freq)
result = TimedeltaIndex._generate_range(start, end, periods, freq,
closed=closed)
result.name = name
return result
10 changes: 5 additions & 5 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1375,11 +1375,11 @@ def _get_time_bins(self, ax):
# because replace() will swallow the nanosecond part
# thus last bin maybe slightly before the end if the end contains
# nanosecond part and lead to `Values falls after last bin` error
binner = labels = DatetimeIndex(freq=self.freq,
start=first,
end=last,
tz=tz,
name=ax.name)
binner = labels = date_range(freq=self.freq,
start=first,
end=last,
tz=tz,
name=ax.name)

# GH 15549
# In edge case of tz-aware resapmling binner last index can be
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ def test_setitem_boolean_column(self):

def test_frame_setitem_timestamp(self):
# GH#2155
columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', freq=BDay())
columns = date_range(start='1/1/2012', end='2/1/2012', freq=BDay())
index = lrange(10)
data = DataFrame(columns=columns, index=index)
t = datetime(2012, 11, 1)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from pandas import (date_range, Timestamp,
Index, MultiIndex, DataFrame, Series,
Panel, DatetimeIndex, read_csv)
Panel, read_csv)
from pandas.errors import PerformanceWarning
from pandas.util.testing import (assert_frame_equal,
assert_series_equal, assert_almost_equal)
Expand Down Expand Up @@ -1438,7 +1438,7 @@ def test_groupby_sort_multiindex_series():
def test_groupby_reindex_inside_function():

periods = 1000
ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods)
ind = date_range(start='2012/1/1', freq='5min', periods=periods)
df = DataFrame({'high': np.arange(
periods), 'low': np.arange(periods)}, index=ind)

Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import pandas as pd
from pandas import (DataFrame, date_range, Index,
Series, MultiIndex, Timestamp, DatetimeIndex)
Series, MultiIndex, Timestamp)
from pandas.core.groupby.ops import BinGrouper
from pandas.compat import StringIO
from pandas.util import testing as tm
Expand Down Expand Up @@ -374,9 +374,9 @@ def sumfunc_value(x):
expected.reset_index(drop=True))

def test_groupby_groups_datetimeindex(self):
# #1430
# GH#1430
periods = 1000
ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods)
ind = pd.date_range(start='2012/1/1', freq='5min', periods=periods)
df = DataFrame({'high': np.arange(periods),
'low': np.arange(periods)}, index=ind)
grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))
Expand All @@ -385,7 +385,7 @@ def test_groupby_groups_datetimeindex(self):
groups = grouped.groups
assert isinstance(list(groups.keys())[0], datetime)

# GH 11442
# GH#11442
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not important, but was just wondering: why are you adding the '#' everywhere?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mostly for internal consistency (not a big deal, but for grepping purposes). A little bit because I was curious how long it would take before someone asked about it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have have the gh-xxxx style as well, slight prefernce for that

index = pd.date_range('2015/01/01', periods=5, name='date')
df = pd.DataFrame({'A': [5, 6, 7, 8, 9],
'B': [1, 2, 3, 4, 5]}, index=index)
Expand Down
49 changes: 25 additions & 24 deletions pandas/tests/indexes/datetimes/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,10 +314,11 @@ def test_constructor_coverage(self):

msg = 'periods must be a number, got foo'
with pytest.raises(TypeError, match=msg):
DatetimeIndex(start='1/1/2000', periods='foo', freq='D')
date_range(start='1/1/2000', periods='foo', freq='D')

pytest.raises(ValueError, DatetimeIndex, start='1/1/2000',
end='1/10/2000')
with pytest.raises(ValueError):
with tm.assert_produces_warning(FutureWarning):
DatetimeIndex(start='1/1/2000', end='1/10/2000')

with pytest.raises(TypeError):
DatetimeIndex('1/1/2000')
Expand Down Expand Up @@ -351,11 +352,11 @@ def test_constructor_coverage(self):
pytest.raises(ValueError, DatetimeIndex,
['2000-01-01', '2000-01-02', '2000-01-04'], freq='D')

pytest.raises(ValueError, DatetimeIndex, start='2011-01-01',
pytest.raises(ValueError, date_range, start='2011-01-01',
freq='b')
pytest.raises(ValueError, DatetimeIndex, end='2011-01-01',
pytest.raises(ValueError, date_range, end='2011-01-01',
freq='B')
pytest.raises(ValueError, DatetimeIndex, periods=10, freq='D')
pytest.raises(ValueError, date_range, periods=10, freq='D')

@pytest.mark.parametrize('freq', ['AS', 'W-SUN'])
def test_constructor_datetime64_tzformat(self, freq):
Expand Down Expand Up @@ -436,8 +437,8 @@ def test_constructor_dtype(self):
tm.assert_index_equal(idx, result)

def test_constructor_name(self):
idx = DatetimeIndex(start='2000-01-01', periods=1, freq='A',
name='TEST')
idx = date_range(start='2000-01-01', periods=1, freq='A',
name='TEST')
assert idx.name == 'TEST'

def test_000constructor_resolution(self):
Expand All @@ -460,7 +461,7 @@ def test_constructor_start_end_with_tz(self, tz):
# GH 18595
start = Timestamp('2013-01-01 06:00:00', tz='America/Los_Angeles')
end = Timestamp('2013-01-02 06:00:00', tz='America/Los_Angeles')
result = DatetimeIndex(freq='D', start=start, end=end, tz=tz)
result = date_range(freq='D', start=start, end=end, tz=tz)
expected = DatetimeIndex(['2013-01-01 06:00:00',
'2013-01-02 06:00:00'],
tz='America/Los_Angeles')
Expand Down Expand Up @@ -576,7 +577,7 @@ def test_ctor_str_intraday(self):
assert rng[0].second == 1

def test_is_(self):
dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M')
dti = date_range(start='1/1/2005', end='12/1/2005', freq='M')
assert dti.is_(dti)
assert dti.is_(dti.view())
assert not dti.is_(dti.copy())
Expand Down Expand Up @@ -604,12 +605,12 @@ def test_constructor_int64_nocopy(self):
@pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B', 'BH',
'T', 'S', 'L', 'U', 'H', 'N', 'C'])
def test_from_freq_recreate_from_data(self, freq):
org = DatetimeIndex(start='2001/02/01 09:00', freq=freq, periods=1)
org = date_range(start='2001/02/01 09:00', freq=freq, periods=1)
idx = DatetimeIndex(org, freq=freq)
tm.assert_index_equal(idx, org)

org = DatetimeIndex(start='2001/02/01 09:00', freq=freq,
tz='US/Pacific', periods=1)
org = date_range(start='2001/02/01 09:00', freq=freq,
tz='US/Pacific', periods=1)
idx = DatetimeIndex(org, freq=freq, tz='US/Pacific')
tm.assert_index_equal(idx, org)

Expand Down Expand Up @@ -648,30 +649,30 @@ def test_datetimeindex_constructor_misc(self):

sdate = datetime(1999, 12, 25)
edate = datetime(2000, 1, 1)
idx = DatetimeIndex(start=sdate, freq='1B', periods=20)
idx = date_range(start=sdate, freq='1B', periods=20)
assert len(idx) == 20
assert idx[0] == sdate + 0 * offsets.BDay()
assert idx.freq == 'B'

idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20)
idx = date_range(end=edate, freq=('D', 5), periods=20)
assert len(idx) == 20
assert idx[-1] == edate
assert idx.freq == '5D'

idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN')
idx2 = DatetimeIndex(start=sdate, end=edate,
freq=offsets.Week(weekday=6))
idx1 = date_range(start=sdate, end=edate, freq='W-SUN')
idx2 = date_range(start=sdate, end=edate,
freq=offsets.Week(weekday=6))
assert len(idx1) == len(idx2)
assert idx1.freq == idx2.freq

idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS')
idx2 = DatetimeIndex(start=sdate, end=edate,
freq=offsets.QuarterBegin(startingMonth=1))
idx1 = date_range(start=sdate, end=edate, freq='QS')
idx2 = date_range(start=sdate, end=edate,
freq=offsets.QuarterBegin(startingMonth=1))
assert len(idx1) == len(idx2)
assert idx1.freq == idx2.freq

idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ')
idx2 = DatetimeIndex(start=sdate, end=edate,
freq=offsets.BQuarterEnd(startingMonth=12))
idx1 = date_range(start=sdate, end=edate, freq='BQ')
idx2 = date_range(start=sdate, end=edate,
freq=offsets.BQuarterEnd(startingMonth=12))
assert len(idx1) == len(idx2)
assert idx1.freq == idx2.freq
8 changes: 4 additions & 4 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,10 +561,10 @@ def test_3(self):
assert rng == expected

def test_precision_finer_than_offset(self):
# GH 9907
result1 = DatetimeIndex(start='2015-04-15 00:00:03',
# GH#9907
result1 = pd.date_range(start='2015-04-15 00:00:03',
end='2016-04-22 00:00:00', freq='Q')
result2 = DatetimeIndex(start='2015-04-15 00:00:03',
result2 = pd.date_range(start='2015-04-15 00:00:03',
end='2015-06-22 00:00:04', freq='W')
expected1_list = ['2015-06-30 00:00:03', '2015-09-30 00:00:03',
'2015-12-31 00:00:03', '2016-03-31 00:00:03']
Expand Down Expand Up @@ -594,7 +594,7 @@ def test_mismatching_tz_raises_err(self, start, end):
with pytest.raises(TypeError):
pd.date_range(start, end)
with pytest.raises(TypeError):
pd.DatetimeIndex(start, end, freq=BDay())
pd.date_range(start, end, freq=BDay())

def test_CalendarDay_range_with_dst_crossing(self):
# GH 20596
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/indexes/datetimes/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ def test_hash_error(self):
hash(index)

def test_stringified_slice_with_tz(self):
# GH2658
# GH#2658
import datetime
start = datetime.datetime.now()
idx = DatetimeIndex(start=start, freq="1d", periods=10)
idx = date_range(start=start, freq="1d", periods=10)
df = DataFrame(lrange(10), index=idx)
df["2013-01-14 23:44:34.437768-05:00":] # no exception here

Expand Down Expand Up @@ -293,8 +293,8 @@ def test_ns_index(self):
index = pd.DatetimeIndex(dt, freq=freq, name='time')
self.assert_index_parameters(index)

new_index = pd.DatetimeIndex(start=index[0], end=index[-1],
freq=index.freq)
new_index = pd.date_range(start=index[0], end=index[-1],
freq=index.freq)
self.assert_index_parameters(new_index)

def test_join_with_period_index(self, join_type):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimes/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def test_to_native_types():
index = DatetimeIndex(freq='1D', periods=3, start='2017-01-01')
index = pd.date_range(freq='1D', periods=3, start='2017-01-01')

# First, with no arguments.
expected = np.array(['2017-01-01', '2017-01-02',
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimes/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def test_take2(self, tz):
dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15),
datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)]

idx = DatetimeIndex(start='2010-01-01 09:00',
idx = pd.date_range(start='2010-01-01 09:00',
end='2010-02-01 09:00', freq='H', tz=tz,
name='idx')
expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz)
Expand Down
Loading