Skip to content

Commit

Permalink
BUG: Fix IntervalIndex constructor inconsistencies (#18424)
Browse files Browse the repository at this point in the history
  • Loading branch information
jschendel authored and jreback committed Nov 24, 2017
1 parent 154c416 commit de4b384
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 57 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ Other API Changes
- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)


.. _whatsnew_0220.deprecations:
Expand Down Expand Up @@ -137,6 +138,7 @@ Indexing
- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
- Bug in :func:`MultiIndex.remove_unused_levels`` which would fill nan values (:issue:`18417`)
- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`)
-

I/O
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ cpdef intervals_to_interval_bounds(ndarray intervals):
int64_t n = len(intervals)
ndarray left, right

left = np.empty(n, dtype=object)
right = np.empty(n, dtype=object)
left = np.empty(n, dtype=intervals.dtype)
right = np.empty(n, dtype=intervals.dtype)

for i in range(len(intervals)):
interval = intervals[i]
Expand Down
57 changes: 39 additions & 18 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pandas.core.dtypes.missing import notna, isna
from pandas.core.dtypes.generic import ABCPeriodIndex
from pandas.core.dtypes.dtypes import IntervalDtype
from pandas.core.dtypes.cast import maybe_convert_platform
from pandas.core.dtypes.common import (
_ensure_platform_int,
is_list_like,
Expand All @@ -31,7 +32,9 @@
from pandas.core.indexes.timedeltas import timedelta_range
from pandas.core.indexes.multi import MultiIndex
from pandas.compat.numpy import function as nv
from pandas.core import common as com
from pandas.core.common import (
_all_not_none, _any_none, _asarray_tuplesafe, _count_not_none,
is_bool_indexer, _maybe_box_datetimelike, _not_none)
from pandas.util._decorators import cache_readonly, Appender
from pandas.core.config import get_option
from pandas.tseries.frequencies import to_offset
Expand Down Expand Up @@ -176,7 +179,7 @@ class IntervalIndex(IntervalMixin, Index):

_mask = None

def __new__(cls, data, closed='right',
def __new__(cls, data, closed=None,
name=None, copy=False, dtype=None,
fastpath=False, verify_integrity=True):

Expand All @@ -197,8 +200,17 @@ def __new__(cls, data, closed='right',
if is_scalar(data):
cls._scalar_data_error(data)

data = IntervalIndex.from_intervals(data, name=name)
left, right, closed = data.left, data.right, data.closed
data = maybe_convert_platform(data)
left, right, infer_closed = intervals_to_interval_bounds(data)

if _all_not_none(closed, infer_closed) and closed != infer_closed:
# GH 18421
msg = ("conflicting values for closed: constructor got "
"'{closed}', inferred from data '{infer_closed}'"
.format(closed=closed, infer_closed=infer_closed))
raise ValueError(msg)

closed = closed or infer_closed

return cls._simple_new(left, right, closed, name,
copy=copy, verify_integrity=verify_integrity)
Expand Down Expand Up @@ -376,7 +388,8 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False):
IntervalIndex.from_tuples : Construct an IntervalIndex from a
list/array of tuples
"""
breaks = np.asarray(breaks)
breaks = maybe_convert_platform(breaks)

return cls.from_arrays(breaks[:-1], breaks[1:], closed,
name=name, copy=copy)

Expand Down Expand Up @@ -416,8 +429,9 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False):
IntervalIndex.from_tuples : Construct an IntervalIndex from a
list/array of tuples
"""
left = np.asarray(left)
right = np.asarray(right)
left = maybe_convert_platform(left)
right = maybe_convert_platform(right)

return cls._simple_new(left, right, closed, name=name,
copy=copy, verify_integrity=True)

Expand Down Expand Up @@ -460,8 +474,12 @@ def from_intervals(cls, data, name=None, copy=False):
IntervalIndex.from_tuples : Construct an IntervalIndex from a
list/array of tuples
"""
data = np.asarray(data)
left, right, closed = intervals_to_interval_bounds(data)
if isinstance(data, IntervalIndex):
left, right, closed = data.left, data.right, data.closed
name = name or data.name
else:
data = maybe_convert_platform(data)
left, right, closed = intervals_to_interval_bounds(data)
return cls.from_arrays(left, right, closed, name=name, copy=False)

@classmethod
Expand Down Expand Up @@ -497,8 +515,11 @@ def from_tuples(cls, data, closed='right', name=None, copy=False):
IntervalIndex.from_intervals : Construct an IntervalIndex from an array
of Interval objects
"""
left = []
right = []
if len(data):
left, right = [], []
else:
left = right = data

for d in data:

if isna(d):
Expand All @@ -517,7 +538,7 @@ def from_tuples(cls, data, closed='right', name=None, copy=False):
return cls.from_arrays(left, right, closed, name=name, copy=False)

def to_tuples(self):
return Index(com._asarray_tuplesafe(zip(self.left, self.right)))
return Index(_asarray_tuplesafe(zip(self.left, self.right)))

@cache_readonly
def _multiindex(self):
Expand Down Expand Up @@ -838,7 +859,7 @@ def get_loc(self, key, method=None):
return self._engine.get_loc(key)

def get_value(self, series, key):
if com.is_bool_indexer(key):
if is_bool_indexer(key):
loc = key
elif is_list_like(key):
loc = self.get_indexer(key)
Expand Down Expand Up @@ -1166,7 +1187,7 @@ def _is_type_compatible(a, b):
return ((is_number(a) and is_number(b)) or
(is_ts_compat(a) and is_ts_compat(b)) or
(is_td_compat(a) and is_td_compat(b)) or
com._any_none(a, b))
_any_none(a, b))


def interval_range(start=None, end=None, periods=None, freq=None,
Expand Down Expand Up @@ -1244,13 +1265,13 @@ def interval_range(start=None, end=None, periods=None, freq=None,
--------
IntervalIndex : an Index of intervals that are all closed on the same side.
"""
if com._count_not_none(start, end, periods) != 2:
if _count_not_none(start, end, periods) != 2:
raise ValueError('Of the three parameters: start, end, and periods, '
'exactly two must be specified')

start = com._maybe_box_datetimelike(start)
end = com._maybe_box_datetimelike(end)
endpoint = next(com._not_none(start, end))
start = _maybe_box_datetimelike(start)
end = _maybe_box_datetimelike(end)
endpoint = next(_not_none(start, end))

if not _is_valid_endpoint(start):
msg = 'start must be numeric or datetime-like, got {start}'
Expand Down
126 changes: 90 additions & 36 deletions pandas/tests/indexes/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pandas import (Interval, IntervalIndex, Index, isna,
interval_range, Timestamp, Timedelta,
compat, date_range, timedelta_range, DateOffset)
from pandas.compat import zip
from pandas.compat import lzip
from pandas.tseries.offsets import Day
from pandas._libs.interval import IntervalTree
from pandas.tests.indexes.common import Base
Expand Down Expand Up @@ -38,7 +38,7 @@ def create_index_with_nan(self, closed='right'):
@pytest.mark.parametrize('name', [None, 'foo'])
def test_constructors(self, closed, name):
left, right = Index([0, 1, 2, 3]), Index([1, 2, 3, 4])
ivs = [Interval(l, r, closed=closed) for l, r in zip(left, right)]
ivs = [Interval(l, r, closed=closed) for l, r in lzip(left, right)]
expected = IntervalIndex._simple_new(
left=left, right=right, closed=closed, name=name)

Expand All @@ -57,7 +57,7 @@ def test_constructors(self, closed, name):
tm.assert_index_equal(result, expected)

result = IntervalIndex.from_tuples(
zip(left, right), closed=closed, name=name)
lzip(left, right), closed=closed, name=name)
tm.assert_index_equal(result, expected)

result = Index(ivs, name=name)
Expand All @@ -68,6 +68,9 @@ def test_constructors(self, closed, name):
tm.assert_index_equal(Index(expected), expected)
tm.assert_index_equal(IntervalIndex(expected), expected)

result = IntervalIndex.from_intervals(expected)
tm.assert_index_equal(result, expected)

result = IntervalIndex.from_intervals(
expected.values, name=expected.name)
tm.assert_index_equal(result, expected)
Expand All @@ -86,63 +89,118 @@ def test_constructors(self, closed, name):
breaks, closed=expected.closed, name=expected.name)
tm.assert_index_equal(result, expected)

def test_constructors_other(self):

# all-nan
result = IntervalIndex.from_intervals([np.nan])
expected = np.array([np.nan], dtype=object)
tm.assert_numpy_array_equal(result.values, expected)

# empty
result = IntervalIndex.from_intervals([])
expected = np.array([], dtype=object)
tm.assert_numpy_array_equal(result.values, expected)
@pytest.mark.parametrize('data', [[np.nan], [np.nan] * 2, [np.nan] * 50])
def test_constructors_nan(self, closed, data):
# GH 18421
expected_values = np.array(data, dtype=object)
expected_idx = IntervalIndex(data, closed=closed)

# validate the expected index
assert expected_idx.closed == closed
tm.assert_numpy_array_equal(expected_idx.values, expected_values)

result = IntervalIndex.from_tuples(data, closed=closed)
tm.assert_index_equal(result, expected_idx)
tm.assert_numpy_array_equal(result.values, expected_values)

result = IntervalIndex.from_breaks([np.nan] + data, closed=closed)
tm.assert_index_equal(result, expected_idx)
tm.assert_numpy_array_equal(result.values, expected_values)

result = IntervalIndex.from_arrays(data, data, closed=closed)
tm.assert_index_equal(result, expected_idx)
tm.assert_numpy_array_equal(result.values, expected_values)

if closed == 'right':
# Can't specify closed for IntervalIndex.from_intervals
result = IntervalIndex.from_intervals(data)
tm.assert_index_equal(result, expected_idx)
tm.assert_numpy_array_equal(result.values, expected_values)

@pytest.mark.parametrize('data', [
[],
np.array([], dtype='int64'),
np.array([], dtype='float64'),
np.array([], dtype=object)])
def test_constructors_empty(self, data, closed):
# GH 18421
expected_dtype = data.dtype if isinstance(data, np.ndarray) else object
expected_values = np.array([], dtype=object)
expected_index = IntervalIndex(data, closed=closed)

# validate the expected index
assert expected_index.empty
assert expected_index.closed == closed
assert expected_index.dtype.subtype == expected_dtype
tm.assert_numpy_array_equal(expected_index.values, expected_values)

result = IntervalIndex.from_tuples(data, closed=closed)
tm.assert_index_equal(result, expected_index)
tm.assert_numpy_array_equal(result.values, expected_values)

result = IntervalIndex.from_breaks(data, closed=closed)
tm.assert_index_equal(result, expected_index)
tm.assert_numpy_array_equal(result.values, expected_values)

result = IntervalIndex.from_arrays(data, data, closed=closed)
tm.assert_index_equal(result, expected_index)
tm.assert_numpy_array_equal(result.values, expected_values)

if closed == 'right':
# Can't specify closed for IntervalIndex.from_intervals
result = IntervalIndex.from_intervals(data)
tm.assert_index_equal(result, expected_index)
tm.assert_numpy_array_equal(result.values, expected_values)

def test_constructors_errors(self):

# scalar
msg = ('IntervalIndex(...) must be called with a collection of '
msg = ('IntervalIndex\(...\) must be called with a collection of '
'some kind, 5 was passed')
with pytest.raises(TypeError, message=msg):
with tm.assert_raises_regex(TypeError, msg):
IntervalIndex(5)

# not an interval
msg = "type <class 'numpy.int32'> with value 0 is not an interval"
with pytest.raises(TypeError, message=msg):
msg = ("type <(class|type) 'numpy.int64'> with value 0 "
"is not an interval")
with tm.assert_raises_regex(TypeError, msg):
IntervalIndex([0, 1])

with pytest.raises(TypeError, message=msg):
with tm.assert_raises_regex(TypeError, msg):
IntervalIndex.from_intervals([0, 1])

# invalid closed
msg = "invalid options for 'closed': invalid"
with pytest.raises(ValueError, message=msg):
with tm.assert_raises_regex(ValueError, msg):
IntervalIndex.from_arrays([0, 1], [1, 2], closed='invalid')

# mismatched closed
# mismatched closed within intervals
msg = 'intervals must all be closed on the same side'
with pytest.raises(ValueError, message=msg):
with tm.assert_raises_regex(ValueError, msg):
IntervalIndex.from_intervals([Interval(0, 1),
Interval(1, 2, closed='left')])

with pytest.raises(ValueError, message=msg):
IntervalIndex.from_arrays([0, 10], [3, 5])

with pytest.raises(ValueError, message=msg):
with tm.assert_raises_regex(ValueError, msg):
Index([Interval(0, 1), Interval(2, 3, closed='left')])

# mismatched closed inferred from intervals vs constructor.
msg = 'conflicting values for closed'
with tm.assert_raises_regex(ValueError, msg):
iv = [Interval(0, 1, closed='both'), Interval(1, 2, closed='both')]
IntervalIndex(iv, closed='neither')

# no point in nesting periods in an IntervalIndex
msg = 'Period dtypes are not supported, use a PeriodIndex instead'
with pytest.raises(ValueError, message=msg):
with tm.assert_raises_regex(ValueError, msg):
IntervalIndex.from_breaks(
pd.period_range('2000-01-01', periods=3))

# decreasing breaks/arrays
msg = 'left side of interval must be <= right side'
with pytest.raises(ValueError, message=msg):
with tm.assert_raises_regex(ValueError, msg):
IntervalIndex.from_breaks(range(10, -1, -1))

with pytest.raises(ValueError, message=msg):
with tm.assert_raises_regex(ValueError, msg):
IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1))

def test_constructors_datetimelike(self, closed):
Expand Down Expand Up @@ -865,23 +923,23 @@ def test_is_non_overlapping_monotonic(self, closed):
idx = IntervalIndex.from_tuples(tpls, closed=closed)
assert idx.is_non_overlapping_monotonic is True

idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed)
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
assert idx.is_non_overlapping_monotonic is True

# Should be False in all cases (overlapping)
tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
idx = IntervalIndex.from_tuples(tpls, closed=closed)
assert idx.is_non_overlapping_monotonic is False

idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed)
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
assert idx.is_non_overlapping_monotonic is False

# Should be False in all cases (non-monotonic)
tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
idx = IntervalIndex.from_tuples(tpls, closed=closed)
assert idx.is_non_overlapping_monotonic is False

idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed)
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
assert idx.is_non_overlapping_monotonic is False

# Should be False for closed='both', overwise True (GH16560)
Expand Down Expand Up @@ -1054,10 +1112,6 @@ def test_constructor_coverage(self):
end=end.to_pydatetime())
tm.assert_index_equal(result, expected)

result = pd.interval_range(start=start.tz_localize('UTC'),
end=end.tz_localize('UTC'))
tm.assert_index_equal(result, expected)

result = pd.interval_range(start=start.asm8, end=end.asm8)
tm.assert_index_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_getitem_with_scalar(self):
def test_nonoverlapping_monotonic(self, direction, closed):
tpls = [(0, 1), (2, 3), (4, 5)]
if direction == 'decreasing':
tpls = reversed(tpls)
tpls = tpls[::-1]

idx = IntervalIndex.from_tuples(tpls, closed=closed)
s = Series(list('abc'), idx)
Expand Down

0 comments on commit de4b384

Please sign in to comment.