Skip to content

Commit

Permalink
Generalize NA Compat
Browse files Browse the repository at this point in the history
  • Loading branch information
jschendel committed Nov 17, 2017
1 parent 4644c53 commit 8986439
Show file tree
Hide file tree
Showing 15 changed files with 109 additions and 40 deletions.
1 change: 0 additions & 1 deletion doc/source/whatsnew/v0.21.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ Bug Fixes
- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`)
- Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`)
- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`)
- Bug in ``IntervalIndex.insert`` when attempting to insert ``NaN`` (:issue:`18295`)

Conversion
^^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Other API Changes
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)

- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)

.. _whatsnew_0220.deprecations:

Expand Down
4 changes: 4 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3728,6 +3728,10 @@ def insert(self, loc, item):
-------
new_index : Index
"""
if lib.checknull(item):
# GH 18295
item = self._na_value

_self = np.asarray(self)
item = self._coerce_scalar_to_index(item)._values
idx = np.concatenate((_self[:loc], item, _self[loc:]))
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numpy as np
from pandas._libs import index as libindex
from pandas._libs import index as libindex, lib

from pandas import compat
from pandas.compat.numpy import function as nv
Expand Down Expand Up @@ -688,7 +688,7 @@ def insert(self, loc, item):
"""
code = self.categories.get_indexer([item])
if (code == -1):
if (code == -1) and not lib.checknull(item):
raise TypeError("cannot insert an item into a CategoricalIndex "
"that is not already an existing category")

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1751,6 +1751,9 @@ def insert(self, loc, item):
-------
new_index : Index
"""
if lib.checknull(item):
# GH 18295
item = self._na_value

freq = None

Expand All @@ -1767,14 +1770,14 @@ def insert(self, loc, item):
elif (loc == len(self)) and item - self.freq == self[-1]:
freq = self.freq
item = _to_m8(item, tz=self.tz)

try:
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
self[loc:].asi8))
if self.tz is not None:
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
return DatetimeIndex(new_dates, name=self.name, freq=freq,
tz=self.tz)

except (AttributeError, TypeError):

# fall back to object index
Expand Down
8 changes: 2 additions & 6 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
Index, _ensure_index,
default_pprint, _index_shared_docs)

from pandas._libs import Timestamp, Timedelta
from pandas._libs import lib, Timestamp, Timedelta
from pandas._libs.interval import (
Interval, IntervalMixin, IntervalTree,
intervals_to_interval_bounds)
Expand Down Expand Up @@ -985,12 +985,8 @@ def insert(self, loc, item):
'side as the index')
left_insert = item.left
right_insert = item.right
elif is_scalar(item) and isna(item):
elif lib.checknull(item):
# GH 18295
if item is not self.left._na_value:
raise TypeError('cannot insert with incompatible NA value: '
'got {item}, expected {na}'
.format(item=item, na=self.left._na_value))
left_insert = right_insert = item
else:
raise ValueError('can only insert Interval objects and NA into '
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,16 +849,18 @@ def insert(self, loc, item):
-------
new_index : Index
"""

# try to convert if possible
if _is_convertible_to_td(item):
try:
item = Timedelta(item)
except Exception:
pass
elif lib.checknull(item):
# GH 18295
item = self._na_value

freq = None
if isinstance(item, Timedelta) or item is NaT:
if isinstance(item, Timedelta) or (item is self._na_value):

# check freq can be preserved on edge cases
if self.freq is not None:
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexes/datetimes/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,13 @@ def test_insert(self):
assert result.tz == expected.tz
assert result.freq is None

# GH 18295 (test missing)
expected = DatetimeIndex(
['20170101', pd.NaT, '20170102', '20170103', '20170104'])
for na in (np.nan, pd.NaT, None):
result = date_range('20170101', periods=4).insert(1, na)
tm.assert_index_equal(result, expected)

def test_delete(self):
idx = date_range(start='2000-01-01', periods=5, freq='M', name='idx')

Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/indexes/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,3 +706,11 @@ def test_join_self(self, how):
index = period_range('1/1/2000', periods=10)
joined = index.join(index, how=how)
assert index is joined

def test_insert(self):
# GH 18295 (test missing)
expected = PeriodIndex(
['2017Q1', pd.NaT, '2017Q2', '2017Q3', '2017Q4'], freq='Q')
for na in (np.nan, pd.NaT, None):
result = period_range('2017Q1', periods=4, freq='Q').insert(1, na)
tm.assert_index_equal(result, expected)
6 changes: 6 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,12 @@ def test_insert(self):
null_index = Index([])
tm.assert_index_equal(Index(['a']), null_index.insert(0, 'a'))

# GH 18295 (test missing)
expected = Index(['a', np.nan, 'b', 'c'])
for na in (np.nan, pd.NaT, None):
result = Index(list('abc')).insert(1, na)
tm.assert_index_equal(result, expected)

def test_delete(self):
idx = Index(['a', 'b', 'c', 'd'], name='idx')

Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,12 @@ def test_insert(self):
# invalid
pytest.raises(TypeError, lambda: ci.insert(0, 'd'))

# GH 18295 (test missing)
expected = CategoricalIndex(['a', np.nan, 'a', 'b', 'c', 'b'])
for na in (np.nan, pd.NaT, None):
result = CategoricalIndex(list('aabcb')).insert(1, na)
tm.assert_index_equal(result, expected)

def test_delete(self):

ci = self.create_index()
Expand Down
66 changes: 39 additions & 27 deletions pandas/tests/indexes/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,38 +246,50 @@ def test_delete(self):
actual = self.index.delete(0)
assert expected.equals(actual)

def test_insert(self):
expected = IntervalIndex.from_breaks(range(4))
actual = self.index.insert(2, Interval(2, 3))
assert expected.equals(actual)

pytest.raises(ValueError, self.index.insert, 0, 1)
pytest.raises(ValueError, self.index.insert, 0,
Interval(2, 3, closed='left'))

@pytest.mark.parametrize('data', [
interval_range(0, periods=10),
interval_range(1.7, periods=8, freq=2.5),
interval_range(Timestamp('20170101'), periods=12),
interval_range(Timedelta('1 day'), periods=6),
interval_range(0, periods=10, closed='neither'),
interval_range(1.7, periods=8, freq=2.5, closed='both'),
interval_range(Timestamp('20170101'), periods=12, closed='left'),
interval_range(Timedelta('1 day'), periods=6, closed='right'),
IntervalIndex.from_tuples([('a', 'd'), ('e', 'j'), ('w', 'z')]),
IntervalIndex.from_tuples([(1, 2), ('a', 'z'), (3.14, 6.28)])])
def test_insert_na(self, data):
# GH 18295
valid_na, invalid_na = np.nan, pd.NaT
if data.left._na_value is pd.NaT:
valid_na, invalid_na = invalid_na, valid_na

# valid insertion
expected = IntervalIndex([data[0], np.nan]).append(data[1:])
result = data.insert(1, valid_na)
def test_insert(self, data):
item = data[0]
idx_item = IntervalIndex([item], closed=data.closed)

# start
expected = idx_item.append(data)
result = data.insert(0, item)
tm.assert_index_equal(result, expected)

# invalid insertion
msg = ('cannot insert with incompatible NA value: got {invalid}, '
'expected {valid}').format(invalid=invalid_na, valid=valid_na)
with tm.assert_raises_regex(TypeError, msg):
data.insert(1, invalid_na)
# end
expected = data.append(idx_item)
result = data.insert(len(data), item)
tm.assert_index_equal(result, expected)

# mid
expected = data[:3].append(idx_item).append(data[3:])
result = data.insert(3, item)
tm.assert_index_equal(result, expected)

# invalid type
msg = 'can only insert Interval objects and NA into an IntervalIndex'
with tm.assert_raises_regex(ValueError, msg):
data.insert(1, 'foo')

# invalid closed
msg = 'inserted item must be closed on the same side as the index'
for closed in {'left', 'right', 'both', 'neither'} - {item.closed}:
with tm.assert_raises_regex(ValueError, msg):
bad_item = Interval(item.left, item.right, closed=closed)
data.insert(1, bad_item)

# GH 18295 (test missing)
na_idx = IntervalIndex([np.nan], closed=data.closed)
for na in (np.nan, pd.NaT, None):
expected = data[:1].append(na_idx).append(data[1:])
result = data.insert(1, na)
tm.assert_index_equal(result, expected)

def test_take(self):
actual = self.index.take([0, 1])
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,13 @@ def test_modulo(self):
expected = Index(index.values % 2)
tm.assert_index_equal(index % 2, expected)

def test_insert(self):
# GH 18295 (test missing)
expected = Float64Index([0, np.nan, 1, 2, 3, 4])
for na in (np.nan, pd.NaT, None):
result = self.create_index().insert(1, na)
tm.assert_index_equal(result, expected)


class TestFloat64Index(Numeric):
_holder = Float64Index
Expand Down Expand Up @@ -1206,3 +1213,10 @@ def test_join_outer(self):
tm.assert_index_equal(res, eres)
tm.assert_numpy_array_equal(lidx, elidx)
tm.assert_numpy_array_equal(ridx, eridx)

def test_insert(self):
# GH 18295 (test missing)
expected = UInt64Index([0, 0, 1, 2, 3, 4])
for na in (np.nan, pd.NaT, None):
result = self.create_index().insert(1, na)
tm.assert_index_equal(result, expected)
6 changes: 6 additions & 0 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,12 @@ def test_insert(self):
# test 0th element
tm.assert_index_equal(idx[0:4], result.insert(0, idx[0]))

# GH 18295 (test missing)
expected = Float64Index([0, np.nan, 1, 2, 3, 4])
for na in (np.nan, pd.NaT, None):
result = RangeIndex(5).insert(1, na)
tm.assert_index_equal(result, expected)

def test_delete(self):

idx = RangeIndex(5, name='Foo')
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/indexes/timedeltas/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ def test_insert(self):
assert result.name == expected.name
assert result.freq == expected.freq

# GH 18295 (test missing)
expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day'])
for na in (np.nan, pd.NaT, None):
result = timedelta_range('1day', '3day').insert(1, na)
tm.assert_index_equal(result, expected)

def test_delete(self):
idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx')

Expand Down

0 comments on commit 8986439

Please sign in to comment.