Skip to content

Commit

Permalink
API: Index doesn't results in PeriodIndex if Period contains NaT
Browse files Browse the repository at this point in the history
Author: sinhrks <sinhrks@gmail.com>

Closes #13664 from sinhrks/period_infer2 and squashes the following commits:

b208a9e [sinhrks] API: Index doesn't results in PeriodIndex if Period contains NaT
  • Loading branch information
sinhrks authored and jreback committed Jul 19, 2016
1 parent fafef5d commit 506520b
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 50 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ API changes
- ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`)
- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`)
- ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior. (:issue:`13299`)
- Passing ``Period`` with multiple frequencies to normal ``Index`` now returns ``Index`` with ``object`` dtype (:issue:`13664`)
- ``PeriodIndex.fillna`` with ``Period`` has different freq now coerces to ``object`` dtype (:issue:`13664`)


.. _whatsnew_0190.api.tolist:
Expand Down Expand Up @@ -645,7 +647,6 @@ Bug Fixes
- Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`)
- Clean some compile time warnings in datetime parsing (:issue:`13607`)


- Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`)
- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
- Bug in ``groupby(..).nth()`` where the group key is included inconsistently if called after ``.head()/.tail()`` (:issue:`12839`)
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
is_list_like,
_ensure_object)
from pandas.types.cast import _maybe_upcast_putmask
from pandas.types.generic import ABCSeries, ABCIndex
from pandas.types.generic import ABCSeries, ABCIndex, ABCPeriodIndex

# -----------------------------------------------------------------------------
# Functions that add arithmetic methods to objects, given arithmetic factory
Expand Down Expand Up @@ -773,6 +773,15 @@ def wrapper(self, other, axis=None):
if (not lib.isscalar(lib.item_from_zerodim(other)) and
len(self) != len(other)):
raise ValueError('Lengths must match to compare')

if isinstance(other, ABCPeriodIndex):
# temp workaround until fixing GH 13637
# tested in test_nat_comparisons
# (pandas.tests.series.test_operators.TestSeriesOperators)
return self._constructor(na_op(self.values,
other.asobject.values),
index=self.index)

return self._constructor(na_op(self.values, np.asarray(other)),
index=self.index).__finalize__(self)
elif isinstance(other, pd.Categorical):
Expand Down
31 changes: 17 additions & 14 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
pass

# maybe coerce to a sub-class
from pandas.tseries.period import PeriodIndex
from pandas.tseries.period import (PeriodIndex,
IncompatibleFrequency)
if isinstance(data, PeriodIndex):
return PeriodIndex(data, copy=copy, name=name, **kwargs)
if issubclass(data.dtype.type, np.integer):
Expand Down Expand Up @@ -265,13 +266,15 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
return DatetimeIndex(subarr, copy=copy, name=name,
**kwargs)

elif (inferred.startswith('timedelta') or
lib.is_timedelta_array(subarr)):
elif inferred.startswith('timedelta'):
from pandas.tseries.tdi import TimedeltaIndex
return TimedeltaIndex(subarr, copy=copy, name=name,
**kwargs)
elif inferred == 'period':
return PeriodIndex(subarr, name=name, **kwargs)
try:
return PeriodIndex(subarr, name=name, **kwargs)
except IncompatibleFrequency:
pass
return cls._simple_new(subarr, name)

elif hasattr(data, '__array__'):
Expand Down Expand Up @@ -866,6 +869,16 @@ def _convert_can_do_setop(self, other):
result_name = self.name if self.name == other.name else None
return other, result_name

def _convert_for_op(self, value):
""" Convert value to be insertable to ndarray """
return value

def _assert_can_do_op(self, value):
""" Check value is valid for scalar op """
if not lib.isscalar(value):
msg = "'value' must be a scalar, passed: {0}"
raise TypeError(msg.format(type(value).__name__))

@property
def nlevels(self):
return 1
Expand Down Expand Up @@ -1508,16 +1521,6 @@ def hasnans(self):
else:
return False

def _convert_for_op(self, value):
""" Convert value to be insertable to ndarray """
return value

def _assert_can_do_op(self, value):
""" Check value is valid for scalar op """
if not is_scalar(value):
msg = "'value' must be a scalar, passed: {0}"
raise TypeError(msg.format(type(value).__name__))

def putmask(self, mask, value):
"""
return a new Index of the values set with the mask
Expand Down
34 changes: 27 additions & 7 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ cdef inline bint is_null_datetimelike(v):


cdef inline bint is_null_datetime64(v):
# determine if we have a null for a datetime (or integer versions)x,
# determine if we have a null for a datetime (or integer versions),
# excluding np.timedelta64('nat')
if util._checknull(v):
return True
Expand All @@ -282,7 +282,7 @@ cdef inline bint is_null_datetime64(v):


cdef inline bint is_null_timedelta64(v):
# determine if we have a null for a timedelta (or integer versions)x,
# determine if we have a null for a timedelta (or integer versions),
# excluding np.datetime64('nat')
if util._checknull(v):
return True
Expand All @@ -293,6 +293,16 @@ cdef inline bint is_null_timedelta64(v):
return False


cdef inline bint is_null_period(v):
# determine if we have a null for a Period (or integer versions),
# excluding np.datetime64('nat') and np.timedelta64('nat')
if util._checknull(v):
return True
elif v is NaT:
return True
return False


cdef inline bint is_datetime(object o):
return PyDateTime_Check(o)

Expand Down Expand Up @@ -531,6 +541,7 @@ def is_timedelta_array(ndarray values):
return False
return null_count != n


def is_timedelta64_array(ndarray values):
cdef Py_ssize_t i, null_count = 0, n = len(values)
cdef object v
Expand All @@ -546,6 +557,7 @@ def is_timedelta64_array(ndarray values):
return False
return null_count != n


def is_timedelta_or_timedelta64_array(ndarray values):
""" infer with timedeltas and/or nat/none """
cdef Py_ssize_t i, null_count = 0, n = len(values)
Expand All @@ -562,6 +574,7 @@ def is_timedelta_or_timedelta64_array(ndarray values):
return False
return null_count != n


def is_date_array(ndarray[object] values):
cdef Py_ssize_t i, n = len(values)
if n == 0:
Expand All @@ -571,6 +584,7 @@ def is_date_array(ndarray[object] values):
return False
return True


def is_time_array(ndarray[object] values):
cdef Py_ssize_t i, n = len(values)
if n == 0:
Expand All @@ -582,15 +596,21 @@ def is_time_array(ndarray[object] values):


def is_period_array(ndarray[object] values):
cdef Py_ssize_t i, n = len(values)
from pandas.tseries.period import Period

cdef Py_ssize_t i, null_count = 0, n = len(values)
cdef object v
if n == 0:
return False

# return False for all nulls
for i in range(n):
if not isinstance(values[i], Period):
v = values[i]
if is_null_period(v):
# we are a regular null
if util._checknull(v):
null_count += 1
elif not is_period(v):
return False
return True
return null_count != n


cdef extern from "parse_helper.h":
Expand Down
84 changes: 62 additions & 22 deletions pandas/tests/indexes/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,10 @@ def test_pickle_compat_construction(self):
def test_construction_index_with_mixed_timezones(self):
# GH 11488
# no tz results in DatetimeIndex
result = Index(
[Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex(
[Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
result = Index([Timestamp('2011-01-01'),
Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01'),
Timestamp('2011-01-02')], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNone(result.tz)
Expand Down Expand Up @@ -295,9 +295,9 @@ def test_construction_dti_with_mixed_timezones(self):
Timestamp('2011-01-02 10:00',
tz='Asia/Tokyo')],
name='idx')
exp = DatetimeIndex(
[Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')
], tz='Asia/Tokyo', name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00')],
tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

Expand Down Expand Up @@ -338,6 +338,17 @@ def test_construction_dti_with_mixed_timezones(self):
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
tz='US/Eastern', name='idx')

def test_construction_base_constructor(self):
arr = [pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')]
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.DatetimeIndex(np.array(arr)))

arr = [np.nan, pd.NaT, pd.Timestamp('2011-01-03')]
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.DatetimeIndex(np.array(arr)))

def test_astype(self):
# GH 13149, GH 13209
idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
Expand Down Expand Up @@ -699,12 +710,11 @@ def test_fillna_datetime64(self):
pd.Timestamp('2011-01-01 11:00')], dtype=object)
self.assert_index_equal(idx.fillna('x'), exp)

idx = pd.DatetimeIndex(
['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], tz=tz)
idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT,
'2011-01-01 11:00'], tz=tz)

exp = pd.DatetimeIndex(
['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'
], tz=tz)
exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'], tz=tz)
self.assert_index_equal(
idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp)

Expand Down Expand Up @@ -734,6 +744,26 @@ def setUp(self):
def create_index(self):
return period_range('20130101', periods=5, freq='D')

def test_construction_base_constructor(self):
# GH 13664
arr = [pd.Period('2011-01', freq='M'), pd.NaT,
pd.Period('2011-03', freq='M')]
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.PeriodIndex(np.array(arr)))

arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')]
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.PeriodIndex(np.array(arr)))

arr = [pd.Period('2011-01', freq='M'), pd.NaT,
pd.Period('2011-03', freq='D')]
tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object))

tm.assert_index_equal(pd.Index(np.array(arr)),
pd.Index(np.array(arr), dtype=object))

def test_astype(self):
# GH 13149, GH 13209
idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D')
Expand Down Expand Up @@ -874,7 +904,6 @@ def test_repeat(self):
self.assertEqual(res.freqstr, 'D')

def test_period_index_indexer(self):

# GH4125
idx = pd.period_range('2002-01', '2003-12', freq='M')
df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx)
Expand All @@ -886,23 +915,23 @@ def test_period_index_indexer(self):

def test_fillna_period(self):
# GH 11343
idx = pd.PeriodIndex(
['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], freq='H')
idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT,
'2011-01-01 11:00'], freq='H')

exp = pd.PeriodIndex(
['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'
], freq='H')
exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'], freq='H')
self.assert_index_equal(
idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp)

exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x',
pd.Period('2011-01-01 11:00', freq='H')], dtype=object)
self.assert_index_equal(idx.fillna('x'), exp)

with tm.assertRaisesRegexp(
ValueError,
'Input has different freq=D from PeriodIndex\\(freq=H\\)'):
idx.fillna(pd.Period('2011-01-01', freq='D'))
exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'),
pd.Period('2011-01-01', freq='D'),
pd.Period('2011-01-01 11:00', freq='H')], dtype=object)
self.assert_index_equal(idx.fillna(pd.Period('2011-01-01', freq='D')),
exp)

def test_no_millisecond_field(self):
with self.assertRaises(AttributeError):
Expand All @@ -923,6 +952,17 @@ def setUp(self):
def create_index(self):
return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1)

def test_construction_base_constructor(self):
arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')]
tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.TimedeltaIndex(np.array(arr)))

arr = [np.nan, pd.NaT, pd.Timedelta('1 days')]
tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.TimedeltaIndex(np.array(arr)))

def test_shift(self):
# test shift for TimedeltaIndex
# err8083
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/types/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,33 @@ def test_infer_dtype_timedelta(self):
dtype=object)
self.assertEqual(lib.infer_dtype(arr), 'mixed')

def test_infer_dtype_period(self):
# GH 13664
arr = np.array([pd.Period('2011-01', freq='D'),
pd.Period('2011-02', freq='D')])
self.assertEqual(pd.lib.infer_dtype(arr), 'period')

arr = np.array([pd.Period('2011-01', freq='D'),
pd.Period('2011-02', freq='M')])
self.assertEqual(pd.lib.infer_dtype(arr), 'period')

# starts with nan
for n in [pd.NaT, np.nan]:
arr = np.array([n, pd.Period('2011-01', freq='D')])
self.assertEqual(pd.lib.infer_dtype(arr), 'period')

arr = np.array([n, pd.Period('2011-01', freq='D'), n])
self.assertEqual(pd.lib.infer_dtype(arr), 'period')

# different type of nat
arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')],
dtype=object)
self.assertEqual(pd.lib.infer_dtype(arr), 'mixed')

arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')],
dtype=object)
self.assertEqual(pd.lib.infer_dtype(arr), 'mixed')

def test_infer_dtype_all_nan_nat_like(self):
arr = np.array([np.nan, np.nan])
self.assertEqual(lib.infer_dtype(arr), 'floating')
Expand Down
7 changes: 5 additions & 2 deletions pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,12 +800,15 @@ def _ensure_datetimelike_to_i8(other):
if lib.isscalar(other) and isnull(other):
other = tslib.iNaT
elif isinstance(other, ABCIndexClass):

# convert tz if needed
if getattr(other, 'tz', None) is not None:
other = other.tz_localize(None).asi8
else:
other = other.asi8
else:
other = np.array(other, copy=False).view('i8')
try:
other = np.array(other, copy=False).view('i8')
except TypeError:
# period array cannot be coerces to int
other = Index(other).asi8
return other
Loading

0 comments on commit 506520b

Please sign in to comment.