Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: Index doesn't results in PeriodIndex if Period contains NaT #13664

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ API changes
- ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`)
- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`)
- ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior. (:issue:`13299`)
- Passing ``Period`` with multiple frequencies to normal ``Index`` now returns ``Index`` with ``object`` dtype (:issue:`13664`)
- ``PeriodIndex.fillna`` with ``Period`` has different freq now coerces to ``object`` dtype (:issue:`13664`)


.. _whatsnew_0190.api.tolist:
Expand Down Expand Up @@ -601,7 +603,6 @@ Bug Fixes
- Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`)
- Clean some compile time warnings in datetime parsing (:issue:`13607`)


- Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`)
- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
- Bug in ``groupby(..).nth()`` where the group key is included inconsistently if called after ``.head()/.tail()`` (:issue:`12839`)
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
is_list_like,
_ensure_object)
from pandas.types.cast import _maybe_upcast_putmask
from pandas.types.generic import ABCSeries, ABCIndex
from pandas.types.generic import ABCSeries, ABCIndex, ABCPeriodIndex

# -----------------------------------------------------------------------------
# Functions that add arithmetic methods to objects, given arithmetic factory
Expand Down Expand Up @@ -773,6 +773,15 @@ def wrapper(self, other, axis=None):
if (not lib.isscalar(lib.item_from_zerodim(other)) and
len(self) != len(other)):
raise ValueError('Lengths must match to compare')

if isinstance(other, ABCPeriodIndex):
# temp workaround until fixing GH 13637
# tested in test_nat_comparisons
# (pandas.tests.series.test_operators.TestSeriesOperators)
return self._constructor(na_op(self.values,
other.asobject.values),
index=self.index)

return self._constructor(na_op(self.values, np.asarray(other)),
index=self.index).__finalize__(self)
elif isinstance(other, pd.Categorical):
Expand Down
31 changes: 17 additions & 14 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
pass

# maybe coerce to a sub-class
from pandas.tseries.period import PeriodIndex
from pandas.tseries.period import (PeriodIndex,
IncompatibleFrequency)
if isinstance(data, PeriodIndex):
return PeriodIndex(data, copy=copy, name=name, **kwargs)
if issubclass(data.dtype.type, np.integer):
Expand Down Expand Up @@ -265,13 +266,15 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
return DatetimeIndex(subarr, copy=copy, name=name,
**kwargs)

elif (inferred.startswith('timedelta') or
lib.is_timedelta_array(subarr)):
elif inferred.startswith('timedelta'):
from pandas.tseries.tdi import TimedeltaIndex
return TimedeltaIndex(subarr, copy=copy, name=name,
**kwargs)
elif inferred == 'period':
return PeriodIndex(subarr, name=name, **kwargs)
try:
return PeriodIndex(subarr, name=name, **kwargs)
except IncompatibleFrequency:
pass
return cls._simple_new(subarr, name)

elif hasattr(data, '__array__'):
Expand Down Expand Up @@ -866,6 +869,16 @@ def _convert_can_do_setop(self, other):
result_name = self.name if self.name == other.name else None
return other, result_name

def _convert_for_op(self, value):
""" Convert value to be insertable to ndarray """
return value

def _assert_can_do_op(self, value):
""" Check value is valid for scalar op """
if not lib.isscalar(value):
msg = "'value' must be a scalar, passed: {0}"
raise TypeError(msg.format(type(value).__name__))

@property
def nlevels(self):
return 1
Expand Down Expand Up @@ -1508,16 +1521,6 @@ def hasnans(self):
else:
return False

def _convert_for_op(self, value):
""" Convert value to be insertable to ndarray """
return value

def _assert_can_do_op(self, value):
""" Check value is valid for scalar op """
if not is_scalar(value):
msg = "'value' must be a scalar, passed: {0}"
raise TypeError(msg.format(type(value).__name__))

def putmask(self, mask, value):
"""
return a new Index of the values set with the mask
Expand Down
34 changes: 27 additions & 7 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ cdef inline bint is_null_datetimelike(v):


cdef inline bint is_null_datetime64(v):
# determine if we have a null for a datetime (or integer versions)x,
# determine if we have a null for a datetime (or integer versions),
# excluding np.timedelta64('nat')
if util._checknull(v):
return True
Expand All @@ -282,7 +282,7 @@ cdef inline bint is_null_datetime64(v):


cdef inline bint is_null_timedelta64(v):
# determine if we have a null for a timedelta (or integer versions)x,
# determine if we have a null for a timedelta (or integer versions),
# excluding np.datetime64('nat')
if util._checknull(v):
return True
Expand All @@ -293,6 +293,16 @@ cdef inline bint is_null_timedelta64(v):
return False


cdef inline bint is_null_period(v):
# determine if we have a null for a Period (or integer versions),
# excluding np.datetime64('nat') and np.timedelta64('nat')
if util._checknull(v):
return True
elif v is NaT:
return True
return False


cdef inline bint is_datetime(object o):
return PyDateTime_Check(o)

Expand Down Expand Up @@ -531,6 +541,7 @@ def is_timedelta_array(ndarray values):
return False
return null_count != n


def is_timedelta64_array(ndarray values):
cdef Py_ssize_t i, null_count = 0, n = len(values)
cdef object v
Expand All @@ -546,6 +557,7 @@ def is_timedelta64_array(ndarray values):
return False
return null_count != n


def is_timedelta_or_timedelta64_array(ndarray values):
""" infer with timedeltas and/or nat/none """
cdef Py_ssize_t i, null_count = 0, n = len(values)
Expand All @@ -562,6 +574,7 @@ def is_timedelta_or_timedelta64_array(ndarray values):
return False
return null_count != n


def is_date_array(ndarray[object] values):
cdef Py_ssize_t i, n = len(values)
if n == 0:
Expand All @@ -571,6 +584,7 @@ def is_date_array(ndarray[object] values):
return False
return True


def is_time_array(ndarray[object] values):
cdef Py_ssize_t i, n = len(values)
if n == 0:
Expand All @@ -582,15 +596,21 @@ def is_time_array(ndarray[object] values):


def is_period_array(ndarray[object] values):
cdef Py_ssize_t i, n = len(values)
from pandas.tseries.period import Period

cdef Py_ssize_t i, null_count = 0, n = len(values)
cdef object v
if n == 0:
return False

# return False for all nulls
for i in range(n):
if not isinstance(values[i], Period):
v = values[i]
if is_null_period(v):
# we are a regular null
if util._checknull(v):
null_count += 1
elif not is_period(v):
return False
return True
return null_count != n


cdef extern from "parse_helper.h":
Expand Down
84 changes: 62 additions & 22 deletions pandas/tests/indexes/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,10 @@ def test_pickle_compat_construction(self):
def test_construction_index_with_mixed_timezones(self):
# GH 11488
# no tz results in DatetimeIndex
result = Index(
[Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex(
[Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
result = Index([Timestamp('2011-01-01'),
Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01'),
Timestamp('2011-01-02')], name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))
self.assertIsNone(result.tz)
Expand Down Expand Up @@ -295,9 +295,9 @@ def test_construction_dti_with_mixed_timezones(self):
Timestamp('2011-01-02 10:00',
tz='Asia/Tokyo')],
name='idx')
exp = DatetimeIndex(
[Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')
], tz='Asia/Tokyo', name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00')],
tz='Asia/Tokyo', name='idx')
self.assert_index_equal(result, exp, exact=True)
self.assertTrue(isinstance(result, DatetimeIndex))

Expand Down Expand Up @@ -338,6 +338,17 @@ def test_construction_dti_with_mixed_timezones(self):
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
tz='US/Eastern', name='idx')

def test_construction_base_constructor(self):
arr = [pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')]
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.DatetimeIndex(np.array(arr)))

arr = [np.nan, pd.NaT, pd.Timestamp('2011-01-03')]
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.DatetimeIndex(np.array(arr)))

def test_astype(self):
# GH 13149, GH 13209
idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
Expand Down Expand Up @@ -699,12 +710,11 @@ def test_fillna_datetime64(self):
pd.Timestamp('2011-01-01 11:00')], dtype=object)
self.assert_index_equal(idx.fillna('x'), exp)

idx = pd.DatetimeIndex(
['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], tz=tz)
idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT,
'2011-01-01 11:00'], tz=tz)

exp = pd.DatetimeIndex(
['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'
], tz=tz)
exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'], tz=tz)
self.assert_index_equal(
idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp)

Expand Down Expand Up @@ -734,6 +744,26 @@ def setUp(self):
def create_index(self):
return period_range('20130101', periods=5, freq='D')

def test_construction_base_constructor(self):
# GH 13664
arr = [pd.Period('2011-01', freq='M'), pd.NaT,
pd.Period('2011-03', freq='M')]
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.PeriodIndex(np.array(arr)))

arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')]
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.PeriodIndex(np.array(arr)))

arr = [pd.Period('2011-01', freq='M'), pd.NaT,
pd.Period('2011-03', freq='D')]
tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object))

tm.assert_index_equal(pd.Index(np.array(arr)),
pd.Index(np.array(arr), dtype=object))

def test_astype(self):
# GH 13149, GH 13209
idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D')
Expand Down Expand Up @@ -874,7 +904,6 @@ def test_repeat(self):
self.assertEqual(res.freqstr, 'D')

def test_period_index_indexer(self):

# GH4125
idx = pd.period_range('2002-01', '2003-12', freq='M')
df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx)
Expand All @@ -886,23 +915,23 @@ def test_period_index_indexer(self):

def test_fillna_period(self):
# GH 11343
idx = pd.PeriodIndex(
['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], freq='H')
idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT,
'2011-01-01 11:00'], freq='H')

exp = pd.PeriodIndex(
['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'
], freq='H')
exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'], freq='H')
self.assert_index_equal(
idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp)

exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x',
pd.Period('2011-01-01 11:00', freq='H')], dtype=object)
self.assert_index_equal(idx.fillna('x'), exp)

with tm.assertRaisesRegexp(
ValueError,
'Input has different freq=D from PeriodIndex\\(freq=H\\)'):
idx.fillna(pd.Period('2011-01-01', freq='D'))
exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'),
pd.Period('2011-01-01', freq='D'),
pd.Period('2011-01-01 11:00', freq='H')], dtype=object)
self.assert_index_equal(idx.fillna(pd.Period('2011-01-01', freq='D')),
exp)

def test_no_millisecond_field(self):
with self.assertRaises(AttributeError):
Expand All @@ -923,6 +952,17 @@ def setUp(self):
def create_index(self):
return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1)

def test_construction_base_constructor(self):
arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')]
tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.TimedeltaIndex(np.array(arr)))

arr = [np.nan, pd.NaT, pd.Timedelta('1 days')]
tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.TimedeltaIndex(np.array(arr)))

def test_shift(self):
# test shift for TimedeltaIndex
# err8083
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/types/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,33 @@ def test_infer_dtype_timedelta(self):
dtype=object)
self.assertEqual(lib.infer_dtype(arr), 'mixed')

def test_infer_dtype_period(self):
# GH 13664
arr = np.array([pd.Period('2011-01', freq='D'),
pd.Period('2011-02', freq='D')])
self.assertEqual(pd.lib.infer_dtype(arr), 'period')

arr = np.array([pd.Period('2011-01', freq='D'),
pd.Period('2011-02', freq='M')])
self.assertEqual(pd.lib.infer_dtype(arr), 'period')

# starts with nan
for n in [pd.NaT, np.nan]:
arr = np.array([n, pd.Period('2011-01', freq='D')])
self.assertEqual(pd.lib.infer_dtype(arr), 'period')

arr = np.array([n, pd.Period('2011-01', freq='D'), n])
self.assertEqual(pd.lib.infer_dtype(arr), 'period')

# different type of nat
arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')],
dtype=object)
self.assertEqual(pd.lib.infer_dtype(arr), 'mixed')

arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')],
dtype=object)
self.assertEqual(pd.lib.infer_dtype(arr), 'mixed')

def test_infer_dtype_all_nan_nat_like(self):
arr = np.array([np.nan, np.nan])
self.assertEqual(lib.infer_dtype(arr), 'floating')
Expand Down
7 changes: 5 additions & 2 deletions pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,12 +800,15 @@ def _ensure_datetimelike_to_i8(other):
if lib.isscalar(other) and isnull(other):
other = tslib.iNaT
elif isinstance(other, ABCIndexClass):

# convert tz if needed
if getattr(other, 'tz', None) is not None:
other = other.tz_localize(None).asi8
else:
other = other.asi8
else:
other = np.array(other, copy=False).view('i8')
try:
other = np.array(other, copy=False).view('i8')
except TypeError:
# period array cannot be coerces to int
other = Index(other).asi8
return other
Loading