Skip to content

Commit

Permalink
Fix IntervalDtype Bugs and Inconsistencies
Browse files Browse the repository at this point in the history
  • Loading branch information
jschendel committed Jan 8, 2018
1 parent d5194e5 commit 6b00114
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 53 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ Other API Changes
- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`)
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)

.. _whatsnew_0230.deprecations:

Expand Down Expand Up @@ -377,7 +378,7 @@ Conversion
- Fixed bug where comparing :class:`DatetimeIndex` failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`)
- Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`)
- Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`)
-
- Bug in ``IntervalDtype`` when constructing two instances with subtype ``CategoricalDtype`` where the second instance used cached attributes from the first (:issue:`18980`)

Indexing
^^^^^^^^
Expand Down
37 changes: 20 additions & 17 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ class IntervalDtype(ExtensionDtype):
THIS IS NOT A REAL NUMPY DTYPE
"""
name = 'interval'
type = IntervalDtypeType
kind = None
str = '|O08'
Expand Down Expand Up @@ -653,8 +654,8 @@ def __new__(cls, subtype=None):
u.subtype = None
return u
elif (isinstance(subtype, compat.string_types) and
subtype == 'interval'):
subtype = ''
subtype in ('interval', 'interval[]')):
subtype = None
else:
if isinstance(subtype, compat.string_types):
m = cls._match.search(subtype)
Expand All @@ -678,11 +679,15 @@ def __new__(cls, subtype=None):
raise TypeError(msg)

try:
return cls._cache[str(subtype)]
# GH 18980: need to combine since str and hash individually may not
# be unique, e.g. str(CategoricalDtype) always returns 'category',
# and hash(np.dtype('<m8')) == hash(np.dtype('<m8[ns]'))
key = ''.join([str(subtype), str(hash(subtype))])
return cls._cache[key]
except KeyError:
u = object.__new__(cls)
u.subtype = subtype
cls._cache[str(subtype)] = u
cls._cache[key] = u
return u

@classmethod
Expand All @@ -692,31 +697,29 @@ def construct_from_string(cls, string):
if its not possible
"""
if isinstance(string, compat.string_types):
try:
return cls(string)
except ValueError:
pass
raise TypeError("could not construct IntervalDtype")
return cls(string)
msg = "a string needs to be passed, got type {typ}"
raise TypeError(msg.format(typ=type(string)))

def __unicode__(self):
if self.subtype is None:
return "interval"
return "interval[{subtype}]".format(subtype=self.subtype)

@property
def name(self):
return str(self)

def __hash__(self):
# make myself hashable
return hash(str(self))

def __eq__(self, other):
if isinstance(other, compat.string_types):
return other == self.name or other == self.name.title()

return (isinstance(other, IntervalDtype) and
self.subtype == other.subtype)
return other.title() in (self.name.title(), str(self).title())
elif not isinstance(other, IntervalDtype):
return False
elif self.subtype is None or other.subtype is None:
# None should match any subtype
return True
else:
return self.subtype == other.subtype

@classmethod
def is_dtype(cls, dtype):
Expand Down
103 changes: 68 additions & 35 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def test_hash_vs_equality(self):
assert dtype2 == dtype
assert dtype3 == dtype
assert dtype is dtype2
assert dtype2 is dtype
assert dtype2 is dtype3
assert dtype3 is dtype
assert hash(dtype) == hash(dtype2)
assert hash(dtype) == hash(dtype3)
Expand All @@ -451,14 +451,19 @@ def test_hash_vs_equality(self):
assert hash(dtype2) == hash(dtype2)
assert hash(dtype2) == hash(dtype3)

def test_construction(self):
with pytest.raises(ValueError):
IntervalDtype('xx')
@pytest.mark.parametrize('subtype', [
'interval[int64]', 'Interval[int64]', 'int64', np.dtype('int64')])
def test_construction(self, subtype):
i = IntervalDtype(subtype)
assert i.subtype == np.dtype('int64')
assert is_interval_dtype(i)

for s in ['interval[int64]', 'Interval[int64]', 'int64']:
i = IntervalDtype(s)
assert i.subtype == np.dtype('int64')
assert is_interval_dtype(i)
@pytest.mark.parametrize('subtype', [None, 'interval', 'interval[]'])
def test_construction_generic(self, subtype):
# generic
i = IntervalDtype(subtype)
assert i.subtype is None
assert is_interval_dtype(i)

@pytest.mark.parametrize('subtype', [
CategoricalDtype(list('abc'), False),
Expand All @@ -471,17 +476,27 @@ def test_construction_not_supported(self, subtype):
with tm.assert_raises_regex(TypeError, msg):
IntervalDtype(subtype)

def test_construction_generic(self):
# generic
i = IntervalDtype('interval')
assert i.subtype == ''
assert is_interval_dtype(i)
assert str(i) == 'interval[]'
def test_construction_errors(self):
msg = 'could not construct IntervalDtype'
with tm.assert_raises_regex(ValueError, msg):
IntervalDtype('xx')

i = IntervalDtype()
assert i.subtype is None
assert is_interval_dtype(i)
assert str(i) == 'interval'
def test_construction_from_string(self):
result = IntervalDtype('interval[int64]')
assert is_dtype_equal(self.dtype, result)
result = IntervalDtype.construct_from_string('interval[int64]')
assert is_dtype_equal(self.dtype, result)

@pytest.mark.parametrize('string', [
'foo', 'interval[foo]', 'foo[int64]', 0, 3.14, ('a', 'b'), None])
def test_construction_from_string_errors(self, string):
if isinstance(string, string_types):
error, msg = ValueError, 'could not construct IntervalDtype'
else:
error, msg = TypeError, 'a string needs to be passed, got type'

with tm.assert_raises_regex(error, msg):
IntervalDtype.construct_from_string(string)

def test_subclass(self):
a = IntervalDtype('interval[int64]')
Expand All @@ -506,36 +521,45 @@ def test_is_dtype(self):
assert not IntervalDtype.is_dtype(np.int64)
assert not IntervalDtype.is_dtype(np.float64)

def test_identity(self):
assert (IntervalDtype('interval[int64]') ==
IntervalDtype('interval[int64]'))

def test_coerce_to_dtype(self):
assert (_coerce_to_dtype('interval[int64]') ==
IntervalDtype('interval[int64]'))

def test_construction_from_string(self):
result = IntervalDtype('interval[int64]')
assert is_dtype_equal(self.dtype, result)
result = IntervalDtype.construct_from_string('interval[int64]')
assert is_dtype_equal(self.dtype, result)
with pytest.raises(TypeError):
IntervalDtype.construct_from_string('foo')
with pytest.raises(TypeError):
IntervalDtype.construct_from_string('interval[foo]')
with pytest.raises(TypeError):
IntervalDtype.construct_from_string('foo[int64]')

def test_equality(self):
assert is_dtype_equal(self.dtype, 'interval[int64]')
assert is_dtype_equal(self.dtype, IntervalDtype('int64'))
assert is_dtype_equal(self.dtype, IntervalDtype('int64'))
assert is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64'))

assert not is_dtype_equal(self.dtype, 'int64')
assert not is_dtype_equal(IntervalDtype('int64'),
IntervalDtype('float64'))

@pytest.mark.parametrize('subtype', [
None, 'interval', 'interval[]', 'int64', 'uint64', 'float64', object,
CategoricalDtype(), 'datetime64', 'timedelta64', PeriodDtype('Q')])
def test_equality_generic(self, subtype):
# GH 18980
dtype = IntervalDtype(subtype)
assert is_dtype_equal(dtype, 'interval')
assert is_dtype_equal(dtype, IntervalDtype())

@pytest.mark.parametrize('subtype', [
'int64', 'uint64', 'float64', 'complex128', np.dtype('O'),
CategoricalDtype(), 'datetime64', 'timedelta64', PeriodDtype('Q')])
def test_name_repr(self, subtype):
# GH 18980
dtype = IntervalDtype(subtype)
expected = 'interval[{subtype}]'.format(subtype=subtype)
assert str(dtype) == expected
assert dtype.name == 'interval'

@pytest.mark.parametrize('subtype', [None, 'interval', 'interval[]'])
def test_name_repr_generic(self, subtype):
# GH 18980
dtype = IntervalDtype(subtype)
assert str(dtype) == 'interval'
assert dtype.name == 'interval'

def test_basic(self):
assert is_interval_dtype(self.dtype)

Expand Down Expand Up @@ -576,6 +600,15 @@ def test_caching(self):
tm.round_trip_pickle(dtype)
assert len(IntervalDtype._cache) == 0

def test_caching_categoricaldtype(self):
# GH 18980
cdt1 = CategoricalDtype(list('abc'), True)
cdt2 = CategoricalDtype(list('wxyz'), False)
idt1 = IntervalDtype(cdt1)
idt2 = IntervalDtype(cdt2)
assert idt1.subtype is cdt1
assert idt2.subtype is cdt2


class TestCategoricalDtypeParametrized(object):

Expand Down

0 comments on commit 6b00114

Please sign in to comment.