Skip to content

Commit

Permalink
Fix IntervalDtype Bugs and Inconsistencies (pandas-dev#18997)
Browse files Browse the repository at this point in the history
* Fix IntervalDtype Bugs and Inconsistencies

* remove code for unsupported dtypes and remove 'interval[]'
  • Loading branch information
jschendel authored and TomAugspurger committed Jan 10, 2018
1 parent 055bfa6 commit 982e112
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 55 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ Other API Changes
- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`)
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)

.. _whatsnew_0230.deprecations:

Expand Down
34 changes: 14 additions & 20 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ class IntervalDtype(ExtensionDtype):
THIS IS NOT A REAL NUMPY DTYPE
"""
name = 'interval'
type = IntervalDtypeType
kind = None
str = '|O08'
Expand Down Expand Up @@ -653,8 +654,8 @@ def __new__(cls, subtype=None):
u.subtype = None
return u
elif (isinstance(subtype, compat.string_types) and
subtype == 'interval'):
subtype = ''
subtype.lower() == 'interval'):
subtype = None
else:
if isinstance(subtype, compat.string_types):
m = cls._match.search(subtype)
Expand All @@ -666,11 +667,6 @@ def __new__(cls, subtype=None):
except TypeError:
raise ValueError("could not construct IntervalDtype")

if subtype is None:
u = object.__new__(cls)
u.subtype = None
return u

if is_categorical_dtype(subtype) or is_string_dtype(subtype):
# GH 19016
msg = ('category, object, and string subtypes are not supported '
Expand All @@ -692,31 +688,29 @@ def construct_from_string(cls, string):
if its not possible
"""
if isinstance(string, compat.string_types):
try:
return cls(string)
except ValueError:
pass
raise TypeError("could not construct IntervalDtype")
return cls(string)
msg = "a string needs to be passed, got type {typ}"
raise TypeError(msg.format(typ=type(string)))

def __unicode__(self):
if self.subtype is None:
return "interval"
return "interval[{subtype}]".format(subtype=self.subtype)

@property
def name(self):
return str(self)

def __hash__(self):
# make myself hashable
return hash(str(self))

def __eq__(self, other):
if isinstance(other, compat.string_types):
return other == self.name or other == self.name.title()

return (isinstance(other, IntervalDtype) and
self.subtype == other.subtype)
return other.lower() in (self.name.lower(), str(self).lower())
elif not isinstance(other, IntervalDtype):
return False
elif self.subtype is None or other.subtype is None:
# None should match any subtype
return True
else:
return self.subtype == other.subtype

@classmethod
def is_dtype(cls, dtype):
Expand Down
94 changes: 59 additions & 35 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def test_hash_vs_equality(self):
assert dtype2 == dtype
assert dtype3 == dtype
assert dtype is dtype2
assert dtype2 is dtype
assert dtype2 is dtype3
assert dtype3 is dtype
assert hash(dtype) == hash(dtype2)
assert hash(dtype) == hash(dtype3)
Expand All @@ -451,14 +451,19 @@ def test_hash_vs_equality(self):
assert hash(dtype2) == hash(dtype2)
assert hash(dtype2) == hash(dtype3)

def test_construction(self):
with pytest.raises(ValueError):
IntervalDtype('xx')
@pytest.mark.parametrize('subtype', [
'interval[int64]', 'Interval[int64]', 'int64', np.dtype('int64')])
def test_construction(self, subtype):
i = IntervalDtype(subtype)
assert i.subtype == np.dtype('int64')
assert is_interval_dtype(i)

for s in ['interval[int64]', 'Interval[int64]', 'int64']:
i = IntervalDtype(s)
assert i.subtype == np.dtype('int64')
assert is_interval_dtype(i)
@pytest.mark.parametrize('subtype', [None, 'interval', 'Interval'])
def test_construction_generic(self, subtype):
# generic
i = IntervalDtype(subtype)
assert i.subtype is None
assert is_interval_dtype(i)

@pytest.mark.parametrize('subtype', [
CategoricalDtype(list('abc'), False),
Expand All @@ -471,17 +476,27 @@ def test_construction_not_supported(self, subtype):
with tm.assert_raises_regex(TypeError, msg):
IntervalDtype(subtype)

def test_construction_generic(self):
# generic
i = IntervalDtype('interval')
assert i.subtype == ''
assert is_interval_dtype(i)
assert str(i) == 'interval[]'
def test_construction_errors(self):
msg = 'could not construct IntervalDtype'
with tm.assert_raises_regex(ValueError, msg):
IntervalDtype('xx')

i = IntervalDtype()
assert i.subtype is None
assert is_interval_dtype(i)
assert str(i) == 'interval'
def test_construction_from_string(self):
result = IntervalDtype('interval[int64]')
assert is_dtype_equal(self.dtype, result)
result = IntervalDtype.construct_from_string('interval[int64]')
assert is_dtype_equal(self.dtype, result)

@pytest.mark.parametrize('string', [
'foo', 'interval[foo]', 'foo[int64]', 0, 3.14, ('a', 'b'), None])
def test_construction_from_string_errors(self, string):
if isinstance(string, string_types):
error, msg = ValueError, 'could not construct IntervalDtype'
else:
error, msg = TypeError, 'a string needs to be passed, got type'

with tm.assert_raises_regex(error, msg):
IntervalDtype.construct_from_string(string)

def test_subclass(self):
a = IntervalDtype('interval[int64]')
Expand All @@ -506,36 +521,45 @@ def test_is_dtype(self):
assert not IntervalDtype.is_dtype(np.int64)
assert not IntervalDtype.is_dtype(np.float64)

def test_identity(self):
assert (IntervalDtype('interval[int64]') ==
IntervalDtype('interval[int64]'))

def test_coerce_to_dtype(self):
assert (_coerce_to_dtype('interval[int64]') ==
IntervalDtype('interval[int64]'))

def test_construction_from_string(self):
result = IntervalDtype('interval[int64]')
assert is_dtype_equal(self.dtype, result)
result = IntervalDtype.construct_from_string('interval[int64]')
assert is_dtype_equal(self.dtype, result)
with pytest.raises(TypeError):
IntervalDtype.construct_from_string('foo')
with pytest.raises(TypeError):
IntervalDtype.construct_from_string('interval[foo]')
with pytest.raises(TypeError):
IntervalDtype.construct_from_string('foo[int64]')

def test_equality(self):
assert is_dtype_equal(self.dtype, 'interval[int64]')
assert is_dtype_equal(self.dtype, IntervalDtype('int64'))
assert is_dtype_equal(self.dtype, IntervalDtype('int64'))
assert is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64'))

assert not is_dtype_equal(self.dtype, 'int64')
assert not is_dtype_equal(IntervalDtype('int64'),
IntervalDtype('float64'))

@pytest.mark.parametrize('subtype', [
None, 'interval', 'Interval', 'int64', 'uint64', 'float64',
'complex128', 'datetime64', 'timedelta64', PeriodDtype('Q')])
def test_equality_generic(self, subtype):
# GH 18980
dtype = IntervalDtype(subtype)
assert is_dtype_equal(dtype, 'interval')
assert is_dtype_equal(dtype, IntervalDtype())

@pytest.mark.parametrize('subtype', [
'int64', 'uint64', 'float64', 'complex128', 'datetime64',
'timedelta64', PeriodDtype('Q')])
def test_name_repr(self, subtype):
# GH 18980
dtype = IntervalDtype(subtype)
expected = 'interval[{subtype}]'.format(subtype=subtype)
assert str(dtype) == expected
assert dtype.name == 'interval'

@pytest.mark.parametrize('subtype', [None, 'interval', 'Interval'])
def test_name_repr_generic(self, subtype):
# GH 18980
dtype = IntervalDtype(subtype)
assert str(dtype) == 'interval'
assert dtype.name == 'interval'

def test_basic(self):
assert is_interval_dtype(self.dtype)

Expand Down

0 comments on commit 982e112

Please sign in to comment.