diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 79bdad82af5a37..1c60c32a7fb20c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -30,6 +30,7 @@ _possibly_downcast_to_dtype, _invalidate_string_dtypes, _coerce_to_dtypes, + _coerce_extension_to_embed, _maybe_upcast_putmask, _find_common_type) from pandas.types.common import (is_categorical_dtype, @@ -2647,7 +2648,7 @@ def reindexer(value): # return internal types directly if is_extension_type(value): - return value + return _coerce_extension_to_embed(value) # broadcast across multiple columns if necessary if broadcast and key in self.columns and value.ndim == 1: diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 5237a076833673..e5940fa1f015b9 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -8,7 +8,7 @@ from pandas.compat import lrange from pandas import (DataFrame, Series, Index, MultiIndex, - RangeIndex, date_range) + RangeIndex, date_range, IntervalIndex) import pandas as pd from pandas.util.testing import (assert_series_equal, @@ -718,11 +718,33 @@ def test_set_index_preserve_categorical_dtype(self): class TestIntervalIndex(tm.TestCase): - def test_set_reset(self): + def test_setitem(self): + + df = DataFrame({'A': range(10)}) + s = pd.cut(df.A, 5) + self.assertIsInstance(s.cat.categories, IntervalIndex) + + # these should end up the same, namely + # an object array of Intervals + df['B'] = s + df['C'] = np.array(s) + df['D'] = s.values + df['E'] = np.array(s.values) + + self.assertTrue(df['B'].dtype == 'object') + self.assertTrue(df['C'].dtype == 'object') + self.assertTrue(df['D'].dtype == 'object') + self.assertTrue(df['E'].dtype == 'object') + + tm.assert_series_equal(df['B'], df['C'], check_names=False) + tm.assert_series_equal(df['B'], df['D'], check_names=False) + tm.assert_series_equal(df['B'], df['E'], check_names=False) + + def test_set_reset_index(self): + df = DataFrame({'A': range(10)}) - df['B'] = pd.cut(df.A, 5) + s = pd.cut(df.A, 5) + df['B'] = s df = df.set_index('B') - # TODO: this should actually be converted prior - self.assertTrue(isinstance(df.index, pd.CategoricalIndex)) df = df.reset_index() diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py index 979aa92d0b2744..168b8953d6164c 100644 --- a/pandas/tests/indexing/test_interval.py +++ b/pandas/tests/indexing/test_interval.py @@ -33,10 +33,10 @@ def test_loc_getitem_series(self): def test_loc_getitem_frame(self): df = DataFrame({'A': range(10)}) - df['B'] = pd.cut(df.A, 5) + s = pd.cut(df.A, 5) + df['B'] = s df = df.set_index('B') - # TODO: fixme - # result = df.loc[4] - # expected = df.iloc[3] - # tm.assert_series_equal(result, expected) + result = df.loc[4] + expected = df.iloc[4:6] + tm.assert_frame_equal(result, expected) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 6b1c3f9c003516..ba0314e2b5cb48 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -7,7 +7,7 @@ from pandas.compat import string_types, text_type, PY3 from .common import (_ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, - is_datetimelike, + is_datetimelike, is_interval_dtype, is_extension_type, is_object_dtype, is_datetime64tz_dtype, is_datetime64_dtype, is_timedelta64_dtype, is_dtype_equal, @@ -484,6 +484,20 @@ def conv(r, dtype): return [conv(r, dtype) for r, dtype in zip(result, dtypes)] +def _coerce_extension_to_embed(value): + """ + we have an extension type, coerce it to a type + suitable for embedding (in a Series/DataFrame) + """ + + # TODO: maybe we should have a method on Categorical + # to actually do this instead + if is_categorical_dtype(value): + if is_interval_dtype(value.categories): + return np.array(value) + + return value + def _astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False, but need to be very careful as the result shape could change! """