API: DataFrame.__getitem__ returns Series for sparse column (pandas-d…

…ev#23561) closes pandas-dev#23559
JustinZhengBC · Nov 14, 2018 · 9977a08 · 9977a08
1 parent ff8130b
commit 9977a08
Show file tree

Hide file tree

Showing 5 changed files with 23 additions and 34 deletions.
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -562,6 +562,7 @@ changes were made:
 - The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
 - ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray.
 - Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed.
+- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`).
 
 Some new warnings are issued for operations that require or are likely to materialize a large dense array:
 

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -101,27 +101,6 @@ def _get_frame_result_type(result, objs):
                                                           ABCSparseDataFrame))
 
 
-def _get_sliced_frame_result_type(data, obj):
-    """
-    return appropriate class of Series. When data is sparse
-    it will return a SparseSeries, otherwise it will return
-    the Series.
-
-    Parameters
-    ----------
-    data : array-like
-    obj : DataFrame
-
-    Returns
-    -------
-    Series or SparseSeries
-    """
-    if is_sparse(data):
-        from pandas.core.sparse.api import SparseSeries
-        return SparseSeries
-    return obj._constructor_sliced
-
-
 def _concat_compat(to_concat, axis=0):
     """
     provide concatenation of an array of arrays each of which is a single

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -72,7 +72,6 @@
     is_iterator,
     is_sequence,
     is_named_tuple)
-from pandas.core.dtypes.concat import _get_sliced_frame_result_type
 from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
 from pandas.core.dtypes.missing import isna, notna
 
@@ -3241,7 +3240,7 @@ def _box_item_values(self, key, values):
 
     def _box_col_values(self, values, items):
         """ provide boxed values for a column """
-        klass = _get_sliced_frame_result_type(values, self)
+        klass = self._constructor_sliced
         return klass(values, index=self.index, name=items, fastpath=True)
 
     def __setitem__(self, key, value):

diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -2277,19 +2277,34 @@ def test_getitem_ix_float_duplicates(self):
         expect = df.iloc[[1, -1], 0]
         assert_series_equal(df.loc[0.2, 'a'], expect)
 
+    def test_getitem_sparse_column(self):
+        # https://github.com/pandas-dev/pandas/issues/23559
+        data = pd.SparseArray([0, 1])
+        df = pd.DataFrame({"A": data})
+        expected = pd.Series(data, name="A")
+        result = df['A']
+        tm.assert_series_equal(result, expected)
+
+        result = df.iloc[:, 0]
+        tm.assert_series_equal(result, expected)
+
+        result = df.loc[:, 'A']
+        tm.assert_series_equal(result, expected)
+
     def test_setitem_with_sparse_value(self):
         # GH8131
         df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
-        sp_series = pd.Series([0, 0, 1]).to_sparse(fill_value=0)
-        df['new_column'] = sp_series
-        assert_series_equal(df['new_column'], sp_series, check_names=False)
+        sp_array = pd.SparseArray([0, 0, 1])
+        df['new_column'] = sp_array
+        assert_series_equal(df['new_column'],
+                            pd.Series(sp_array, name='new_column'),
+                            check_names=False)
 
     def test_setitem_with_unaligned_sparse_value(self):
         df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
-        sp_series = (pd.Series([0, 0, 1], index=[2, 1, 0])
-                     .to_sparse(fill_value=0))
+        sp_series = pd.Series(pd.SparseArray([0, 0, 1]), index=[2, 1, 0])
         df['new_column'] = sp_series
-        exp = pd.SparseSeries([1, 0, 0], name='new_column')
+        exp = pd.Series(pd.SparseArray([1, 0, 0]), name='new_column')
         assert_series_equal(df['new_column'], exp)
 
     def test_setitem_with_unaligned_tz_aware_datetime_column(self):

diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
@@ -160,11 +160,6 @@ def test_construct_DataFrame_with_sp_series(self):
         df.dtypes
         str(df)
 
-        tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False)
-
-        result = df.iloc[:, 0]
-        tm.assert_sp_series_equal(result, self.bseries, check_names=False)
-
         # blocking
         expected = Series({'col': 'float64:sparse'})
         result = df.ftypes