From 9977a087bbbbc25cd3af019c1a139ad1565126b7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 11 Nov 2018 09:22:06 -0600 Subject: [PATCH] API: DataFrame.__getitem__ returns Series for sparse column (#23561) closes https://github.com/pandas-dev/pandas/issues/23559 --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/dtypes/concat.py | 21 ------------------ pandas/core/frame.py | 3 +-- pandas/tests/frame/test_indexing.py | 27 ++++++++++++++++++----- pandas/tests/sparse/series/test_series.py | 5 ----- 5 files changed, 23 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3dcaef302d564..be795b024c329 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -562,6 +562,7 @@ changes were made: - The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``. - ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray. - Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed. +- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`). Some new warnings are issued for operations that require or are likely to materialize a large dense array: diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index b2999c112e8ab..bb4ab823069ee 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -101,27 +101,6 @@ def _get_frame_result_type(result, objs): ABCSparseDataFrame)) -def _get_sliced_frame_result_type(data, obj): - """ - return appropriate class of Series. When data is sparse - it will return a SparseSeries, otherwise it will return - the Series. - - Parameters - ---------- - data : array-like - obj : DataFrame - - Returns - ------- - Series or SparseSeries - """ - if is_sparse(data): - from pandas.core.sparse.api import SparseSeries - return SparseSeries - return obj._constructor_sliced - - def _concat_compat(to_concat, axis=0): """ provide concatenation of an array of arrays each of which is a single diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6b29725ba2bea..7153f5c2e7007 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -72,7 +72,6 @@ is_iterator, is_sequence, is_named_tuple) -from pandas.core.dtypes.concat import _get_sliced_frame_result_type from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex from pandas.core.dtypes.missing import isna, notna @@ -3241,7 +3240,7 @@ def _box_item_values(self, key, values): def _box_col_values(self, values, items): """ provide boxed values for a column """ - klass = _get_sliced_frame_result_type(values, self) + klass = self._constructor_sliced return klass(values, index=self.index, name=items, fastpath=True) def __setitem__(self, key, value): diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index b0e7fe2e25a6c..78aa853f68459 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2277,19 +2277,34 @@ def test_getitem_ix_float_duplicates(self): expect = df.iloc[[1, -1], 0] assert_series_equal(df.loc[0.2, 'a'], expect) + def test_getitem_sparse_column(self): + # https://github.com/pandas-dev/pandas/issues/23559 + data = pd.SparseArray([0, 1]) + df = pd.DataFrame({"A": data}) + expected = pd.Series(data, name="A") + result = df['A'] + tm.assert_series_equal(result, expected) + + result = df.iloc[:, 0] + tm.assert_series_equal(result, expected) + + result = df.loc[:, 'A'] + tm.assert_series_equal(result, expected) + def test_setitem_with_sparse_value(self): # GH8131 df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]}) - sp_series = pd.Series([0, 0, 1]).to_sparse(fill_value=0) - df['new_column'] = sp_series - assert_series_equal(df['new_column'], sp_series, check_names=False) + sp_array = pd.SparseArray([0, 0, 1]) + df['new_column'] = sp_array + assert_series_equal(df['new_column'], + pd.Series(sp_array, name='new_column'), + check_names=False) def test_setitem_with_unaligned_sparse_value(self): df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]}) - sp_series = (pd.Series([0, 0, 1], index=[2, 1, 0]) - .to_sparse(fill_value=0)) + sp_series = pd.Series(pd.SparseArray([0, 0, 1]), index=[2, 1, 0]) df['new_column'] = sp_series - exp = pd.SparseSeries([1, 0, 0], name='new_column') + exp = pd.Series(pd.SparseArray([1, 0, 0]), name='new_column') assert_series_equal(df['new_column'], exp) def test_setitem_with_unaligned_tz_aware_datetime_column(self): diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index 9c7dbd85edcbb..fd5dbcd932993 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -160,11 +160,6 @@ def test_construct_DataFrame_with_sp_series(self): df.dtypes str(df) - tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False) - - result = df.iloc[:, 0] - tm.assert_sp_series_equal(result, self.bseries, check_names=False) - # blocking expected = Series({'col': 'float64:sparse'}) result = df.ftypes