Skip to content

Commit

Permalink
API: DataFrame.__getitem__ returns Series for sparse column (#23561)
Browse files Browse the repository at this point in the history
closes #23559
  • Loading branch information
TomAugspurger authored and jreback committed Nov 11, 2018
1 parent 4c63f3e commit 43a558f
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 34 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,7 @@ changes were made:
- The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
- ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray.
- Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed.
- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`).

Some new warnings are issued for operations that require or are likely to materialize a large dense array:

Expand Down
21 changes: 0 additions & 21 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,27 +101,6 @@ def _get_frame_result_type(result, objs):
ABCSparseDataFrame))


def _get_sliced_frame_result_type(data, obj):
"""
return appropriate class of Series. When data is sparse
it will return a SparseSeries, otherwise it will return
the Series.
Parameters
----------
data : array-like
obj : DataFrame
Returns
-------
Series or SparseSeries
"""
if is_sparse(data):
from pandas.core.sparse.api import SparseSeries
return SparseSeries
return obj._constructor_sliced


def _concat_compat(to_concat, axis=0):
"""
provide concatenation of an array of arrays each of which is a single
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@
is_iterator,
is_sequence,
is_named_tuple)
from pandas.core.dtypes.concat import _get_sliced_frame_result_type
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
from pandas.core.dtypes.missing import isna, notna

Expand Down Expand Up @@ -3241,7 +3240,7 @@ def _box_item_values(self, key, values):

def _box_col_values(self, values, items):
""" provide boxed values for a column """
klass = _get_sliced_frame_result_type(values, self)
klass = self._constructor_sliced
return klass(values, index=self.index, name=items, fastpath=True)

def __setitem__(self, key, value):
Expand Down
27 changes: 21 additions & 6 deletions pandas/tests/frame/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2277,19 +2277,34 @@ def test_getitem_ix_float_duplicates(self):
expect = df.iloc[[1, -1], 0]
assert_series_equal(df.loc[0.2, 'a'], expect)

def test_getitem_sparse_column(self):
# https://github.com/pandas-dev/pandas/issues/23559
data = pd.SparseArray([0, 1])
df = pd.DataFrame({"A": data})
expected = pd.Series(data, name="A")
result = df['A']
tm.assert_series_equal(result, expected)

result = df.iloc[:, 0]
tm.assert_series_equal(result, expected)

result = df.loc[:, 'A']
tm.assert_series_equal(result, expected)

def test_setitem_with_sparse_value(self):
# GH8131
df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
sp_series = pd.Series([0, 0, 1]).to_sparse(fill_value=0)
df['new_column'] = sp_series
assert_series_equal(df['new_column'], sp_series, check_names=False)
sp_array = pd.SparseArray([0, 0, 1])
df['new_column'] = sp_array
assert_series_equal(df['new_column'],
pd.Series(sp_array, name='new_column'),
check_names=False)

def test_setitem_with_unaligned_sparse_value(self):
df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
sp_series = (pd.Series([0, 0, 1], index=[2, 1, 0])
.to_sparse(fill_value=0))
sp_series = pd.Series(pd.SparseArray([0, 0, 1]), index=[2, 1, 0])
df['new_column'] = sp_series
exp = pd.SparseSeries([1, 0, 0], name='new_column')
exp = pd.Series(pd.SparseArray([1, 0, 0]), name='new_column')
assert_series_equal(df['new_column'], exp)

def test_setitem_with_unaligned_tz_aware_datetime_column(self):
Expand Down
5 changes: 0 additions & 5 deletions pandas/tests/sparse/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,6 @@ def test_construct_DataFrame_with_sp_series(self):
df.dtypes
str(df)

tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False)

result = df.iloc[:, 0]
tm.assert_sp_series_equal(result, self.bseries, check_names=False)

# blocking
expected = Series({'col': 'float64:sparse'})
result = df.ftypes
Expand Down

0 comments on commit 43a558f

Please sign in to comment.