pandas-dev · jreback · May 29, 2019 · Mar 15, 2019 · Mar 12, 2019 · May 14, 2019
diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -299,14 +299,39 @@ Other API Changes
 Deprecations
 ~~~~~~~~~~~~
 
+Sparse Subclasses
+^^^^^^^^^^^^^^^^^
+
+The ``SparseSeries`` and ``SparseDataFrame`` subclasses are deprecated. Their functionality is better-provided
+by a ``Series`` or ``DataFrame`` with sparse values.
+
+**Previous Way**
+
+.. ipython:: python
+   :okwarning:
+
+   df = pd.SparseDataFrame({"A": [0, 0, 1, 2]})
+   df.dtypes
+
+**New Way**
+
+.. ipython:: python
+
+   df = pd.DataFrame({"A": pd.SparseArray([0, 0, 1, 2])})
+   df.dtypes
+
+The memory usage of the two approaches is identical. See :ref:`sparse.migration` for more (:issue:`19239`).
+
+Other Deprecations
+^^^^^^^^^^^^^^^^^^
+
 - The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`).
 - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
 - The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or
   the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
 - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
 - The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).
 
-
 .. _whatsnew_0250.prior_deprecations:
 
 Removal of prior version deprecations/changes

diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
@@ -2014,9 +2014,9 @@ def from_coo(cls, A, dense_index=False):
         from pandas.core.sparse.scipy_sparse import _coo_to_sparse_series
         from pandas import Series
 
-        result = _coo_to_sparse_series(A, dense_index=dense_index)
-        # SparseSeries -> Series[sparse]
-        result = Series(result.values, index=result.index, copy=False)
+        result = _coo_to_sparse_series(A, dense_index=dense_index,
+                                       sparse_series=False)
+        result = Series(result.array, index=result.index, copy=False)
 
         return result
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1930,13 +1930,13 @@ def to_sparse(self, fill_value=None, kind='block'):
         >>> type(df)
         <class 'pandas.core.frame.DataFrame'>
 
-        >>> sdf = df.to_sparse()
-        >>> sdf
+        >>> sdf = df.to_sparse()  # doctest: +SKIP
+        >>> sdf  # doctest: +SKIP
              0    1
         0  NaN  NaN
         1  1.0  NaN
         2  NaN  1.0
-        >>> type(sdf)
+        >>> type(sdf)  # doctest: +SKIP
         <class 'pandas.core.sparse.frame.SparseDataFrame'>
         """
         from pandas.core.sparse.api import SparseDataFrame

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -5589,7 +5589,7 @@ def ftypes(self):
         3    float64:dense
         dtype: object
 
-        >>> pd.SparseDataFrame(arr).ftypes
+        >>> pd.SparseDataFrame(arr).ftypes  # doctest: +SKIP
         0    float64:sparse
         1    float64:sparse
         2    float64:sparse

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1586,7 +1586,6 @@ def to_sparse(self, kind='block', fill_value=None):
         SparseSeries
             Sparse representation of the Series.
         """
-        # TODO: deprecate
         from pandas.core.sparse.series import SparseSeries
 
         values = SparseArray(self, kind=kind, fill_value=fill_value)

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
@@ -28,13 +28,24 @@
 from pandas.core.sparse.series import SparseSeries
 
 _shared_doc_kwargs = dict(klass='SparseDataFrame')
+depr_msg = """\
+SparseDataFrame is deprecated and will be removed in a future version.
+Use a regular DataFrame whose columns are SparseArrays instead.
+
+See http://pandas.pydata.org/pandas-docs/stable/\
+user_guide/sparse.html#migrating for more.
+"""
 
 
 class SparseDataFrame(DataFrame):
     """
     DataFrame containing sparse floating point data in the form of SparseSeries
     objects
 
+    .. deprectaed:: 0.25.0
+
+       Use a DataFrame with sparse values instead.
+
     Parameters
     ----------
     data : same types as can be passed to DataFrame or scipy.sparse.spmatrix
@@ -56,6 +67,7 @@ class SparseDataFrame(DataFrame):
     def __init__(self, data=None, index=None, columns=None, default_kind=None,
                  default_fill_value=None, dtype=None, copy=False):
 
+        warnings.warn(depr_msg, FutureWarning, stacklevel=2)
         # pick up the defaults from the Sparse structures
         if isinstance(data, SparseDataFrame):
             if index is None:

diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py
@@ -116,14 +116,32 @@ def _sparse_series_to_coo(ss, row_levels=(0, ), column_levels=(1, ),
     return sparse_matrix, rows, columns
 
 
-def _coo_to_sparse_series(A, dense_index=False):
+def _coo_to_sparse_series(A, dense_index: bool = False,
+                          sparse_series: bool = True):
     """
     Convert a scipy.sparse.coo_matrix to a SparseSeries.
-    Use the defaults given in the SparseSeries constructor.
+
+    Parameters
+    ----------
+    A : scipy.sparse.coo.coo_matrix
+    dense_index : bool, default False
+    sparse_series : bool, default True
+
+    Returns
+    -------
+    Series or SparseSeries
     """
+    from pandas import SparseDtype
+
     s = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
     s = s.sort_index()
-    s = s.to_sparse()  # TODO: specify kind?
+    if sparse_series:
+        # TODO(SparseSeries): remove this and the sparse_series keyword.
+        # This is just here to avoid a DeprecationWarning when
+        # _coo_to_sparse_series is called via Series.sparse.from_coo
+        s = s.to_sparse()  # TODO: specify kind?
+    else:
+        s = s.astype(SparseDtype(s.dtype))
     if dense_index:
         # is there a better constructor method to use here?
         i = range(A.shape[0])

diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
@@ -32,9 +32,24 @@
                           optional_labels='', optional_axis='')
 
 
+depr_msg = """\
+SparseSeries is deprecated and will be removed in a future version.
+Use a Series with sparse values instead.
+
+    >>> series = pd.Series(pd.SparseArray(...))
+
+See http://pandas.pydata.org/pandas-docs/stable/\
+user_guide/sparse.html#migrating for more.
+"""
+
+
 class SparseSeries(Series):
     """Data structure for labeled, sparse floating point data
 
+    .. deprectaed:: 0.25.0
+
+       Use a Series with sparse values instead.
+
     Parameters
     ----------
     data : {array-like, Series, SparseSeries, dict}
@@ -60,6 +75,7 @@ class SparseSeries(Series):
     def __init__(self, data=None, index=None, sparse_index=None, kind='block',
                  fill_value=None, name=None, dtype=None, copy=False,
                  fastpath=False):
+        warnings.warn(depr_msg, FutureWarning, stacklevel=2)
         # TODO: Most of this should be refactored and shared with Series
         # 1. BlockManager -> array
         # 2. Series.index, Series.name, index, name reconciliation

diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
@@ -101,3 +101,21 @@ def test_density(self):
         res = df.sparse.density
         expected = 0.75
         assert res == expected
+
+    @pytest.mark.parametrize("dtype", ['int64', 'float64'])
+    @pytest.mark.parametrize("dense_index", [True, False])
+    @td.skip_if_no_scipy
+    def test_series_from_coo(self, dtype, dense_index):
+        import scipy.sparse
+
+        A = scipy.sparse.eye(3, format='coo', dtype=dtype)
+        result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
+        index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
+        expected = pd.Series(pd.SparseArray(np.array([1, 1, 1], dtype=dtype)),
+                             index=index)
+        if dense_index:
+            expected = expected.reindex(
+                pd.MultiIndex.from_product(index.levels)
+            )
+
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py
@@ -8,6 +8,7 @@
 import pandas.util.testing as tm
 
 
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 class TestSparseArrayArithmetics:
 
     _base = np.array

diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -215,6 +215,7 @@ def test_scalar_with_index_infer_dtype(self, scalar, dtype):
         assert exp.dtype == dtype
 
     @pytest.mark.parametrize("fill", [1, np.nan, 0])
+    @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
     def test_sparse_series_round_trip(self, kind, fill):
         # see gh-13999
         arr = SparseArray([np.nan, 1, np.nan, 2, 3],
@@ -231,6 +232,7 @@ def test_sparse_series_round_trip(self, kind, fill):
         tm.assert_sp_array_equal(arr, res)
 
     @pytest.mark.parametrize("fill", [True, False, np.nan])
+    @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
     def test_sparse_series_round_trip2(self, kind, fill):
         # see gh-13999
         arr = SparseArray([True, False, True, True], dtype=np.bool,
@@ -1098,6 +1100,7 @@ def test_npoints(self):
         assert arr.npoints == 1
 
 
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 class TestAccessor:
 
     @pytest.mark.parametrize('attr', [

diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
@@ -15,6 +15,10 @@
 from pandas.core.sparse.api import SparseDtype
 import pandas.util.testing as tm
 
+ignore_sparse_warning = pytest.mark.filterwarnings(
+    "ignore:Sparse:FutureWarning"
+)
+
 
 # EA & Actual Dtypes
 def to_ea_dtypes(dtypes):
@@ -146,6 +150,7 @@ def test_is_object():
 @pytest.mark.parametrize("check_scipy", [
     False, pytest.param(True, marks=td.skip_if_no_scipy)
 ])
+@ignore_sparse_warning
 def test_is_sparse(check_scipy):
     assert com.is_sparse(pd.SparseArray([1, 2, 3]))
     assert com.is_sparse(pd.SparseSeries([1, 2, 3]))
@@ -158,6 +163,7 @@ def test_is_sparse(check_scipy):
 
 
 @td.skip_if_no_scipy
+@ignore_sparse_warning
 def test_is_scipy_sparse():
     from scipy.sparse import bsr_matrix
     assert com.is_scipy_sparse(bsr_matrix([1, 2, 3]))
@@ -529,6 +535,7 @@ def test_is_bool_dtype():
 @pytest.mark.parametrize("check_scipy", [
     False, pytest.param(True, marks=td.skip_if_no_scipy)
 ])
+@ignore_sparse_warning
 def test_is_extension_type(check_scipy):
     assert not com.is_extension_type([1, 2, 3])
     assert not com.is_extension_type(np.array([1, 2, 3]))
@@ -595,8 +602,6 @@ def test_is_offsetlike():
     (pd.DatetimeIndex([1, 2]).dtype, np.dtype('=M8[ns]')),
     ('<M8[ns]', np.dtype('<M8[ns]')),
     ('datetime64[ns, Europe/London]', DatetimeTZDtype('ns', 'Europe/London')),
-    (pd.SparseSeries([1, 2], dtype='int32'), SparseDtype('int32')),
-    (pd.SparseSeries([1, 2], dtype='int32').dtype, SparseDtype('int32')),
     (PeriodDtype(freq='D'), PeriodDtype(freq='D')),
     ('period[D]', PeriodDtype(freq='D')),
     (IntervalDtype(), IntervalDtype()),
@@ -605,6 +610,14 @@ def test__get_dtype(input_param, result):
     assert com._get_dtype(input_param) == result
 
 
+@ignore_sparse_warning
+def test__get_dtype_sparse():
+    ser = pd.SparseSeries([1, 2], dtype='int32')
+    expected = SparseDtype('int32')
+    assert com._get_dtype(ser) == expected
+    assert com._get_dtype(ser.dtype) == expected
+
+
 @pytest.mark.parametrize('input_param,expected_error_message', [
     (None, "Cannot deduce dtype from null object"),
     (1, "data type not understood"),
@@ -640,8 +653,7 @@ def test__get_dtype_fails(input_param, expected_error_message):
     (pd.DatetimeIndex(['2000'], tz='Europe/London').dtype,
      pd.Timestamp),
     ('datetime64[ns, Europe/London]', pd.Timestamp),
-    (pd.SparseSeries([1, 2], dtype='int32'), np.int32),
-    (pd.SparseSeries([1, 2], dtype='int32').dtype, np.int32),
+
     (PeriodDtype(freq='D'), pd.Period),
     ('period[D]', pd.Period),
     (IntervalDtype(), pd.Interval),
@@ -652,3 +664,11 @@ def test__get_dtype_fails(input_param, expected_error_message):
 ])
 def test__is_dtype_type(input_param, result):
     assert com._is_dtype_type(input_param, lambda tipo: tipo == result)
+
+
+@ignore_sparse_warning
+def test__is_dtype_type_sparse():
+    ser = pd.SparseSeries([1, 2], dtype='int32')
+    result = np.dtype('int32')
+    assert com._is_dtype_type(ser, lambda tipo: tipo == result)
+    assert com._is_dtype_type(ser.dtype, lambda tipo: tipo == result)
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
@@ -870,7 +870,6 @@ def test_registry_find(dtype, expected):
     (pd.Series([1, 2]), False),
     (np.array([True, False]), True),
     (pd.Series([True, False]), True),
-    (pd.SparseSeries([True, False]), True),
     (pd.SparseArray([True, False]), True),
     (SparseDtype(bool), True)
 ])
@@ -879,6 +878,12 @@ def test_is_bool_dtype(dtype, expected):
     assert result is expected
 
 
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+def test_is_bool_dtype_sparse():
+    result = is_bool_dtype(pd.SparseSeries([True, False]))
+    assert result is True
+
+
 @pytest.mark.parametrize("check", [
     is_categorical_dtype,
     is_datetime64tz_dtype,

diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py
@@ -1,4 +1,4 @@
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
 
 import numpy as np
 
@@ -17,9 +17,12 @@ class TestABCClasses:
     categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1])
     categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical)
     df = pd.DataFrame({'names': ['a', 'b', 'c']}, index=multi_index)
-    sparse_series = pd.Series([1, 2, 3]).to_sparse()
+    with catch_warnings():
+        simplefilter('ignore', FutureWarning)
+        sparse_series = pd.Series([1, 2, 3]).to_sparse()
+        sparse_frame = pd.SparseDataFrame({'a': [1, -1, None]})
+
     sparse_array = pd.SparseArray(np.random.randn(10))
-    sparse_frame = pd.SparseDataFrame({'a': [1, -1, None]})
     datetime_array = pd.core.arrays.DatetimeArray(datetime_index)
     timedelta_array = pd.core.arrays.TimedeltaArray(timedelta_index)
 

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
@@ -13,6 +13,7 @@
 import pandas.util.testing as tm
 
 
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 class TestDataFrameAlterAxes:
 
     def test_set_index_directly(self, float_string_frame):
@@ -1376,6 +1377,7 @@ def test_droplevel(self):
         tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 class TestIntervalIndex:
 
     def test_setitem(self):

diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -2073,6 +2073,7 @@ def test_loc_duplicates(self):
         df.loc[trange[bool_idx], "A"] += 6
         tm.assert_frame_equal(df, expected)
 
+    @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
     def test_iloc_sparse_propegate_fill_value(self):
         from pandas.core.sparse.api import SparseDataFrame
         df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999)