DEPR: Remove SparseSeries and SparseDataFrame (pandas-dev#28425)

proost · Dec 19, 2019 · e045526 · e045526
1 parent 06c8584
commit e045526
Show file tree

Hide file tree

Showing 74 changed files with 227 additions and 8,569 deletions.
diff --git a/doc/redirects.csv b/doc/redirects.csv
@@ -503,7 +503,6 @@ generated/pandas.DataFrame.to_parquet,../reference/api/pandas.DataFrame.to_parqu
 generated/pandas.DataFrame.to_period,../reference/api/pandas.DataFrame.to_period
 generated/pandas.DataFrame.to_pickle,../reference/api/pandas.DataFrame.to_pickle
 generated/pandas.DataFrame.to_records,../reference/api/pandas.DataFrame.to_records
-generated/pandas.DataFrame.to_sparse,../reference/api/pandas.DataFrame.to_sparse
 generated/pandas.DataFrame.to_sql,../reference/api/pandas.DataFrame.to_sql
 generated/pandas.DataFrame.to_stata,../reference/api/pandas.DataFrame.to_stata
 generated/pandas.DataFrame.to_string,../reference/api/pandas.DataFrame.to_string
@@ -1432,7 +1431,6 @@ generated/pandas.Series.to_msgpack,../reference/api/pandas.Series.to_msgpack
 generated/pandas.Series.to_numpy,../reference/api/pandas.Series.to_numpy
 generated/pandas.Series.to_period,../reference/api/pandas.Series.to_period
 generated/pandas.Series.to_pickle,../reference/api/pandas.Series.to_pickle
-generated/pandas.Series.to_sparse,../reference/api/pandas.Series.to_sparse
 generated/pandas.Series.to_sql,../reference/api/pandas.Series.to_sql
 generated/pandas.Series.to_string,../reference/api/pandas.Series.to_string
 generated/pandas.Series.to_timestamp,../reference/api/pandas.Series.to_timestamp

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
@@ -356,15 +356,7 @@ Serialization / IO / conversion
    DataFrame.to_msgpack
    DataFrame.to_gbq
    DataFrame.to_records
-   DataFrame.to_sparse
    DataFrame.to_dense
    DataFrame.to_string
    DataFrame.to_clipboard
    DataFrame.style
-
-Sparse
-~~~~~~
-.. autosummary::
-   :toctree: api/
-
-   SparseDataFrame.to_coo
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
@@ -576,18 +576,7 @@ Serialization / IO / conversion
    Series.to_sql
    Series.to_msgpack
    Series.to_json
-   Series.to_sparse
    Series.to_dense
    Series.to_string
    Series.to_clipboard
    Series.to_latex
-
-
-Sparse
-------
-
-.. autosummary::
-   :toctree: api/
-
-   SparseSeries.to_coo
-   SparseSeries.from_coo
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -4641,6 +4641,14 @@ Several caveats.
 
 See the `Full Documentation <https://github.com/wesm/feather>`__.
 
+.. ipython:: python
+   :suppress:
+
+   import warnings
+   # This can be removed once building with pyarrow >=0.15.0
+   warnings.filterwarnings("ignore", "The Sparse", FutureWarning)
+
+
 .. ipython:: python
 
    df = pd.DataFrame({'a': list('abc'),

diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
@@ -6,12 +6,6 @@
 Sparse data structures
 **********************
 
-.. note::
-
-   ``SparseSeries`` and ``SparseDataFrame`` have been deprecated. Their purpose
-   is served equally well by a :class:`Series` or :class:`DataFrame` with
-   sparse values. See :ref:`sparse.migration` for tips on migrating.
-
 Pandas provides data structures for efficiently storing sparse data.
 These are not necessarily sparse in the typical "mostly 0". Rather, you can view these
 objects as being "compressed" where any data matching a specific value (``NaN`` / missing value, though any value
@@ -168,6 +162,11 @@ the correct dense result.
 Migrating
 ---------
 
+.. note::
+
+   ``SparseSeries`` and ``SparseDataFrame`` were removed in pandas 1.0.0. This migration
+   guide is present to aid in migrating from previous versions.
+
 In older versions of pandas, the ``SparseSeries`` and ``SparseDataFrame`` classes (documented below)
 were the preferred way to work with sparse data. With the advent of extension arrays, these subclasses
 are no longer needed. Their purpose is better served by using a regular Series or DataFrame with
@@ -366,12 +365,3 @@ row and columns coordinates of the matrix. Note that this will consume a signifi
 
    ss_dense = pd.Series.sparse.from_coo(A, dense_index=True)
    ss_dense
-
-
-.. _sparse.subclasses:
-
-Sparse subclasses
------------------
-
-The :class:`SparseSeries` and :class:`SparseDataFrame` classes are deprecated. Visit their
-API pages for usage.
diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst
@@ -91,8 +91,7 @@ Interaction with scipy.sparse
 
 Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here <sparse.scipysparse>`). For example, given a SparseSeries with MultiIndex we can convert to a `scipy.sparse.coo_matrix` by specifying the row and column labels as index levels:
 
-.. ipython:: python
-   :okwarning:
+.. code-block:: python
 
    s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan])
    s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0),
@@ -121,8 +120,7 @@ Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:is
 The from_coo method is a convenience method for creating a ``SparseSeries``
 from a ``scipy.sparse.coo_matrix``:
 
-.. ipython:: python
-   :okwarning:
+.. code-block:: python
 
    from scipy import sparse
    A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])),

diff --git a/doc/source/whatsnew/v0.18.1.rst b/doc/source/whatsnew/v0.18.1.rst
@@ -393,8 +393,7 @@ used in the ``pandas`` implementation (:issue:`12644`, :issue:`12638`, :issue:`1
 
 An example of this signature augmentation is illustrated below:
 
-.. ipython:: python
-   :okwarning:
+.. code-block:: python
 
    sp = pd.SparseDataFrame([1, 2, 3])
    sp
@@ -409,8 +408,7 @@ Previous behaviour:
 
 New behaviour:
 
-.. ipython:: python
-   :okwarning:
+.. code-block:: python
 
    np.cumsum(sp, axis=0)
 

diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
@@ -1235,8 +1235,7 @@ Operators now preserve dtypes
 
 - Sparse data structure now can preserve ``dtype`` after arithmetic ops (:issue:`13848`)
 
-.. ipython:: python
-   :okwarning:
+.. code-block:: python
 
    s = pd.SparseSeries([0, 2, 0, 1], fill_value=0, dtype=np.int64)
    s.dtype
@@ -1245,8 +1244,7 @@ Operators now preserve dtypes
 
 - Sparse data structure now support ``astype`` to convert internal ``dtype`` (:issue:`13900`)
 
-.. ipython:: python
-   :okwarning:
+.. code-block:: python
 
    s = pd.SparseSeries([1., 0., 2., 0.], fill_value=0)
    s

diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
@@ -338,8 +338,7 @@ See the :ref:`documentation <sparse.scipysparse>` for more information. (:issue:
 
 All sparse formats are supported, but matrices that are not in :mod:`COOrdinate <scipy.sparse>` format will be converted, copying data as needed.
 
-.. ipython:: python
-   :okwarning:
+.. code-block:: python
 
    from scipy.sparse import csr_matrix
    arr = np.random.random(size=(1000, 5))
@@ -351,7 +350,7 @@ All sparse formats are supported, but matrices that are not in :mod:`COOrdinate
 
 To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you can use:
 
-.. ipython:: python
+.. code-block:: python
 
    sdf.to_coo()
 

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -902,8 +902,7 @@ by a ``Series`` or ``DataFrame`` with sparse values.
 
 **Previous way**
 
-.. ipython:: python
-   :okwarning:
+.. code-block:: python
 
    df = pd.SparseDataFrame({"A": [0, 0, 1, 2]})
    df.dtypes

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -92,8 +92,17 @@ Deprecations
 
 .. _whatsnew_1000.prior_deprecations:
 
+
+Removed SparseSeries and SparseDataFrame
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``SparseSeries`` and ``SparseDataFrame`` have been removed (:issue:`28425`).
+We recommend using a ``Series`` or ``DataFrame`` with sparse values instead.
+See :ref:`sparse.migration` for help with migrating existing code.
+
 Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 - Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`)
 - Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to False (:issue:`27600`)
 - :meth:`pandas.Series.str.cat` now defaults to aligning ``others``, using ``join='left'`` (:issue:`27611`)

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -114,12 +114,7 @@
     DataFrame,
 )
 
-from pandas.core.sparse.api import (
-    SparseArray,
-    SparseDataFrame,
-    SparseSeries,
-    SparseDtype,
-)
+from pandas.core.sparse.api import SparseArray, SparseDtype
 
 from pandas.tseries.api import infer_freq
 from pandas.tseries import offsets
@@ -196,8 +191,9 @@
 if pandas.compat.PY37:
 
     def __getattr__(name):
+        import warnings
+
         if name == "Panel":
-            import warnings
 
             warnings.warn(
                 "The Panel class is removed from pandas. Accessing it "
@@ -211,6 +207,17 @@ class Panel:
                 pass
 
             return Panel
+        elif name in {"SparseSeries", "SparseDataFrame"}:
+            warnings.warn(
+                "The {} class is removed from pandas. Accessing it from "
+                "the top-level namespace will also be removed in the next "
+                "version".format(name),
+                FutureWarning,
+                stacklevel=2,
+            )
+
+            return type(name, (), {})
+
         raise AttributeError("module 'pandas' has no attribute '{}'".format(name))
 
 
@@ -219,6 +226,12 @@ class Panel:
     class Panel:
         pass
 
+    class SparseDataFrame:
+        pass
+
+    class SparseSeries:
+        pass
+
 
 # module level doc-string
 __doc__ = """

diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -12,13 +12,10 @@
     from pandas.core.dtypes.dtypes import ExtensionDtype  # noqa: F401
     from pandas.core.indexes.base import Index  # noqa: F401
     from pandas.core.series import Series  # noqa: F401
-    from pandas.core.sparse.series import SparseSeries  # noqa: F401
     from pandas.core.generic import NDFrame  # noqa: F401
 
 
-AnyArrayLike = TypeVar(
-    "AnyArrayLike", "ExtensionArray", "Index", "Series", "SparseSeries", np.ndarray
-)
+AnyArrayLike = TypeVar("AnyArrayLike", "ExtensionArray", "Index", "Series", np.ndarray)
 ArrayLike = TypeVar("ArrayLike", "ExtensionArray", np.ndarray)
 DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", "Period", "Timestamp", "Timedelta")
 Dtype = Union[str, np.dtype, "ExtensionDtype"]

diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py
@@ -5,9 +5,14 @@
 import copy
 import pickle as pkl
 import sys
+from typing import TYPE_CHECKING
+import warnings
 
 from pandas import Index
 
+if TYPE_CHECKING:
+    from pandas import Series, DataFrame
+
 
 def load_reduce(self):
     stack = self.stack
@@ -54,6 +59,41 @@ def load_reduce(self):
         raise
 
 
+_sparse_msg = """\
+
+Loading a saved '{cls}' as a {new} with sparse values.
+'{cls}' is now removed. You should re-save this dataset in its new format.
+"""
+
+
+class _LoadSparseSeries:
+    # To load a SparseSeries as a Series[Sparse]
+    def __new__(cls) -> "Series":
+        from pandas import Series
+
+        warnings.warn(
+            _sparse_msg.format(cls="SparseSeries", new="Series"),
+            FutureWarning,
+            stacklevel=6,
+        )
+
+        return Series()
+
+
+class _LoadSparseFrame:
+    # To load a SparseDataFrame as a DataFrame[Sparse]
+    def __new__(cls) -> "DataFrame":
+        from pandas import DataFrame
+
+        warnings.warn(
+            _sparse_msg.format(cls="SparseDataFrame", new="DataFrame"),
+            FutureWarning,
+            stacklevel=6,
+        )
+
+        return DataFrame()
+
+
 # If classes are moved, provide compat here.
 _class_locations_map = {
     ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"),
@@ -101,12 +141,12 @@ def load_reduce(self):
         "SparseArray",
     ),
     ("pandas.sparse.series", "SparseSeries"): (
-        "pandas.core.sparse.series",
-        "SparseSeries",
+        "pandas.compat.pickle_compat",
+        "_LoadSparseSeries",
     ),
     ("pandas.sparse.frame", "SparseDataFrame"): (
         "pandas.core.sparse.frame",
-        "SparseDataFrame",
+        "_LoadSparseFrame",
     ),
     ("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"),
     ("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"),
@@ -139,6 +179,14 @@ def load_reduce(self):
         "pandas.core.indexes.numeric",
         "Float64Index",
     ),
+    ("pandas.core.sparse.series", "SparseSeries"): (
+        "pandas.compat.pickle_compat",
+        "_LoadSparseSeries",
+    ),
+    ("pandas.core.sparse.frame", "SparseDataFrame"): (
+        "pandas.compat.pickle_compat",
+        "_LoadSparseFrame",
+    ),
 }