updates

TomAugspurger · Mar 14, 2019 · 0922296 · 0922296
1 parent f433be8
commit 0922296
Show file tree

Hide file tree

Showing 3 changed files with 86 additions and 35 deletions.
diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
@@ -683,6 +683,8 @@ def from_spmatrix(cls, data):
         """
         Create a SparseArray from a scipy.sparse matrix.
 
+        .. versionadded:: 0.25.0
+
         Parameters
         ----------
         data : scipy.sparse.sp_matrix
@@ -711,9 +713,14 @@ def from_spmatrix(cls, data):
                 "'data' must have a single column, not '{}'".format(ncol)
             )
 
+        # our sparse index classes require that the positions be strictly
+        # increasing. So we need to sort loc, and arr accordingly.
         arr = data.data
         idx, _ = data.nonzero()
+        loc = np.argsort(idx)
+        arr = arr.take(loc)
         idx.sort()
+
         zero = np.array(0, dtype=arr.dtype).item()
         dtype = SparseDtype(arr.dtype, zero)
         index = IntIndex(length, idx)
@@ -2074,13 +2081,43 @@ def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False):
         return A, rows, columns
 
     def to_dense(self):
+        """
+        Convert a Series from sparse values to dense.
+
+        .. versionadded:: 0.25.0
+
+        Returns
+        -------
+        Series:
+            A Series with the same values, stored as a dense array.
+
+        Examples
+        --------
+        >>> series = pd.Series(pd.SparseArray([0, 1, 0]))
+        >>> series
+        0    0
+        1    1
+        2    0
+        dtype: Sparse[int64, 0]
+
+        >>> series.sparse.to_dense()
+        0    0
+        1    1
+        2    0
+        dtype: int64
+        """
         from pandas import Series
         return Series(self._parent.array.to_dense(),
                       index=self._parent.index,
                       name=self._parent.name)
 
 
 class SparseFrameAccessor(BaseAccessor, PandasDelegate):
+    """
+    DataFrame accessor for sparse data.
+
+    .. versionadded :: 0.25.0
+    """
 
     def _validate(self, data):
         dtypes = data.dtypes
@@ -2092,6 +2129,8 @@ def from_spmatrix(cls, data, index=None, columns=None):
         """
         Create a new DataFrame from a scipy sparse matrix.
 
+        .. versionadded:: 0.25.0
+
         Parameters
         ----------
         data : scipy.sparse.spmatrix
@@ -2103,6 +2142,8 @@ def from_spmatrix(cls, data, index=None, columns=None):
         Returns
         -------
         DataFrame
+            Each column of the DataFrame is stored as a
+            :class:`SparseArray`.
 
         Examples
         --------
@@ -2127,11 +2168,23 @@ def from_spmatrix(cls, data, index=None, columns=None):
 
     def to_dense(self):
         """
-        Convert to dense DataFrame
+        Convert a DataFrame with sparse values to dense.
+
+        .. versionadded:: 0.25.0
 
         Returns
         -------
-        df : DataFrame
+        DataFrame
+            A DataFrame with the same values stored as dense arrays.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"A": pd.SparseArray([0, 1, 0])})
+        >>> df.sparse.to_dense()
+           A
+        0  0
+        1  1
+        2  0
         """
         from pandas import DataFrame
 
@@ -2142,6 +2195,27 @@ def to_dense(self):
                          columns=self._parent.columns)
 
     def to_coo(self):
+        """
+        Return the contents of the frame as a sparse SciPy COO matrix.
+
+        .. versionadded:: 0.20.0
+
+        Returns
+        -------
+        coo_matrix : scipy.sparse.spmatrix
+            If the caller is heterogeneous and contains booleans or objects,
+            the result will be of dtype=object. See Notes.
+
+        Notes
+        -----
+        The dtype will be the lowest-common-denominator type (implicit
+        upcasting); that is to say if the dtypes (even of numeric types)
+        are mixed, the one that accommodates all will be chosen.
+
+        e.g. If the dtypes are float16 and float32, dtype will be upcast to
+        float32. By numpy.find_common_type convention, mixing int64 and
+        and uint64 will result in a float64 dtype.
+        """
         try:
             from scipy.sparse import coo_matrix
         except ImportError:

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
@@ -233,28 +233,8 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
 
         return self._init_dict(sdict, index, columns, dtype)
 
+    @Appender(SparseFrameAccessor.to_coo.__doc__)
     def to_coo(self):
-        """
-        Return the contents of the frame as a sparse SciPy COO matrix.
-
-        .. versionadded:: 0.20.0
-
-        Returns
-        -------
-        coo_matrix : scipy.sparse.spmatrix
-            If the caller is heterogeneous and contains booleans or objects,
-            the result will be of dtype=object. See Notes.
-
-        Notes
-        -----
-        The dtype will be the lowest-common-denominator type (implicit
-        upcasting); that is to say if the dtypes (even of numeric types)
-        are mixed, the one that accommodates all will be chosen.
-
-        e.g. If the dtypes are float16 and float32, dtype will be upcast to
-        float32. By numpy.find_common_type convention, mixing int64 and
-        and uint64 will result in a float64 dtype.
-        """
         return SparseFrameAccessor(self).to_coo()
 
     def __array_wrap__(self, result):
@@ -296,16 +276,9 @@ def _unpickle_sparse_frame_compat(self, state):
         self._default_fill_value = fv
         self._default_kind = kind
 
+    @Appender(SparseFrameAccessor.to_dense.__doc__)
     def to_dense(self):
-        """
-        Convert to dense DataFrame
-
-        Returns
-        -------
-        df : DataFrame
-        """
-        data = {k: v.to_dense() for k, v in compat.iteritems(self)}
-        return DataFrame(data, index=self.index, columns=self.columns)
+        return SparseFrameAccessor(self).to_dense()
 
     def _apply_columns(self, func):
         """

diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -173,13 +173,17 @@ def test_constructor_inferred_fill_value(self, data, fill_value):
             assert result == fill_value
 
     @pytest.mark.parametrize('format', ['coo', 'csc', 'csr'])
-    def test_from_spmatrix(self, format):
+    @pytest.mark.parametrize('size', [0, 10])
+    def test_from_spmatrix(self, size, format):
         pytest.importorskip('scipy')
         import scipy.sparse
 
-        mat = scipy.sparse.random(10, 1, density=0.5, format=format)
+        mat = scipy.sparse.random(size, 1, density=0.5, format=format)
         result = SparseArray.from_spmatrix(mat)
-        tm.assert_numpy_array_equal(mat.data, result.sp_values)
+
+        result = np.asarray(result)
+        expected = mat.toarray().ravel()
+        tm.assert_numpy_array_equal(result, expected)
 
     def test_from_spmatrix_raises(self):
         pytest.importorskip('scipy')