Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Mar 14, 2019
1 parent f433be8 commit 0922296
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 35 deletions.
78 changes: 76 additions & 2 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,8 @@ def from_spmatrix(cls, data):
"""
Create a SparseArray from a scipy.sparse matrix.
.. versionadded:: 0.25.0
Parameters
----------
data : scipy.sparse.sp_matrix
Expand Down Expand Up @@ -711,9 +713,14 @@ def from_spmatrix(cls, data):
"'data' must have a single column, not '{}'".format(ncol)
)

# our sparse index classes require that the positions be strictly
# increasing. So we need to sort loc, and arr accordingly.
arr = data.data
idx, _ = data.nonzero()
loc = np.argsort(idx)
arr = arr.take(loc)
idx.sort()

zero = np.array(0, dtype=arr.dtype).item()
dtype = SparseDtype(arr.dtype, zero)
index = IntIndex(length, idx)
Expand Down Expand Up @@ -2074,13 +2081,43 @@ def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False):
return A, rows, columns

def to_dense(self):
"""
Convert a Series from sparse values to dense.
.. versionadded:: 0.25.0
Returns
-------
Series:
A Series with the same values, stored as a dense array.
Examples
--------
>>> series = pd.Series(pd.SparseArray([0, 1, 0]))
>>> series
0 0
1 1
2 0
dtype: Sparse[int64, 0]
>>> series.sparse.to_dense()
0 0
1 1
2 0
dtype: int64
"""
from pandas import Series
return Series(self._parent.array.to_dense(),
index=self._parent.index,
name=self._parent.name)


class SparseFrameAccessor(BaseAccessor, PandasDelegate):
"""
DataFrame accessor for sparse data.
.. versionadded :: 0.25.0
"""

def _validate(self, data):
dtypes = data.dtypes
Expand All @@ -2092,6 +2129,8 @@ def from_spmatrix(cls, data, index=None, columns=None):
"""
Create a new DataFrame from a scipy sparse matrix.
.. versionadded:: 0.25.0
Parameters
----------
data : scipy.sparse.spmatrix
Expand All @@ -2103,6 +2142,8 @@ def from_spmatrix(cls, data, index=None, columns=None):
Returns
-------
DataFrame
Each column of the DataFrame is stored as a
:class:`SparseArray`.
Examples
--------
Expand All @@ -2127,11 +2168,23 @@ def from_spmatrix(cls, data, index=None, columns=None):

def to_dense(self):
"""
Convert to dense DataFrame
Convert a DataFrame with sparse values to dense.
.. versionadded:: 0.25.0
Returns
-------
df : DataFrame
DataFrame
A DataFrame with the same values stored as dense arrays.
Examples
--------
>>> df = pd.DataFrame({"A": pd.SparseArray([0, 1, 0])})
>>> df.sparse.to_dense()
A
0 0
1 1
2 0
"""
from pandas import DataFrame

Expand All @@ -2142,6 +2195,27 @@ def to_dense(self):
columns=self._parent.columns)

def to_coo(self):
"""
Return the contents of the frame as a sparse SciPy COO matrix.
.. versionadded:: 0.20.0
Returns
-------
coo_matrix : scipy.sparse.spmatrix
If the caller is heterogeneous and contains booleans or objects,
the result will be of dtype=object. See Notes.
Notes
-----
The dtype will be the lowest-common-denominator type (implicit
upcasting); that is to say if the dtypes (even of numeric types)
are mixed, the one that accommodates all will be chosen.
e.g. If the dtypes are float16 and float32, dtype will be upcast to
float32. By numpy.find_common_type convention, mixing int64 and
and uint64 will result in a float64 dtype.
"""
try:
from scipy.sparse import coo_matrix
except ImportError:
Expand Down
33 changes: 3 additions & 30 deletions pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,28 +233,8 @@ def _init_spmatrix(self, data, index, columns, dtype=None,

return self._init_dict(sdict, index, columns, dtype)

@Appender(SparseFrameAccessor.to_coo.__doc__)
def to_coo(self):
"""
Return the contents of the frame as a sparse SciPy COO matrix.
.. versionadded:: 0.20.0
Returns
-------
coo_matrix : scipy.sparse.spmatrix
If the caller is heterogeneous and contains booleans or objects,
the result will be of dtype=object. See Notes.
Notes
-----
The dtype will be the lowest-common-denominator type (implicit
upcasting); that is to say if the dtypes (even of numeric types)
are mixed, the one that accommodates all will be chosen.
e.g. If the dtypes are float16 and float32, dtype will be upcast to
float32. By numpy.find_common_type convention, mixing int64 and
and uint64 will result in a float64 dtype.
"""
return SparseFrameAccessor(self).to_coo()

def __array_wrap__(self, result):
Expand Down Expand Up @@ -296,16 +276,9 @@ def _unpickle_sparse_frame_compat(self, state):
self._default_fill_value = fv
self._default_kind = kind

@Appender(SparseFrameAccessor.to_dense.__doc__)
def to_dense(self):
"""
Convert to dense DataFrame
Returns
-------
df : DataFrame
"""
data = {k: v.to_dense() for k, v in compat.iteritems(self)}
return DataFrame(data, index=self.index, columns=self.columns)
return SparseFrameAccessor(self).to_dense()

def _apply_columns(self, func):
"""
Expand Down
10 changes: 7 additions & 3 deletions pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,17 @@ def test_constructor_inferred_fill_value(self, data, fill_value):
assert result == fill_value

@pytest.mark.parametrize('format', ['coo', 'csc', 'csr'])
def test_from_spmatrix(self, format):
@pytest.mark.parametrize('size', [0, 10])
def test_from_spmatrix(self, size, format):
pytest.importorskip('scipy')
import scipy.sparse

mat = scipy.sparse.random(10, 1, density=0.5, format=format)
mat = scipy.sparse.random(size, 1, density=0.5, format=format)
result = SparseArray.from_spmatrix(mat)
tm.assert_numpy_array_equal(mat.data, result.sp_values)

result = np.asarray(result)
expected = mat.toarray().ravel()
tm.assert_numpy_array_equal(result, expected)

def test_from_spmatrix_raises(self):
pytest.importorskip('scipy')
Expand Down

0 comments on commit 0922296

Please sign in to comment.