diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 015fdf1f45f47..30bd989f7a819 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -179,6 +179,7 @@ Sparse ^^^^^^ +- Bug in instantiating :class:`SparseSeries` from ``dict`` (:issue:`16905`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 9dd061e26ba06..ebe2547d5ec60 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -146,10 +146,8 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', data = data._data elif isinstance(data, (Series, dict)): - if index is None: - index = data.index.view() - - data = Series(data) + data = Series(data, index=index) + index = data.index res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 1eb2b98a7d7cc..d70468b7fffce 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -1,5 +1,6 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +from collections import OrderedDict import pytest @@ -117,9 +118,78 @@ def test_to_sparse_pass_name(self): result = self.ts.to_sparse() assert result.name == self.ts.name + def test_constructor_dict(self): + d = {'a': 0., 'b': 1., 'c': 2.} + result = self.series_klass(d) + expected = self.series_klass(d, index=sorted(d.keys())) + tm.assert_series_equal(result, expected) + + result = self.series_klass(d, index=['b', 'c', 'd', 'a']) + expected = self.series_klass([1, 2, np.nan, 0], index=['b', 'c', 'd', 'a']) + tm.assert_series_equal(result, expected) + + def test_constructor_subclass_dict(self): + data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) + series = self.series_klass(data) + expected = self.series_klass(dict(compat.iteritems(data))) + tm.assert_series_equal(series, expected) + + def test_constructor_ordereddict(self): + # GH3283 + data = OrderedDict( + ('col%s' % i, np.random.random()) for i in range(12)) + + series = self.series_klass(data) + expected = self.series_klass(list(data.values()), list(data.keys())) + tm.assert_series_equal(series, expected) + + # Test with subclass + class A(OrderedDict): + pass + + series = self.series_klass(A(data)) + tm.assert_series_equal(series, expected) + + def test_constructor_dict_multiindex(self): + d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.} + _d = sorted(d.items()) + result = self.series_klass(d) + expected = self.series_klass( + [x[1] for x in _d], + index=pd.MultiIndex.from_tuples([x[0] for x in _d])) + tm.assert_series_equal(result, expected) + + d['z'] = 111. + _d.insert(0, ('z', d['z'])) + result = self.series_klass(d) + expected = self.series_klass([x[1] for x in _d], + index=pd.Index([x[0] for x in _d], + tupleize_cols=False)) + result = result.reindex(index=expected.index) + tm.assert_series_equal(result, expected) + + def test_constructor_dict_timedelta_index(self): + # GH #12169 : Resample category data with timedelta index + # construct Series from dict as data and TimedeltaIndex as index + # will result NaN in result Series data + expected = self.series_klass( + data=['A', 'B', 'C'], + index=pd.to_timedelta([0, 10, 20], unit='s') + ) + + result = self.series_klass( + data={pd.to_timedelta(0, unit='s'): 'A', + pd.to_timedelta(10, unit='s'): 'B', + pd.to_timedelta(20, unit='s'): 'C'}, + index=pd.to_timedelta([0, 10, 20], unit='s') + ) + tm.assert_series_equal(result, expected) + class TestSeriesMisc(TestData, SharedWithSparse): + series_klass = Series + def test_tab_completion(self): # GH 9910 s = Series(list('abcd')) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d591aa4f567a9..a916c42c007f9 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -20,8 +20,7 @@ from pandas._libs import lib from pandas._libs.tslib import iNaT -from pandas.compat import lrange, range, zip, OrderedDict, long -from pandas import compat +from pandas.compat import lrange, range, zip, long from pandas.util.testing import assert_series_equal import pandas.util.testing as tm @@ -605,48 +604,6 @@ def test_constructor_dict(self): expected.iloc[1] = 1 assert_series_equal(result, expected) - def test_constructor_dict_multiindex(self): - check = lambda result, expected: tm.assert_series_equal( - result, expected, check_dtype=True, check_series_type=True) - d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.} - _d = sorted(d.items()) - ser = Series(d) - expected = Series([x[1] for x in _d], - index=MultiIndex.from_tuples([x[0] for x in _d])) - check(ser, expected) - - d['z'] = 111. - _d.insert(0, ('z', d['z'])) - ser = Series(d) - expected = Series([x[1] for x in _d], index=Index( - [x[0] for x in _d], tupleize_cols=False)) - ser = ser.reindex(index=expected.index) - check(ser, expected) - - def test_constructor_dict_timedelta_index(self): - # GH #12169 : Resample category data with timedelta index - # construct Series from dict as data and TimedeltaIndex as index - # will result NaN in result Series data - expected = Series( - data=['A', 'B', 'C'], - index=pd.to_timedelta([0, 10, 20], unit='s') - ) - - result = Series( - data={pd.to_timedelta(0, unit='s'): 'A', - pd.to_timedelta(10, unit='s'): 'B', - pd.to_timedelta(20, unit='s'): 'C'}, - index=pd.to_timedelta([0, 10, 20], unit='s') - ) - # this should work - assert_series_equal(result, expected) - - def test_constructor_subclass_dict(self): - data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) - series = Series(data) - refseries = Series(dict(compat.iteritems(data))) - assert_series_equal(refseries, series) - def test_constructor_dict_datetime64_index(self): # GH 9456 @@ -670,26 +627,6 @@ def create_data(constructor): assert_series_equal(result_datetime, expected) assert_series_equal(result_Timestamp, expected) - def test_orderedDict_ctor(self): - # GH3283 - import pandas - import random - data = OrderedDict([('col%s' % i, random.random()) for i in range(12)]) - s = pandas.Series(data) - assert all(s.values == list(data.values())) - - def test_orderedDict_subclass_ctor(self): - # GH3283 - import pandas - import random - - class A(OrderedDict): - pass - - data = A([('col%s' % i, random.random()) for i in range(12)]) - s = pandas.Series(data) - assert all(s.values == list(data.values())) - def test_constructor_list_of_tuples(self): data = [(1, 1), (2, 2), (2, 3)] s = Series(data) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 654d12b782f37..631e324659f92 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1002,12 +1002,14 @@ def _check(frame, orig): shifted = frame.shift(2, freq='B') exp = orig.shift(2, freq='B') - exp = exp.to_sparse(frame.default_fill_value) + exp = exp.to_sparse(frame.default_fill_value, + kind=frame.default_kind) tm.assert_frame_equal(shifted, exp) shifted = frame.shift(2, freq=BDay()) exp = orig.shift(2, freq=BDay()) - exp = exp.to_sparse(frame.default_fill_value) + exp = exp.to_sparse(frame.default_fill_value, + kind=frame.default_kind) tm.assert_frame_equal(shifted, exp) self._check_all(_check) diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index b524d6bfab418..08601648c473c 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -1,6 +1,8 @@ # pylint: disable-msg=E1101,W0612 import operator +from datetime import datetime + import pytest from numpy import nan @@ -58,6 +60,8 @@ def _test_data2_zero(): class TestSparseSeries(SharedWithSparse): + series_klass = SparseSeries + def setup_method(self, method): arr, index = _test_data1() @@ -1361,3 +1365,18 @@ def test_numpy_func_call(self): for func in funcs: for series in ('bseries', 'zbseries'): getattr(np, func)(getattr(self, series)) + + +@pytest.mark.parametrize( + 'datetime_type', (np.datetime64, + pd.Timestamp, + lambda x: datetime.strptime(x, '%Y-%m-%d'))) +def test_constructor_dict_datetime64_index(datetime_type): + # GH 9456 + dates = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15'] + values = [42544017.198965244, 1234565, 40512335.181958228, -1] + + result = SparseSeries(dict(zip(map(datetime_type, dates), values))) + expected = SparseSeries(values, map(pd.Timestamp, dates)) + + tm.assert_sp_series_equal(result, expected) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 17e09b38b20e0..d45589e6edd3a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -46,7 +46,7 @@ from pandas import (bdate_range, CategoricalIndex, Categorical, IntervalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex, Index, MultiIndex, - Series, DataFrame, Panel, Panel4D) + Series, DataFrame, SparseSeries, Panel, Panel4D) from pandas._libs import testing as _testing from pandas.io.common import urlopen @@ -1225,6 +1225,12 @@ def assert_series_equal(left, right, check_dtype=True, # instance validation _check_isinstance(left, right, Series) + if isinstance(left, SparseSeries) and isinstance(right, SparseSeries): + return assert_sp_series_equal(left, right, + check_dtype=check_dtype, + check_series_type=check_series_type, + check_names=check_names) + if check_series_type: # ToDo: There are some tests using rhs is sparse # lhs is dense. Should use assert_class_equal in future