From ec79b2b2acc2d3e7265fbc8e1be2348e92b59cd1 Mon Sep 17 00:00:00 2001 From: Boris Rumyantsev Date: Sun, 9 Jan 2022 00:00:54 +0300 Subject: [PATCH] BUG: SparseArray doesn't recalc indices. (#44956, #45110) (#45125) --- pandas/core/arrays/sparse/array.py | 5 +- .../tests/arrays/sparse/test_arithmetics.py | 1 + pandas/tests/arrays/sparse/test_array.py | 9 ++- pandas/tests/extension/test_sparse.py | 60 ++++++++++++------- 4 files changed, 51 insertions(+), 24 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 8961dadaf98de..2d326648d2c32 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1704,13 +1704,14 @@ def _cmp_method(self, other, op) -> SparseArray: op_name = op.__name__.strip("_") return _sparse_array_op(self, other, op, op_name) else: + # scalar with np.errstate(all="ignore"): fill_value = op(self.fill_value, other) - result = op(self.sp_values, other) + result = np.full(len(self), fill_value, dtype=np.bool_) + result[self.sp_index.indices] = op(self.sp_values, other) return type(self)( result, - sparse_index=self.sp_index, fill_value=fill_value, dtype=np.bool_, ) diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index 012fe61fdba05..3db1ee9faad78 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -32,6 +32,7 @@ class TestSparseArrayArithmetics: _klass = SparseArray def _assert(self, a, b): + # We have to use tm.assert_sp_array_equal. See GH #45126 tm.assert_numpy_array_equal(a, b) def _check_numeric_ops(self, a, b, a_dense, b_dense, mix: bool, op): diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 2c3dcdeeaf8dc..0ebe03d9a1198 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -248,8 +248,8 @@ def test_scalar_with_index_infer_dtype(self, scalar, dtype): assert arr.dtype == dtype assert exp.dtype == dtype - # GH 23122 def test_getitem_bool_sparse_array(self): + # GH 23122 spar_bool = SparseArray([False, True] * 5, dtype=np.bool8, fill_value=True) exp = SparseArray([np.nan, 2, np.nan, 5, 6]) tm.assert_sp_array_equal(self.arr[spar_bool], exp) @@ -266,6 +266,13 @@ def test_getitem_bool_sparse_array(self): exp = SparseArray([np.nan, 3, 5]) tm.assert_sp_array_equal(res, exp) + def test_getitem_bool_sparse_array_as_comparison(self): + # GH 45110 + arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan) + res = arr[arr > 2] + exp = SparseArray([3.0, 4.0], fill_value=np.nan) + tm.assert_sp_array_equal(res, exp) + def test_get_item(self): assert np.isnan(self.arr[1]) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 3a37ea4d673af..5e2f452009e92 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -100,6 +100,11 @@ def data_for_grouping(request): return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3], fill_value=request.param) +@pytest.fixture(params=[0, np.nan]) +def data_for_compare(request): + return SparseArray([0, 0, np.nan, -2, -1, 4, 2, 3, 0, 0], fill_value=request.param) + + class BaseSparseTests: def _check_unsupported(self, data): if data.dtype == SparseDtype(int, 0): @@ -461,32 +466,45 @@ def _check_divmod_op(self, ser, op, other, exc=NotImplementedError): super()._check_divmod_op(ser, op, other, exc=None) -class TestComparisonOps(BaseSparseTests, base.BaseComparisonOpsTests): - def _compare_other(self, s, data, comparison_op, other): +class TestComparisonOps(BaseSparseTests): + def _compare_other(self, data_for_compare: SparseArray, comparison_op, other): op = comparison_op - # array - result = pd.Series(op(data, other)) - # hard to test the fill value, since we don't know what expected - # is in general. - # Rely on tests in `tests/sparse` to validate that. - assert isinstance(result.dtype, SparseDtype) - assert result.dtype.subtype == np.dtype("bool") - - with np.errstate(all="ignore"): - expected = pd.Series( - SparseArray( - op(np.asarray(data), np.asarray(other)), - fill_value=result.values.fill_value, - ) + result = op(data_for_compare, other) + assert isinstance(result, SparseArray) + assert result.dtype.subtype == np.bool_ + + if isinstance(other, SparseArray): + fill_value = op(data_for_compare.fill_value, other.fill_value) + else: + fill_value = np.all( + op(np.asarray(data_for_compare.fill_value), np.asarray(other)) ) - tm.assert_series_equal(result, expected) + expected = SparseArray( + op(data_for_compare.to_dense(), np.asarray(other)), + fill_value=fill_value, + dtype=np.bool_, + ) + tm.assert_sp_array_equal(result, expected) - # series - ser = pd.Series(data) - result = op(ser, other) - tm.assert_series_equal(result, expected) + def test_scalar(self, data_for_compare: SparseArray, comparison_op): + self._compare_other(data_for_compare, comparison_op, 0) + self._compare_other(data_for_compare, comparison_op, 1) + self._compare_other(data_for_compare, comparison_op, -1) + self._compare_other(data_for_compare, comparison_op, np.nan) + + @pytest.mark.xfail(reason="Wrong indices") + def test_array(self, data_for_compare: SparseArray, comparison_op): + arr = np.linspace(-4, 5, 10) + self._compare_other(data_for_compare, comparison_op, arr) + + @pytest.mark.xfail(reason="Wrong indices") + def test_sparse_array(self, data_for_compare: SparseArray, comparison_op): + arr = data_for_compare + 1 + self._compare_other(data_for_compare, comparison_op, arr) + arr = data_for_compare * 2 + self._compare_other(data_for_compare, comparison_op, arr) class TestPrinting(BaseSparseTests, base.BasePrintingTests):