From ec79b2b2acc2d3e7265fbc8e1be2348e92b59cd1 Mon Sep 17 00:00:00 2001
From: Boris Rumyantsev <bd.rumyantsev@gmail.com>
Date: Sun, 9 Jan 2022 00:00:54 +0300
Subject: [PATCH] BUG: SparseArray doesn't recalc indices. (#44956, #45110)
 (#45125)

---
 pandas/core/arrays/sparse/array.py            |  5 +-
 .../tests/arrays/sparse/test_arithmetics.py   |  1 +
 pandas/tests/arrays/sparse/test_array.py      |  9 ++-
 pandas/tests/extension/test_sparse.py         | 60 ++++++++++++-------
 4 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 8961dadaf98de..2d326648d2c32 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -1704,13 +1704,14 @@ def _cmp_method(self, other, op) -> SparseArray:
             op_name = op.__name__.strip("_")
             return _sparse_array_op(self, other, op, op_name)
         else:
+            # scalar
             with np.errstate(all="ignore"):
                 fill_value = op(self.fill_value, other)
-                result = op(self.sp_values, other)
+                result = np.full(len(self), fill_value, dtype=np.bool_)
+                result[self.sp_index.indices] = op(self.sp_values, other)
 
             return type(self)(
                 result,
-                sparse_index=self.sp_index,
                 fill_value=fill_value,
                 dtype=np.bool_,
             )
diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py
index 012fe61fdba05..3db1ee9faad78 100644
--- a/pandas/tests/arrays/sparse/test_arithmetics.py
+++ b/pandas/tests/arrays/sparse/test_arithmetics.py
@@ -32,6 +32,7 @@ class TestSparseArrayArithmetics:
     _klass = SparseArray
 
     def _assert(self, a, b):
+        # We have to use tm.assert_sp_array_equal. See GH #45126
         tm.assert_numpy_array_equal(a, b)
 
     def _check_numeric_ops(self, a, b, a_dense, b_dense, mix: bool, op):
diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
index 2c3dcdeeaf8dc..0ebe03d9a1198 100644
--- a/pandas/tests/arrays/sparse/test_array.py
+++ b/pandas/tests/arrays/sparse/test_array.py
@@ -248,8 +248,8 @@ def test_scalar_with_index_infer_dtype(self, scalar, dtype):
         assert arr.dtype == dtype
         assert exp.dtype == dtype
 
-    # GH 23122
     def test_getitem_bool_sparse_array(self):
+        # GH 23122
         spar_bool = SparseArray([False, True] * 5, dtype=np.bool8, fill_value=True)
         exp = SparseArray([np.nan, 2, np.nan, 5, 6])
         tm.assert_sp_array_equal(self.arr[spar_bool], exp)
@@ -266,6 +266,13 @@ def test_getitem_bool_sparse_array(self):
         exp = SparseArray([np.nan, 3, 5])
         tm.assert_sp_array_equal(res, exp)
 
+    def test_getitem_bool_sparse_array_as_comparison(self):
+        # GH 45110
+        arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan)
+        res = arr[arr > 2]
+        exp = SparseArray([3.0, 4.0], fill_value=np.nan)
+        tm.assert_sp_array_equal(res, exp)
+
     def test_get_item(self):
 
         assert np.isnan(self.arr[1])
diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
index 3a37ea4d673af..5e2f452009e92 100644
--- a/pandas/tests/extension/test_sparse.py
+++ b/pandas/tests/extension/test_sparse.py
@@ -100,6 +100,11 @@ def data_for_grouping(request):
     return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3], fill_value=request.param)
 
 
+@pytest.fixture(params=[0, np.nan])
+def data_for_compare(request):
+    return SparseArray([0, 0, np.nan, -2, -1, 4, 2, 3, 0, 0], fill_value=request.param)
+
+
 class BaseSparseTests:
     def _check_unsupported(self, data):
         if data.dtype == SparseDtype(int, 0):
@@ -461,32 +466,45 @@ def _check_divmod_op(self, ser, op, other, exc=NotImplementedError):
         super()._check_divmod_op(ser, op, other, exc=None)
 
 
-class TestComparisonOps(BaseSparseTests, base.BaseComparisonOpsTests):
-    def _compare_other(self, s, data, comparison_op, other):
+class TestComparisonOps(BaseSparseTests):
+    def _compare_other(self, data_for_compare: SparseArray, comparison_op, other):
         op = comparison_op
 
-        # array
-        result = pd.Series(op(data, other))
-        # hard to test the fill value, since we don't know what expected
-        # is in general.
-        # Rely on tests in `tests/sparse` to validate that.
-        assert isinstance(result.dtype, SparseDtype)
-        assert result.dtype.subtype == np.dtype("bool")
-
-        with np.errstate(all="ignore"):
-            expected = pd.Series(
-                SparseArray(
-                    op(np.asarray(data), np.asarray(other)),
-                    fill_value=result.values.fill_value,
-                )
+        result = op(data_for_compare, other)
+        assert isinstance(result, SparseArray)
+        assert result.dtype.subtype == np.bool_
+
+        if isinstance(other, SparseArray):
+            fill_value = op(data_for_compare.fill_value, other.fill_value)
+        else:
+            fill_value = np.all(
+                op(np.asarray(data_for_compare.fill_value), np.asarray(other))
             )
 
-        tm.assert_series_equal(result, expected)
+            expected = SparseArray(
+                op(data_for_compare.to_dense(), np.asarray(other)),
+                fill_value=fill_value,
+                dtype=np.bool_,
+            )
+        tm.assert_sp_array_equal(result, expected)
 
-        # series
-        ser = pd.Series(data)
-        result = op(ser, other)
-        tm.assert_series_equal(result, expected)
+    def test_scalar(self, data_for_compare: SparseArray, comparison_op):
+        self._compare_other(data_for_compare, comparison_op, 0)
+        self._compare_other(data_for_compare, comparison_op, 1)
+        self._compare_other(data_for_compare, comparison_op, -1)
+        self._compare_other(data_for_compare, comparison_op, np.nan)
+
+    @pytest.mark.xfail(reason="Wrong indices")
+    def test_array(self, data_for_compare: SparseArray, comparison_op):
+        arr = np.linspace(-4, 5, 10)
+        self._compare_other(data_for_compare, comparison_op, arr)
+
+    @pytest.mark.xfail(reason="Wrong indices")
+    def test_sparse_array(self, data_for_compare: SparseArray, comparison_op):
+        arr = data_for_compare + 1
+        self._compare_other(data_for_compare, comparison_op, arr)
+        arr = data_for_compare * 2
+        self._compare_other(data_for_compare, comparison_op, arr)
 
 
 class TestPrinting(BaseSparseTests, base.BasePrintingTests):