diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index fcb14a7c29a..34f002086da 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -39,8 +39,8 @@ New functions/methods
   `NEP18 <https://www.numpy.org/neps/nep-0018-array-function-protocol.html>`_ compliant
   numpy-like library (important: read notes about NUMPY_EXPERIMENTAL_ARRAY_FUNCTION in
   the above link). Added explicit test coverage for
-  `sparse <https://github.com/pydata/sparse>`_. (:issue:`3117`, :issue:`3202`)
-  By `Nezar Abdennur <https://github.com/nvictus>`_
+  `sparse <https://github.com/pydata/sparse>`_. (:issue:`3117`, :issue:`3202`).
+  This requires `sparse>=0.8.0`. By `Nezar Abdennur <https://github.com/nvictus>`_
   and `Guido Imperiale <https://github.com/crusaderky>`_.
 
 - The xarray package is now discoverable by mypy (although typing hints coverage is not
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index d5cd5eb9e8f..f6570149484 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -10,7 +10,7 @@
 
 from . import duck_array_ops, nputils, utils
 from .npcompat import DTypeLike
-from .pycompat import dask_array_type, integer_types
+from .pycompat import dask_array_type, integer_types, sparse_array_type
 from .utils import is_dict_like, maybe_cast_to_coords_dtype
 
 
@@ -1076,19 +1076,30 @@ def _logical_any(args):
     return functools.reduce(operator.or_, args)
 
 
-def _masked_result_drop_slice(key, chunks_hint=None):
+def _masked_result_drop_slice(key, data=None):
+
     key = (k for k in key if not isinstance(k, slice))
-    if chunks_hint is not None:
-        key = [
-            _dask_array_with_chunks_hint(k, chunks_hint)
-            if isinstance(k, np.ndarray)
-            else k
-            for k in key
-        ]
-    return _logical_any(k == -1 for k in key)
+    chunks_hint = getattr(data, "chunks", None)
+
+    new_keys = []
+    for k in key:
+        if isinstance(k, np.ndarray):
+            if isinstance(data, dask_array_type):
+                new_keys.append(_dask_array_with_chunks_hint(k, chunks_hint))
+            elif isinstance(data, sparse_array_type):
+                import sparse
+
+                new_keys.append(sparse.COO.from_numpy(k))
+            else:
+                new_keys.append(k)
+        else:
+            new_keys.append(k)
+
+    mask = _logical_any(k == -1 for k in new_keys)
+    return mask
 
 
-def create_mask(indexer, shape, chunks_hint=None):
+def create_mask(indexer, shape, data=None):
     """Create a mask for indexing with a fill-value.
 
     Parameters
@@ -1098,25 +1109,24 @@ def create_mask(indexer, shape, chunks_hint=None):
         the result that should be masked.
     shape : tuple
         Shape of the array being indexed.
-    chunks_hint : tuple, optional
-        Optional tuple indicating desired chunks for the result. If provided,
-        used as a hint for chunks on the resulting dask. Must have a hint for
-        each dimension on the result array.
+    data : optional
+        Data for which mask is being created. If data is a dask arrays, its chunks
+        are used as a hint for chunks on the resulting mask. If data is a sparse
+        array, the returned mask is also a sparse array.
 
     Returns
     -------
-    mask : bool, np.ndarray or dask.array.Array with dtype=bool
-        Dask array if chunks_hint is provided, otherwise a NumPy array. Has the
-        same shape as the indexing result.
+    mask : bool, np.ndarray, SparseArray or dask.array.Array with dtype=bool
+        Same type as data. Has the same shape as the indexing result.
     """
     if isinstance(indexer, OuterIndexer):
         key = _outer_to_vectorized_indexer(indexer, shape).tuple
         assert not any(isinstance(k, slice) for k in key)
-        mask = _masked_result_drop_slice(key, chunks_hint)
+        mask = _masked_result_drop_slice(key, data)
 
     elif isinstance(indexer, VectorizedIndexer):
         key = indexer.tuple
-        base_mask = _masked_result_drop_slice(key, chunks_hint)
+        base_mask = _masked_result_drop_slice(key, data)
         slice_shape = tuple(
             np.arange(*k.indices(size)).size
             for k, size in zip(key, shape)
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index bc8da10dd0c..c64dd8af6c6 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -710,8 +710,7 @@ def _getitem_with_mask(self, key, fill_value=dtypes.NA):
                 actual_indexer = indexer
 
             data = as_indexable(self._data)[actual_indexer]
-            chunks_hint = getattr(data, "chunks", None)
-            mask = indexing.create_mask(indexer, self.shape, chunks_hint)
+            mask = indexing.create_mask(indexer, self.shape, data)
             data = duck_array_ops.where(mask, fill_value, data)
         else:
             # array cannot be indexed along dimensions of size 0, so just
diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py
index f37f8d98ca8..82ee9b63f9d 100644
--- a/xarray/tests/test_indexing.py
+++ b/xarray/tests/test_indexing.py
@@ -708,7 +708,9 @@ def test_create_mask_dask():
 
     indexer = indexing.OuterIndexer((1, slice(2), np.array([0, -1, 2])))
     expected = np.array(2 * [[False, True, False]])
-    actual = indexing.create_mask(indexer, (5, 5, 5), chunks_hint=((1, 1), (2, 1)))
+    actual = indexing.create_mask(
+        indexer, (5, 5, 5), da.empty((2, 3), chunks=((1, 1), (2, 1)))
+    )
     assert actual.chunks == ((1, 1), (2, 1))
     np.testing.assert_array_equal(expected, actual)
 
@@ -716,12 +718,14 @@ def test_create_mask_dask():
         (np.array([0, -1, 2]), slice(None), np.array([0, 1, -1]))
     )
     expected = np.array([[False, True, True]] * 2).T
-    actual = indexing.create_mask(indexer, (5, 2), chunks_hint=((3,), (2,)))
+    actual = indexing.create_mask(
+        indexer, (5, 2), da.empty((3, 2), chunks=((3,), (2,)))
+    )
     assert isinstance(actual, da.Array)
     np.testing.assert_array_equal(expected, actual)
 
     with pytest.raises(ValueError):
-        indexing.create_mask(indexer, (5, 2), chunks_hint=())
+        indexing.create_mask(indexer, (5, 2), da.empty((5,), chunks=(1,)))
 
 
 def test_create_mask_error():
diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
index 36decf49713..80f80a93a1c 100644
--- a/xarray/tests/test_sparse.py
+++ b/xarray/tests/test_sparse.py
@@ -9,6 +9,7 @@
 import xarray.ufuncs as xu
 from xarray import DataArray, Variable
 from xarray.core.npcompat import IS_NEP18_ACTIVE
+from xarray.core.pycompat import sparse_array_type
 
 from . import assert_equal, assert_identical
 
@@ -23,6 +24,12 @@
 sparse = pytest.importorskip("sparse")
 
 
+def assert_sparse_equal(a, b):
+    assert isinstance(a, sparse_array_type)
+    assert isinstance(b, sparse_array_type)
+    np.testing.assert_equal(a.todense(), b.todense())
+
+
 def make_ndarray(shape):
     return np.arange(np.prod(shape)).reshape(shape)
 
@@ -105,21 +112,9 @@ def test_variable_property(prop):
         (do("to_base_variable"), True),
         (do("transpose"), True),
         (do("unstack", dimensions={"x": {"x1": 5, "x2": 2}}), True),
-        param(
-            do("broadcast_equals", make_xrvar({"x": 10, "y": 5})),
-            False,
-            marks=xfail(reason="https://github.com/pydata/sparse/issues/270"),
-        ),
-        param(
-            do("equals", make_xrvar({"x": 10, "y": 5})),
-            False,
-            marks=xfail(reason="https://github.com/pydata/sparse/issues/270"),
-        ),
-        param(
-            do("identical", make_xrvar({"x": 10, "y": 5})),
-            False,
-            marks=xfail(reason="https://github.com/pydata/sparse/issues/270"),
-        ),
+        (do("broadcast_equals", make_xrvar({"x": 10, "y": 5})), False),
+        (do("equals", make_xrvar({"x": 10, "y": 5})), False),
+        (do("identical", make_xrvar({"x": 10, "y": 5})), False),
         param(
             do("argmax"),
             True,
@@ -161,11 +156,7 @@ def test_variable_property(prop):
             True,
             marks=xfail(reason="Missing implementation for np.nancumsum"),
         ),
-        param(
-            do("fillna", 0),
-            True,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
+        (do("fillna", 0), True),
         param(
             do("item", (1, 1)),
             False,
@@ -188,11 +179,7 @@ def test_variable_property(prop):
             True,  # noqa
             marks=xfail(reason="Missing implementation for np.pad"),
         ),
-        param(
-            do("prod"),
-            False,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
+        (do("prod"), False),
         param(
             do("quantile", q=0.5),
             True,
@@ -219,20 +206,12 @@ def test_variable_property(prop):
         param(
             do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd")
         ),
-        param(
-            do("sum"),
-            False,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
+        (do("sum"), False),
         param(
             do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar")
         ),
         param(do("to_dict"), False, marks=xfail(reason="Coercion to dense")),
-        param(
-            do("where", cond=make_xrvar({"x": 10, "y": 5}) > 0.5),
-            True,
-            marks=xfail(reason="Coercion of dense to sparse when using sparse mask"),
-        ),  # noqa
+        (do("where", cond=make_xrvar({"x": 10, "y": 5}) > 0.5), True),
     ],
     ids=repr,
 )
@@ -282,16 +261,18 @@ def setUp(self):
         self.var = xr.Variable(("x", "y"), self.data)
 
     def test_unary_op(self):
-        sparse.utils.assert_eq(-self.var.data, -self.data)
-        sparse.utils.assert_eq(abs(self.var).data, abs(self.data))
-        sparse.utils.assert_eq(self.var.round().data, self.data.round())
+        assert_sparse_equal(-self.var.data, -self.data)
+        assert_sparse_equal(abs(self.var).data, abs(self.data))
+        assert_sparse_equal(self.var.round().data, self.data.round())
 
+    @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
     def test_univariate_ufunc(self):
-        sparse.utils.assert_eq(np.sin(self.data), xu.sin(self.var).data)
+        assert_sparse_equal(np.sin(self.data), xu.sin(self.var).data)
 
+    @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
     def test_bivariate_ufunc(self):
-        sparse.utils.assert_eq(np.maximum(self.data, 0), xu.maximum(self.var, 0).data)
-        sparse.utils.assert_eq(np.maximum(self.data, 0), xu.maximum(0, self.var).data)
+        assert_sparse_equal(np.maximum(self.data, 0), xu.maximum(self.var, 0).data)
+        assert_sparse_equal(np.maximum(self.data, 0), xu.maximum(0, self.var).data)
 
     def test_repr(self):
         expected = dedent(
@@ -304,9 +285,8 @@ def test_repr(self):
     def test_pickle(self):
         v1 = self.var
         v2 = pickle.loads(pickle.dumps(v1))
-        sparse.utils.assert_eq(v1.data, v2.data)
+        assert_sparse_equal(v1.data, v2.data)
 
-    @pytest.mark.xfail(reason="Missing implementation for np.result_type")
     def test_missing_values(self):
         a = np.array([0, 1, np.nan, 3])
         s = sparse.COO.from_numpy(a)
@@ -384,16 +364,8 @@ def test_dataarray_property(prop):
         # TODO
         # set_index
         # swap_dims
-        param(
-            do("broadcast_equals", make_xrvar({"x": 10, "y": 5})),
-            False,
-            marks=xfail(reason="https://github.com/pydata/sparse/issues/270"),
-        ),
-        param(
-            do("equals", make_xrvar({"x": 10, "y": 5})),
-            False,
-            marks=xfail(reason="https://github.com/pydata/sparse/issues/270"),
-        ),
+        (do("broadcast_equals", make_xrvar({"x": 10, "y": 5})), False),
+        (do("equals", make_xrvar({"x": 10, "y": 5})), False),
         param(
             do("argmax"),
             True,
@@ -414,11 +386,7 @@ def test_dataarray_property(prop):
             False,
             marks=xfail(reason="Missing implementation for np.flip"),
         ),
-        param(
-            do("combine_first", make_xrarray({"x": 10, "y": 5})),
-            True,
-            marks=xfail(reason="mixed sparse-dense operation"),
-        ),
+        (do("combine_first", make_xrarray({"x": 10, "y": 5})), True),
         param(
             do("conjugate"),
             False,
@@ -445,16 +413,8 @@ def test_dataarray_property(prop):
             marks=xfail(reason="Missing implementation for np.einsum"),
         ),
         param(do("dropna", "x"), False, marks=xfail(reason="Coercion to dense")),
-        param(
-            do("ffill", "x"),
-            False,
-            marks=xfail(reason="Coercion to dense via bottleneck.push"),
-        ),
-        param(
-            do("fillna", 0),
-            True,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
+        param(do("ffill", "x"), False, marks=xfail(reason="Coercion to dense")),
+        (do("fillna", 0), True),
         param(
             do("interp", coords={"x": np.arange(10) + 0.5}),
             True,
@@ -489,17 +449,9 @@ def test_dataarray_property(prop):
             False,
             marks=xfail(reason="Missing implementation for np.nanmedian"),
         ),
-        param(do("notnull"), True),
-        param(
-            do("pipe", np.sum, axis=1),
-            True,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
-        param(
-            do("prod"),
-            False,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
+        (do("notnull"), True),
+        (do("pipe", np.sum, axis=1), True),
+        (do("prod"), False),
         param(
             do("quantile", q=0.5),
             False,
@@ -526,11 +478,7 @@ def test_dataarray_property(prop):
             True,
             marks=xfail(reason="Indexing COO with more than one iterable index"),
         ),  # noqa
-        param(
-            do("roll", x=2),
-            True,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
+        (do("roll", x=2, roll_coords=True), True),
         param(
             do("sel", x=[0, 1, 2], y=[2, 3]),
             True,
@@ -539,11 +487,7 @@ def test_dataarray_property(prop):
         param(
             do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd")
         ),
-        param(
-            do("sum"),
-            False,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
+        (do("sum"), False),
         param(
             do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar")
         ),
@@ -606,7 +550,6 @@ def setUp(self):
             self.ds_ar, coords={"x": range(4)}, dims=("x", "y"), name="foo"
         )
 
-    @pytest.mark.xfail(reason="Missing implementation for np.result_type")
     def test_to_dataset_roundtrip(self):
         x = self.sp_xr
         assert_equal(x, x.to_dataset("x").to_array("x"))
@@ -657,7 +600,6 @@ def test_align_2d(self):
         assert np.all(B1.coords["x"] == B2.coords["x"])
         assert np.all(B1.coords["y"] == B2.coords["y"])
 
-    @pytest.mark.xfail(reason="fill value leads to sparse-dense operation")
     def test_align_outer(self):
         a1 = xr.DataArray(
             sparse.COO.from_numpy(np.arange(4)),
@@ -672,22 +614,21 @@ def test_align_outer(self):
         a2, b2 = xr.align(a1, b1, join="outer")
         assert isinstance(a2.data, sparse.SparseArray)
         assert isinstance(b2.data, sparse.SparseArray)
-        assert np.all(a2.coords["x"].data == ["a", "b", "c", "d"])
-        assert np.all(b2.coords["x"].data == ["a", "b", "c", "d"])
+        assert np.all(a2.coords["x"].data == ["a", "b", "c", "d", "e"])
+        assert np.all(b2.coords["x"].data == ["a", "b", "c", "d", "e"])
 
-    @pytest.mark.xfail(reason="Missing implementation for np.result_type")
     def test_concat(self):
         ds1 = xr.Dataset(data_vars={"d": self.sp_xr})
         ds2 = xr.Dataset(data_vars={"d": self.sp_xr})
         ds3 = xr.Dataset(data_vars={"d": self.sp_xr})
         out = xr.concat([ds1, ds2, ds3], dim="x")
-        sparse.utils.assert_eq(
+        assert_sparse_equal(
             out["d"].data,
             sparse.concatenate([self.sp_ar, self.sp_ar, self.sp_ar], axis=0),
         )
 
         out = xr.concat([self.sp_xr, self.sp_xr, self.sp_xr], dim="y")
-        sparse.utils.assert_eq(
+        assert_sparse_equal(
             out.data, sparse.concatenate([self.sp_ar, self.sp_ar, self.sp_ar], axis=1)
         )
 
@@ -706,6 +647,7 @@ def test_stack(self):
         roundtripped = stacked.unstack()
         assert arr.identical(roundtripped)
 
+    @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
     def test_ufuncs(self):
         x = self.sp_xr
         assert_equal(np.sin(x), xu.sin(x))
@@ -830,8 +772,8 @@ def test_groupby_first(self):
     def test_groupby_bins(self):
         x1 = self.ds_xr
         x2 = self.sp_xr
-        m1 = x1.groupby_bins("x", bins=[0, 3, 7, 10]).sum()
-        m2 = x2.groupby_bins("x", bins=[0, 3, 7, 10]).sum()
+        m1 = x1.groupby_bins("x", bins=[0, 3, 7, 10]).sum(xr.ALL_DIMS)
+        m2 = x2.groupby_bins("x", bins=[0, 3, 7, 10]).sum(xr.ALL_DIMS)
         assert isinstance(m2.data, sparse.SparseArray)
         assert np.allclose(m1.data, m2.data.todense())