Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sparse fixes for reindex #3255

Merged
merged 15 commits into from
Aug 27, 2019
4 changes: 2 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ New functions/methods
`NEP18 <https://www.numpy.org/neps/nep-0018-array-function-protocol.html>`_ compliant
numpy-like library (important: read notes about NUMPY_EXPERIMENTAL_ARRAY_FUNCTION in
the above link). Added explicit test coverage for
`sparse <https://github.com/pydata/sparse>`_. (:issue:`3117`, :issue:`3202`)
By `Nezar Abdennur <https://github.com/nvictus>`_
`sparse <https://github.com/pydata/sparse>`_. (:issue:`3117`, :issue:`3202`).
This requires `sparse>=0.8.0`. By `Nezar Abdennur <https://github.com/nvictus>`_
and `Guido Imperiale <https://github.com/crusaderky>`_.

- The xarray package is now discoverable by mypy (although typing hints coverage is not
Expand Down
50 changes: 30 additions & 20 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from . import duck_array_ops, nputils, utils
from .npcompat import DTypeLike
from .pycompat import dask_array_type, integer_types
from .pycompat import dask_array_type, integer_types, sparse_array_type
from .utils import is_dict_like, maybe_cast_to_coords_dtype


Expand Down Expand Up @@ -1076,19 +1076,30 @@ def _logical_any(args):
return functools.reduce(operator.or_, args)


def _masked_result_drop_slice(key, chunks_hint=None):
def _masked_result_drop_slice(key, data=None):

key = (k for k in key if not isinstance(k, slice))
if chunks_hint is not None:
key = [
_dask_array_with_chunks_hint(k, chunks_hint)
if isinstance(k, np.ndarray)
else k
for k in key
]
return _logical_any(k == -1 for k in key)
chunks_hint = getattr(data, "chunks", None)

new_keys = []
for k in key:
if isinstance(k, np.ndarray):
if isinstance(data, dask_array_type):
new_keys.append(_dask_array_with_chunks_hint(k, chunks_hint))
elif isinstance(data, sparse_array_type):
import sparse

new_keys.append(sparse.COO.from_numpy(k))
else:
new_keys.append(k)
else:
new_keys.append(k)

mask = _logical_any(k == -1 for k in new_keys)
return mask


def create_mask(indexer, shape, chunks_hint=None):
def create_mask(indexer, shape, data=None):
"""Create a mask for indexing with a fill-value.

Parameters
Expand All @@ -1098,25 +1109,24 @@ def create_mask(indexer, shape, chunks_hint=None):
the result that should be masked.
shape : tuple
Shape of the array being indexed.
chunks_hint : tuple, optional
Optional tuple indicating desired chunks for the result. If provided,
used as a hint for chunks on the resulting dask. Must have a hint for
each dimension on the result array.
data : optional
Data for which mask is being created. If data is a dask arrays, its chunks
are used as a hint for chunks on the resulting mask. If data is a sparse
array, the returned mask is also a sparse array.

Returns
-------
mask : bool, np.ndarray or dask.array.Array with dtype=bool
Dask array if chunks_hint is provided, otherwise a NumPy array. Has the
same shape as the indexing result.
mask : bool, np.ndarray, SparseArray or dask.array.Array with dtype=bool
Same type as data. Has the same shape as the indexing result.
"""
if isinstance(indexer, OuterIndexer):
key = _outer_to_vectorized_indexer(indexer, shape).tuple
assert not any(isinstance(k, slice) for k in key)
mask = _masked_result_drop_slice(key, chunks_hint)
mask = _masked_result_drop_slice(key, data)

elif isinstance(indexer, VectorizedIndexer):
key = indexer.tuple
base_mask = _masked_result_drop_slice(key, chunks_hint)
base_mask = _masked_result_drop_slice(key, data)
slice_shape = tuple(
np.arange(*k.indices(size)).size
for k, size in zip(key, shape)
Expand Down
3 changes: 1 addition & 2 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,8 +710,7 @@ def _getitem_with_mask(self, key, fill_value=dtypes.NA):
actual_indexer = indexer

data = as_indexable(self._data)[actual_indexer]
chunks_hint = getattr(data, "chunks", None)
mask = indexing.create_mask(indexer, self.shape, chunks_hint)
mask = indexing.create_mask(indexer, self.shape, data)
data = duck_array_ops.where(mask, fill_value, data)
dcherian marked this conversation as resolved.
Show resolved Hide resolved
else:
# array cannot be indexed along dimensions of size 0, so just
Expand Down
10 changes: 7 additions & 3 deletions xarray/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,20 +708,24 @@ def test_create_mask_dask():

indexer = indexing.OuterIndexer((1, slice(2), np.array([0, -1, 2])))
expected = np.array(2 * [[False, True, False]])
actual = indexing.create_mask(indexer, (5, 5, 5), chunks_hint=((1, 1), (2, 1)))
actual = indexing.create_mask(
indexer, (5, 5, 5), da.empty((2, 3), chunks=((1, 1), (2, 1)))
)
assert actual.chunks == ((1, 1), (2, 1))
np.testing.assert_array_equal(expected, actual)

indexer = indexing.VectorizedIndexer(
(np.array([0, -1, 2]), slice(None), np.array([0, 1, -1]))
)
expected = np.array([[False, True, True]] * 2).T
actual = indexing.create_mask(indexer, (5, 2), chunks_hint=((3,), (2,)))
actual = indexing.create_mask(
indexer, (5, 2), da.empty((3, 2), chunks=((3,), (2,)))
)
assert isinstance(actual, da.Array)
np.testing.assert_array_equal(expected, actual)

with pytest.raises(ValueError):
indexing.create_mask(indexer, (5, 2), chunks_hint=())
indexing.create_mask(indexer, (5, 2), da.empty((5,), chunks=(1,)))


def test_create_mask_error():
Expand Down
Loading