Skip to content

Commit

Permalink
Datasets more robust to non-string keys (#2174)
Browse files Browse the repository at this point in the history
* ds more robust to non-str keys

* formatting

* time.dayofyear needs cover in dataarray getitem

* trial of indexer_dict

* feedback from stephan

* a few more methods

* reindex added

* rename to either_dict_or_kwargs

* remove assert check

* docstring

* more docstring

* `optional` goes last

* last docstring

* what's new

* artefact

* test either_dict_or_kwargs
  • Loading branch information
max-sixty authored and shoyer committed May 27, 2018
1 parent a8c1ed2 commit 8470500
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 52 deletions.
8 changes: 8 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ Enhancements
to manage its version strings. (:issue:`1300`).
By `Joe Hamman <https://github.com/jhamman>`_.

- :py:meth:`~DataArray.sel`, :py:meth:`~DataArray.isel` & :py:meth:`~DataArray.reindex`,
(and their :py:class:`Dataset` counterparts) now support supplying a ``dict``
as a first argument, as an alternative to the existing approach
of supplying a set of `kwargs`. This allows for more robust behavior
of dimension names which conflict with other keyword names, or are
not strings.
By `Maximilian Roos <https://github.com/maxim-lian>`_.

Bug fixes
~~~~~~~~~

Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ testpaths=xarray/tests
[flake8]
max-line-length=79
ignore=
W503
exclude=
doc/

Expand Down
8 changes: 5 additions & 3 deletions xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@
from .merge import (
expand_and_merge_variables, merge_coords, merge_coords_for_inplace_math)
from .pycompat import OrderedDict
from .utils import Frozen, ReprObject
from .utils import Frozen, ReprObject, either_dict_or_kwargs
from .variable import Variable


# Used as the key corresponding to a DataArray's variable when converting
# arbitrary DataArray objects to datasets
_THIS_ARRAY = ReprObject('<this-array>')
Expand Down Expand Up @@ -332,7 +331,8 @@ def assert_coordinate_consistent(obj, coords):
.format(k, obj[k], coords[k]))


def remap_label_indexers(obj, method=None, tolerance=None, **indexers):
def remap_label_indexers(obj, indexers=None, method=None, tolerance=None,
**indexers_kwargs):
"""
Remap **indexers from obj.coords.
If indexer is an instance of DataArray and it has coordinate, then this
Expand All @@ -345,6 +345,8 @@ def remap_label_indexers(obj, method=None, tolerance=None, **indexers):
new_indexes: mapping of new dimensional-coordinate.
"""
from .dataarray import DataArray
indexers = either_dict_or_kwargs(
indexers, indexers_kwargs, 'remap_label_indexers')

v_indexers = {k: v.variable.data if isinstance(v, DataArray) else v
for k, v in indexers.items()}
Expand Down
42 changes: 27 additions & 15 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
from .formatting import format_item
from .options import OPTIONS
from .pycompat import OrderedDict, basestring, iteritems, range, zip
from .utils import decode_numpy_dict_values, ensure_us_time_resolution
from .utils import (
either_dict_or_kwargs, decode_numpy_dict_values,
ensure_us_time_resolution)
from .variable import (
IndexVariable, Variable, as_compatible_data, as_variable,
assert_unique_multiindex_level_names)
Expand Down Expand Up @@ -470,7 +472,7 @@ def __getitem__(self, key):
return self._getitem_coord(key)
else:
# xarray-style array indexing
return self.isel(**self._item_key_to_dict(key))
return self.isel(indexers=self._item_key_to_dict(key))

def __setitem__(self, key, value):
if isinstance(key, basestring):
Expand Down Expand Up @@ -498,7 +500,7 @@ def _attr_sources(self):
@property
def _item_sources(self):
"""List of places to look-up items for key-completion"""
return [self.coords, {d: self[d] for d in self.dims},
return [self.coords, {d: self.coords[d] for d in self.dims},
LevelCoordinatesSource(self)]

def __contains__(self, key):
Expand Down Expand Up @@ -742,7 +744,7 @@ def chunk(self, chunks=None, name_prefix='xarray-', token=None,
token=token, lock=lock)
return self._from_temp_dataset(ds)

def isel(self, drop=False, **indexers):
def isel(self, indexers=None, drop=False, **indexers_kwargs):
"""Return a new DataArray whose dataset is given by integer indexing
along the specified dimension(s).
Expand All @@ -751,10 +753,12 @@ def isel(self, drop=False, **indexers):
Dataset.isel
DataArray.sel
"""
ds = self._to_temp_dataset().isel(drop=drop, **indexers)
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'isel')
ds = self._to_temp_dataset().isel(drop=drop, indexers=indexers)
return self._from_temp_dataset(ds)

def sel(self, method=None, tolerance=None, drop=False, **indexers):
def sel(self, indexers=None, method=None, tolerance=None, drop=False,
**indexers_kwargs):
"""Return a new DataArray whose dataset is given by selecting
index labels along the specified dimension(s).
Expand All @@ -776,8 +780,9 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers):
DataArray.isel
"""
ds = self._to_temp_dataset().sel(drop=drop, method=method,
tolerance=tolerance, **indexers)
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'sel')
ds = self._to_temp_dataset().sel(
indexers=indexers, drop=drop, method=method, tolerance=tolerance)
return self._from_temp_dataset(ds)

def isel_points(self, dim='points', **indexers):
Expand Down Expand Up @@ -851,12 +856,19 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True):
return self.reindex(method=method, tolerance=tolerance, copy=copy,
**indexers)

def reindex(self, method=None, tolerance=None, copy=True, **indexers):
def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
**indexers_kwargs):
"""Conform this object onto a new set of indexes, filling in
missing values with NaN.
Parameters
----------
indexers : dict, optional
Dictionary with keys given by dimension names and values given by
arrays of coordinates tick labels. Any mis-matched coordinate
values will be filled in with NaN, and any mis-matched dimension
names will simply be ignored.
One of indexers or indexers_kwargs must be provided.
copy : bool, optional
If ``copy=True``, data in the return value is always copied. If
``copy=False`` and reindexing is unnecessary, or can be performed
Expand All @@ -874,11 +886,9 @@ def reindex(self, method=None, tolerance=None, copy=True, **indexers):
Maximum distance between original and new labels for inexact
matches. The values of the index at the matching locations most
satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
**indexers : dict
Dictionary with keys given by dimension names and values given by
arrays of coordinates tick labels. Any mis-matched coordinate
values will be filled in with NaN, and any mis-matched dimension
names will simply be ignored.
**indexers_kwarg : {dim: indexer, ...}, optional
The keyword arguments form of ``indexers``.
One of indexers or indexers_kwargs must be provided.
Returns
-------
Expand All @@ -891,8 +901,10 @@ def reindex(self, method=None, tolerance=None, copy=True, **indexers):
DataArray.reindex_like
align
"""
indexers = either_dict_or_kwargs(
indexers, indexers_kwargs, 'reindex')
ds = self._to_temp_dataset().reindex(
method=method, tolerance=tolerance, copy=copy, **indexers)
indexers=indexers, method=method, tolerance=tolerance, copy=copy)
return self._from_temp_dataset(ds)

def rename(self, new_name_or_name_dict):
Expand Down
63 changes: 39 additions & 24 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
rolling, utils)
from .. import conventions
from .alignment import align
from .common import (DataWithCoords, ImplementsDatasetReduce,
_contains_datetime_like_objects)
from .common import (
DataWithCoords, ImplementsDatasetReduce, _contains_datetime_like_objects)
from .coordinates import (
DatasetCoordinates, Indexes, LevelCoordinatesSource,
assert_coordinate_consistent, remap_label_indexers)
Expand All @@ -30,7 +30,7 @@
from .pycompat import (
OrderedDict, basestring, dask_array_type, integer_types, iteritems, range)
from .utils import (
Frozen, SortedKeysDict, decode_numpy_dict_values,
Frozen, SortedKeysDict, either_dict_or_kwargs, decode_numpy_dict_values,
ensure_us_time_resolution, hashable, maybe_wrap_array)
from .variable import IndexVariable, Variable, as_variable, broadcast_variables

Expand Down Expand Up @@ -1368,7 +1368,7 @@ def _get_indexers_coordinates(self, indexers):
attached_coords[k] = v
return attached_coords

def isel(self, drop=False, **indexers):
def isel(self, indexers=None, drop=False, **indexers_kwargs):
"""Returns a new dataset with each array indexed along the specified
dimension(s).
Expand All @@ -1378,15 +1378,19 @@ def isel(self, drop=False, **indexers):
Parameters
----------
drop : bool, optional
If ``drop=True``, drop coordinates variables indexed by integers
instead of making them scalar.
**indexers : {dim: indexer, ...}
Keyword arguments with names matching dimensions and values given
indexers : dict, optional
A dict with keys matching dimensions and values given
by integers, slice objects or arrays.
indexer can be a integer, slice, array-like or DataArray.
If DataArrays are passed as indexers, xarray-style indexing will be
carried out. See :ref:`indexing` for the details.
One of indexers or indexers_kwargs must be provided.
drop : bool, optional
If ``drop=True``, drop coordinates variables indexed by integers
instead of making them scalar.
**indexers_kwarg : {dim: indexer, ...}, optional
The keyword arguments form of ``indexers``.
One of indexers or indexers_kwargs must be provided.
Returns
-------
Expand All @@ -1404,12 +1408,15 @@ def isel(self, drop=False, **indexers):
Dataset.sel
DataArray.isel
"""

indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'isel')

indexers_list = self._validate_indexers(indexers)

variables = OrderedDict()
for name, var in iteritems(self._variables):
var_indexers = {k: v for k, v in indexers_list if k in var.dims}
new_var = var.isel(**var_indexers)
new_var = var.isel(indexers=var_indexers)
if not (drop and name in var_indexers):
variables[name] = new_var

Expand All @@ -1425,7 +1432,8 @@ def isel(self, drop=False, **indexers):
.union(coord_vars))
return self._replace_vars_and_dims(variables, coord_names=coord_names)

def sel(self, method=None, tolerance=None, drop=False, **indexers):
def sel(self, indexers=None, method=None, tolerance=None, drop=False,
**indexers_kwargs):
"""Returns a new dataset with each array indexed by tick labels
along the specified dimension(s).
Expand All @@ -1444,6 +1452,14 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers):
Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and values given
by scalars, slices or arrays of tick labels. For dimensions with
multi-index, the indexer may also be a dict-like object with keys
matching index level names.
If DataArrays are passed as indexers, xarray-style indexing will be
carried out. See :ref:`indexing` for the details.
One of indexers or indexers_kwargs must be provided.
method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional
Method to use for inexact matches (requires pandas>=0.16):
Expand All @@ -1459,13 +1475,9 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers):
drop : bool, optional
If ``drop=True``, drop coordinates variables in `indexers` instead
of making them scalar.
**indexers : {dim: indexer, ...}
Keyword arguments with names matching dimensions and values given
by scalars, slices or arrays of tick labels. For dimensions with
multi-index, the indexer may also be a dict-like object with keys
matching index level names.
If DataArrays are passed as indexers, xarray-style indexing will be
carried out. See :ref:`indexing` for the details.
**indexers_kwarg : {dim: indexer, ...}, optional
The keyword arguments form of ``indexers``.
One of indexers or indexers_kwargs must be provided.
Returns
-------
Expand All @@ -1484,9 +1496,10 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers):
Dataset.isel
DataArray.sel
"""
pos_indexers, new_indexes = remap_label_indexers(self, method,
tolerance, **indexers)
result = self.isel(drop=drop, **pos_indexers)
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'sel')
pos_indexers, new_indexes = remap_label_indexers(
self, indexers=indexers, method=method, tolerance=tolerance)
result = self.isel(indexers=pos_indexers, drop=drop)
return result._replace_indexes(new_indexes)

def isel_points(self, dim='points', **indexers):
Expand Down Expand Up @@ -1734,7 +1747,7 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True):
**indexers)

def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
**kw_indexers):
**indexers_kwargs):
"""Conform this object onto a new set of indexes, filling in
missing values with NaN.
Expand All @@ -1745,6 +1758,7 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
arrays of coordinates tick labels. Any mis-matched coordinate values
will be filled in with NaN, and any mis-matched dimension names will
simply be ignored.
One of indexers or indexers_kwargs must be provided.
method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional
Method to use for filling index values in ``indexers`` not found in
this dataset:
Expand All @@ -1763,8 +1777,9 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
``copy=False`` and reindexing is unnecessary, or can be performed
with only slice operations, then the output may share memory with
the input. In either case, a new xarray object is always returned.
**kw_indexers : optional
**indexers_kwarg : {dim: indexer, ...}, optional
Keyword arguments in the same form as ``indexers``.
One of indexers or indexers_kwargs must be provided.
Returns
-------
Expand All @@ -1777,7 +1792,7 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
align
pandas.Index.get_indexer
"""
indexers = utils.combine_pos_and_kw_args(indexers, kw_indexers,
indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs,
'reindex')

bad_dims = [d for d in indexers if d not in self.dims]
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def is_full_slice(value):
return isinstance(value, slice) and value == slice(None)


def combine_pos_and_kw_args(pos_kwargs, kw_kwargs, func_name):
def either_dict_or_kwargs(pos_kwargs, kw_kwargs, func_name):
if pos_kwargs is not None:
if not is_dict_like(pos_kwargs):
raise ValueError('the first argument to .%s must be a dictionary'
Expand Down
8 changes: 5 additions & 3 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
import xarray as xr # only for Dataset and DataArray

from . import (
arithmetic, common, dtypes, duck_array_ops, indexing, nputils, ops, utils,)
arithmetic, common, dtypes, duck_array_ops, indexing, nputils, ops, utils)
from .indexing import (
BasicIndexer, OuterIndexer, PandasIndexAdapter, VectorizedIndexer,
as_indexable)
from .pycompat import (
OrderedDict, basestring, dask_array_type, integer_types, zip)
from .utils import OrderedSet
from .utils import OrderedSet, either_dict_or_kwargs

try:
import dask.array as da
Expand Down Expand Up @@ -824,7 +824,7 @@ def chunk(self, chunks=None, name=None, lock=False):
return type(self)(self.dims, data, self._attrs, self._encoding,
fastpath=True)

def isel(self, **indexers):
def isel(self, indexers=None, drop=False, **indexers_kwargs):
"""Return a new array indexed along the specified dimension(s).
Parameters
Expand All @@ -841,6 +841,8 @@ def isel(self, **indexers):
unless numpy fancy indexing was triggered by using an array
indexer, in which case the data will be a copy.
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'isel')

invalid = [k for k in indexers if k not in self.dims]
if invalid:
raise ValueError("dimensions %r do not exist" % invalid)
Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from xarray.core.pycompat import OrderedDict, iteritems
from xarray.tests import (
ReturnItem, TestCase, assert_allclose, assert_array_equal, assert_equal,
assert_identical, raises_regex, requires_bottleneck, requires_dask,
requires_scipy, source_ndarray, unittest, requires_cftime)
assert_identical, raises_regex, requires_bottleneck, requires_cftime,
requires_dask, requires_scipy, source_ndarray, unittest)


class TestDataArray(TestCase):
Expand Down
Loading

0 comments on commit 8470500

Please sign in to comment.