Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datasets more robust to non-string keys #2174

Merged
merged 16 commits into from
May 27, 2018
8 changes: 8 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ Enhancements
to manage its version strings. (:issue:`1300`).
By `Joe Hamman <https://github.com/jhamman>`_.

- :py:meth:`~DataArray.sel`, :py:meth:`~DataArray.isel` & :py:meth:`~DataArray.reindex`,
(and their :py:class:`Dataset` counterparts) now support supplying a ``dict``
as a first argument, as an alternative to the existing approach
of supplying a set of `kwargs`. This allows for more robust behavior
of dimension names which conflict with other keyword names, or are
not strings.
By `Maximilian Roos <https://github.com/maxim-lian>`_.

Bug fixes
~~~~~~~~~

Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ testpaths=xarray/tests
[flake8]
max-line-length=79
ignore=
W503
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is line break before operator, which has been changed in PEP8

exclude=
doc/

Expand Down
8 changes: 5 additions & 3 deletions xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@
from .merge import (
expand_and_merge_variables, merge_coords, merge_coords_for_inplace_math)
from .pycompat import OrderedDict
from .utils import Frozen, ReprObject
from .utils import Frozen, ReprObject, either_dict_or_kwargs
from .variable import Variable


# Used as the key corresponding to a DataArray's variable when converting
# arbitrary DataArray objects to datasets
_THIS_ARRAY = ReprObject('<this-array>')
Expand Down Expand Up @@ -332,7 +331,8 @@ def assert_coordinate_consistent(obj, coords):
.format(k, obj[k], coords[k]))


def remap_label_indexers(obj, method=None, tolerance=None, **indexers):
def remap_label_indexers(obj, indexers=None, method=None, tolerance=None,
**indexers_kwargs):
"""
Remap **indexers from obj.coords.
If indexer is an instance of DataArray and it has coordinate, then this
Expand All @@ -345,6 +345,8 @@ def remap_label_indexers(obj, method=None, tolerance=None, **indexers):
new_indexes: mapping of new dimensional-coordinate.
"""
from .dataarray import DataArray
indexers = either_dict_or_kwargs(
indexers, indexers_kwargs, 'remap_label_indexers')

v_indexers = {k: v.variable.data if isinstance(v, DataArray) else v
for k, v in indexers.items()}
Expand Down
42 changes: 27 additions & 15 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
from .formatting import format_item
from .options import OPTIONS
from .pycompat import OrderedDict, basestring, iteritems, range, zip
from .utils import decode_numpy_dict_values, ensure_us_time_resolution
from .utils import (
either_dict_or_kwargs, decode_numpy_dict_values,
ensure_us_time_resolution)
from .variable import (
IndexVariable, Variable, as_compatible_data, as_variable,
assert_unique_multiindex_level_names)
Expand Down Expand Up @@ -470,7 +472,7 @@ def __getitem__(self, key):
return self._getitem_coord(key)
else:
# xarray-style array indexing
return self.isel(**self._item_key_to_dict(key))
return self.isel(indexers=self._item_key_to_dict(key))

def __setitem__(self, key, value):
if isinstance(key, basestring):
Expand Down Expand Up @@ -498,7 +500,7 @@ def _attr_sources(self):
@property
def _item_sources(self):
"""List of places to look-up items for key-completion"""
return [self.coords, {d: self[d] for d in self.dims},
return [self.coords, {d: self.coords[d] for d in self.dims},
LevelCoordinatesSource(self)]

def __contains__(self, key):
Expand Down Expand Up @@ -742,7 +744,7 @@ def chunk(self, chunks=None, name_prefix='xarray-', token=None,
token=token, lock=lock)
return self._from_temp_dataset(ds)

def isel(self, drop=False, **indexers):
def isel(self, indexers=None, drop=False, **indexers_kwargs):
"""Return a new DataArray whose dataset is given by integer indexing
along the specified dimension(s).

Expand All @@ -751,10 +753,12 @@ def isel(self, drop=False, **indexers):
Dataset.isel
DataArray.sel
"""
ds = self._to_temp_dataset().isel(drop=drop, **indexers)
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'isel')
ds = self._to_temp_dataset().isel(drop=drop, indexers=indexers)
return self._from_temp_dataset(ds)

def sel(self, method=None, tolerance=None, drop=False, **indexers):
def sel(self, indexers=None, method=None, tolerance=None, drop=False,
**indexers_kwargs):
"""Return a new DataArray whose dataset is given by selecting
index labels along the specified dimension(s).

Expand All @@ -776,8 +780,9 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers):
DataArray.isel

"""
ds = self._to_temp_dataset().sel(drop=drop, method=method,
tolerance=tolerance, **indexers)
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'sel')
ds = self._to_temp_dataset().sel(
indexers=indexers, drop=drop, method=method, tolerance=tolerance)
return self._from_temp_dataset(ds)

def isel_points(self, dim='points', **indexers):
Expand Down Expand Up @@ -851,12 +856,19 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True):
return self.reindex(method=method, tolerance=tolerance, copy=copy,
**indexers)

def reindex(self, method=None, tolerance=None, copy=True, **indexers):
def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
**indexers_kwargs):
"""Conform this object onto a new set of indexes, filling in
missing values with NaN.

Parameters
----------
indexers : dict, optional
Dictionary with keys given by dimension names and values given by
arrays of coordinates tick labels. Any mis-matched coordinate
values will be filled in with NaN, and any mis-matched dimension
names will simply be ignored.
One of indexers or indexers_kwargs must be provided.
copy : bool, optional
If ``copy=True``, data in the return value is always copied. If
``copy=False`` and reindexing is unnecessary, or can be performed
Expand All @@ -874,11 +886,9 @@ def reindex(self, method=None, tolerance=None, copy=True, **indexers):
Maximum distance between original and new labels for inexact
matches. The values of the index at the matching locations most
satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
**indexers : dict
Dictionary with keys given by dimension names and values given by
arrays of coordinates tick labels. Any mis-matched coordinate
values will be filled in with NaN, and any mis-matched dimension
names will simply be ignored.
**indexers_kwarg : {dim: indexer, ...}, optional
The keyword arguments form of ``indexers``.
One of indexers or indexers_kwargs must be provided.

Returns
-------
Expand All @@ -891,8 +901,10 @@ def reindex(self, method=None, tolerance=None, copy=True, **indexers):
DataArray.reindex_like
align
"""
indexers = either_dict_or_kwargs(
indexers, indexers_kwargs, 'reindex')
ds = self._to_temp_dataset().reindex(
method=method, tolerance=tolerance, copy=copy, **indexers)
indexers=indexers, method=method, tolerance=tolerance, copy=copy)
return self._from_temp_dataset(ds)

def rename(self, new_name_or_name_dict):
Expand Down
63 changes: 39 additions & 24 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
rolling, utils)
from .. import conventions
from .alignment import align
from .common import (DataWithCoords, ImplementsDatasetReduce,
_contains_datetime_like_objects)
from .common import (
DataWithCoords, ImplementsDatasetReduce, _contains_datetime_like_objects)
from .coordinates import (
DatasetCoordinates, Indexes, LevelCoordinatesSource,
assert_coordinate_consistent, remap_label_indexers)
Expand All @@ -30,7 +30,7 @@
from .pycompat import (
OrderedDict, basestring, dask_array_type, integer_types, iteritems, range)
from .utils import (
Frozen, SortedKeysDict, decode_numpy_dict_values,
Frozen, SortedKeysDict, either_dict_or_kwargs, decode_numpy_dict_values,
ensure_us_time_resolution, hashable, maybe_wrap_array)
from .variable import IndexVariable, Variable, as_variable, broadcast_variables

Expand Down Expand Up @@ -1368,7 +1368,7 @@ def _get_indexers_coordinates(self, indexers):
attached_coords[k] = v
return attached_coords

def isel(self, drop=False, **indexers):
def isel(self, indexers=None, drop=False, **indexers_kwargs):
"""Returns a new dataset with each array indexed along the specified
dimension(s).

Expand All @@ -1378,15 +1378,19 @@ def isel(self, drop=False, **indexers):

Parameters
----------
drop : bool, optional
If ``drop=True``, drop coordinates variables indexed by integers
instead of making them scalar.
**indexers : {dim: indexer, ...}
Keyword arguments with names matching dimensions and values given
indexers : dict, optional
A dict with keys matching dimensions and values given
by integers, slice objects or arrays.
indexer can be a integer, slice, array-like or DataArray.
If DataArrays are passed as indexers, xarray-style indexing will be
carried out. See :ref:`indexing` for the details.
One of indexers or indexers_kwargs must be provided.
drop : bool, optional
If ``drop=True``, drop coordinates variables indexed by integers
instead of making them scalar.
**indexers_kwarg : {dim: indexer, ...}, optional
The keyword arguments form of ``indexers``.
One of indexers or indexers_kwargs must be provided.

Returns
-------
Expand All @@ -1404,12 +1408,15 @@ def isel(self, drop=False, **indexers):
Dataset.sel
DataArray.isel
"""

indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'isel')

indexers_list = self._validate_indexers(indexers)

variables = OrderedDict()
for name, var in iteritems(self._variables):
var_indexers = {k: v for k, v in indexers_list if k in var.dims}
new_var = var.isel(**var_indexers)
new_var = var.isel(indexers=var_indexers)
if not (drop and name in var_indexers):
variables[name] = new_var

Expand All @@ -1425,7 +1432,8 @@ def isel(self, drop=False, **indexers):
.union(coord_vars))
return self._replace_vars_and_dims(variables, coord_names=coord_names)

def sel(self, method=None, tolerance=None, drop=False, **indexers):
def sel(self, indexers=None, method=None, tolerance=None, drop=False,
**indexers_kwargs):
"""Returns a new dataset with each array indexed by tick labels
along the specified dimension(s).

Expand All @@ -1444,6 +1452,14 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers):

Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and values given
by scalars, slices or arrays of tick labels. For dimensions with
multi-index, the indexer may also be a dict-like object with keys
matching index level names.
If DataArrays are passed as indexers, xarray-style indexing will be
carried out. See :ref:`indexing` for the details.
One of indexers or indexers_kwargs must be provided.
method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional
Method to use for inexact matches (requires pandas>=0.16):

Expand All @@ -1459,13 +1475,9 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers):
drop : bool, optional
If ``drop=True``, drop coordinates variables in `indexers` instead
of making them scalar.
**indexers : {dim: indexer, ...}
Keyword arguments with names matching dimensions and values given
by scalars, slices or arrays of tick labels. For dimensions with
multi-index, the indexer may also be a dict-like object with keys
matching index level names.
If DataArrays are passed as indexers, xarray-style indexing will be
carried out. See :ref:`indexing` for the details.
**indexers_kwarg : {dim: indexer, ...}, optional
The keyword arguments form of ``indexers``.
One of indexers or indexers_kwargs must be provided.

Returns
-------
Expand All @@ -1484,9 +1496,10 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers):
Dataset.isel
DataArray.sel
"""
pos_indexers, new_indexes = remap_label_indexers(self, method,
tolerance, **indexers)
result = self.isel(drop=drop, **pos_indexers)
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'sel')
pos_indexers, new_indexes = remap_label_indexers(
self, indexers=indexers, method=method, tolerance=tolerance)
result = self.isel(indexers=pos_indexers, drop=drop)
return result._replace_indexes(new_indexes)

def isel_points(self, dim='points', **indexers):
Expand Down Expand Up @@ -1734,7 +1747,7 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True):
**indexers)

def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
**kw_indexers):
**indexers_kwargs):
"""Conform this object onto a new set of indexes, filling in
missing values with NaN.

Expand All @@ -1745,6 +1758,7 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
arrays of coordinates tick labels. Any mis-matched coordinate values
will be filled in with NaN, and any mis-matched dimension names will
simply be ignored.
One of indexers or indexers_kwargs must be provided.
method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional
Method to use for filling index values in ``indexers`` not found in
this dataset:
Expand All @@ -1763,8 +1777,9 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
``copy=False`` and reindexing is unnecessary, or can be performed
with only slice operations, then the output may share memory with
the input. In either case, a new xarray object is always returned.
**kw_indexers : optional
**indexers_kwarg : {dim: indexer, ...}, optional
Keyword arguments in the same form as ``indexers``.
One of indexers or indexers_kwargs must be provided.

Returns
-------
Expand All @@ -1777,7 +1792,7 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True,
align
pandas.Index.get_indexer
"""
indexers = utils.combine_pos_and_kw_args(indexers, kw_indexers,
indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs,
'reindex')

bad_dims = [d for d in indexers if d not in self.dims]
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def is_full_slice(value):
return isinstance(value, slice) and value == slice(None)


def combine_pos_and_kw_args(pos_kwargs, kw_kwargs, func_name):
def either_dict_or_kwargs(pos_kwargs, kw_kwargs, func_name):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need to test every use, but we should at least add unit test for this helper function -- I don't think we have any test coverage currently.

if pos_kwargs is not None:
if not is_dict_like(pos_kwargs):
raise ValueError('the first argument to .%s must be a dictionary'
Expand Down
8 changes: 5 additions & 3 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
import xarray as xr # only for Dataset and DataArray

from . import (
arithmetic, common, dtypes, duck_array_ops, indexing, nputils, ops, utils,)
arithmetic, common, dtypes, duck_array_ops, indexing, nputils, ops, utils)
from .indexing import (
BasicIndexer, OuterIndexer, PandasIndexAdapter, VectorizedIndexer,
as_indexable)
from .pycompat import (
OrderedDict, basestring, dask_array_type, integer_types, zip)
from .utils import OrderedSet
from .utils import OrderedSet, either_dict_or_kwargs

try:
import dask.array as da
Expand Down Expand Up @@ -824,7 +824,7 @@ def chunk(self, chunks=None, name=None, lock=False):
return type(self)(self.dims, data, self._attrs, self._encoding,
fastpath=True)

def isel(self, **indexers):
def isel(self, indexers=None, drop=False, **indexers_kwargs):
"""Return a new array indexed along the specified dimension(s).

Parameters
Expand All @@ -841,6 +841,8 @@ def isel(self, **indexers):
unless numpy fancy indexing was triggered by using an array
indexer, in which case the data will be a copy.
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'isel')

invalid = [k for k in indexers if k not in self.dims]
if invalid:
raise ValueError("dimensions %r do not exist" % invalid)
Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from xarray.core.pycompat import OrderedDict, iteritems
from xarray.tests import (
ReturnItem, TestCase, assert_allclose, assert_array_equal, assert_equal,
assert_identical, raises_regex, requires_bottleneck, requires_dask,
requires_scipy, source_ndarray, unittest, requires_cftime)
assert_identical, raises_regex, requires_bottleneck, requires_cftime,
requires_dask, requires_scipy, source_ndarray, unittest)


class TestDataArray(TestCase):
Expand Down
Loading