diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d9f43fa1868..e9d76b6d1e1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -46,6 +46,14 @@ Enhancements to manage its version strings. (:issue:`1300`). By `Joe Hamman `_. +- :py:meth:`~DataArray.sel`, :py:meth:`~DataArray.isel` & :py:meth:`~DataArray.reindex`, + (and their :py:class:`Dataset` counterparts) now support supplying a ``dict`` + as a first argument, as an alternative to the existing approach + of supplying a set of `kwargs`. This allows for more robust behavior + of dimension names which conflict with other keyword names, or are + not strings. + By `Maximilian Roos `_. + Bug fixes ~~~~~~~~~ diff --git a/setup.cfg b/setup.cfg index 850551b3579..4dd1bffe043 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,6 +8,7 @@ testpaths=xarray/tests [flake8] max-line-length=79 ignore= + W503 exclude= doc/ diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index cb22c0b687b..efe8affb2a3 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -9,10 +9,9 @@ from .merge import ( expand_and_merge_variables, merge_coords, merge_coords_for_inplace_math) from .pycompat import OrderedDict -from .utils import Frozen, ReprObject +from .utils import Frozen, ReprObject, either_dict_or_kwargs from .variable import Variable - # Used as the key corresponding to a DataArray's variable when converting # arbitrary DataArray objects to datasets _THIS_ARRAY = ReprObject('') @@ -332,7 +331,8 @@ def assert_coordinate_consistent(obj, coords): .format(k, obj[k], coords[k])) -def remap_label_indexers(obj, method=None, tolerance=None, **indexers): +def remap_label_indexers(obj, indexers=None, method=None, tolerance=None, + **indexers_kwargs): """ Remap **indexers from obj.coords. If indexer is an instance of DataArray and it has coordinate, then this @@ -345,6 +345,8 @@ def remap_label_indexers(obj, method=None, tolerance=None, **indexers): new_indexes: mapping of new dimensional-coordinate. """ from .dataarray import DataArray + indexers = either_dict_or_kwargs( + indexers, indexers_kwargs, 'remap_label_indexers') v_indexers = {k: v.variable.data if isinstance(v, DataArray) else v for k, v in indexers.items()} diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index fc7091dad85..da9acb48a7a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -18,7 +18,9 @@ from .formatting import format_item from .options import OPTIONS from .pycompat import OrderedDict, basestring, iteritems, range, zip -from .utils import decode_numpy_dict_values, ensure_us_time_resolution +from .utils import ( + either_dict_or_kwargs, decode_numpy_dict_values, + ensure_us_time_resolution) from .variable import ( IndexVariable, Variable, as_compatible_data, as_variable, assert_unique_multiindex_level_names) @@ -470,7 +472,7 @@ def __getitem__(self, key): return self._getitem_coord(key) else: # xarray-style array indexing - return self.isel(**self._item_key_to_dict(key)) + return self.isel(indexers=self._item_key_to_dict(key)) def __setitem__(self, key, value): if isinstance(key, basestring): @@ -498,7 +500,7 @@ def _attr_sources(self): @property def _item_sources(self): """List of places to look-up items for key-completion""" - return [self.coords, {d: self[d] for d in self.dims}, + return [self.coords, {d: self.coords[d] for d in self.dims}, LevelCoordinatesSource(self)] def __contains__(self, key): @@ -742,7 +744,7 @@ def chunk(self, chunks=None, name_prefix='xarray-', token=None, token=token, lock=lock) return self._from_temp_dataset(ds) - def isel(self, drop=False, **indexers): + def isel(self, indexers=None, drop=False, **indexers_kwargs): """Return a new DataArray whose dataset is given by integer indexing along the specified dimension(s). @@ -751,10 +753,12 @@ def isel(self, drop=False, **indexers): Dataset.isel DataArray.sel """ - ds = self._to_temp_dataset().isel(drop=drop, **indexers) + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'isel') + ds = self._to_temp_dataset().isel(drop=drop, indexers=indexers) return self._from_temp_dataset(ds) - def sel(self, method=None, tolerance=None, drop=False, **indexers): + def sel(self, indexers=None, method=None, tolerance=None, drop=False, + **indexers_kwargs): """Return a new DataArray whose dataset is given by selecting index labels along the specified dimension(s). @@ -776,8 +780,9 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers): DataArray.isel """ - ds = self._to_temp_dataset().sel(drop=drop, method=method, - tolerance=tolerance, **indexers) + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'sel') + ds = self._to_temp_dataset().sel( + indexers=indexers, drop=drop, method=method, tolerance=tolerance) return self._from_temp_dataset(ds) def isel_points(self, dim='points', **indexers): @@ -851,12 +856,19 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True): return self.reindex(method=method, tolerance=tolerance, copy=copy, **indexers) - def reindex(self, method=None, tolerance=None, copy=True, **indexers): + def reindex(self, indexers=None, method=None, tolerance=None, copy=True, + **indexers_kwargs): """Conform this object onto a new set of indexes, filling in missing values with NaN. Parameters ---------- + indexers : dict, optional + Dictionary with keys given by dimension names and values given by + arrays of coordinates tick labels. Any mis-matched coordinate + values will be filled in with NaN, and any mis-matched dimension + names will simply be ignored. + One of indexers or indexers_kwargs must be provided. copy : bool, optional If ``copy=True``, data in the return value is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed @@ -874,11 +886,9 @@ def reindex(self, method=None, tolerance=None, copy=True, **indexers): Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations most satisfy the equation ``abs(index[indexer] - target) <= tolerance``. - **indexers : dict - Dictionary with keys given by dimension names and values given by - arrays of coordinates tick labels. Any mis-matched coordinate - values will be filled in with NaN, and any mis-matched dimension - names will simply be ignored. + **indexers_kwarg : {dim: indexer, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. Returns ------- @@ -891,8 +901,10 @@ def reindex(self, method=None, tolerance=None, copy=True, **indexers): DataArray.reindex_like align """ + indexers = either_dict_or_kwargs( + indexers, indexers_kwargs, 'reindex') ds = self._to_temp_dataset().reindex( - method=method, tolerance=tolerance, copy=copy, **indexers) + indexers=indexers, method=method, tolerance=tolerance, copy=copy) return self._from_temp_dataset(ds) def rename(self, new_name_or_name_dict): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index fff11dedb01..d6a5ac1c172 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -17,8 +17,8 @@ rolling, utils) from .. import conventions from .alignment import align -from .common import (DataWithCoords, ImplementsDatasetReduce, - _contains_datetime_like_objects) +from .common import ( + DataWithCoords, ImplementsDatasetReduce, _contains_datetime_like_objects) from .coordinates import ( DatasetCoordinates, Indexes, LevelCoordinatesSource, assert_coordinate_consistent, remap_label_indexers) @@ -30,7 +30,7 @@ from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, iteritems, range) from .utils import ( - Frozen, SortedKeysDict, decode_numpy_dict_values, + Frozen, SortedKeysDict, either_dict_or_kwargs, decode_numpy_dict_values, ensure_us_time_resolution, hashable, maybe_wrap_array) from .variable import IndexVariable, Variable, as_variable, broadcast_variables @@ -1368,7 +1368,7 @@ def _get_indexers_coordinates(self, indexers): attached_coords[k] = v return attached_coords - def isel(self, drop=False, **indexers): + def isel(self, indexers=None, drop=False, **indexers_kwargs): """Returns a new dataset with each array indexed along the specified dimension(s). @@ -1378,15 +1378,19 @@ def isel(self, drop=False, **indexers): Parameters ---------- - drop : bool, optional - If ``drop=True``, drop coordinates variables indexed by integers - instead of making them scalar. - **indexers : {dim: indexer, ...} - Keyword arguments with names matching dimensions and values given + indexers : dict, optional + A dict with keys matching dimensions and values given by integers, slice objects or arrays. indexer can be a integer, slice, array-like or DataArray. If DataArrays are passed as indexers, xarray-style indexing will be carried out. See :ref:`indexing` for the details. + One of indexers or indexers_kwargs must be provided. + drop : bool, optional + If ``drop=True``, drop coordinates variables indexed by integers + instead of making them scalar. + **indexers_kwarg : {dim: indexer, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. Returns ------- @@ -1404,12 +1408,15 @@ def isel(self, drop=False, **indexers): Dataset.sel DataArray.isel """ + + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'isel') + indexers_list = self._validate_indexers(indexers) variables = OrderedDict() for name, var in iteritems(self._variables): var_indexers = {k: v for k, v in indexers_list if k in var.dims} - new_var = var.isel(**var_indexers) + new_var = var.isel(indexers=var_indexers) if not (drop and name in var_indexers): variables[name] = new_var @@ -1425,7 +1432,8 @@ def isel(self, drop=False, **indexers): .union(coord_vars)) return self._replace_vars_and_dims(variables, coord_names=coord_names) - def sel(self, method=None, tolerance=None, drop=False, **indexers): + def sel(self, indexers=None, method=None, tolerance=None, drop=False, + **indexers_kwargs): """Returns a new dataset with each array indexed by tick labels along the specified dimension(s). @@ -1444,6 +1452,14 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers): Parameters ---------- + indexers : dict, optional + A dict with keys matching dimensions and values given + by scalars, slices or arrays of tick labels. For dimensions with + multi-index, the indexer may also be a dict-like object with keys + matching index level names. + If DataArrays are passed as indexers, xarray-style indexing will be + carried out. See :ref:`indexing` for the details. + One of indexers or indexers_kwargs must be provided. method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional Method to use for inexact matches (requires pandas>=0.16): @@ -1459,13 +1475,9 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers): drop : bool, optional If ``drop=True``, drop coordinates variables in `indexers` instead of making them scalar. - **indexers : {dim: indexer, ...} - Keyword arguments with names matching dimensions and values given - by scalars, slices or arrays of tick labels. For dimensions with - multi-index, the indexer may also be a dict-like object with keys - matching index level names. - If DataArrays are passed as indexers, xarray-style indexing will be - carried out. See :ref:`indexing` for the details. + **indexers_kwarg : {dim: indexer, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. Returns ------- @@ -1484,9 +1496,10 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers): Dataset.isel DataArray.sel """ - pos_indexers, new_indexes = remap_label_indexers(self, method, - tolerance, **indexers) - result = self.isel(drop=drop, **pos_indexers) + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'sel') + pos_indexers, new_indexes = remap_label_indexers( + self, indexers=indexers, method=method, tolerance=tolerance) + result = self.isel(indexers=pos_indexers, drop=drop) return result._replace_indexes(new_indexes) def isel_points(self, dim='points', **indexers): @@ -1734,7 +1747,7 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True): **indexers) def reindex(self, indexers=None, method=None, tolerance=None, copy=True, - **kw_indexers): + **indexers_kwargs): """Conform this object onto a new set of indexes, filling in missing values with NaN. @@ -1745,6 +1758,7 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True, arrays of coordinates tick labels. Any mis-matched coordinate values will be filled in with NaN, and any mis-matched dimension names will simply be ignored. + One of indexers or indexers_kwargs must be provided. method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional Method to use for filling index values in ``indexers`` not found in this dataset: @@ -1763,8 +1777,9 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True, ``copy=False`` and reindexing is unnecessary, or can be performed with only slice operations, then the output may share memory with the input. In either case, a new xarray object is always returned. - **kw_indexers : optional + **indexers_kwarg : {dim: indexer, ...}, optional Keyword arguments in the same form as ``indexers``. + One of indexers or indexers_kwargs must be provided. Returns ------- @@ -1777,7 +1792,7 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True, align pandas.Index.get_indexer """ - indexers = utils.combine_pos_and_kw_args(indexers, kw_indexers, + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, 'reindex') bad_dims = [d for d in indexers if d not in self.dims] diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 06bb3ede393..8a5476c9c37 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -183,7 +183,7 @@ def is_full_slice(value): return isinstance(value, slice) and value == slice(None) -def combine_pos_and_kw_args(pos_kwargs, kw_kwargs, func_name): +def either_dict_or_kwargs(pos_kwargs, kw_kwargs, func_name): if pos_kwargs is not None: if not is_dict_like(pos_kwargs): raise ValueError('the first argument to .%s must be a dictionary' diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9dcb99459d4..52d470accfe 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -11,13 +11,13 @@ import xarray as xr # only for Dataset and DataArray from . import ( - arithmetic, common, dtypes, duck_array_ops, indexing, nputils, ops, utils,) + arithmetic, common, dtypes, duck_array_ops, indexing, nputils, ops, utils) from .indexing import ( BasicIndexer, OuterIndexer, PandasIndexAdapter, VectorizedIndexer, as_indexable) from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, zip) -from .utils import OrderedSet +from .utils import OrderedSet, either_dict_or_kwargs try: import dask.array as da @@ -824,7 +824,7 @@ def chunk(self, chunks=None, name=None, lock=False): return type(self)(self.dims, data, self._attrs, self._encoding, fastpath=True) - def isel(self, **indexers): + def isel(self, indexers=None, drop=False, **indexers_kwargs): """Return a new array indexed along the specified dimension(s). Parameters @@ -841,6 +841,8 @@ def isel(self, **indexers): unless numpy fancy indexing was triggered by using an array indexer, in which case the data will be a copy. """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'isel') + invalid = [k for k in indexers if k not in self.dims] if invalid: raise ValueError("dimensions %r do not exist" % invalid) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 35e270f0db7..25d8a844eec 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -17,8 +17,8 @@ from xarray.core.pycompat import OrderedDict, iteritems from xarray.tests import ( ReturnItem, TestCase, assert_allclose, assert_array_equal, assert_equal, - assert_identical, raises_regex, requires_bottleneck, requires_dask, - requires_scipy, source_ndarray, unittest, requires_cftime) + assert_identical, raises_regex, requires_bottleneck, requires_cftime, + requires_dask, requires_scipy, source_ndarray, unittest) class TestDataArray(TestCase): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 76e41c43c6d..38e2dce1633 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1435,7 +1435,7 @@ def test_sel_method(self): with raises_regex(TypeError, '``method``'): # this should not pass silently - data.sel(data) + data.sel(method=data) # cannot pass method if there is no associated coordinate with raises_regex(ValueError, 'cannot supply'): @@ -4181,6 +4181,12 @@ def test_dir_non_string(data_set): result = dir(data_set) assert not (5 in result) + # GH2172 + sample_data = np.random.uniform(size=[2, 2000, 10000]) + x = xr.Dataset({"sample_data": (sample_data.shape, sample_data)}) + x2 = x["sample_data"] + dir(x2) + def test_dir_unicode(data_set): data_set[u'unicode'] = 'uni' diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 0b3b0ee7dd6..e1645f2903c 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -1,17 +1,21 @@ from __future__ import absolute_import, division, print_function +from datetime import datetime + import numpy as np import pandas as pd import pytest -from datetime import datetime from xarray.coding.cftimeindex import CFTimeIndex from xarray.core import duck_array_ops, utils from xarray.core.options import set_options from xarray.core.pycompat import OrderedDict +from xarray.core.utils import either_dict_or_kwargs + +from . import ( + TestCase, assert_array_equal, has_cftime, has_cftime_or_netCDF4, + requires_dask) from .test_coding_times import _all_cftime_date_types -from . import (TestCase, requires_dask, assert_array_equal, - has_cftime_or_netCDF4, has_cftime) class TestAlias(TestCase): @@ -235,3 +239,17 @@ def test_hidden_key_dict(): hkd[hidden_key] with pytest.raises(KeyError): del hkd[hidden_key] + + +def test_either_dict_or_kwargs(): + + result = either_dict_or_kwargs(dict(a=1), None, 'foo') + expected = dict(a=1) + assert result == expected + + result = either_dict_or_kwargs(None, dict(a=1), 'foo') + expected = dict(a=1) + assert result == expected + + with pytest.raises(ValueError, match=r'foo'): + result = either_dict_or_kwargs(dict(a=1), dict(a=1), 'foo')