diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 67d0d548ec5..4e1607f0e42 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,6 +30,16 @@ What's New v0.11.0 (unreleased) -------------------- +Breaking changes +~~~~~~~~~~~~~~~~ + +- Reduction of :py:meth:`DataArray.groupby` and :py:meth:`DataArray.resample` + without dimension argument will change in the next release. + Now we warn a FutureWarning. + By `Keisuke Fujii `_. + +Documentation +~~~~~~~~~~~~~ Enhancements ~~~~~~~~~~~~ diff --git a/xarray/__init__.py b/xarray/__init__.py index e3898f348cc..59a961c6b56 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -34,3 +34,5 @@ from . import tutorial from . import ufuncs from . import testing + +from .core.common import ALL_DIMS diff --git a/xarray/core/common.py b/xarray/core/common.py index 280034a30dd..41e4fec2982 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -10,7 +10,11 @@ from . import duck_array_ops, dtypes, formatting, ops from .arithmetic import SupportsArithmetic from .pycompat import OrderedDict, basestring, dask_array_type, suppress -from .utils import either_dict_or_kwargs, Frozen, SortedKeysDict +from .utils import either_dict_or_kwargs, Frozen, SortedKeysDict, ReprObject + + +# Used as a sentinel value to indicate a all dimensions +ALL_DIMS = ReprObject('') class ImplementsArrayReduce(object): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9cf304858a6..981ad3157ba 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -18,7 +18,8 @@ from .. import conventions from .alignment import align from .common import ( - DataWithCoords, ImplementsDatasetReduce, _contains_datetime_like_objects) + ALL_DIMS, DataWithCoords, ImplementsDatasetReduce, + _contains_datetime_like_objects) from .coordinates import ( DatasetCoordinates, Indexes, LevelCoordinatesSource, assert_coordinate_consistent, remap_label_indexers) @@ -743,7 +744,7 @@ def copy(self, deep=False, data=None): Shallow copy versus deep copy >>> da = xr.DataArray(np.random.randn(2, 3)) - >>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])}, + >>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])}, coords={'x': ['one', 'two']}) >>> ds.copy() @@ -775,7 +776,7 @@ def copy(self, deep=False, data=None): foo (dim_0, dim_1) float64 7.0 0.3897 -1.862 -0.6091 -1.051 -0.3003 bar (x) int64 -1 2 - Changing the data using the ``data`` argument maintains the + Changing the data using the ``data`` argument maintains the structure of the original object, but with the new data. Original object is unaffected. @@ -826,7 +827,7 @@ def copy(self, deep=False, data=None): # skip __init__ to avoid costly validation return self._construct_direct(variables, self._coord_names.copy(), self._dims.copy(), self._attrs_copy(), - encoding=self.encoding) + encoding=self.encoding) def _subset_with_all_valid_coords(self, variables, coord_names, attrs): needed_dims = set() @@ -2893,6 +2894,8 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False, Dataset with this object's DataArrays replaced with new DataArrays of summarized data and the indicated dimension(s) removed. """ + if dim is ALL_DIMS: + dim = None if isinstance(dim, basestring): dims = set([dim]) elif dim is None: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 7068f8e6cae..3842c642047 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1,14 +1,15 @@ from __future__ import absolute_import, division, print_function import functools +import warnings import numpy as np import pandas as pd -from . import dtypes, duck_array_ops, nputils, ops +from . import dtypes, duck_array_ops, nputils, ops, utils from .arithmetic import SupportsArithmetic from .combine import concat -from .common import ImplementsArrayReduce, ImplementsDatasetReduce +from .common import ALL_DIMS, ImplementsArrayReduce, ImplementsDatasetReduce from .pycompat import integer_types, range, zip from .utils import hashable, maybe_wrap_array, peek_at, safe_cast_to_index from .variable import IndexVariable, Variable, as_variable @@ -567,10 +568,39 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, Array with summarized data and the indicated dimension(s) removed. """ + if dim == DEFAULT_DIMS: + dim = ALL_DIMS + # TODO change this to dim = self._group_dim after + # the deprecation process + if self._obj.ndim > 1: + warnings.warn( + "Default reduction dimension will be changed to the " + "grouped dimension after xarray 0.12. To silence this " + "warning, pass dim=xarray.ALL_DIMS explicitly.", + FutureWarning, stacklevel=2) + def reduce_array(ar): return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) return self.apply(reduce_array, shortcut=shortcut) + # TODO remove the following class method and DEFAULT_DIMS after the + # deprecation cycle + @classmethod + def _reduce_method(cls, func, include_skipna, numeric_only): + if include_skipna: + def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, skipna=None, + keep_attrs=False, **kwargs): + return self.reduce(func, dim, axis, keep_attrs=keep_attrs, + skipna=skipna, allow_lazy=True, **kwargs) + else: + def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, + keep_attrs=False, **kwargs): + return self.reduce(func, dim, axis, keep_attrs=keep_attrs, + allow_lazy=True, **kwargs) + return wrapped_func + + +DEFAULT_DIMS = utils.ReprObject('') ops.inject_reduce_methods(DataArrayGroupBy) ops.inject_binary_ops(DataArrayGroupBy) @@ -649,10 +679,40 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs): Array with summarized data and the indicated dimension(s) removed. """ + if dim == DEFAULT_DIMS: + dim = ALL_DIMS + # TODO change this to dim = self._group_dim after + # the deprecation process. Do not forget to remove _reduce_method + warnings.warn( + "Default reduction dimension will be changed to the " + "grouped dimension after xarray 0.12. To silence this " + "warning, pass dim=xarray.ALL_DIMS explicitly.", + FutureWarning, stacklevel=2) + elif dim is None: + dim = self._group_dim + def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) return self.apply(reduce_dataset) + # TODO remove the following class method and DEFAULT_DIMS after the + # deprecation cycle + @classmethod + def _reduce_method(cls, func, include_skipna, numeric_only): + if include_skipna: + def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False, + skipna=None, **kwargs): + return self.reduce(func, dim, keep_attrs, skipna=skipna, + numeric_only=numeric_only, allow_lazy=True, + **kwargs) + else: + def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False, + **kwargs): + return self.reduce(func, dim, keep_attrs, + numeric_only=numeric_only, allow_lazy=True, + **kwargs) + return wrapped_func + def assign(self, **kwargs): """Assign data variables by group. diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 4933a09b257..25c149c51af 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,7 +1,7 @@ from __future__ import absolute_import, division, print_function from . import ops -from .groupby import DataArrayGroupBy, DatasetGroupBy +from .groupby import DataArrayGroupBy, DatasetGroupBy, DEFAULT_DIMS from .pycompat import OrderedDict, dask_array_type RESAMPLE_DIM = '__resample_dim__' @@ -277,15 +277,14 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs): """Reduce the items in this group by applying `func` along the pre-defined resampling dimension. - Note that `dim` is by default here and ignored if passed by the user; - this ensures compatibility with the existing reduce interface. - Parameters ---------- func : function Function which can be called in the form `func(x, axis=axis, **kwargs)` to return the result of collapsing an np.ndarray over an integer valued axis. + dim : str or sequence of str, optional + Dimension(s) over which to apply `func`. keep_attrs : bool, optional If True, the datasets's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new @@ -299,8 +298,11 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs): Array with summarized data and the indicated dimension(s) removed. """ + if dim == DEFAULT_DIMS: + dim = None + return super(DatasetResample, self).reduce( - func, self._dim, keep_attrs, **kwargs) + func, dim, keep_attrs, **kwargs) def _interpolate(self, kind='linear'): """Apply scipy.interpolate.interp1d along resampling dimension.""" diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 86629cc2a28..c003d52aab2 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1333,6 +1333,8 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, Array with summarized data and the indicated dimension(s) removed. """ + if dim is common.ALL_DIMS: + dim = None if dim is not None and axis is not None: raise ValueError("cannot supply both 'axis' and 'dim' arguments") diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 6ca83ab73ab..43fa35473ce 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -385,8 +385,8 @@ def test_groupby(self): u = self.eager_array v = self.lazy_array - expected = u.groupby('x').mean() - actual = v.groupby('x').mean() + expected = u.groupby('x').mean(xr.ALL_DIMS) + actual = v.groupby('x').mean(xr.ALL_DIMS) self.assertLazyAndAllClose(expected, actual) def test_groupby_first(self): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2b93e696d50..f8b288f4ab0 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2,6 +2,7 @@ import pickle from copy import deepcopy +from distutils.version import LooseVersion from textwrap import dedent import warnings @@ -14,7 +15,7 @@ DataArray, Dataset, IndexVariable, Variable, align, broadcast, set_options) from xarray.convert import from_cdms2 from xarray.coding.times import CFDatetimeCoder, _import_cftime -from xarray.core.common import full_like +from xarray.core.common import full_like, ALL_DIMS from xarray.core.pycompat import OrderedDict, iteritems from xarray.tests import ( ReturnItem, TestCase, assert_allclose, assert_array_equal, assert_equal, @@ -2000,15 +2001,15 @@ def test_groupby_sum(self): self.x[:, 10:].sum(), self.x[:, 9:10].sum()]).T), 'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo'] - assert_allclose(expected_sum_all, grouped.reduce(np.sum)) - assert_allclose(expected_sum_all, grouped.sum()) + assert_allclose(expected_sum_all, grouped.reduce(np.sum, dim=ALL_DIMS)) + assert_allclose(expected_sum_all, grouped.sum(ALL_DIMS)) expected = DataArray([array['y'].values[idx].sum() for idx in [slice(9), slice(10, None), slice(9, 10)]], [['a', 'b', 'c']], ['abc']) actual = array['y'].groupby('abc').apply(np.sum) assert_allclose(expected, actual) - actual = array['y'].groupby('abc').sum() + actual = array['y'].groupby('abc').sum(ALL_DIMS) assert_allclose(expected, actual) expected_sum_axis1 = Dataset( @@ -2019,6 +2020,27 @@ def test_groupby_sum(self): assert_allclose(expected_sum_axis1, grouped.reduce(np.sum, 'y')) assert_allclose(expected_sum_axis1, grouped.sum('y')) + def test_groupby_warning(self): + array = self.make_groupby_example_array() + grouped = array.groupby('y') + with pytest.warns(FutureWarning): + grouped.sum() + + @pytest.mark.skipif(LooseVersion(xr.__version__) < LooseVersion('0.12'), + reason="not to forget the behavior change") + def test_groupby_sum_default(self): + array = self.make_groupby_example_array() + grouped = array.groupby('abc') + + expected_sum_all = Dataset( + {'foo': Variable(['x', 'abc'], + np.array([self.x[:, :9].sum(axis=-1), + self.x[:, 10:].sum(axis=-1), + self.x[:, 9:10].sum(axis=-1)]).T), + 'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo'] + + assert_allclose(expected_sum_all, grouped.sum()) + def test_groupby_count(self): array = DataArray( [0, 0, np.nan, np.nan, 0, 0], @@ -2099,9 +2121,9 @@ def test_groupby_math(self): assert_identical(expected, actual) grouped = array.groupby('abc') - expected_agg = (grouped.mean() - np.arange(3)).rename(None) + expected_agg = (grouped.mean(ALL_DIMS) - np.arange(3)).rename(None) actual = grouped - DataArray(range(3), [('abc', ['a', 'b', 'c'])]) - actual_agg = actual.groupby('abc').mean() + actual_agg = actual.groupby('abc').mean(ALL_DIMS) assert_allclose(expected_agg, actual_agg) with raises_regex(TypeError, 'only support binary ops'): @@ -2175,7 +2197,7 @@ def test_groupby_multidim(self): ('lon', DataArray([5, 28, 23], coords=[('lon', [30., 40., 50.])])), ('lat', DataArray([16, 40], coords=[('lat', [10., 20.])]))]: - actual_sum = array.groupby(dim).sum() + actual_sum = array.groupby(dim).sum(ALL_DIMS) assert_identical(expected_sum, actual_sum) def test_groupby_multidim_apply(self): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f8fb9b98ac3..237dc09d06a 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -14,7 +14,7 @@ import xarray as xr from xarray import ( DataArray, Dataset, IndexVariable, MergeError, Variable, align, backends, - broadcast, open_dataset, set_options) + broadcast, open_dataset, set_options, ALL_DIMS) from xarray.core import indexing, npcompat, utils from xarray.core.common import full_like from xarray.core.pycompat import ( @@ -2648,20 +2648,28 @@ def test_groupby_reduce(self): expected = data.mean('y') expected['yonly'] = expected['yonly'].variable.set_dims({'x': 3}) - actual = data.groupby('x').mean() + actual = data.groupby('x').mean(ALL_DIMS) assert_allclose(expected, actual) actual = data.groupby('x').mean('y') assert_allclose(expected, actual) letters = data['letters'] - expected = Dataset({'xy': data['xy'].groupby(letters).mean(), + expected = Dataset({'xy': data['xy'].groupby(letters).mean(ALL_DIMS), 'xonly': (data['xonly'].mean().variable .set_dims({'letters': 2})), 'yonly': data['yonly'].groupby(letters).mean()}) - actual = data.groupby('letters').mean() + actual = data.groupby('letters').mean(ALL_DIMS) assert_allclose(expected, actual) + def test_groupby_warn(self): + data = Dataset({'xy': (['x', 'y'], np.random.randn(3, 4)), + 'xonly': ('x', np.random.randn(3)), + 'yonly': ('y', np.random.randn(4)), + 'letters': ('y', ['a', 'a', 'b', 'b'])}) + with pytest.warns(FutureWarning): + data.groupby('x').mean() + def test_groupby_math(self): def reorder_dims(x): return x.transpose('dim1', 'dim2', 'dim3', 'time') @@ -2716,7 +2724,7 @@ def test_groupby_math_virtual(self): ds = Dataset({'x': ('t', [1, 2, 3])}, {'t': pd.date_range('20100101', periods=3)}) grouped = ds.groupby('t.day') - actual = grouped - grouped.mean() + actual = grouped - grouped.mean(ALL_DIMS) expected = Dataset({'x': ('t', [0, 0, 0])}, ds[['t', 't.day']]) assert_identical(actual, expected) @@ -2725,18 +2733,17 @@ def test_groupby_nan(self): # nan should be excluded from groupby ds = Dataset({'foo': ('x', [1, 2, 3, 4])}, {'bar': ('x', [1, 1, 2, np.nan])}) - actual = ds.groupby('bar').mean() + actual = ds.groupby('bar').mean(ALL_DIMS) expected = Dataset({'foo': ('bar', [1.5, 3]), 'bar': [1, 2]}) assert_identical(actual, expected) def test_groupby_order(self): # groupby should preserve variables order - ds = Dataset() for vn in ['a', 'b', 'c']: ds[vn] = DataArray(np.arange(10), dims=['t']) data_vars_ref = list(ds.data_vars.keys()) - ds = ds.groupby('t').mean() + ds = ds.groupby('t').mean(ALL_DIMS) data_vars = list(ds.data_vars.keys()) assert data_vars == data_vars_ref # coords are now at the end of the list, so the test below fails