Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Future warning for default reduction dimension of groupby #2366

Merged
merged 14 commits into from
Sep 28, 2018
2 changes: 2 additions & 0 deletions xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,5 @@
from . import tutorial
from . import ufuncs
from . import testing

from .core.common import ALL_DIMS
5 changes: 3 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ..plot.plot import _PlotMethods
from .accessors import DatetimeAccessor
from .alignment import align, reindex_like_indexers
from .common import AbstractArray, DataWithCoords
from .common import AbstractArray, DataWithCoords, ALL_DIMS
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

F401 '.common.ALL_DIMS' imported but unused

from .coordinates import (
DataArrayCoordinates, Indexes, LevelCoordinatesSource,
assert_coordinate_consistent, remap_label_indexers)
Expand Down Expand Up @@ -1460,7 +1460,8 @@ def combine_first(self, other):
"""
return ops.fillna(self, other, join="outer")

def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
def reduce(self, func, dim=None, axis=None, keep_attrs=False,
**kwargs):
"""Reduce this array by applying `func` along some dimension(s).

Parameters
Expand Down
9 changes: 6 additions & 3 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from .. import conventions
from .alignment import align
from .common import (
DataWithCoords, ImplementsDatasetReduce, _contains_datetime_like_objects)
ALL_DIMS, DataWithCoords, ImplementsDatasetReduce,
_contains_datetime_like_objects)
from .coordinates import (
DatasetCoordinates, Indexes, LevelCoordinatesSource,
assert_coordinate_consistent, remap_label_indexers)
Expand Down Expand Up @@ -2722,8 +2723,8 @@ def combine_first(self, other):
out = ops.fillna(self, other, join="outer", dataset_join="outer")
return out

def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
allow_lazy=False, **kwargs):
def reduce(self, func, dim=None, keep_attrs=False,
numeric_only=False, allow_lazy=False, **kwargs):
"""Reduce this dataset by applying `func` along some dimension(s).

Parameters
Expand All @@ -2750,6 +2751,8 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
Dataset with this object's DataArrays replaced with new DataArrays
of summarized data and the indicated dimension(s) removed.
"""
if dim == ALL_DIMS:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I usually prefer using is for comparison to sentinel objects -- it looks a little more idiomatic, and is more similar to what we do for comparison to None.

dim = None
if isinstance(dim, basestring):
dims = set([dim])
elif dim is None:
Expand Down
35 changes: 33 additions & 2 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from __future__ import absolute_import, division, print_function

import functools
import warnings

import numpy as np
import pandas as pd

from . import dtypes, duck_array_ops, nputils, ops
from . import dtypes, duck_array_ops, nputils, ops, utils
from .arithmetic import SupportsArithmetic
from .combine import concat
from .common import ImplementsArrayReduce, ImplementsDatasetReduce
from .common import ALL_DIMS, ImplementsArrayReduce, ImplementsDatasetReduce
from .pycompat import integer_types, range, zip
from .utils import hashable, maybe_wrap_array, peek_at, safe_cast_to_index
from .variable import IndexVariable, Variable, as_variable
Expand Down Expand Up @@ -567,10 +568,40 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
Array with summarized data and the indicated dimension(s)
removed.
"""
if dim is DEFAULT_DIMS:
dim = ALL_DIMS
# TODO change this to dim = self._group_dim after
# the deprecation process
if self._obj.ndim > 1:
warnings.warn("Default reduction dimension will be changed to "
"the grouped dimension. To silence warning, set "
"dim=xarray.ALL_DIMS explicitly.",
FutureWarning, stacklevel=2)
if dim is None: # TODO enable this after the deprecation
dim = self._group_dim
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should add this yet -- this means that code that uses groupby.reduce(... dim=None) would change its behavior. There probably isn't much code like this but I don't see a good reason to change it yet.


def reduce_array(ar):
return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs)
return self.apply(reduce_array, shortcut=shortcut)

# TODO remove the following class method and DEFAULT_DIMS after the
# deprecation cycle
@classmethod
def _reduce_method(cls, func, include_skipna, numeric_only):
if include_skipna:
def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, skipna=None,
keep_attrs=False, **kwargs):
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
skipna=skipna, allow_lazy=True, **kwargs)
else:
def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, keep_attrs=False,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

E501 line too long (81 > 79 characters)

**kwargs):
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
allow_lazy=True, **kwargs)
return wrapped_func


DEFAULT_DIMS = utils.ReprObject('<default-dims>')

ops.inject_reduce_methods(DataArrayGroupBy)
ops.inject_binary_ops(DataArrayGroupBy)
Expand Down
2 changes: 2 additions & 0 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1258,6 +1258,8 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
Array with summarized data and the indicated dimension(s)
removed.
"""
if dim == common.ALL_DIMS:
dim = None
if dim is not None and axis is not None:
raise ValueError("cannot supply both 'axis' and 'dim' arguments")

Expand Down
32 changes: 25 additions & 7 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
DataArray, Dataset, IndexVariable, Variable, align, broadcast, set_options)
from xarray.convert import from_cdms2
from xarray.coding.times import CFDatetimeCoder, _import_cftime
from xarray.core.common import full_like
from xarray.core.common import full_like, ALL_DIMS
from xarray.core.pycompat import OrderedDict, iteritems
from xarray.tests import (
ReturnItem, TestCase, assert_allclose, assert_array_equal, assert_equal,
Expand Down Expand Up @@ -1983,15 +1983,15 @@ def test_groupby_sum(self):
self.x[:, 10:].sum(),
self.x[:, 9:10].sum()]).T),
'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
assert_allclose(expected_sum_all, grouped.reduce(np.sum))
assert_allclose(expected_sum_all, grouped.sum())
assert_allclose(expected_sum_all, grouped.reduce(np.sum, dim=ALL_DIMS))
assert_allclose(expected_sum_all, grouped.sum(ALL_DIMS))

expected = DataArray([array['y'].values[idx].sum() for idx
in [slice(9), slice(10, None), slice(9, 10)]],
[['a', 'b', 'c']], ['abc'])
actual = array['y'].groupby('abc').apply(np.sum)
assert_allclose(expected, actual)
actual = array['y'].groupby('abc').sum()
actual = array['y'].groupby('abc').sum(ALL_DIMS)
assert_allclose(expected, actual)

expected_sum_axis1 = Dataset(
Expand All @@ -2002,6 +2002,24 @@ def test_groupby_sum(self):
assert_allclose(expected_sum_axis1, grouped.reduce(np.sum, 'y'))
assert_allclose(expected_sum_axis1, grouped.sum('y'))

def test_groupby_warning(self):
array = self.make_groupby_example_array()
grouped = array.groupby('abc')
with pytest.warns(FutureWarning):
grouped.sum()

def test_groupby_sum_default(self):
array = self.make_groupby_example_array()
grouped = array.groupby('abc')

expected_sum_all = Dataset(
{'foo': Variable(['x', 'abc'],
np.array([self.x[:, :9].sum(axis=-1),
self.x[:, 10:].sum(axis=-1),
self.x[:, 9:10].sum(axis=-1)]).T),
'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
assert_allclose(expected_sum_all, grouped.sum(dim=None))

def test_groupby_count(self):
array = DataArray(
[0, 0, np.nan, np.nan, 0, 0],
Expand Down Expand Up @@ -2082,9 +2100,9 @@ def test_groupby_math(self):
assert_identical(expected, actual)

grouped = array.groupby('abc')
expected_agg = (grouped.mean() - np.arange(3)).rename(None)
expected_agg = (grouped.mean(ALL_DIMS) - np.arange(3)).rename(None)
actual = grouped - DataArray(range(3), [('abc', ['a', 'b', 'c'])])
actual_agg = actual.groupby('abc').mean()
actual_agg = actual.groupby('abc').mean(ALL_DIMS)
assert_allclose(expected_agg, actual_agg)

with raises_regex(TypeError, 'only support binary ops'):
Expand Down Expand Up @@ -2158,7 +2176,7 @@ def test_groupby_multidim(self):
('lon', DataArray([5, 28, 23],
coords=[('lon', [30., 40., 50.])])),
('lat', DataArray([16, 40], coords=[('lat', [10., 20.])]))]:
actual_sum = array.groupby(dim).sum()
actual_sum = array.groupby(dim).sum(ALL_DIMS)
assert_identical(expected_sum, actual_sum)

def test_groupby_multidim_apply(self):
Expand Down