Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

coarsen: better keep_attrs #5227

Merged
merged 3 commits into from
Apr 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ Breaking changes
not have the averaged dimensions are not accidentially changed
(:issue:`4885`, :pull:`5207`). By `David Schwörer
<https://github.com/dschwoerer>`_
- :py:attr:`DataArray.coarsen` and :py:attr:`Dataset.coarsen` no longer support passing ``keep_attrs``
via its constructor. Pass ``keep_attrs`` via the applied function, i.e. use
``ds.coarsen(...).mean(keep_attrs=False)`` instead of ``ds.coarsen(..., keep_attrs=False).mean()``.
Further, coarsen now keeps attributes per default (:pull:`5227`).
By `Mathias Hauser <https://github.com/mathause>`_.

Deprecations
~~~~~~~~~~~~
Expand Down
6 changes: 0 additions & 6 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -954,10 +954,6 @@ def coarsen(
coord_func : str or mapping of hashable to str, default: "mean"
function (name) that is applied to the coordinates,
or a mapping from coordinate name to function (name).
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.

Returns
-------
Expand Down Expand Up @@ -1001,8 +997,6 @@ def coarsen(
core.rolling.DataArrayCoarsen
core.rolling.DatasetCoarsen
"""
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

dim = either_dict_or_kwargs(dim, window_kwargs, "coarsen")
return self._coarsen_cls(
Expand Down
85 changes: 64 additions & 21 deletions xarray/core/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,16 @@ def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs):
self.windows = windows
self.side = side
self.boundary = boundary

if keep_attrs is not None:
warnings.warn(
"Passing ``keep_attrs`` to ``coarsen`` is deprecated and will raise an"
" error in xarray 0.19. Please pass ``keep_attrs`` directly to the"
" applied function, i.e. use ``ds.coarsen(...).mean(keep_attrs=False)``"
" instead of ``ds.coarsen(..., keep_attrs=False).mean()``"
" Note that keep_attrs is now True per default.",
FutureWarning,
)
self.keep_attrs = keep_attrs

absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
Expand All @@ -819,6 +829,19 @@ def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs):
coord_func[c] = duck_array_ops.mean
self.coord_func = coord_func

def _get_keep_attrs(self, keep_attrs):

if keep_attrs is None:
# TODO: uncomment the next line and remove the others after the deprecation
# keep_attrs = _get_keep_attrs(default=True)

if self.keep_attrs is None:
keep_attrs = _get_keep_attrs(default=True)
else:
keep_attrs = self.keep_attrs

return keep_attrs

def __repr__(self):
"""provide a nice str repr of our coarsen object"""

Expand Down Expand Up @@ -849,11 +872,13 @@ def _reduce_method(
if include_skipna:
kwargs["skipna"] = None

def wrapped_func(self, **kwargs):
def wrapped_func(self, keep_attrs: bool = None, **kwargs):
from .dataarray import DataArray

keep_attrs = self._get_keep_attrs(keep_attrs)

reduced = self.obj.variable.coarsen(
self.windows, func, self.boundary, self.side, self.keep_attrs, **kwargs
self.windows, func, self.boundary, self.side, keep_attrs, **kwargs
)
coords = {}
for c, v in self.obj.coords.items():
Expand All @@ -866,16 +891,18 @@ def wrapped_func(self, **kwargs):
self.coord_func[c],
self.boundary,
self.side,
self.keep_attrs,
keep_attrs,
**kwargs,
)
else:
coords[c] = v
return DataArray(reduced, dims=self.obj.dims, coords=coords)
return DataArray(
reduced, dims=self.obj.dims, coords=coords, name=self.obj.name
)

return wrapped_func

def reduce(self, func: Callable, **kwargs):
def reduce(self, func: Callable, keep_attrs: bool = None, **kwargs):
"""Reduce the items in this group by applying `func` along some
dimension(s).

Expand All @@ -886,6 +913,10 @@ def reduce(self, func: Callable, **kwargs):
to return the result of collapsing an np.ndarray over the coarsening
dimensions. It must be possible to provide the `axis` argument
with a tuple of integers.
keep_attrs : bool, default: None
If True, the attributes (``attrs``) will be copied from the original
object to the new one. If False, the new object will be returned
without attributes. If None uses the global default.
**kwargs : dict
Additional keyword arguments passed on to `func`.

Expand All @@ -905,7 +936,7 @@ def reduce(self, func: Callable, **kwargs):
Dimensions without coordinates: a, b
"""
wrapped_func = self._reduce_method(func)
return wrapped_func(self, **kwargs)
return wrapped_func(self, keep_attrs=keep_attrs, **kwargs)


class DatasetCoarsen(Coarsen):
Expand All @@ -925,37 +956,45 @@ def _reduce_method(
if include_skipna:
kwargs["skipna"] = None

def wrapped_func(self, **kwargs):
def wrapped_func(self, keep_attrs: bool = None, **kwargs):
from .dataset import Dataset

if self.keep_attrs:
keep_attrs = self._get_keep_attrs(keep_attrs)

if keep_attrs:
attrs = self.obj.attrs
else:
attrs = {}

reduced = {}
for key, da in self.obj.data_vars.items():
reduced[key] = da.variable.coarsen(
self.windows, func, self.boundary, self.side, **kwargs
self.windows,
func,
self.boundary,
self.side,
keep_attrs=keep_attrs,
**kwargs,
)

coords = {}
for c, v in self.obj.coords.items():
if any(d in self.windows for d in v.dims):
coords[c] = v.variable.coarsen(
self.windows,
self.coord_func[c],
self.boundary,
self.side,
**kwargs,
)
else:
coords[c] = v.variable
# variable.coarsen returns variables not containing the window dims
# unchanged (maybe removes attrs)
coords[c] = v.variable.coarsen(
self.windows,
self.coord_func[c],
self.boundary,
self.side,
keep_attrs=keep_attrs,
**kwargs,
)

return Dataset(reduced, coords=coords, attrs=attrs)

return wrapped_func

def reduce(self, func: Callable, **kwargs):
def reduce(self, func: Callable, keep_attrs=None, **kwargs):
"""Reduce the items in this group by applying `func` along some
dimension(s).

Expand All @@ -966,6 +1005,10 @@ def reduce(self, func: Callable, **kwargs):
to return the result of collapsing an np.ndarray over the coarsening
dimensions. It must be possible to provide the `axis` argument with
a tuple of integers.
keep_attrs : bool, default: None
If True, the attributes (``attrs``) will be copied from the original
object to the new one. If False, the new object will be returned
without attributes. If None uses the global default.
**kwargs : dict
Additional keyword arguments passed on to `func`.

Expand All @@ -975,4 +1018,4 @@ def reduce(self, func: Callable, **kwargs):
Arrays with summarized data.
"""
wrapped_func = self._reduce_method(func)
return wrapped_func(self, **kwargs)
return wrapped_func(self, keep_attrs=keep_attrs, **kwargs)
7 changes: 4 additions & 3 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -2140,17 +2140,18 @@ def coarsen(
Apply reduction function.
"""
windows = {k: v for k, v in windows.items() if k in self.dims}
if not windows:
return self.copy()

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
keep_attrs = _get_keep_attrs(default=True)

if keep_attrs:
_attrs = self.attrs
else:
_attrs = None

if not windows:
return self._replace(attrs=_attrs)

reshaped, axes = self._coarsen_reshape(windows, boundary, side)
if isinstance(func, str):
name = func
Expand Down
98 changes: 77 additions & 21 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -6502,33 +6502,89 @@ def test_isin(da):
assert_equal(result, expected)


def test_coarsen_keep_attrs():
_attrs = {"units": "test", "long_name": "testing"}

da = xr.DataArray(
np.linspace(0, 364, num=364),
dims="time",
coords={"time": pd.date_range("15/12/1999", periods=364)},
attrs=_attrs,
@pytest.mark.parametrize(
"funcname, argument",
[
("reduce", (np.mean,)),
("mean", ()),
],
)
def test_coarsen_keep_attrs(funcname, argument):
attrs_da = {"da_attr": "test"}
attrs_coords = {"attrs_coords": "test"}

data = np.linspace(10, 15, 100)
coords = np.linspace(1, 10, 100)

da = DataArray(
data,
dims=("coord"),
coords={"coord": ("coord", coords, attrs_coords)},
attrs=attrs_da,
name="name",
)

da2 = da.copy(deep=True)
# attrs are now kept per default
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
result = func(*argument)
assert result.attrs == attrs_da
da.coord.attrs == attrs_coords
assert result.name == "name"

# Test dropped attrs
dat = da.coarsen(time=3, boundary="trim").mean()
assert dat.attrs == {}
# discard attrs
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
result = func(*argument, keep_attrs=False)
assert result.attrs == {}
da.coord.attrs == {}
assert result.name == "name"

# Test kept attrs using dataset keyword
dat = da.coarsen(time=3, boundary="trim", keep_attrs=True).mean()
assert dat.attrs == _attrs
# test discard attrs using global option
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
with set_options(keep_attrs=False):
result = func(*argument)
assert result.attrs == {}
da.coord.attrs == {}
assert result.name == "name"

# Test kept attrs using global option
with xr.set_options(keep_attrs=True):
dat = da.coarsen(time=3, boundary="trim").mean()
assert dat.attrs == _attrs
# keyword takes precedence over global option
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
with set_options(keep_attrs=False):
result = func(*argument, keep_attrs=True)
assert result.attrs == attrs_da
da.coord.attrs == {}
assert result.name == "name"

func = getattr(da.coarsen(dim={"coord": 5}), funcname)
with set_options(keep_attrs=True):
result = func(*argument, keep_attrs=False)
assert result.attrs == {}
da.coord.attrs == {}
assert result.name == "name"


def test_coarsen_keep_attrs_deprecated():
attrs_da = {"da_attr": "test"}

data = np.linspace(10, 15, 100)
coords = np.linspace(1, 10, 100)

# Test kept attrs in original object
xr.testing.assert_identical(da, da2)
da = DataArray(data, dims=("coord"), coords={"coord": coords}, attrs=attrs_da)

# deprecated option
with pytest.warns(
FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated"
):
result = da.coarsen(dim={"coord": 5}, keep_attrs=False).mean()

assert result.attrs == {}

# the keep_attrs in the reduction function takes precedence
with pytest.warns(
FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated"
):
result = da.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False)

assert result.attrs == {}


@pytest.mark.parametrize("da", (1, 2), indirect=True)
Expand Down
Loading