diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6acb231bcd5..1af81a7d468 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -104,6 +104,11 @@ Breaking changes not have the averaged dimensions are not accidentially changed (:issue:`4885`, :pull:`5207`). By `David Schwörer `_ +- :py:attr:`DataArray.coarsen` and :py:attr:`Dataset.coarsen` no longer support passing ``keep_attrs`` + via its constructor. Pass ``keep_attrs`` via the applied function, i.e. use + ``ds.coarsen(...).mean(keep_attrs=False)`` instead of ``ds.coarsen(..., keep_attrs=False).mean()``. + Further, coarsen now keeps attributes per default (:pull:`5227`). + By `Mathias Hauser `_. Deprecations ~~~~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index ce6157e6386..e348600b0bb 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -954,10 +954,6 @@ def coarsen( coord_func : str or mapping of hashable to str, default: "mean" function (name) that is applied to the coordinates, or a mapping from coordinate name to function (name). - keep_attrs : bool, optional - If True, the object's attributes (`attrs`) will be copied from - the original object to the new one. If False (default), the new - object will be returned without attributes. Returns ------- @@ -1001,8 +997,6 @@ def coarsen( core.rolling.DataArrayCoarsen core.rolling.DatasetCoarsen """ - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) dim = either_dict_or_kwargs(dim, window_kwargs, "coarsen") return self._coarsen_cls( diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 6b65c2cd536..f99a7568282 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -805,6 +805,16 @@ def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs): self.windows = windows self.side = side self.boundary = boundary + + if keep_attrs is not None: + warnings.warn( + "Passing ``keep_attrs`` to ``coarsen`` is deprecated and will raise an" + " error in xarray 0.19. Please pass ``keep_attrs`` directly to the" + " applied function, i.e. use ``ds.coarsen(...).mean(keep_attrs=False)``" + " instead of ``ds.coarsen(..., keep_attrs=False).mean()``" + " Note that keep_attrs is now True per default.", + FutureWarning, + ) self.keep_attrs = keep_attrs absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] @@ -819,6 +829,19 @@ def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs): coord_func[c] = duck_array_ops.mean self.coord_func = coord_func + def _get_keep_attrs(self, keep_attrs): + + if keep_attrs is None: + # TODO: uncomment the next line and remove the others after the deprecation + # keep_attrs = _get_keep_attrs(default=True) + + if self.keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + else: + keep_attrs = self.keep_attrs + + return keep_attrs + def __repr__(self): """provide a nice str repr of our coarsen object""" @@ -849,11 +872,13 @@ def _reduce_method( if include_skipna: kwargs["skipna"] = None - def wrapped_func(self, **kwargs): + def wrapped_func(self, keep_attrs: bool = None, **kwargs): from .dataarray import DataArray + keep_attrs = self._get_keep_attrs(keep_attrs) + reduced = self.obj.variable.coarsen( - self.windows, func, self.boundary, self.side, self.keep_attrs, **kwargs + self.windows, func, self.boundary, self.side, keep_attrs, **kwargs ) coords = {} for c, v in self.obj.coords.items(): @@ -866,16 +891,18 @@ def wrapped_func(self, **kwargs): self.coord_func[c], self.boundary, self.side, - self.keep_attrs, + keep_attrs, **kwargs, ) else: coords[c] = v - return DataArray(reduced, dims=self.obj.dims, coords=coords) + return DataArray( + reduced, dims=self.obj.dims, coords=coords, name=self.obj.name + ) return wrapped_func - def reduce(self, func: Callable, **kwargs): + def reduce(self, func: Callable, keep_attrs: bool = None, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -886,6 +913,10 @@ def reduce(self, func: Callable, **kwargs): to return the result of collapsing an np.ndarray over the coarsening dimensions. It must be possible to provide the `axis` argument with a tuple of integers. + keep_attrs : bool, default: None + If True, the attributes (``attrs``) will be copied from the original + object to the new one. If False, the new object will be returned + without attributes. If None uses the global default. **kwargs : dict Additional keyword arguments passed on to `func`. @@ -905,7 +936,7 @@ def reduce(self, func: Callable, **kwargs): Dimensions without coordinates: a, b """ wrapped_func = self._reduce_method(func) - return wrapped_func(self, **kwargs) + return wrapped_func(self, keep_attrs=keep_attrs, **kwargs) class DatasetCoarsen(Coarsen): @@ -925,10 +956,12 @@ def _reduce_method( if include_skipna: kwargs["skipna"] = None - def wrapped_func(self, **kwargs): + def wrapped_func(self, keep_attrs: bool = None, **kwargs): from .dataset import Dataset - if self.keep_attrs: + keep_attrs = self._get_keep_attrs(keep_attrs) + + if keep_attrs: attrs = self.obj.attrs else: attrs = {} @@ -936,26 +969,32 @@ def wrapped_func(self, **kwargs): reduced = {} for key, da in self.obj.data_vars.items(): reduced[key] = da.variable.coarsen( - self.windows, func, self.boundary, self.side, **kwargs + self.windows, + func, + self.boundary, + self.side, + keep_attrs=keep_attrs, + **kwargs, ) coords = {} for c, v in self.obj.coords.items(): - if any(d in self.windows for d in v.dims): - coords[c] = v.variable.coarsen( - self.windows, - self.coord_func[c], - self.boundary, - self.side, - **kwargs, - ) - else: - coords[c] = v.variable + # variable.coarsen returns variables not containing the window dims + # unchanged (maybe removes attrs) + coords[c] = v.variable.coarsen( + self.windows, + self.coord_func[c], + self.boundary, + self.side, + keep_attrs=keep_attrs, + **kwargs, + ) + return Dataset(reduced, coords=coords, attrs=attrs) return wrapped_func - def reduce(self, func: Callable, **kwargs): + def reduce(self, func: Callable, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -966,6 +1005,10 @@ def reduce(self, func: Callable, **kwargs): to return the result of collapsing an np.ndarray over the coarsening dimensions. It must be possible to provide the `axis` argument with a tuple of integers. + keep_attrs : bool, default: None + If True, the attributes (``attrs``) will be copied from the original + object to the new one. If False, the new object will be returned + without attributes. If None uses the global default. **kwargs : dict Additional keyword arguments passed on to `func`. @@ -975,4 +1018,4 @@ def reduce(self, func: Callable, **kwargs): Arrays with summarized data. """ wrapped_func = self._reduce_method(func) - return wrapped_func(self, **kwargs) + return wrapped_func(self, keep_attrs=keep_attrs, **kwargs) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index e4813d72cde..d1d6698cea7 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2140,17 +2140,18 @@ def coarsen( Apply reduction function. """ windows = {k: v for k, v in windows.items() if k in self.dims} - if not windows: - return self.copy() if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) if keep_attrs: _attrs = self.attrs else: _attrs = None + if not windows: + return self._replace(attrs=_attrs) + reshaped, axes = self._coarsen_reshape(windows, boundary, side) if isinstance(func, str): name = func diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 6bb62cf75ab..d62c156b6d3 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6502,33 +6502,89 @@ def test_isin(da): assert_equal(result, expected) -def test_coarsen_keep_attrs(): - _attrs = {"units": "test", "long_name": "testing"} - - da = xr.DataArray( - np.linspace(0, 364, num=364), - dims="time", - coords={"time": pd.date_range("15/12/1999", periods=364)}, - attrs=_attrs, +@pytest.mark.parametrize( + "funcname, argument", + [ + ("reduce", (np.mean,)), + ("mean", ()), + ], +) +def test_coarsen_keep_attrs(funcname, argument): + attrs_da = {"da_attr": "test"} + attrs_coords = {"attrs_coords": "test"} + + data = np.linspace(10, 15, 100) + coords = np.linspace(1, 10, 100) + + da = DataArray( + data, + dims=("coord"), + coords={"coord": ("coord", coords, attrs_coords)}, + attrs=attrs_da, + name="name", ) - da2 = da.copy(deep=True) + # attrs are now kept per default + func = getattr(da.coarsen(dim={"coord": 5}), funcname) + result = func(*argument) + assert result.attrs == attrs_da + da.coord.attrs == attrs_coords + assert result.name == "name" - # Test dropped attrs - dat = da.coarsen(time=3, boundary="trim").mean() - assert dat.attrs == {} + # discard attrs + func = getattr(da.coarsen(dim={"coord": 5}), funcname) + result = func(*argument, keep_attrs=False) + assert result.attrs == {} + da.coord.attrs == {} + assert result.name == "name" - # Test kept attrs using dataset keyword - dat = da.coarsen(time=3, boundary="trim", keep_attrs=True).mean() - assert dat.attrs == _attrs + # test discard attrs using global option + func = getattr(da.coarsen(dim={"coord": 5}), funcname) + with set_options(keep_attrs=False): + result = func(*argument) + assert result.attrs == {} + da.coord.attrs == {} + assert result.name == "name" - # Test kept attrs using global option - with xr.set_options(keep_attrs=True): - dat = da.coarsen(time=3, boundary="trim").mean() - assert dat.attrs == _attrs + # keyword takes precedence over global option + func = getattr(da.coarsen(dim={"coord": 5}), funcname) + with set_options(keep_attrs=False): + result = func(*argument, keep_attrs=True) + assert result.attrs == attrs_da + da.coord.attrs == {} + assert result.name == "name" + + func = getattr(da.coarsen(dim={"coord": 5}), funcname) + with set_options(keep_attrs=True): + result = func(*argument, keep_attrs=False) + assert result.attrs == {} + da.coord.attrs == {} + assert result.name == "name" + + +def test_coarsen_keep_attrs_deprecated(): + attrs_da = {"da_attr": "test"} + + data = np.linspace(10, 15, 100) + coords = np.linspace(1, 10, 100) - # Test kept attrs in original object - xr.testing.assert_identical(da, da2) + da = DataArray(data, dims=("coord"), coords={"coord": coords}, attrs=attrs_da) + + # deprecated option + with pytest.warns( + FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" + ): + result = da.coarsen(dim={"coord": 5}, keep_attrs=False).mean() + + assert result.attrs == {} + + # the keep_attrs in the reduction function takes precedence + with pytest.warns( + FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" + ): + result = da.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False) + + assert result.attrs == {} @pytest.mark.parametrize("da", (1, 2), indirect=True) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b93f233fde7..130f65792c5 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6157,36 +6157,118 @@ def test_coarsen_coords_cftime(): np.testing.assert_array_equal(actual.time, expected_times) -def test_coarsen_keep_attrs(): - _attrs = {"units": "test", "long_name": "testing"} +@pytest.mark.parametrize( + "funcname, argument", + [ + ("reduce", (np.mean,)), + ("mean", ()), + ], +) +def test_coarsen_keep_attrs(funcname, argument): + global_attrs = {"units": "test", "long_name": "testing"} + da_attrs = {"da_attr": "test"} + attrs_coords = {"attrs_coords": "test"} + da_not_coarsend_attrs = {"da_not_coarsend_attr": "test"} - var1 = np.linspace(10, 15, 100) - var2 = np.linspace(5, 10, 100) + data = np.linspace(10, 15, 100) coords = np.linspace(1, 10, 100) ds = Dataset( - data_vars={"var1": ("coord", var1), "var2": ("coord", var2)}, - coords={"coord": coords}, - attrs=_attrs, + data_vars={ + "da": ("coord", data, da_attrs), + "da_not_coarsend": ("no_coord", data, da_not_coarsend_attrs), + }, + coords={"coord": ("coord", coords, attrs_coords)}, + attrs=global_attrs, ) - ds2 = ds.copy(deep=True) + # attrs are now kept per default + func = getattr(ds.coarsen(dim={"coord": 5}), funcname) + result = func(*argument) + assert result.attrs == global_attrs + assert result.da.attrs == da_attrs + assert result.da_not_coarsend.attrs == da_not_coarsend_attrs + assert result.coord.attrs == attrs_coords + assert result.da.name == "da" + assert result.da_not_coarsend.name == "da_not_coarsend" + + # discard attrs + func = getattr(ds.coarsen(dim={"coord": 5}), funcname) + result = func(*argument, keep_attrs=False) + assert result.attrs == {} + assert result.da.attrs == {} + assert result.da_not_coarsend.attrs == {} + assert result.coord.attrs == {} + assert result.da.name == "da" + assert result.da_not_coarsend.name == "da_not_coarsend" + + # test discard attrs using global option + func = getattr(ds.coarsen(dim={"coord": 5}), funcname) + with set_options(keep_attrs=False): + result = func(*argument) - # Test dropped attrs - dat = ds.coarsen(coord=5).mean() - assert dat.attrs == {} + assert result.attrs == {} + assert result.da.attrs == {} + assert result.da_not_coarsend.attrs == {} + assert result.coord.attrs == {} + assert result.da.name == "da" + assert result.da_not_coarsend.name == "da_not_coarsend" - # Test kept attrs using dataset keyword - dat = ds.coarsen(coord=5, keep_attrs=True).mean() - assert dat.attrs == _attrs + # keyword takes precedence over global option + func = getattr(ds.coarsen(dim={"coord": 5}), funcname) + with set_options(keep_attrs=False): + result = func(*argument, keep_attrs=True) - # Test kept attrs using global option + assert result.attrs == global_attrs + assert result.da.attrs == da_attrs + assert result.da_not_coarsend.attrs == da_not_coarsend_attrs + assert result.coord.attrs == attrs_coords + assert result.da.name == "da" + assert result.da_not_coarsend.name == "da_not_coarsend" + + func = getattr(ds.coarsen(dim={"coord": 5}), funcname) with set_options(keep_attrs=True): - dat = ds.coarsen(coord=5).mean() - assert dat.attrs == _attrs + result = func(*argument, keep_attrs=False) + + assert result.attrs == {} + assert result.da.attrs == {} + assert result.da_not_coarsend.attrs == {} + assert result.coord.attrs == {} + assert result.da.name == "da" + assert result.da_not_coarsend.name == "da_not_coarsend" + - # Test kept attrs in original object - xr.testing.assert_identical(ds, ds2) +def test_coarsen_keep_attrs_deprecated(): + global_attrs = {"units": "test", "long_name": "testing"} + attrs_da = {"da_attr": "test"} + + data = np.linspace(10, 15, 100) + coords = np.linspace(1, 10, 100) + + ds = Dataset( + data_vars={"da": ("coord", data)}, + coords={"coord": coords}, + attrs=global_attrs, + ) + ds.da.attrs = attrs_da + + # deprecated option + with pytest.warns( + FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" + ): + result = ds.coarsen(dim={"coord": 5}, keep_attrs=False).mean() + + assert result.attrs == {} + assert result.da.attrs == {} + + # the keep_attrs in the reduction function takes precedence + with pytest.warns( + FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" + ): + result = ds.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False) + + assert result.attrs == {} + assert result.da.attrs == {} @pytest.mark.slow