diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index e5492ec73a4..14d79039a3a 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -47,6 +47,7 @@ core.rolling.DatasetCoarsen.median core.rolling.DatasetCoarsen.min core.rolling.DatasetCoarsen.prod + core.rolling.DatasetCoarsen.reduce core.rolling.DatasetCoarsen.std core.rolling.DatasetCoarsen.sum core.rolling.DatasetCoarsen.var @@ -190,6 +191,7 @@ core.rolling.DataArrayCoarsen.median core.rolling.DataArrayCoarsen.min core.rolling.DataArrayCoarsen.prod + core.rolling.DataArrayCoarsen.reduce core.rolling.DataArrayCoarsen.std core.rolling.DataArrayCoarsen.sum core.rolling.DataArrayCoarsen.var diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 05066f883bf..ea9c5f89bb5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -91,6 +91,10 @@ New Features (including globs for the latter) for ``engine="zarr"``, and so allow reading from many remote and other file systems (:pull:`4461`) By `Martin Durant `_ +- :py:class:`DataArrayCoarsen` and :py:class:`DatasetCoarsen` now implement a + ``reduce`` method, enabling coarsening operations with custom reduction + functions (:issue:`3741`, :pull:`4939`). By `Spencer Clark + `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 6087fd4c806..dbdd9595069 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -836,7 +836,9 @@ class DataArrayCoarsen(Coarsen): _reduce_extra_args_docstring = """""" @classmethod - def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): + def _reduce_method( + cls, func: Callable, include_skipna: bool = False, numeric_only: bool = False + ): """ Return a wrapped function for injecting reduction methods. see ops.inject_reduce_methods @@ -871,6 +873,38 @@ def wrapped_func(self, **kwargs): return wrapped_func + def reduce(self, func: Callable, **kwargs): + """Reduce the items in this group by applying `func` along some + dimension(s). + + Parameters + ---------- + func : callable + Function which can be called in the form `func(x, axis, **kwargs)` + to return the result of collapsing an np.ndarray over the coarsening + dimensions. It must be possible to provide the `axis` argument + with a tuple of integers. + **kwargs : dict + Additional keyword arguments passed on to `func`. + + Returns + ------- + reduced : DataArray + Array with summarized data. + + Examples + -------- + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=("a", "b")) + >>> coarsen = da.coarsen(b=2) + >>> coarsen.reduce(np.sum) + + array([[ 1, 5], + [ 9, 13]]) + Dimensions without coordinates: a, b + """ + wrapped_func = self._reduce_method(func) + return wrapped_func(self, **kwargs) + class DatasetCoarsen(Coarsen): __slots__ = () @@ -878,7 +912,9 @@ class DatasetCoarsen(Coarsen): _reduce_extra_args_docstring = """""" @classmethod - def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): + def _reduce_method( + cls, func: Callable, include_skipna: bool = False, numeric_only: bool = False + ): """ Return a wrapped function for injecting reduction methods. see ops.inject_reduce_methods @@ -917,6 +953,28 @@ def wrapped_func(self, **kwargs): return wrapped_func + def reduce(self, func: Callable, **kwargs): + """Reduce the items in this group by applying `func` along some + dimension(s). + + Parameters + ---------- + func : callable + Function which can be called in the form `func(x, axis, **kwargs)` + to return the result of collapsing an np.ndarray over the coarsening + dimensions. It must be possible to provide the `axis` argument with + a tuple of integers. + **kwargs : dict + Additional keyword arguments passed on to `func`. + + Returns + ------- + reduced : Dataset + Arrays with summarized data. + """ + wrapped_func = self._reduce_method(func) + return wrapped_func(self, **kwargs) + inject_reduce_methods(DataArrayCoarsen) inject_reduce_methods(DatasetCoarsen) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 68e31cd123a..d1d36dd93b3 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6382,6 +6382,22 @@ def test_coarsen_keep_attrs(): xr.testing.assert_identical(da, da2) +@pytest.mark.parametrize("da", (1, 2), indirect=True) +@pytest.mark.parametrize("window", (1, 2, 3, 4)) +@pytest.mark.parametrize("name", ("sum", "mean", "std", "max")) +def test_coarsen_reduce(da, window, name): + if da.isnull().sum() > 1 and window == 1: + pytest.skip("These parameters lead to all-NaN slices") + + # Use boundary="trim" to accomodate all window sizes used in tests + coarsen_obj = da.coarsen(time=window, boundary="trim") + + # add nan prefix to numpy methods to get similar # behavior as bottleneck + actual = coarsen_obj.reduce(getattr(np, f"nan{name}")) + expected = getattr(coarsen_obj, name)() + assert_allclose(actual, expected) + + @pytest.mark.parametrize("da", (1, 2), indirect=True) def test_rolling_iter(da): rolling_obj = da.rolling(time=7) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c4161093837..2118bc8b780 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6055,6 +6055,27 @@ def test_coarsen_keep_attrs(): xr.testing.assert_identical(ds, ds2) +@pytest.mark.slow +@pytest.mark.parametrize("ds", (1, 2), indirect=True) +@pytest.mark.parametrize("window", (1, 2, 3, 4)) +@pytest.mark.parametrize("name", ("sum", "mean", "std", "var", "min", "max", "median")) +def test_coarsen_reduce(ds, window, name): + # Use boundary="trim" to accomodate all window sizes used in tests + coarsen_obj = ds.coarsen(time=window, boundary="trim") + + # add nan prefix to numpy methods to get similar behavior as bottleneck + actual = coarsen_obj.reduce(getattr(np, f"nan{name}")) + expected = getattr(coarsen_obj, name)() + assert_allclose(actual, expected) + + # make sure the order of data_var are not changed. + assert list(ds.data_vars.keys()) == list(actual.data_vars.keys()) + + # Make sure the dimension order is restored + for key, src_var in ds.data_vars.items(): + assert src_var.dims == actual[key].dims + + @pytest.mark.parametrize( "funcname, argument", [