From 7416cebc67194f91c21fee08228297a539bb68a8 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 29 Oct 2019 14:32:14 -0400 Subject: [PATCH 01/12] rename dataset.apply to dataset.map, deprecating apply --- xarray/core/dataset.py | 20 +++++++++++++++++--- xarray/tests/test_dataset.py | 33 ++++++++++++++++++++------------- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 05d9772cb7a..e6d8cf4119b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4088,14 +4088,14 @@ def reduce( variables, coord_names=coord_names, attrs=attrs, indexes=indexes ) - def apply( + def map( self, func: Callable, keep_attrs: bool = None, args: Iterable[Any] = (), **kwargs: Any, ) -> "Dataset": - """Apply a function over the data variables in this dataset. + """Apply a function over each data variable in this dataset. Parameters ---------- @@ -4128,7 +4128,7 @@ def apply( Data variables: foo (dim_0, dim_1) float64 -0.3751 -1.951 -1.945 0.2948 0.711 -0.3948 bar (x) int64 -1 2 - >>> ds.apply(np.fabs) + >>> ds.map(np.fabs) Dimensions: (dim_0: 2, dim_1: 3, x: 2) Dimensions without coordinates: dim_0, dim_1, x @@ -4145,6 +4145,20 @@ def apply( attrs = self.attrs if keep_attrs else None return type(self)(variables, attrs=attrs) + def apply( + self, + func: Callable, + keep_attrs: bool = None, + args: Iterable[Any] = (), + **kwargs: Any, + ) -> "Dataset": + warnings.warn( + "Dataset.apply is deprecated in favor of Dataset.map and will be changed or removed in a future version of xarray", + DeprecationWarning, + stacklevel=2, + ) + return self.map(func, keep_attrs, args, **kwargs) + def assign( self, variables: Mapping[Hashable, Any] = None, **variables_kwargs: Hashable ) -> "Dataset": diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index eab6040e17e..c8cddc154ca 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4494,29 +4494,36 @@ def test_count(self): actual = ds.count() assert_identical(expected, actual) - def test_apply(self): + def test_map(self): data = create_test_data() data.attrs["foo"] = "bar" - assert_identical(data.apply(np.mean), data.mean()) + assert_identical(data.map(np.mean), data.mean()) expected = data.mean(keep_attrs=True) - actual = data.apply(lambda x: x.mean(keep_attrs=True), keep_attrs=True) + actual = data.map(lambda x: x.mean(keep_attrs=True), keep_attrs=True) assert_identical(expected, actual) - assert_identical(data.apply(lambda x: x, keep_attrs=True), data.drop("time")) + assert_identical(data.map(lambda x: x, keep_attrs=True), data.drop("time")) def scale(x, multiple=1): return multiple * x - actual = data.apply(scale, multiple=2) + actual = data.map(scale, multiple=2) assert_equal(actual["var1"], 2 * data["var1"]) assert_identical(actual["numbers"], data["numbers"]) - actual = data.apply(np.asarray) + actual = data.map(np.asarray) expected = data.drop("time") # time is not used on a data var assert_equal(expected, actual) + def test_apply_deprecated_map(self): + data = create_test_data() + data.attrs["foo"] = "bar" + + with pytest.warns(DeprecationWarning): + assert_identical(data.map(np.mean), data.mean()) + def make_example_math_dataset(self): variables = { "bar": ("x", np.arange(100, 400, 100)), @@ -4543,15 +4550,15 @@ def test_dataset_number_math(self): def test_unary_ops(self): ds = self.make_example_math_dataset() - assert_identical(ds.apply(abs), abs(ds)) - assert_identical(ds.apply(lambda x: x + 4), ds + 4) + assert_identical(ds.map(abs), abs(ds)) + assert_identical(ds.map(lambda x: x + 4), ds + 4) for func in [ lambda x: x.isnull(), lambda x: x.round(), lambda x: x.astype(int), ]: - assert_identical(ds.apply(func), func(ds)) + assert_identical(ds.map(func), func(ds)) assert_identical(ds.isnull(), ~ds.notnull()) @@ -4564,7 +4571,7 @@ def test_unary_ops(self): def test_dataset_array_math(self): ds = self.make_example_math_dataset() - expected = ds.apply(lambda x: x - ds["foo"]) + expected = ds.map(lambda x: x - ds["foo"]) assert_identical(expected, ds - ds["foo"]) assert_identical(expected, -ds["foo"] + ds) assert_identical(expected, ds - ds["foo"].variable) @@ -4573,7 +4580,7 @@ def test_dataset_array_math(self): actual -= ds["foo"] assert_identical(expected, actual) - expected = ds.apply(lambda x: x + ds["bar"]) + expected = ds.map(lambda x: x + ds["bar"]) assert_identical(expected, ds + ds["bar"]) actual = ds.copy(deep=True) actual += ds["bar"] @@ -4589,7 +4596,7 @@ def test_dataset_dataset_math(self): assert_identical(ds, ds + 0 * ds) assert_identical(ds, ds + {"foo": 0, "bar": 0}) - expected = ds.apply(lambda x: 2 * x) + expected = ds.map(lambda x: 2 * x) assert_identical(expected, 2 * ds) assert_identical(expected, ds + ds) assert_identical(expected, ds + ds.data_vars) @@ -4686,7 +4693,7 @@ def test_dataset_transpose(self): assert_identical(expected, actual) actual = ds.transpose("x", "y") - expected = ds.apply(lambda x: x.transpose("x", "y", transpose_coords=True)) + expected = ds.map(lambda x: x.transpose("x", "y", transpose_coords=True)) assert_identical(expected, actual) ds = create_test_data() From 1c495d3e48995a75e8e5695945e6e96b8fd22ad4 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 29 Oct 2019 15:21:32 -0400 Subject: [PATCH 02/12] use apply in deprecation test --- xarray/tests/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c8cddc154ca..a184c729ad1 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4522,7 +4522,7 @@ def test_apply_deprecated_map(self): data.attrs["foo"] = "bar" with pytest.warns(DeprecationWarning): - assert_identical(data.map(np.mean), data.mean()) + assert_identical(data.apply(np.mean), data.mean()) def make_example_math_dataset(self): variables = { From a44a59b1cbc5bd6f2c2b5c68f938b703fab45d21 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 29 Oct 2019 19:12:39 -0400 Subject: [PATCH 03/12] adjust docs --- doc/computation.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/computation.rst b/doc/computation.rst index ae5f4bc5c66..d477cb63d72 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -462,13 +462,13 @@ Datasets support most of the same methods found on data arrays: abs(ds) Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or -alternatively you can use :py:meth:`~xarray.Dataset.apply` to apply a function +alternatively you can use :py:meth:`~xarray.Dataset.map` to map a function to each variable in a dataset: .. ipython:: python np.sin(ds) - ds.apply(np.sin) + ds.map(np.sin) Datasets also use looping over variables for *broadcasting* in binary arithmetic. You can do arithmetic between any ``DataArray`` and a dataset: From fa57466d514509b65136caacdc67996ecbe1865a Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 9 Nov 2019 12:51:08 -0500 Subject: [PATCH 04/12] add groupby rename, remove depreciation warnings (to pending) --- xarray/core/dataset.py | 8 ++++---- xarray/core/groupby.py | 25 +++++++++++++++++++++---- xarray/tests/test_dataset.py | 2 +- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b099562326a..f4e8264f816 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4115,7 +4115,7 @@ def map( args: Iterable[Any] = (), **kwargs: Any, ) -> "Dataset": - """Apply a function over each data variable in this dataset. + """Apply a function to each variable in this dataset Parameters ---------- @@ -4135,7 +4135,7 @@ def map( Returns ------- applied : Dataset - Resulting dataset from applying ``func`` over each data variable. + Resulting dataset from applying ``func`` to each data variable. Examples -------- @@ -4173,8 +4173,8 @@ def apply( **kwargs: Any, ) -> "Dataset": warnings.warn( - "Dataset.apply is deprecated in favor of Dataset.map and will be changed or removed in a future version of xarray", - DeprecationWarning, + "Dataset.apply may be deprecated in the future. Using Dataset.map is encouraged", + PendingDeprecationWarning, stacklevel=2, ) return self.map(func, keep_attrs, args, **kwargs) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index c8906e34737..8cf92f01e7f 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -655,8 +655,9 @@ def lookup_order(dimension): new_order = sorted(stacked.dims, key=lookup_order) return stacked.transpose(*new_order, transpose_coords=self._restore_coord_dims) - def apply(self, func, shortcut=False, args=(), **kwargs): - """Apply a function over each array in the group and concatenate them + def map(self, func, shortcut=False, args=(), **kwargs): + + """Apply a function to each array in the group and concatenate them together into a new array. `func` is called like `func(ar, *args, **kwargs)` for each array `ar` @@ -702,6 +703,14 @@ def apply(self, func, shortcut=False, args=(), **kwargs): applied = (maybe_wrap_array(arr, func(arr, *args, **kwargs)) for arr in grouped) return self._combine(applied, shortcut=shortcut) + def apply(self, func, shortcut=False, args=(), **kwargs): + warnings.warn( + "GroupBy.apply may be deprecated in the future. Using GroupBy.map is encouraged", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.map(func, shortcut=shortcut, args=args, **kwargs) + def _combine(self, applied, restore_coord_dims=False, shortcut=False): """Recombine the applied objects like the original.""" applied_example, applied = peek_at(applied) @@ -828,8 +837,8 @@ def reduce_array(ar): class DatasetGroupBy(GroupBy, ImplementsDatasetReduce): - def apply(self, func, args=(), shortcut=None, **kwargs): - """Apply a function over each Dataset in the group and concatenate them + def map(self, func, args=(), shortcut=None, **kwargs): + """Apply a function to each Dataset in the group and concatenate them together into a new Dataset. `func` is called like `func(ds, *args, **kwargs)` for each dataset `ds` @@ -862,6 +871,14 @@ def apply(self, func, args=(), shortcut=None, **kwargs): applied = (func(ds, *args, **kwargs) for ds in self._iter_grouped()) return self._combine(applied) + def apply(self, func, args=(), shortcut=None, **kwargs): + warnings.warn( + "GroupBy.apply may be deprecated in the future. Using GroupBy.map is encouraged", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.map(func, shortcut=shortcut, args=args, **kwargs) + def _combine(self, applied): """Recombine the applied objects like the original.""" applied_example, applied = peek_at(applied) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 3012993bae6..09f610bc230 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4542,7 +4542,7 @@ def test_apply_deprecated_map(self): data = create_test_data() data.attrs["foo"] = "bar" - with pytest.warns(DeprecationWarning): + with pytest.warns(PendingDeprecationWarning): assert_identical(data.apply(np.mean), data.mean()) def make_example_math_dataset(self): From 5da5fb242fefef7b88adfab2a9eab9470c85e955 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 9 Nov 2019 13:14:42 -0500 Subject: [PATCH 05/12] change internal usages --- xarray/core/groupby.py | 10 +++++----- xarray/core/resample.py | 29 +++++++++++++++++++++++---- xarray/tests/test_dataarray.py | 36 +++++++++++++++++----------------- xarray/tests/test_groupby.py | 14 ++++++------- 4 files changed, 55 insertions(+), 34 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 8cf92f01e7f..555f2a0790f 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -608,7 +608,7 @@ def assign_coords(self, coords=None, **coords_kwargs): Dataset.swap_dims """ coords_kwargs = either_dict_or_kwargs(coords, coords_kwargs, "assign_coords") - return self.apply(lambda ds: ds.assign_coords(**coords_kwargs)) + return self.map(lambda ds: ds.assign_coords(**coords_kwargs)) def _maybe_reorder(xarray_obj, dim, positions): @@ -774,7 +774,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): if dim is None: dim = self._group_dim - out = self.apply( + out = self.map( self._obj.__class__.quantile, shortcut=False, q=q, @@ -829,7 +829,7 @@ def reduce_array(ar): check_reduce_dims(dim, self.dims) - return self.apply(reduce_array, shortcut=shortcut) + return self.map(reduce_array, shortcut=shortcut) ops.inject_reduce_methods(DataArrayGroupBy) @@ -931,7 +931,7 @@ def reduce_dataset(ds): check_reduce_dims(dim, self.dims) - return self.apply(reduce_dataset) + return self.map(reduce_dataset) def assign(self, **kwargs): """Assign data variables by group. @@ -940,7 +940,7 @@ def assign(self, **kwargs): -------- Dataset.assign """ - return self.apply(lambda ds: ds.assign(**kwargs)) + return self.map(lambda ds: ds.assign(**kwargs)) ops.inject_reduce_methods(DatasetGroupBy) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 2cb1bd55e19..4c03186065d 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -173,8 +173,8 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): super().__init__(*args, **kwargs) - def apply(self, func, shortcut=False, args=(), **kwargs): - """Apply a function over each array in the group and concatenate them + def map(self, func, shortcut=False, args=(), **kwargs): + """Apply a function to each array in the group and concatenate them together into a new array. `func` is called like `func(ar, *args, **kwargs)` for each array `ar` @@ -212,7 +212,9 @@ def apply(self, func, shortcut=False, args=(), **kwargs): applied : DataArray or DataArray The result of splitting, applying and combining this array. """ - combined = super().apply(func, shortcut=shortcut, args=args, **kwargs) + # TODO: the argument order for Resample doesn't match that for its parent, + # GroupBy + combined = super().map(func, shortcut=shortcut, args=args, **kwargs) # If the aggregation function didn't drop the original resampling # dimension, then we need to do so before we can rename the proxy @@ -225,6 +227,17 @@ def apply(self, func, shortcut=False, args=(), **kwargs): return combined + def apply(self, func, args=(), shortcut=None, **kwargs): + warnings.warn( + "Resample.apply may be deprecated in the future. Using Resample.map is encouraged", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.map(func=func, shortcut=shortcut, args=args, **kwargs) + + +import warnings + ops.inject_reduce_methods(DataArrayResample) ops.inject_binary_ops(DataArrayResample) @@ -247,7 +260,7 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): super().__init__(*args, **kwargs) - def apply(self, func, args=(), shortcut=None, **kwargs): + def map(self, func, args=(), shortcut=None, **kwargs): """Apply a function over each Dataset in the groups generated for resampling and concatenate them together into a new Dataset. @@ -282,6 +295,14 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return combined.rename({self._resample_dim: self._dim}) + def apply(self, func, args=(), shortcut=None, **kwargs): + warnings.warn( + "Resample.apply may be deprecated in the future. Using Resample.map is encouraged", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.map(func=func, shortcut=shortcut, args=args, **kwargs) + def reduce(self, func, dim=None, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along the pre-defined resampling dimension. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index acfe684d220..42fae2c9dd4 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2417,7 +2417,7 @@ def test_groupby_properties(self): assert_array_equal(expected_groups[key], grouped.groups[key]) assert 3 == len(grouped) - def test_groupby_apply_identity(self): + def test_groupby_map_identity(self): expected = self.make_groupby_example_array() idx = expected.coords["y"] @@ -2428,7 +2428,7 @@ def identity(x): for shortcut in [False, True]: for squeeze in [False, True]: grouped = expected.groupby(g, squeeze=squeeze) - actual = grouped.apply(identity, shortcut=shortcut) + actual = grouped.map(identity, shortcut=shortcut) assert_identical(expected, actual) def test_groupby_sum(self): @@ -2461,7 +2461,7 @@ def test_groupby_sum(self): [["a", "b", "c"]], ["abc"], ) - actual = array["y"].groupby("abc").apply(np.sum) + actual = array["y"].groupby("abc").map(np.sum) assert_allclose(expected, actual) actual = array["y"].groupby("abc").sum(...) assert_allclose(expected, actual) @@ -2532,7 +2532,7 @@ def test_groupby_reduce_attrs(self): expected.attrs["foo"] = "bar" assert_identical(expected, actual) - def test_groupby_apply_center(self): + def test_groupby_map_center(self): def center(x): return x - np.mean(x) @@ -2545,16 +2545,16 @@ def center(x): ) expected_ds["foo"] = (["x", "y"], exp_data) expected_centered = expected_ds["foo"] - assert_allclose(expected_centered, grouped.apply(center)) + assert_allclose(expected_centered, grouped.map(center)) - def test_groupby_apply_ndarray(self): + def test_groupby_map_ndarray(self): # regression test for #326 array = self.make_groupby_example_array() grouped = array.groupby("abc") - actual = grouped.apply(np.asarray) + actual = grouped.map(np.asarray) assert_equal(array, actual) - def test_groupby_apply_changes_metadata(self): + def test_groupby_map_changes_metadata(self): def change_metadata(x): x.coords["x"] = x.coords["x"] * 2 x.attrs["fruit"] = "lemon" @@ -2562,7 +2562,7 @@ def change_metadata(x): array = self.make_groupby_example_array() grouped = array.groupby("abc") - actual = grouped.apply(change_metadata) + actual = grouped.map(change_metadata) expected = array.copy() expected = change_metadata(expected) assert_equal(expected, actual) @@ -2631,7 +2631,7 @@ def test_groupby_restore_dim_order(self): ("a", ("a", "y")), ("b", ("x", "b")), ]: - result = array.groupby(by).apply(lambda x: x.squeeze()) + result = array.groupby(by).map(lambda x: x.squeeze()) assert result.dims == expected_dims def test_groupby_restore_coord_dims(self): @@ -2651,13 +2651,13 @@ def test_groupby_restore_coord_dims(self): ("a", ("a", "y")), ("b", ("x", "b")), ]: - result = array.groupby(by, restore_coord_dims=True).apply( + result = array.groupby(by, restore_coord_dims=True).map( lambda x: x.squeeze() )["c"] assert result.dims == expected_dims with pytest.warns(FutureWarning): - array.groupby("x").apply(lambda x: x.squeeze()) + array.groupby("x").map(lambda x: x.squeeze()) def test_groupby_first_and_last(self): array = DataArray([1, 2, 3, 4, 5], dims="x") @@ -2699,9 +2699,9 @@ def test_groupby_multidim(self): actual_sum = array.groupby(dim).sum(...) assert_identical(expected_sum, actual_sum) - def test_groupby_multidim_apply(self): + def test_groupby_multidim_map(self): array = self.make_groupby_multidim_example_array() - actual = array.groupby("lon").apply(lambda x: x - x.mean()) + actual = array.groupby("lon").map(lambda x: x - x.mean()) expected = DataArray( [[[-2.5, -6.0], [-5.0, -8.5]], [[2.5, 3.0], [8.0, 8.5]]], coords=array.coords, @@ -2722,7 +2722,7 @@ def test_groupby_bins(self): ) # the problem with this is that it overwrites the dimensions of array! # actual = array.groupby('dim_0', bins=bins).sum() - actual = array.groupby_bins("dim_0", bins).apply(lambda x: x.sum()) + actual = array.groupby_bins("dim_0", bins).map(lambda x: x.sum()) assert_identical(expected, actual) # make sure original array dims are unchanged assert len(array.dim_0) == 4 @@ -2744,12 +2744,12 @@ def test_groupby_bins_multidim(self): bins = [0, 15, 20] bin_coords = pd.cut(array["lat"].values.flat, bins).categories expected = DataArray([16, 40], dims="lat_bins", coords={"lat_bins": bin_coords}) - actual = array.groupby_bins("lat", bins).apply(lambda x: x.sum()) + actual = array.groupby_bins("lat", bins).map(lambda x: x.sum()) assert_identical(expected, actual) # modify the array coordinates to be non-monotonic after unstacking array["lat"].data = np.array([[10.0, 20.0], [20.0, 10.0]]) expected = DataArray([28, 28], dims="lat_bins", coords={"lat_bins": bin_coords}) - actual = array.groupby_bins("lat", bins).apply(lambda x: x.sum()) + actual = array.groupby_bins("lat", bins).map(lambda x: x.sum()) assert_identical(expected, actual) def test_groupby_bins_sort(self): @@ -2784,7 +2784,7 @@ def func(arg1, arg2, arg3=0.0): times = pd.date_range("2000", periods=3, freq="D") da = xr.DataArray([1.0, 1.0, 1.0], coords=[times], dims=["time"]) expected = xr.DataArray([3.0, 3.0, 3.0], coords=[times], dims=["time"]) - actual = da.resample(time="D").apply(func, args=(1.0,), arg3=1.0) + actual = da.resample(time="D").map(func, args=(1.0,), arg3=1.0) assert_identical(actual, expected) def test_resample_first(self): diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index e2216547ac8..581affa3471 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -45,14 +45,14 @@ def test_groupby_dims_property(dataset): assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims -def test_multi_index_groupby_apply(dataset): +def test_multi_index_groupby_map(dataset): # regression test for GH873 ds = dataset.isel(z=1, drop=True)[["foo"]] expected = 2 * ds actual = ( ds.stack(space=["x", "y"]) .groupby("space") - .apply(lambda x: 2 * x) + .map(lambda x: 2 * x) .unstack("space") ) assert_equal(expected, actual) @@ -107,23 +107,23 @@ def test_groupby_input_mutation(): assert_identical(array, array_copy) # should not modify inputs -def test_da_groupby_apply_func_args(): +def test_da_groupby_map_func_args(): def func(arg1, arg2, arg3=0): return arg1 + arg2 + arg3 array = xr.DataArray([1, 1, 1], [("x", [1, 2, 3])]) expected = xr.DataArray([3, 3, 3], [("x", [1, 2, 3])]) - actual = array.groupby("x").apply(func, args=(1,), arg3=1) + actual = array.groupby("x").map(func, args=(1,), arg3=1) assert_identical(expected, actual) -def test_ds_groupby_apply_func_args(): +def test_ds_groupby_map_func_args(): def func(arg1, arg2, arg3=0): return arg1 + arg2 + arg3 dataset = xr.Dataset({"foo": ("x", [1, 1, 1])}, {"x": [1, 2, 3]}) expected = xr.Dataset({"foo": ("x", [3, 3, 3])}, {"x": [1, 2, 3]}) - actual = dataset.groupby("x").apply(func, args=(1,), arg3=1) + actual = dataset.groupby("x").map(func, args=(1,), arg3=1) assert_identical(expected, actual) @@ -285,7 +285,7 @@ def test_groupby_drops_nans(): expected.variable.values[0, 0, :] = np.nan expected.variable.values[-1, -1, :] = np.nan expected.variable.values[3, 0, :] = np.nan - actual = grouped.apply(lambda x: x).transpose(*ds.variable.dims) + actual = grouped.map(lambda x: x).transpose(*ds.variable.dims) assert_identical(actual, expected) # reduction along grouped dimension From 5200383c85857f22e9c4fc82c935006d6ba4de6e Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 9 Nov 2019 13:17:24 -0500 Subject: [PATCH 06/12] formatting --- xarray/core/groupby.py | 1 - xarray/core/resample.py | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 555f2a0790f..d27387254fd 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -656,7 +656,6 @@ def lookup_order(dimension): return stacked.transpose(*new_order, transpose_coords=self._restore_coord_dims) def map(self, func, shortcut=False, args=(), **kwargs): - """Apply a function to each array in the group and concatenate them together into a new array. diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 4c03186065d..77df5135546 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,3 +1,5 @@ +import warnings + from . import ops from .groupby import DataArrayGroupBy, DatasetGroupBy @@ -236,9 +238,6 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return self.map(func=func, shortcut=shortcut, args=args, **kwargs) -import warnings - - ops.inject_reduce_methods(DataArrayResample) ops.inject_binary_ops(DataArrayResample) From 4103380d6af98a5e57f369b563c828514ef7967c Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 9 Nov 2019 13:32:39 -0500 Subject: [PATCH 07/12] whatsnew --- doc/whats-new.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 04fe88e9993..b572b1dda67 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,6 +44,13 @@ New Features option for dropping either labels or variables, but using the more specific methods is encouraged. (:pull:`3475`) By `Maximilian Roos `_ +- :py:meth:`Dataset.map` & :py:meth:`GroupBy.map` & :py:meth:`Resample.map` have been added for + mapping / applying a function over each item in the collection, reflecting the widely used + and least surprising name for this operation. + The existing ``apply`` methods remain for backward compatibility, though using the ``map`` + methods is encouraged. + (:pull:`3459`) + By `Maximilian Roos `_ - :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`) to represent all 'other' dimensions. For example, to move one dimension to the front, use `.transpose('x', ...)`. (:pull:`3421`) From 1830d0a2206c00a7a7355bed2d83c77f5e51385c Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 9 Nov 2019 13:47:58 -0500 Subject: [PATCH 08/12] docs --- doc/groupby.rst | 15 ++++++++------- doc/howdoi.rst | 2 +- xarray/tests/test_dataarray.py | 2 ++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/doc/groupby.rst b/doc/groupby.rst index 52a27f4f160..f5943703765 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -35,10 +35,11 @@ Let's create a simple example dataset: .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 3))}, - coords={'x': [10, 20, 30, 40], - 'letters': ('x', list('abba'))}) - arr = ds['foo'] + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 3))}, + coords={"x": [10, 20, 30, 40], "letters": ("x", list("abba"))}, + ) + arr = ds["foo"] ds If we groupby the name of a variable or coordinate in a dataset (we can also @@ -93,7 +94,7 @@ Apply ~~~~~ To apply a function to each group, you can use the flexible -:py:meth:`~xarray.DatasetGroupBy.apply` method. The resulting objects are automatically +:py:meth:`~xarray.DatasetGroupBy.map` method. The resulting objects are automatically concatenated back together along the group axis: .. ipython:: python @@ -101,7 +102,7 @@ concatenated back together along the group axis: def standardize(x): return (x - x.mean()) / x.std() - arr.groupby('letters').apply(standardize) + arr.groupby('letters').map(standardize) GroupBy objects also have a :py:meth:`~xarray.DatasetGroupBy.reduce` method and methods like :py:meth:`~xarray.DatasetGroupBy.mean` as shortcuts for applying an @@ -202,7 +203,7 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen dims=['ny','nx']) da da.groupby('lon').sum(...) - da.groupby('lon').apply(lambda x: x - x.mean(), shortcut=False) + da.groupby('lon').map(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins` diff --git a/doc/howdoi.rst b/doc/howdoi.rst index 721d1323e73..91644ba2718 100644 --- a/doc/howdoi.rst +++ b/doc/howdoi.rst @@ -44,7 +44,7 @@ How do I ... * - convert a possibly irregularly sampled timeseries to a regularly sampled timeseries - :py:meth:`DataArray.resample`, :py:meth:`Dataset.resample` (see :ref:`resampling` for more) * - apply a function on all data variables in a Dataset - - :py:meth:`Dataset.apply` + - :py:meth:`Dataset.map` * - write xarray objects with complex values to a netCDF file - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf", invalid_netcdf=True`` * - make xarray objects look like other xarray objects diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 42fae2c9dd4..776dd14b76a 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1,4 +1,6 @@ import pickle + + import sys import warnings from copy import deepcopy From 9e1440f92cd2a386ce3400c313e89a8663fc4d0a Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 9 Nov 2019 13:53:36 -0500 Subject: [PATCH 09/12] docs --- doc/quick-overview.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst index 7d84199323d..741b3d1a5fe 100644 --- a/doc/quick-overview.rst +++ b/doc/quick-overview.rst @@ -142,7 +142,7 @@ xarray supports grouped operations using a very similar API to pandas (see :ref: labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels') labels data.groupby(labels).mean('y') - data.groupby(labels).apply(lambda x: x - x.min()) + data.groupby(labels).map(lambda x: x - x.min()) Plotting -------- From 0887359b3194fb1d96833a34542b6e3c203ad54e Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 9 Nov 2019 13:53:42 -0500 Subject: [PATCH 10/12] internal usages --- xarray/tests/test_dataset.py | 10 +++++----- xarray/tests/test_sparse.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 09f610bc230..d001c43da94 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3310,17 +3310,17 @@ def identity(x): return x for k in ["x", "c", "y"]: - actual = data.groupby(k, squeeze=False).apply(identity) + actual = data.groupby(k, squeeze=False).map(identity) assert_equal(data, actual) def test_groupby_returns_new_type(self): data = Dataset({"z": (["x", "y"], np.random.randn(3, 5))}) - actual = data.groupby("x").apply(lambda ds: ds["z"]) + actual = data.groupby("x").map(lambda ds: ds["z"]) expected = data["z"] assert_identical(expected, actual) - actual = data["z"].groupby("x").apply(lambda x: x.to_dataset()) + actual = data["z"].groupby("x").map(lambda x: x.to_dataset()) expected = data assert_identical(expected, actual) @@ -3639,7 +3639,7 @@ def func(arg1, arg2, arg3=0.0): times = pd.date_range("2000", freq="D", periods=3) ds = xr.Dataset({"foo": ("time", [1.0, 1.0, 1.0]), "time": times}) expected = xr.Dataset({"foo": ("time", [3.0, 3.0, 3.0]), "time": times}) - actual = ds.resample(time="D").apply(func, args=(1.0,), arg3=1.0) + actual = ds.resample(time="D").map(func, args=(1.0,), arg3=1.0) assert_identical(expected, actual) def test_to_array(self): @@ -4538,7 +4538,7 @@ def scale(x, multiple=1): expected = data.drop_vars("time") # time is not used on a data var assert_equal(expected, actual) - def test_apply_deprecated_map(self): + def test_apply_pending_deprecated_map(self): data = create_test_data() data.attrs["foo"] = "bar" diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 8e2d4b8e064..a31da162487 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -339,7 +339,7 @@ def test_dataarray_property(prop): (do("copy"), True), (do("count"), False), (do("diff", "x"), True), - (do("drop", "x"), True), + (do("drop_vars", "x"), True), (do("expand_dims", {"z": 2}, axis=2), True), (do("get_axis_num", "x"), False), (do("get_index", "x"), False), From 3fc62dd0cb8fde9a4a025ccb399cbf579cd9a576 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 9 Nov 2019 14:34:52 -0500 Subject: [PATCH 11/12] formatting --- xarray/tests/test_dataarray.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 776dd14b76a..42fae2c9dd4 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1,6 +1,4 @@ import pickle - - import sys import warnings from copy import deepcopy From 609cb5d31a0091e2913c48465d09a0adc503b74b Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 9 Nov 2019 15:35:00 -0500 Subject: [PATCH 12/12] docstring, see also --- xarray/core/dataarray.py | 11 ++++++++--- xarray/core/dataset.py | 12 ++++++++++++ xarray/core/groupby.py | 15 +++++++++++++++ xarray/core/resample.py | 15 +++++++++++++++ 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d2d37871ee9..c9ccd88b61e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -919,7 +919,7 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": Coordinates: * x (x) >> arr.identical(roundtripped) True - See also + See Also -------- DataArray.stack """ @@ -1922,6 +1922,11 @@ def drop( """Backward compatible method based on `drop_vars` and `drop_sel` Using either `drop_vars` or `drop_sel` is encouraged + + See Also + -------- + DataArray.drop_vars + DataArray.drop_sel """ ds = self._to_temp_dataset().drop(labels, dim, errors=errors) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f4e8264f816..dc5a315e72a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3557,6 +3557,11 @@ def drop(self, labels=None, dim=None, *, errors="raise", **labels_kwargs): """Backward compatible method based on `drop_vars` and `drop_sel` Using either `drop_vars` or `drop_sel` is encouraged + + See Also + -------- + Dataset.drop_vars + Dataset.drop_sel """ if errors not in ["raise", "ignore"]: raise ValueError('errors must be either "raise" or "ignore"') @@ -4172,6 +4177,13 @@ def apply( args: Iterable[Any] = (), **kwargs: Any, ) -> "Dataset": + """ + Backward compatible implementation of ``map`` + + See Also + -------- + Dataset.map + """ warnings.warn( "Dataset.apply may be deprecated in the future. Using Dataset.map is encouraged", PendingDeprecationWarning, diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index d27387254fd..8ae65d9b9df 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -703,6 +703,13 @@ def map(self, func, shortcut=False, args=(), **kwargs): return self._combine(applied, shortcut=shortcut) def apply(self, func, shortcut=False, args=(), **kwargs): + """ + Backward compatible implementation of ``map`` + + See Also + -------- + DataArrayGroupBy.map + """ warnings.warn( "GroupBy.apply may be deprecated in the future. Using GroupBy.map is encouraged", PendingDeprecationWarning, @@ -871,6 +878,14 @@ def map(self, func, args=(), shortcut=None, **kwargs): return self._combine(applied) def apply(self, func, args=(), shortcut=None, **kwargs): + """ + Backward compatible implementation of ``map`` + + See Also + -------- + DatasetGroupBy.map + """ + warnings.warn( "GroupBy.apply may be deprecated in the future. Using GroupBy.map is encouraged", PendingDeprecationWarning, diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 77df5135546..fb388490d06 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -230,6 +230,13 @@ def map(self, func, shortcut=False, args=(), **kwargs): return combined def apply(self, func, args=(), shortcut=None, **kwargs): + """ + Backward compatible implementation of ``map`` + + See Also + -------- + DataArrayResample.map + """ warnings.warn( "Resample.apply may be deprecated in the future. Using Resample.map is encouraged", PendingDeprecationWarning, @@ -295,6 +302,14 @@ def map(self, func, args=(), shortcut=None, **kwargs): return combined.rename({self._resample_dim: self._dim}) def apply(self, func, args=(), shortcut=None, **kwargs): + """ + Backward compatible implementation of ``map`` + + See Also + -------- + DataSetResample.map + """ + warnings.warn( "Resample.apply may be deprecated in the future. Using Resample.map is encouraged", PendingDeprecationWarning,