From 1617485dfe0d4091247b23beea8b62fd3e8cd9ed Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Tue, 12 Jan 2021 21:48:26 +0100 Subject: [PATCH 1/3] Closes #4658 - Use get_index(dim) in drop_sel - Add drop_isel --- doc/api.rst | 1 + doc/whats-new.rst | 1 + xarray/core/dataset.py | 57 +++++++++++++++++++++++++++++++++++- xarray/tests/test_dataset.py | 29 ++++++++++++++++-- 4 files changed, 85 insertions(+), 3 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index ceab7dcc976..844f4bbbb4f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -126,6 +126,7 @@ Indexing Dataset.isel Dataset.sel Dataset.drop_sel + Dataset.drop_isel Dataset.head Dataset.tail Dataset.thin diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 88994a5bfc0..6a5f5f0ff81 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -80,6 +80,7 @@ Bug fixes - Expand user directory paths (e.g. ``~/``) in :py:func:`open_mfdataset` and :py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`). By `Julien Seguinot `_. +- Add :py:meth:`Dataset.drop_isel` (:issue:`4658`, :pull:`4819`). By `Daniel Mesejo `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 7edc2fab067..689e412826f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4053,13 +4053,68 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): labels_for_dim = [labels_for_dim] labels_for_dim = np.asarray(labels_for_dim) try: - index = self.indexes[dim] + index = self.get_index(dim) except KeyError: raise ValueError("dimension %r does not have coordinate labels" % dim) new_index = index.drop(labels_for_dim, errors=errors) ds = ds.loc[{dim: new_index}] return ds + def drop_isel(self, indexers=None, **indexers_kwargs): + """Drop index positions from this dataset. + + Parameters + ---------- + indexers : mapping of hashable to Any + Index locations to drop + **indexers_kwargs : {dim: position, ...}, optional + The keyword arguments form of ``dim`` and ``positions`` + + Returns + ------- + dropped : Dataset + + Raises + ------ + IndexError + + Examples + -------- + >>> data = np.random.randn(2, 3) + >>> labels = ["a", "b", "c"] + >>> ds = xr.Dataset({"A": (["x", "y"], data), "y": labels}) + >>> ds.drop_isel(y=[0, 2]) + + Dimensions: (x: 2, y: 1) + Coordinates: + * y (y) >> ds.drop_isel(y=1) + + Dimensions: (x: 2, y: 2) + Coordinates: + * y (y) "Dataset": diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index bd1938455b1..8f6247ec475 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2371,8 +2371,12 @@ def test_drop_index_labels(self): data.drop(DataArray(["a", "b", "c"]), dim="x", errors="ignore") assert_identical(expected, actual) - with raises_regex(ValueError, "does not have coordinate labels"): - data.drop_sel(y=1) + actual = data.drop_sel(y=[1]) + expected = data.isel(y=[0, 2]) + assert_identical(expected, actual) + + with raises_regex(KeyError, "not found in axis"): + data.drop_sel(x=0) def test_drop_labels_by_keyword(self): data = Dataset( @@ -2410,6 +2414,27 @@ def test_drop_labels_by_keyword(self): with pytest.raises(ValueError): data.drop(dim="x", x="a") + def test_drop_labels_by_position(self): + data = Dataset( + {"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)} + ) + # Basic functionality. + assert len(data.coords["x"]) == 2 + + ds2 = data.drop_isel(x=0) + ds3 = data.drop_isel(x=[0]) + ds4 = data.drop_isel(x=[0, 1]) + ds5 = data.drop_isel(x=[0, 1], y=range(0, 6, 2)) + + assert_array_equal(ds2.coords["x"], ["b"]) + assert_array_equal(ds3.coords["x"], ["b"]) + assert ds4.coords["x"].size == 0 + assert ds5.coords["x"].size == 0 + assert_array_equal(ds5.coords["y"], [1, 3, 5]) + + with pytest.raises(KeyError): + data.drop_isel(z=1) + def test_drop_dims(self): data = xr.Dataset( { From a183f3c15ddf7988abb9acbc64dd851d710496e2 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Mon, 18 Jan 2021 21:48:13 +0100 Subject: [PATCH 2/3] address issues in PR --- doc/api.rst | 1 + doc/whats-new.rst | 2 +- xarray/core/dataarray.py | 22 ++++++++++++++++++++++ xarray/core/dataset.py | 18 +++++++++++++----- xarray/tests/test_dataarray.py | 6 ++++++ xarray/tests/test_dataset.py | 28 +++++++++++++++++++--------- 6 files changed, 62 insertions(+), 15 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 844f4bbbb4f..9cb02441d37 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -308,6 +308,7 @@ Indexing DataArray.isel DataArray.sel DataArray.drop_sel + DataArray.drop_isel DataArray.head DataArray.tail DataArray.thin diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6a5f5f0ff81..717a40f37ec 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -80,7 +80,7 @@ Bug fixes - Expand user directory paths (e.g. ``~/``) in :py:func:`open_mfdataset` and :py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`). By `Julien Seguinot `_. -- Add :py:meth:`Dataset.drop_isel` (:issue:`4658`, :pull:`4819`). By `Daniel Mesejo `_. +- Add :py:meth:`Dataset.drop_isel` and :py:meth:`DataArray.drop_isel` (:issue:`4658`, :pull:`4819`). By `Daniel Mesejo `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 6fdda8fc418..ed645c06e27 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2247,6 +2247,28 @@ def drop_sel( ds = self._to_temp_dataset().drop_sel(labels, errors=errors) return self._from_temp_dataset(ds) + def drop_isel(self, indexers=None, **indexers_kwargs): + """Drop index positions from this DataArray. + + Parameters + ---------- + indexers : mapping of hashable to Any + Index locations to drop + **indexers_kwargs : {dim: position, ...}, optional + The keyword arguments form of ``dim`` and ``positions`` + + Returns + ------- + dropped : DataArray + + Raises + ------ + IndexError + """ + dataset = self._to_temp_dataset() + dataset = dataset.drop_isel(indexers=indexers, **indexers_kwargs) + return self._from_temp_dataset(dataset) + def dropna( self, dim: Hashable, how: str = "any", thresh: int = None ) -> "DataArray": diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 689e412826f..6b08abcc815 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4061,7 +4061,7 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): return ds def drop_isel(self, indexers=None, **indexers_kwargs): - """Drop index positions from this dataset. + """Drop index positions from this Dataset. Parameters ---------- @@ -4080,9 +4080,17 @@ def drop_isel(self, indexers=None, **indexers_kwargs): Examples -------- - >>> data = np.random.randn(2, 3) + >>> data = np.arange(6).reshape(2, 3) >>> labels = ["a", "b", "c"] >>> ds = xr.Dataset({"A": (["x", "y"], data), "y": labels}) + >>> ds + + Dimensions: (x: 2, y: 3) + Coordinates: + * y (y) >> ds.drop_isel(y=[0, 2]) Dimensions: (x: 2, y: 1) @@ -4090,7 +4098,7 @@ def drop_isel(self, indexers=None, **indexers_kwargs): * y (y) >> ds.drop_isel(y=1) Dimensions: (x: 2, y: 2) @@ -4098,7 +4106,7 @@ def drop_isel(self, indexers=None, **indexers_kwargs): * y (y) Date: Tue, 19 Jan 2021 00:31:45 +0100 Subject: [PATCH 3/3] extract dict creation out of the loop --- xarray/core/dataset.py | 4 +++- xarray/tests/test_dataset.py | 3 --- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6b08abcc815..65f38a7e26a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4112,6 +4112,7 @@ def drop_isel(self, indexers=None, **indexers_kwargs): indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "drop") ds = self + dimension_index = {} for dim, pos_for_dim in indexers.items(): # Don't cast to set, as it would harm performance when labels # is a large numpy array @@ -4120,7 +4121,8 @@ def drop_isel(self, indexers=None, **indexers_kwargs): pos_for_dim = np.asarray(pos_for_dim) index = self.get_index(dim) new_index = index.delete(pos_for_dim) - ds = ds.loc[{dim: new_index}] + dimension_index[dim] = new_index + ds = ds.loc[dimension_index] return ds def drop_dims( diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b7e4f668f48..f71b8ec7741 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2423,13 +2423,11 @@ def test_drop_labels_by_position(self): actual = data.drop_isel(x=0) expected = data.drop_sel(x="a") - assert_array_equal(actual.coords["x"], ["b"]) assert_identical(expected, actual) actual = data.drop_isel(x=[0]) expected = data.drop_sel(x=["a"]) assert_identical(expected, actual) - assert_array_equal(actual.coords["x"], ["b"]) actual = data.drop_isel(x=[0, 1]) expected = data.drop_sel(x=["a", "b"]) @@ -2440,7 +2438,6 @@ def test_drop_labels_by_position(self): expected = data.drop_sel(x=["a", "b"], y=range(0, 6, 2)) assert_identical(expected, actual) assert actual.coords["x"].size == 0 - assert_array_equal(actual.coords["y"], [1, 3, 5]) with pytest.raises(KeyError): data.drop_isel(z=1)