Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add drop_isel #4819

Merged
merged 3 commits into from
Jan 18, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ Indexing
Dataset.isel
Dataset.sel
Dataset.drop_sel
Dataset.drop_isel
Dataset.head
Dataset.tail
Dataset.thin
Expand Down Expand Up @@ -307,6 +308,7 @@ Indexing
DataArray.isel
DataArray.sel
DataArray.drop_sel
DataArray.drop_isel
DataArray.head
DataArray.tail
DataArray.thin
Expand Down
1 change: 1 addition & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ Bug fixes
- Expand user directory paths (e.g. ``~/``) in :py:func:`open_mfdataset` and
:py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`).
By `Julien Seguinot <https://github.com/juseg>`_.
- Add :py:meth:`Dataset.drop_isel` and :py:meth:`DataArray.drop_isel` (:issue:`4658`, :pull:`4819`). By `Daniel Mesejo <https://github.com/mesejo>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
22 changes: 22 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2247,6 +2247,28 @@ def drop_sel(
ds = self._to_temp_dataset().drop_sel(labels, errors=errors)
return self._from_temp_dataset(ds)

def drop_isel(self, indexers=None, **indexers_kwargs):
"""Drop index positions from this DataArray.

Parameters
----------
indexers : mapping of hashable to Any
Index locations to drop
**indexers_kwargs : {dim: position, ...}, optional
The keyword arguments form of ``dim`` and ``positions``

Returns
-------
dropped : DataArray

Raises
------
IndexError
"""
dataset = self._to_temp_dataset()
dataset = dataset.drop_isel(indexers=indexers, **indexers_kwargs)
return self._from_temp_dataset(dataset)

def dropna(
self, dim: Hashable, how: str = "any", thresh: int = None
) -> "DataArray":
Expand Down
65 changes: 64 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4053,13 +4053,76 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs):
labels_for_dim = [labels_for_dim]
labels_for_dim = np.asarray(labels_for_dim)
try:
index = self.indexes[dim]
index = self.get_index(dim)
keewis marked this conversation as resolved.
Show resolved Hide resolved
except KeyError:
raise ValueError("dimension %r does not have coordinate labels" % dim)
new_index = index.drop(labels_for_dim, errors=errors)
ds = ds.loc[{dim: new_index}]
return ds

def drop_isel(self, indexers=None, **indexers_kwargs):
"""Drop index positions from this Dataset.

Parameters
----------
indexers : mapping of hashable to Any
Index locations to drop
**indexers_kwargs : {dim: position, ...}, optional
The keyword arguments form of ``dim`` and ``positions``

Returns
-------
dropped : Dataset

Raises
------
IndexError

Examples
--------
>>> data = np.arange(6).reshape(2, 3)
>>> labels = ["a", "b", "c"]
>>> ds = xr.Dataset({"A": (["x", "y"], data), "y": labels})
mesejo marked this conversation as resolved.
Show resolved Hide resolved
>>> ds
<xarray.Dataset>
Dimensions: (x: 2, y: 3)
Coordinates:
* y (y) <U1 'a' 'b' 'c'
Dimensions without coordinates: x
Data variables:
A (x, y) int64 0 1 2 3 4 5
>>> ds.drop_isel(y=[0, 2])
<xarray.Dataset>
Dimensions: (x: 2, y: 1)
Coordinates:
* y (y) <U1 'b'
Dimensions without coordinates: x
Data variables:
A (x, y) int64 1 4
>>> ds.drop_isel(y=1)
<xarray.Dataset>
Dimensions: (x: 2, y: 2)
Coordinates:
* y (y) <U1 'a' 'c'
Dimensions without coordinates: x
Data variables:
A (x, y) int64 0 2 3 5
"""

indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "drop")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "drop")
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "drop_isel")

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one we should change — @mesejo would you be up for putting the one-line PR in?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I can do it. No problem, but just to be sure, drop_sel also has:
labels = either_dict_or_kwargs(labels, labels_kwargs, "drop")

Should I change it also? BTW drop_sel also uses np.random.randn in the docstrings should I change this also?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, good spot, that's a mistake, it should be "drop_sel")

We're trying to move away from random values in the docstrings, but it's not urgent — if you can that would be appreciated, but correcting this & #4819 (comment) is fine to do alone.

Thank you!


ds = self
for dim, pos_for_dim in indexers.items():
# Don't cast to set, as it would harm performance when labels
# is a large numpy array
if utils.is_scalar(pos_for_dim):
pos_for_dim = [pos_for_dim]
pos_for_dim = np.asarray(pos_for_dim)
index = self.get_index(dim)
new_index = index.delete(pos_for_dim)
ds = ds.loc[{dim: new_index}]
mesejo marked this conversation as resolved.
Show resolved Hide resolved
return ds

def drop_dims(
self, drop_dims: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise"
) -> "Dataset":
Expand Down
6 changes: 6 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2327,6 +2327,12 @@ def test_drop_index_labels(self):
with pytest.warns(DeprecationWarning):
arr.drop([0, 1, 3], dim="y", errors="ignore")

def test_drop_index_positions(self):
arr = DataArray(np.random.randn(2, 3), dims=["x", "y"])
actual = arr.drop_sel(y=[0, 1])
Copy link
Collaborator

@keewis keewis Jan 18, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it intentional that you use drop_sel here? This succeeds because of the change to self.get_index(dim) in drop_sel, but it really should fail. Edit: as it turns out, drop_sel is consistent with sel now, so I guess it makes sense that this does not fail. However, since the test is called test_drop_index_positions I still think this is a typo:

Suggested change
actual = arr.drop_sel(y=[0, 1])
actual = arr.drop_isel(y=[0, 1])

expected = arr[:, 2:]
assert_identical(actual, expected)

def test_dropna(self):
x = np.random.randn(4, 4)
x[::2, 0] = np.nan
Expand Down
39 changes: 37 additions & 2 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2371,8 +2371,12 @@ def test_drop_index_labels(self):
data.drop(DataArray(["a", "b", "c"]), dim="x", errors="ignore")
assert_identical(expected, actual)

with raises_regex(ValueError, "does not have coordinate labels"):
data.drop_sel(y=1)
actual = data.drop_sel(y=[1])
expected = data.isel(y=[0, 2])
assert_identical(expected, actual)
keewis marked this conversation as resolved.
Show resolved Hide resolved

with raises_regex(KeyError, "not found in axis"):
data.drop_sel(x=0)

def test_drop_labels_by_keyword(self):
data = Dataset(
Expand Down Expand Up @@ -2410,6 +2414,37 @@ def test_drop_labels_by_keyword(self):
with pytest.raises(ValueError):
data.drop(dim="x", x="a")

def test_drop_labels_by_position(self):
mesejo marked this conversation as resolved.
Show resolved Hide resolved
data = Dataset(
{"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)}
)
# Basic functionality.
assert len(data.coords["x"]) == 2

actual = data.drop_isel(x=0)
expected = data.drop_sel(x="a")
assert_array_equal(actual.coords["x"], ["b"])
mesejo marked this conversation as resolved.
Show resolved Hide resolved
assert_identical(expected, actual)

actual = data.drop_isel(x=[0])
expected = data.drop_sel(x=["a"])
assert_identical(expected, actual)
assert_array_equal(actual.coords["x"], ["b"])

actual = data.drop_isel(x=[0, 1])
expected = data.drop_sel(x=["a", "b"])
assert_identical(expected, actual)
assert actual.coords["x"].size == 0

actual = data.drop_isel(x=[0, 1], y=range(0, 6, 2))
expected = data.drop_sel(x=["a", "b"], y=range(0, 6, 2))
assert_identical(expected, actual)
assert actual.coords["x"].size == 0
assert_array_equal(actual.coords["y"], [1, 3, 5])

with pytest.raises(KeyError):
data.drop_isel(z=1)

def test_drop_dims(self):
data = xr.Dataset(
{
Expand Down