Skip to content

Commit

Permalink
Add "errors" keyword argument to drop() and drop_dims() (#2994) (#3028)
Browse files Browse the repository at this point in the history
* Add "errors" keyword argument (GH2994)

Adds an errors keyword to Dataset.drop(), Dataset.drop_dims(), and DataArray.drop() (GH2994). Consistent with pandas, the value can be either "raise" or "ignore"

* Fix quotes

* Different pandas versions raise different errors

* Error messages also vary

* Correct doc for DataArray.drop; array, not dataset

* Require errors argument to be passed with a keyword
  • Loading branch information
andrew-c-ross authored and shoyer committed Jun 20, 2019
1 parent 145f25f commit 9c0bbf7
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 14 deletions.
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ Enhancements
formatted datetimes. By `Alan Brammer <https://github.com/abrammer>`_.
- Add ``.str`` accessor to DataArrays for string related manipulations.
By `0x0L <https://github.com/0x0L>`_.
- Add ``errors`` keyword argument to :py:meth:`Dataset.drop` and :py:meth:`Dataset.drop_dims`
that allows ignoring errors if a passed label or dimension is not in the dataset
(:issue:`2994`).
By `Andrew Ross <https://github.com/andrew-c-ross>`_.


Bug fixes
~~~~~~~~~
Expand Down
10 changes: 7 additions & 3 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1462,7 +1462,7 @@ def transpose(self, *dims, transpose_coords=None) -> 'DataArray':
def T(self) -> 'DataArray':
return self.transpose()

def drop(self, labels, dim=None):
def drop(self, labels, dim=None, *, errors='raise'):
"""Drop coordinates or index labels from this DataArray.
Parameters
Expand All @@ -1472,14 +1472,18 @@ def drop(self, labels, dim=None):
dim : str, optional
Dimension along which to drop index labels. By default (if
``dim is None``), drops coordinates rather than index labels.
errors: {'raise', 'ignore'}, optional
If 'raise' (default), raises a ValueError error if
any of the coordinates or index labels passed are not
in the array. If 'ignore', any given labels that are in the
array are dropped and no error is raised.
Returns
-------
dropped : DataArray
"""
if utils.is_scalar(labels):
labels = [labels]
ds = self._to_temp_dataset().drop(labels, dim)
ds = self._to_temp_dataset().drop(labels, dim, errors=errors)
return self._from_temp_dataset(ds)

def dropna(self, dim, how='any', thresh=None):
Expand Down
37 changes: 27 additions & 10 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2826,7 +2826,7 @@ def _assert_all_in_dataset(self, names, virtual_okay=False):
raise ValueError('One or more of the specified variables '
'cannot be found in this dataset')

def drop(self, labels, dim=None):
def drop(self, labels, dim=None, *, errors='raise'):
"""Drop variables or index labels from this dataset.
Parameters
Expand All @@ -2836,33 +2836,41 @@ def drop(self, labels, dim=None):
dim : None or str, optional
Dimension along which to drop index labels. By default (if
``dim is None``), drops variables rather than index labels.
errors: {'raise', 'ignore'}, optional
If 'raise' (default), raises a ValueError error if
any of the variable or index labels passed are not
in the dataset. If 'ignore', any given labels that are in the
dataset are dropped and no error is raised.
Returns
-------
dropped : Dataset
"""
if errors not in ['raise', 'ignore']:
raise ValueError('errors must be either "raise" or "ignore"')
if utils.is_scalar(labels):
labels = [labels]
if dim is None:
return self._drop_vars(labels)
return self._drop_vars(labels, errors=errors)
else:
try:
index = self.indexes[dim]
except KeyError:
raise ValueError(
'dimension %r does not have coordinate labels' % dim)
new_index = index.drop(labels)
new_index = index.drop(labels, errors=errors)
return self.loc[{dim: new_index}]

def _drop_vars(self, names):
self._assert_all_in_dataset(names)
def _drop_vars(self, names, errors='raise'):
if errors == 'raise':
self._assert_all_in_dataset(names)
drop = set(names)
variables = OrderedDict((k, v) for k, v in self._variables.items()
if k not in drop)
coord_names = set(k for k in self._coord_names if k in variables)
return self._replace_vars_and_dims(variables, coord_names)

def drop_dims(self, drop_dims):
def drop_dims(self, drop_dims, *, errors='raise'):
"""Drop dimensions and associated variables from this dataset.
Parameters
Expand All @@ -2875,14 +2883,23 @@ def drop_dims(self, drop_dims):
obj : Dataset
The dataset without the given dimensions (or any variables
containing those dimensions)
errors: {'raise', 'ignore'}, optional
If 'raise' (default), raises a ValueError error if
any of the dimensions passed are not
in the dataset. If 'ignore', any given dimensions that are in the
dataset are dropped and no error is raised.
"""
if errors not in ['raise', 'ignore']:
raise ValueError('errors must be either "raise" or "ignore"')

if utils.is_scalar(drop_dims):
drop_dims = [drop_dims]

missing_dimensions = [d for d in drop_dims if d not in self.dims]
if missing_dimensions:
raise ValueError('Dataset does not contain the dimensions: %s'
% missing_dimensions)
if errors == 'raise':
missing_dimensions = [d for d in drop_dims if d not in self.dims]
if missing_dimensions:
raise ValueError('Dataset does not contain the dimensions: %s'
% missing_dimensions)

drop_vars = set(k for k, v in self._variables.items()
for d in v.dims if d in drop_dims)
Expand Down
17 changes: 16 additions & 1 deletion xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1859,19 +1859,34 @@ def test_drop_coordinates(self):
with pytest.raises(ValueError):
arr.drop('not found')

actual = expected.drop('not found', errors='ignore')
assert_identical(actual, expected)

with raises_regex(ValueError, 'cannot be found'):
arr.drop(None)

actual = expected.drop(None, errors='ignore')
assert_identical(actual, expected)

renamed = arr.rename('foo')
with raises_regex(ValueError, 'cannot be found'):
renamed.drop('foo')

actual = renamed.drop('foo', errors='ignore')
assert_identical(actual, renamed)

def test_drop_index_labels(self):
arr = DataArray(np.random.randn(2, 3), coords={'y': [0, 1, 2]},
dims=['x', 'y'])
actual = arr.drop([0, 1], dim='y')
expected = arr[:, 2:]
assert_identical(expected, actual)
assert_identical(actual, expected)

with raises_regex((KeyError, ValueError), 'not .* in axis'):
actual = arr.drop([0, 1, 3], dim='y')

actual = arr.drop([0, 1, 3], dim='y', errors='ignore')
assert_identical(actual, expected)

def test_dropna(self):
x = np.random.randn(4, 4)
Expand Down
35 changes: 35 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1889,6 +1889,15 @@ def test_drop_variables(self):
with raises_regex(ValueError, 'cannot be found'):
data.drop('not_found_here')

actual = data.drop('not_found_here', errors='ignore')
assert_identical(data, actual)

actual = data.drop(['not_found_here'], errors='ignore')
assert_identical(data, actual)

actual = data.drop(['time', 'not_found_here'], errors='ignore')
assert_identical(expected, actual)

def test_drop_index_labels(self):
data = Dataset({'A': (['x', 'y'], np.random.randn(2, 3)),
'x': ['a', 'b']})
Expand All @@ -1907,6 +1916,16 @@ def test_drop_index_labels(self):
# not contained in axis
data.drop(['c'], dim='x')

actual = data.drop(['c'], dim='x', errors='ignore')
assert_identical(data, actual)

with pytest.raises(ValueError):
data.drop(['c'], dim='x', errors='wrong_value')

actual = data.drop(['a', 'b', 'c'], 'x', errors='ignore')
expected = data.isel(x=slice(0, 0))
assert_identical(expected, actual)

with raises_regex(
ValueError, 'does not have coordinate labels'):
data.drop(1, 'y')
Expand All @@ -1931,6 +1950,22 @@ def test_drop_dims(self):
with pytest.raises((ValueError, KeyError)):
data.drop_dims('z') # not a dimension

with pytest.raises((ValueError, KeyError)):
data.drop_dims(None)

actual = data.drop_dims('z', errors='ignore')
assert_identical(data, actual)

actual = data.drop_dims(None, errors='ignore')
assert_identical(data, actual)

with pytest.raises(ValueError):
actual = data.drop_dims('z', errors='wrong_value')

actual = data.drop_dims(['x', 'y', 'z'], errors='ignore')
expected = data.drop(['A', 'B', 'x'])
assert_identical(expected, actual)

def test_copy(self):
data = create_test_data()
data.attrs['Test'] = [1, 2, 3]
Expand Down

0 comments on commit 9c0bbf7

Please sign in to comment.