-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add drop duplicates #5089
Add drop duplicates #5089
Changes from 8 commits
d84dae7
1494966
adfafc0
322ad9a
81d4002
28aa96a
4b1dab7
f9ee3fe
daa6e42
cc94bbe
f7dcdd4
915dcf5
344a7d8
596ec7a
1698990
e307041
d33586e
8c27afb
8a168ce
a1ce19d
e1e24bc
d7cf3c4
1c8a4ae
c2cc15f
966a420
d9fde90
b9ee4ca
3b9b7e3
61352f9
25949b0
5c4fc82
a77f78d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7074,5 +7074,42 @@ def query( | |
# apply the selection | ||
return self.isel(indexers, missing_dims=missing_dims) | ||
|
||
def drop_duplicates( | ||
self, | ||
dims: Union[Hashable, Iterable[Hashable]] = None, | ||
keep: Union[str, bool] = "first", | ||
): | ||
"""Returns a new dataset with duplicate dimension values removed. | ||
|
||
Parameters | ||
---------- | ||
dims : dimension label or sequence of labels, optional | ||
Only consider certain dimensions for identifying duplicates, by | ||
default use all dimensions. | ||
keep : {"first", "last", False}, default: "first" | ||
Determines which duplicates (if any) to keep. | ||
- ``"first"`` : Drop duplicates except for the first occurrence. | ||
- ``"last"`` : Drop duplicates except for the last occurrence. | ||
- False : Drop all duplicates. | ||
|
||
Returns | ||
------- | ||
Dataset | ||
""" | ||
if dims is None: | ||
dims = list(self.coords) | ||
elif isinstance(dims, str) or not isinstance(dims, Iterable): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could in principle use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's use |
||
dims = [dims] | ||
else: | ||
dims = list(dims) | ||
|
||
indexes = {} | ||
for dim in dims: | ||
if dim not in self.dims: | ||
raise ValueError(f"'{dim}' not found in dimensions") | ||
indexes[dim] = ~self.get_index(dim).duplicated(keep=keep) | ||
|
||
return self.isel(indexes) | ||
|
||
|
||
ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
...I think?
And we should add a test for this please — an array with a non-dimensioned coord