From 4cb67dca15d3a70d4f72e2093141efae9059875e Mon Sep 17 00:00:00 2001 From: ahuang11 Date: Fri, 30 Apr 2021 22:22:10 -0500 Subject: [PATCH 01/12] Add drop_duplicates for dims --- doc/api.rst | 2 ++ doc/whats-new.rst | 3 ++ xarray/core/dataarray.py | 26 +++++++++++++++++ xarray/core/dataset.py | 37 ++++++++++++++++++++++++ xarray/tests/test_dataarray.py | 51 ++++++++++++++++++++++++++++++++ xarray/tests/test_dataset.py | 53 ++++++++++++++++++++++++++++++++++ 6 files changed, 172 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 85a0d75f56a..495e2a446c8 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -103,6 +103,7 @@ Dataset contents Dataset.expand_dims Dataset.drop_vars Dataset.drop_dims + Dataset.drop_duplicates Dataset.set_coords Dataset.reset_coords @@ -292,6 +293,7 @@ DataArray contents DataArray.swap_dims DataArray.expand_dims DataArray.drop_vars + DataArray.drop_duplicates DataArray.reset_coords DataArray.copy diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ebbfc87b4d3..b3bf8bdb042 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -99,6 +99,9 @@ New Features :py:func:`xarray.open_mfdataset` when `combine='by_coords'` is specified. Fixes (:issue:`5230`). By `Tom Nicholas `_. +- Implement :py:meth:`Dataset.drop_duplicates` and :py:meth:`DataArray.drop_duplicates` + to remove duplicate dimension values (:pull:`5231`). + By `Andrew Huang `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index c622f50335d..572433d32bf 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4480,6 +4480,32 @@ def curvefit( kwargs=kwargs, ) + def drop_duplicates( + self, + dims: Union[Hashable, Iterable[Hashable]] = None, + keep: Union[ + str, + bool, + ] = "first", + ): + """Returns a new DataArray with duplicate dimension values removed. + Parameters + ---------- + dims : dimension label or sequence of labels, optional + Only consider certain dimensions for identifying duplicates, by + default use all dimensions. + keep : {"first", "last", False}, default: "first" + Determines which duplicates (if any) to keep. + - ``"first"`` : Drop duplicates except for the first occurrence. + - ``"last"`` : Drop duplicates except for the last occurrence. + - False : Drop all duplicates. + Returns + ------- + DataArray + """ + ds = self._to_temp_dataset().drop_duplicates(dims=dims, keep=keep) + return self._from_temp_dataset(ds) + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = utils.UncachedAccessor(StringAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ca8d8a28e3a..07b09e28193 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7316,3 +7316,40 @@ def _wrapper(Y, *coords_, **kwargs): result.attrs = self.attrs.copy() return result + + def drop_duplicates( + self, + dims: Union[Hashable, Iterable[Hashable]] = None, + keep: Union[str, bool] = "first", + ): + """Returns a new dataset with duplicate dimension values removed. + + Parameters + ---------- + dims : dimension label or sequence of labels, optional + Only consider certain dimensions for identifying duplicates, by + default use all dimensions. + keep : {"first", "last", False}, default: "first" + Determines which duplicates (if any) to keep. + - ``"first"`` : Drop duplicates except for the first occurrence. + - ``"last"`` : Drop duplicates except for the last occurrence. + - False : Drop all duplicates. + + Returns + ------- + Dataset + """ + if dims is None: + dims = list(self.dims) + elif isinstance(dims, str) or not isinstance(dims, Iterable): + dims = [dims] + else: + dims = list(dims) + + indexes = {} + for dim in dims: + if dim not in self.dims: + raise ValueError(f"'{dim}' not found in dimensions") + indexes[dim] = ~self.get_index(dim).duplicated(keep=keep) + + return self.isel(indexes) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index d62c156b6d3..0b5ed651960 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -7416,3 +7416,54 @@ def test_clip(da): # Unclear whether we want this work, OK to adjust the test when we have decided. with pytest.raises(ValueError, match="arguments without labels along dimension"): result = da.clip(min=da.mean("x"), max=da.mean("a").isel(x=[0, 1])) + + +pytest.mark.parametrize("keep", ["first", "last", False]) +def test_drop_duplicates(keep): + ds = xr.DataArray( + [0, 5, 6, 7], dims="time", coords={"time": [0, 0, 1, 2]}, name="test" + ) + + if keep == "first": + data = [0, 6, 7] + time = [0, 1, 2] + elif keep == "last": + data = [5, 6, 7] + time = [0, 1, 2] + else: + data = [6, 7] + time = [1, 2] + + expected = xr.DataArray(data, dims="time", coords={"time": time}, name="test") + result = ds.drop_duplicates("time", keep=keep) + assert_equal(expected, result) + + +@pytest.mark.parametrize("keep", ["first", "last", False]) +def test_drop_duplicates_multi_dim(keep): + base_data = np.stack([np.arange(0, 5) * i for i in np.arange(0, 5)]) + ds = xr.DataArray( + base_data, + coords={"lat": [0, 1, 2, 2, 3], "lon": [0, 1, 3, 3, 4]}, + dims=["lat", "lon"], + name="test", + ) + + if keep == "first": + data = base_data[[0, 1, 2, 4]][:, [0, 1, 2, 4]] + lat = [0, 1, 2, 3] + lon = [0, 1, 3, 4] + elif keep == "last": + data = base_data[[0, 1, 3, 4]][:, [0, 1, 3, 4]] + lat = [0, 1, 2, 3] + lon = [0, 1, 3, 4] + else: + data = base_data[[0, 1, 4]][:, [0, 1, 4]] + lat = [0, 1, 3] + lon = [0, 1, 4] + + expected = xr.DataArray( + data, dims=["lat", "lon"], coords={"lat": lat, "lon": lon}, name="test" + ) + result = ds.drop_duplicates(["lat", "lon"], keep=keep) + assert_equal(expected, result) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 130f65792c5..dd23f9db6fa 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6947,3 +6947,56 @@ def test_clip(ds): result = ds.clip(min=ds.mean("y"), max=ds.mean("y")) assert result.dims == ds.dims + + +@pytest.mark.parametrize("keep", ["first", "last", False]) +def test_drop_duplicates(keep): + ds = xr.DataArray( + [0, 5, 6, 7], dims="time", coords={"time": [0, 0, 1, 2]}, name="test" + ).to_dataset() + + if keep == "first": + data = [0, 6, 7] + time = [0, 1, 2] + elif keep == "last": + data = [5, 6, 7] + time = [0, 1, 2] + else: + data = [6, 7] + time = [1, 2] + + expected = xr.DataArray( + data, dims="time", coords={"time": time}, name="test" + ).to_dataset() + result = ds.drop_duplicates("time", keep=keep) + assert_equal(expected, result) + + +@pytest.mark.parametrize("keep", ["first", "last", False]) +def test_drop_duplicates_multi_dim(keep): + base_data = np.stack([np.arange(0, 5) * i for i in np.arange(0, 5)]) + ds = xr.DataArray( + base_data, + coords={"lat": [0, 1, 2, 2, 3], "lon": [0, 1, 3, 3, 4]}, + dims=["lat", "lon"], + name="test", + ).to_dataset() + + if keep == "first": + data = base_data[[0, 1, 2, 4]][:, [0, 1, 2, 4]] + lat = [0, 1, 2, 3] + lon = [0, 1, 3, 4] + elif keep == "last": + data = base_data[[0, 1, 3, 4]][:, [0, 1, 3, 4]] + lat = [0, 1, 2, 3] + lon = [0, 1, 3, 4] + else: + data = base_data[[0, 1, 4]][:, [0, 1, 4]] + lat = [0, 1, 3] + lon = [0, 1, 4] + + expected = xr.DataArray( + data, dims=["lat", "lon"], coords={"lat": lat, "lon": lon}, name="test" + ).to_dataset() + result = ds.drop_duplicates(["lat", "lon"], keep=keep) + assert_equal(expected, result) From 4d8952fb708d9ea180094c1f7c7f114ba812ed73 Mon Sep 17 00:00:00 2001 From: ahuang11 Date: Fri, 30 Apr 2021 22:25:12 -0500 Subject: [PATCH 02/12] Update PR # and fix lint --- doc/whats-new.rst | 2 +- xarray/tests/test_dataarray.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b3bf8bdb042..f7c71b65dbc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -100,7 +100,7 @@ New Features Fixes (:issue:`5230`). By `Tom Nicholas `_. - Implement :py:meth:`Dataset.drop_duplicates` and :py:meth:`DataArray.drop_duplicates` - to remove duplicate dimension values (:pull:`5231`). + to remove duplicate dimension values (:pull:`5239`). By `Andrew Huang `_. Breaking changes diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 0b5ed651960..6f63caac63f 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -7418,7 +7418,7 @@ def test_clip(da): result = da.clip(min=da.mean("x"), max=da.mean("a").isel(x=[0, 1])) -pytest.mark.parametrize("keep", ["first", "last", False]) +@pytest.mark.parametrize("keep", ["first", "last", False]) def test_drop_duplicates(keep): ds = xr.DataArray( [0, 5, 6, 7], dims="time", coords={"time": [0, 0, 1, 2]}, name="test" From 3370f6999567a4b7f114fcf64d2ced86326eac49 Mon Sep 17 00:00:00 2001 From: ahuang11 Date: Sat, 1 May 2021 13:38:46 -0500 Subject: [PATCH 03/12] Remove dataset --- xarray/core/dataarray.py | 16 +++++++++-- xarray/core/dataset.py | 37 ------------------------- xarray/tests/test_dataset.py | 53 ------------------------------------ 3 files changed, 14 insertions(+), 92 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 572433d32bf..8afedfa8716 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4503,8 +4503,20 @@ def drop_duplicates( ------- DataArray """ - ds = self._to_temp_dataset().drop_duplicates(dims=dims, keep=keep) - return self._from_temp_dataset(ds) + if dims is None: + dims = list(self.dims) + elif isinstance(dims, str) or not isinstance(dims, Iterable): + dims = [dims] + else: + dims = list(dims) + + indexes = {} + for dim in dims: + if dim not in self.dims: + raise ValueError(f"'{dim}' not found in dimensions") + indexes[dim] = ~self.get_index(dim).duplicated(keep=keep) + + return self.isel(indexes) # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 07b09e28193..ca8d8a28e3a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7316,40 +7316,3 @@ def _wrapper(Y, *coords_, **kwargs): result.attrs = self.attrs.copy() return result - - def drop_duplicates( - self, - dims: Union[Hashable, Iterable[Hashable]] = None, - keep: Union[str, bool] = "first", - ): - """Returns a new dataset with duplicate dimension values removed. - - Parameters - ---------- - dims : dimension label or sequence of labels, optional - Only consider certain dimensions for identifying duplicates, by - default use all dimensions. - keep : {"first", "last", False}, default: "first" - Determines which duplicates (if any) to keep. - - ``"first"`` : Drop duplicates except for the first occurrence. - - ``"last"`` : Drop duplicates except for the last occurrence. - - False : Drop all duplicates. - - Returns - ------- - Dataset - """ - if dims is None: - dims = list(self.dims) - elif isinstance(dims, str) or not isinstance(dims, Iterable): - dims = [dims] - else: - dims = list(dims) - - indexes = {} - for dim in dims: - if dim not in self.dims: - raise ValueError(f"'{dim}' not found in dimensions") - indexes[dim] = ~self.get_index(dim).duplicated(keep=keep) - - return self.isel(indexes) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index dd23f9db6fa..130f65792c5 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6947,56 +6947,3 @@ def test_clip(ds): result = ds.clip(min=ds.mean("y"), max=ds.mean("y")) assert result.dims == ds.dims - - -@pytest.mark.parametrize("keep", ["first", "last", False]) -def test_drop_duplicates(keep): - ds = xr.DataArray( - [0, 5, 6, 7], dims="time", coords={"time": [0, 0, 1, 2]}, name="test" - ).to_dataset() - - if keep == "first": - data = [0, 6, 7] - time = [0, 1, 2] - elif keep == "last": - data = [5, 6, 7] - time = [0, 1, 2] - else: - data = [6, 7] - time = [1, 2] - - expected = xr.DataArray( - data, dims="time", coords={"time": time}, name="test" - ).to_dataset() - result = ds.drop_duplicates("time", keep=keep) - assert_equal(expected, result) - - -@pytest.mark.parametrize("keep", ["first", "last", False]) -def test_drop_duplicates_multi_dim(keep): - base_data = np.stack([np.arange(0, 5) * i for i in np.arange(0, 5)]) - ds = xr.DataArray( - base_data, - coords={"lat": [0, 1, 2, 2, 3], "lon": [0, 1, 3, 3, 4]}, - dims=["lat", "lon"], - name="test", - ).to_dataset() - - if keep == "first": - data = base_data[[0, 1, 2, 4]][:, [0, 1, 2, 4]] - lat = [0, 1, 2, 3] - lon = [0, 1, 3, 4] - elif keep == "last": - data = base_data[[0, 1, 3, 4]][:, [0, 1, 3, 4]] - lat = [0, 1, 2, 3] - lon = [0, 1, 3, 4] - else: - data = base_data[[0, 1, 4]][:, [0, 1, 4]] - lat = [0, 1, 3] - lon = [0, 1, 4] - - expected = xr.DataArray( - data, dims=["lat", "lon"], coords={"lat": lat, "lon": lon}, name="test" - ).to_dataset() - result = ds.drop_duplicates(["lat", "lon"], keep=keep) - assert_equal(expected, result) From 4f20db5afdd5d0b6f294dd0f5a181f7dbb814aba Mon Sep 17 00:00:00 2001 From: ahuang11 Date: Sat, 1 May 2021 13:54:54 -0500 Subject: [PATCH 04/12] Remove references to ds --- doc/api.rst | 1 - doc/whats-new.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 495e2a446c8..746cf1a874c 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -103,7 +103,6 @@ Dataset contents Dataset.expand_dims Dataset.drop_vars Dataset.drop_dims - Dataset.drop_duplicates Dataset.set_coords Dataset.reset_coords diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f7c71b65dbc..87995d48798 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -99,7 +99,7 @@ New Features :py:func:`xarray.open_mfdataset` when `combine='by_coords'` is specified. Fixes (:issue:`5230`). By `Tom Nicholas `_. -- Implement :py:meth:`Dataset.drop_duplicates` and :py:meth:`DataArray.drop_duplicates` +- Implement :py:meth:`DataArray.drop_duplicates` to remove duplicate dimension values (:pull:`5239`). By `Andrew Huang `_. From 2852662fd0eea13a908e8a45b68602a79513dd3c Mon Sep 17 00:00:00 2001 From: Andrew <15331990+ahuang11@users.noreply.github.com> Date: Sat, 1 May 2021 14:49:01 -0500 Subject: [PATCH 05/12] Update dataarray.py --- xarray/core/dataarray.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 8afedfa8716..f5bf57c05df 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4483,10 +4483,7 @@ def curvefit( def drop_duplicates( self, dims: Union[Hashable, Iterable[Hashable]] = None, - keep: Union[ - str, - bool, - ] = "first", + keep: Union[str, bool] = "first", ): """Returns a new DataArray with duplicate dimension values removed. Parameters From d8fba5f30ab74f2c946fb82fc7009ce2f2726eb5 Mon Sep 17 00:00:00 2001 From: Andrew <15331990+ahuang11@users.noreply.github.com> Date: Sat, 1 May 2021 14:52:08 -0500 Subject: [PATCH 06/12] Update xarray/core/dataarray.py Co-authored-by: keewis --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f5bf57c05df..65101255e54 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4488,7 +4488,7 @@ def drop_duplicates( """Returns a new DataArray with duplicate dimension values removed. Parameters ---------- - dims : dimension label or sequence of labels, optional + dims : hashable or sequence of hashable, optional Only consider certain dimensions for identifying duplicates, by default use all dimensions. keep : {"first", "last", False}, default: "first" From 14ccd4bfec9a14f8739385b4a29b75f5ffbb79fc Mon Sep 17 00:00:00 2001 From: Andrew <15331990+ahuang11@users.noreply.github.com> Date: Sat, 1 May 2021 14:54:51 -0500 Subject: [PATCH 07/12] Update dataarray.py --- xarray/core/dataarray.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 65101255e54..fda3bbea26e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4496,6 +4496,7 @@ def drop_duplicates( - ``"first"`` : Drop duplicates except for the first occurrence. - ``"last"`` : Drop duplicates except for the last occurrence. - False : Drop all duplicates. + Returns ------- DataArray From 4467586062bdc02499316c00f3f8b8bc81796f88 Mon Sep 17 00:00:00 2001 From: ahuang11 Date: Wed, 12 May 2021 22:11:11 -0500 Subject: [PATCH 08/12] Single dim --- xarray/core/dataarray.py | 21 +++++---------------- xarray/tests/test_dataarray.py | 30 ------------------------------ 2 files changed, 5 insertions(+), 46 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 8afedfa8716..0cd6e8c32eb 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4482,7 +4482,7 @@ def curvefit( def drop_duplicates( self, - dims: Union[Hashable, Iterable[Hashable]] = None, + dim: str, keep: Union[ str, bool, @@ -4491,9 +4491,7 @@ def drop_duplicates( """Returns a new DataArray with duplicate dimension values removed. Parameters ---------- - dims : dimension label or sequence of labels, optional - Only consider certain dimensions for identifying duplicates, by - default use all dimensions. + dims : dimension label, optional keep : {"first", "last", False}, default: "first" Determines which duplicates (if any) to keep. - ``"first"`` : Drop duplicates except for the first occurrence. @@ -4503,19 +4501,10 @@ def drop_duplicates( ------- DataArray """ - if dims is None: - dims = list(self.dims) - elif isinstance(dims, str) or not isinstance(dims, Iterable): - dims = [dims] - else: - dims = list(dims) - indexes = {} - for dim in dims: - if dim not in self.dims: - raise ValueError(f"'{dim}' not found in dimensions") - indexes[dim] = ~self.get_index(dim).duplicated(keep=keep) - + if dim not in self.dims: + raise ValueError(f"'{dim}' not found in dimensions") + indexes[dim] = ~self.get_index(dim).duplicated(keep=keep) return self.isel(indexes) # this needs to be at the end, or mypy will confuse with `str` diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 6f63caac63f..2ad96c3d093 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -7437,33 +7437,3 @@ def test_drop_duplicates(keep): expected = xr.DataArray(data, dims="time", coords={"time": time}, name="test") result = ds.drop_duplicates("time", keep=keep) assert_equal(expected, result) - - -@pytest.mark.parametrize("keep", ["first", "last", False]) -def test_drop_duplicates_multi_dim(keep): - base_data = np.stack([np.arange(0, 5) * i for i in np.arange(0, 5)]) - ds = xr.DataArray( - base_data, - coords={"lat": [0, 1, 2, 2, 3], "lon": [0, 1, 3, 3, 4]}, - dims=["lat", "lon"], - name="test", - ) - - if keep == "first": - data = base_data[[0, 1, 2, 4]][:, [0, 1, 2, 4]] - lat = [0, 1, 2, 3] - lon = [0, 1, 3, 4] - elif keep == "last": - data = base_data[[0, 1, 3, 4]][:, [0, 1, 3, 4]] - lat = [0, 1, 2, 3] - lon = [0, 1, 3, 4] - else: - data = base_data[[0, 1, 4]][:, [0, 1, 4]] - lat = [0, 1, 3] - lon = [0, 1, 4] - - expected = xr.DataArray( - data, dims=["lat", "lon"], coords={"lat": lat, "lon": lon}, name="test" - ) - result = ds.drop_duplicates(["lat", "lon"], keep=keep) - assert_equal(expected, result) From bc804101290d820d1b883d6cbaee6e87bcd65f90 Mon Sep 17 00:00:00 2001 From: Andrew <15331990+ahuang11@users.noreply.github.com> Date: Wed, 12 May 2021 23:06:49 -0500 Subject: [PATCH 09/12] Update xarray/core/dataarray.py Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 6101d82d668..b0c3b1439cd 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4578,7 +4578,7 @@ def curvefit( def drop_duplicates( self, - dim: str, + dim: Hashable, keep: Union[ str, bool, From 75a9c91b3d66c1bd24f9215c448d5c338bb14499 Mon Sep 17 00:00:00 2001 From: Andrew <15331990+ahuang11@users.noreply.github.com> Date: Wed, 12 May 2021 23:06:54 -0500 Subject: [PATCH 10/12] Update xarray/core/dataarray.py Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b0c3b1439cd..b0b1249851e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4601,7 +4601,7 @@ def drop_duplicates( indexes = {} if dim not in self.dims: raise ValueError(f"'{dim}' not found in dimensions") - indexes[dim] = ~self.get_index(dim).duplicated(keep=keep) + indexes = {dim: ~self.get_index(dim).duplicated(keep=keep)} return self.isel(indexes) # this needs to be at the end, or mypy will confuse with `str` From fd92d25355a2bdef681198ebf2ad532679b34938 Mon Sep 17 00:00:00 2001 From: Andrew <15331990+ahuang11@users.noreply.github.com> Date: Wed, 12 May 2021 23:07:01 -0500 Subject: [PATCH 11/12] Update xarray/core/dataarray.py Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/dataarray.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b0b1249851e..ea6753e445c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4598,7 +4598,6 @@ def drop_duplicates( ------- DataArray """ - indexes = {} if dim not in self.dims: raise ValueError(f"'{dim}' not found in dimensions") indexes = {dim: ~self.get_index(dim).duplicated(keep=keep)} From d4a0462812eb817374342147c3520db0dc4297e7 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 13 May 2021 11:47:12 -0600 Subject: [PATCH 12/12] [skip-ci]