From 3442d8fe6d607e5910d522c7e0d7533b5b281c79 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 16 Jul 2022 15:11:53 -0600 Subject: [PATCH 1/8] Drop multi-indexes when assigning to a multi-indexed variable Closes #6505 --- xarray/core/coordinates.py | 27 ++++++++++++++++++++++++++- xarray/core/dataset.py | 1 + xarray/tests/test_dataarray.py | 7 +++++++ xarray/tests/test_dataset.py | 12 ++++++++++++ 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 65949a24369..db7de1c2346 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from contextlib import contextmanager from typing import TYPE_CHECKING, Any, Hashable, Iterator, Mapping, Sequence, cast @@ -7,7 +8,7 @@ import pandas as pd from . import formatting -from .indexes import Index, Indexes, assert_no_index_corrupted +from .indexes import Index, Indexes, PandasMultiIndex, assert_no_index_corrupted from .merge import merge_coordinates_without_align, merge_coords from .utils import Frozen, ReprObject from .variable import Variable, calculate_dimensions @@ -152,8 +153,32 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: return pd.MultiIndex(level_list, code_list, names=names) + def _drop_multiindexes(self, keys): + from .dataset import Dataset + + for key in keys: + maybe_midx = self._data._indexes.get(key, None) + idx_coord_names = set(maybe_midx.index.names + [maybe_midx.dim]) + if isinstance(maybe_midx, PandasMultiIndex): + warnings.warn( + f"Updating MultiIndexed coordinate {key!r} would corrupt indices for " + f"other variables: {list(maybe_midx.index.names)!r}. " + f"This will raise an error in the future. Use `.drop_vars({idx_coord_names!r})` before " + "assigning new coordinate values.", + DeprecationWarning, + stacklevel=3, + ) + for k in idx_coord_names: + if isinstance(self._data, Dataset): + del self._data._variables[k] + del self._data._indexes[k] + else: + del self._data._coords[k] + del self._data._indexes[k] + def update(self, other: Mapping[Any, Any]) -> None: other_vars = getattr(other, "variables", other) + self._drop_multiindexes(other_vars.keys()) coords, indexes = merge_coords( [self.variables, other_vars], priority_arg=1, indexes=self.xindexes ) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index dc147fa921d..1a6a527eba0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5764,6 +5764,7 @@ def assign( data = self.copy() # do all calculations first... results: CoercibleMapping = data._calc_assign_results(variables) + data.coords._drop_multiindexes(results.keys()) # ... and then assign data.update(results) return data diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index f26fc1c9d5e..b53db972a76 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1499,6 +1499,13 @@ def test_assign_coords(self) -> None: with pytest.raises(ValueError): da.coords["x"] = ("y", [1, 2, 3]) # no new dimension to a DataArray + def test_assign_coords_existing_multiindex(self) -> None: + data = self.mda + with pytest.warns( + DeprecationWarning, match=r"Updating MultiIndexed coordinate" + ): + data.assign_coords(x=range(4)) + def test_coords_alignment(self) -> None: lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])]) rhs = DataArray([2, 3, 4], [("x", [1, 2, 3])]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 459acfd87fa..9ea47163d05 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3967,6 +3967,18 @@ def test_assign_multiindex_level(self) -> None: data.assign(level_1=range(4)) data.assign_coords(level_1=range(4)) + def test_assign_coords_existing_multiindex(self) -> None: + data = create_test_multiindex() + with pytest.warns( + DeprecationWarning, match=r"Updating MultiIndexed coordinate" + ): + data.assign_coords(x=range(4)) + + with pytest.warns( + DeprecationWarning, match=r"Updating MultiIndexed coordinate" + ): + data.assign(x=range(4)) + def test_assign_all_multiindex_coords(self) -> None: data = create_test_multiindex() actual = data.assign(x=range(4), level_1=range(4), level_2=range(4)) From 3a586b755f4e0a60512899fbac3b076be45f57f1 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 16 Jul 2022 15:19:24 -0600 Subject: [PATCH 2/8] fix. --- xarray/core/coordinates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index db7de1c2346..98301c8a96e 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -156,8 +156,8 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: def _drop_multiindexes(self, keys): from .dataset import Dataset - for key in keys: - maybe_midx = self._data._indexes.get(key, None) + for key in set(keys) & set(self._data._indexes): + maybe_midx = self._data._indexes[key] idx_coord_names = set(maybe_midx.index.names + [maybe_midx.dim]) if isinstance(maybe_midx, PandasMultiIndex): warnings.warn( From 44c94fac5009643823485912f31e256aebb13a44 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 16 Jul 2022 15:20:01 -0600 Subject: [PATCH 3/8] Update xarray/core/coordinates.py Co-authored-by: Anderson Banihirwe --- xarray/core/coordinates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 98301c8a96e..2352d005b03 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -171,10 +171,10 @@ def _drop_multiindexes(self, keys): for k in idx_coord_names: if isinstance(self._data, Dataset): del self._data._variables[k] - del self._data._indexes[k] else: del self._data._coords[k] - del self._data._indexes[k] + + del self._data._indexes[k] def update(self, other: Mapping[Any, Any]) -> None: other_vars = getattr(other, "variables", other) From 23865b374dafe6139a79a1647782b66b69162a25 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 19 Jul 2022 09:12:49 -0600 Subject: [PATCH 4/8] Add Data*Coordinates._drop_coords --- xarray/core/coordinates.py | 60 +++++++++++++++++++++++--------------- xarray/core/dataset.py | 2 +- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 2352d005b03..759721915c5 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -153,32 +153,9 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: return pd.MultiIndex(level_list, code_list, names=names) - def _drop_multiindexes(self, keys): - from .dataset import Dataset - - for key in set(keys) & set(self._data._indexes): - maybe_midx = self._data._indexes[key] - idx_coord_names = set(maybe_midx.index.names + [maybe_midx.dim]) - if isinstance(maybe_midx, PandasMultiIndex): - warnings.warn( - f"Updating MultiIndexed coordinate {key!r} would corrupt indices for " - f"other variables: {list(maybe_midx.index.names)!r}. " - f"This will raise an error in the future. Use `.drop_vars({idx_coord_names!r})` before " - "assigning new coordinate values.", - DeprecationWarning, - stacklevel=3, - ) - for k in idx_coord_names: - if isinstance(self._data, Dataset): - del self._data._variables[k] - else: - del self._data._coords[k] - - del self._data._indexes[k] - def update(self, other: Mapping[Any, Any]) -> None: other_vars = getattr(other, "variables", other) - self._drop_multiindexes(other_vars.keys()) + self._drop_coords(other_vars) coords, indexes = merge_coords( [self.variables, other_vars], priority_arg=1, indexes=self.xindexes ) @@ -329,6 +306,14 @@ def _update_coords( original_indexes.update(indexes) self._data._indexes = original_indexes + def _drop_coords(self, coords: dict[Hashable, Variable]) -> None: + """Drops variables in coords, and any associated variables as well.""" + variables = self._data._variables.copy() + indexes = dict(self._data.xindexes) + drop_coords(coords, variables, indexes) + self._data._variables = variables + self._data._indexes = indexes + def __delitem__(self, key: Hashable) -> None: if key in self: del self._data[key] @@ -397,6 +382,14 @@ def _update_coords( original_indexes.update(indexes) self._data._indexes = original_indexes + def _drop_coords(self, coords: dict[Hashable, Variable]) -> None: + """Drops variables in coords, and any associated variables as well.""" + variables = self._data._coords.copy() + indexes = dict(self._data.xindexes) + drop_coords(coords, variables, indexes) + self._data._coords = variables + self._data._indexes = indexes + @property def variables(self): return Frozen(self._data._coords) @@ -422,6 +415,25 @@ def _ipython_key_completions_(self): return self._data._ipython_key_completions_() +def drop_coords(coords_to_drop, variables, indexes): + """Drop index variables associated with variables in coords_to_drop.""" + for key in set(coords_to_drop) & set(indexes): + maybe_midx = indexes[key] + idx_coord_names = set(maybe_midx.index.names + [maybe_midx.dim]) + if isinstance(maybe_midx, PandasMultiIndex): + warnings.warn( + f"Updating MultiIndexed coordinate {key!r} would corrupt indices for " + f"other variables: {list(maybe_midx.index.names)!r}. " + f"This will raise an error in the future. Use `.drop_vars({idx_coord_names!r})` before " + "assigning new coordinate values.", + DeprecationWarning, + stacklevel=4, + ) + for k in idx_coord_names: + del variables[k] + del indexes[k] + + def assert_coordinate_consistent( obj: DataArray | Dataset, coords: Mapping[Any, Variable] ) -> None: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1a6a527eba0..492eda38f21 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5764,7 +5764,7 @@ def assign( data = self.copy() # do all calculations first... results: CoercibleMapping = data._calc_assign_results(variables) - data.coords._drop_multiindexes(results.keys()) + data.coords._drop_coords(results) # ... and then assign data.update(results) return data From 1b88ac408f6bc2d9fc6d5354f9199a823fdcde77 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 19 Jul 2022 12:13:07 -0600 Subject: [PATCH 5/8] Restore error when assigning to a MultiIndex level. --- xarray/core/coordinates.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 759721915c5..ad1cd68bf62 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -417,10 +417,17 @@ def _ipython_key_completions_(self): def drop_coords(coords_to_drop, variables, indexes): """Drop index variables associated with variables in coords_to_drop.""" + # Only warn when we're dropping the dimension with the multi-indexed coordinate + # If asked to drop a subset of the levels in a multi-index, we raise an error + # later but skip the warning here. for key in set(coords_to_drop) & set(indexes): maybe_midx = indexes[key] idx_coord_names = set(maybe_midx.index.names + [maybe_midx.dim]) - if isinstance(maybe_midx, PandasMultiIndex): + if ( + isinstance(maybe_midx, PandasMultiIndex) + and key == maybe_midx.dim + and (idx_coord_names - coords_to_drop) + ): warnings.warn( f"Updating MultiIndexed coordinate {key!r} would corrupt indices for " f"other variables: {list(maybe_midx.index.names)!r}. " From b3a32f1cc17e25741e9b8fefee7295d2c14f338e Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 19 Jul 2022 12:44:24 -0600 Subject: [PATCH 6/8] fix --- xarray/core/coordinates.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index ad1cd68bf62..4071e8349d1 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -420,13 +420,14 @@ def drop_coords(coords_to_drop, variables, indexes): # Only warn when we're dropping the dimension with the multi-indexed coordinate # If asked to drop a subset of the levels in a multi-index, we raise an error # later but skip the warning here. - for key in set(coords_to_drop) & set(indexes): + names = set(coords_to_drop) + for key in names & set(indexes): maybe_midx = indexes[key] idx_coord_names = set(maybe_midx.index.names + [maybe_midx.dim]) if ( isinstance(maybe_midx, PandasMultiIndex) and key == maybe_midx.dim - and (idx_coord_names - coords_to_drop) + and (idx_coord_names - names) ): warnings.warn( f"Updating MultiIndexed coordinate {key!r} would corrupt indices for " From f1bdf8bf9d541b06f617b00b6d75ce9a9a85a72a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 20 Jul 2022 08:32:33 -0600 Subject: [PATCH 7/8] Update xarray/core/coordinates.py Co-authored-by: Benoit Bovy --- xarray/core/coordinates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 4071e8349d1..1d158e158e0 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -423,7 +423,7 @@ def drop_coords(coords_to_drop, variables, indexes): names = set(coords_to_drop) for key in names & set(indexes): maybe_midx = indexes[key] - idx_coord_names = set(maybe_midx.index.names + [maybe_midx.dim]) + idx_coord_names = set(indexes.get_all_coords(key)) if ( isinstance(maybe_midx, PandasMultiIndex) and key == maybe_midx.dim From e5762ec247750ea66e76d884e570c9322d5c94a9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 20 Jul 2022 08:53:48 -0600 Subject: [PATCH 8/8] Address review comments. --- xarray/core/coordinates.py | 38 +++++++++++++++++++++++--------------- xarray/core/dataset.py | 2 +- xarray/core/indexes.py | 3 +++ 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 1d158e158e0..42cc8130810 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -58,6 +58,9 @@ def variables(self): def _update_coords(self, coords, indexes): raise NotImplementedError() + def _maybe_drop_multiindex_coords(self, coords): + raise NotImplementedError() + def __iter__(self) -> Iterator[Hashable]: # needs to be in the same order as the dataset variables for k in self.variables: @@ -155,7 +158,7 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: def update(self, other: Mapping[Any, Any]) -> None: other_vars = getattr(other, "variables", other) - self._drop_coords(other_vars) + self._maybe_drop_multiindex_coords(set(other_vars)) coords, indexes = merge_coords( [self.variables, other_vars], priority_arg=1, indexes=self.xindexes ) @@ -306,11 +309,12 @@ def _update_coords( original_indexes.update(indexes) self._data._indexes = original_indexes - def _drop_coords(self, coords: dict[Hashable, Variable]) -> None: + def _maybe_drop_multiindex_coords(self, coords: set[Hashable]) -> None: """Drops variables in coords, and any associated variables as well.""" - variables = self._data._variables.copy() - indexes = dict(self._data.xindexes) - drop_coords(coords, variables, indexes) + assert self._data.xindexes is not None + variables, indexes = drop_coords( + coords, self._data._variables, self._data.xindexes + ) self._data._variables = variables self._data._indexes = indexes @@ -382,11 +386,11 @@ def _update_coords( original_indexes.update(indexes) self._data._indexes = original_indexes - def _drop_coords(self, coords: dict[Hashable, Variable]) -> None: + def _maybe_drop_multiindex_coords(self, coords: set[Hashable]) -> None: """Drops variables in coords, and any associated variables as well.""" - variables = self._data._coords.copy() - indexes = dict(self._data.xindexes) - drop_coords(coords, variables, indexes) + variables, indexes = drop_coords( + coords, self._data._coords, self._data.xindexes + ) self._data._coords = variables self._data._indexes = indexes @@ -415,19 +419,22 @@ def _ipython_key_completions_(self): return self._data._ipython_key_completions_() -def drop_coords(coords_to_drop, variables, indexes): +def drop_coords( + coords_to_drop: set[Hashable], variables, indexes: Indexes +) -> tuple[dict, dict]: """Drop index variables associated with variables in coords_to_drop.""" # Only warn when we're dropping the dimension with the multi-indexed coordinate # If asked to drop a subset of the levels in a multi-index, we raise an error # later but skip the warning here. - names = set(coords_to_drop) - for key in names & set(indexes): + new_variables = dict(variables.copy()) + new_indexes = dict(indexes.copy()) + for key in coords_to_drop & set(indexes): maybe_midx = indexes[key] idx_coord_names = set(indexes.get_all_coords(key)) if ( isinstance(maybe_midx, PandasMultiIndex) and key == maybe_midx.dim - and (idx_coord_names - names) + and (idx_coord_names - coords_to_drop) ): warnings.warn( f"Updating MultiIndexed coordinate {key!r} would corrupt indices for " @@ -438,8 +445,9 @@ def drop_coords(coords_to_drop, variables, indexes): stacklevel=4, ) for k in idx_coord_names: - del variables[k] - del indexes[k] + del new_variables[k] + del new_indexes[k] + return new_variables, new_indexes def assert_coordinate_consistent( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 492eda38f21..559950b8fbd 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5764,7 +5764,7 @@ def assign( data = self.copy() # do all calculations first... results: CoercibleMapping = data._calc_assign_results(variables) - data.coords._drop_coords(results) + data.coords._maybe_drop_multiindex_coords(set(results.keys())) # ... and then assign data.update(results) return data diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index d7133683d83..8ff0d40ff07 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1085,6 +1085,9 @@ def dims(self) -> Mapping[Hashable, int]: return Frozen(self._dims) + def copy(self): + return type(self)(dict(self._indexes), dict(self._variables)) + def get_unique(self) -> list[T_PandasOrXarrayIndex]: """Return a list of unique indexes, preserving order."""