Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop multi-indexes when assigning to a multi-indexed variable #6798

Merged
merged 9 commits into from
Jul 21, 2022
27 changes: 26 additions & 1 deletion xarray/core/coordinates.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from __future__ import annotations

import warnings
from contextlib import contextmanager
from typing import TYPE_CHECKING, Any, Hashable, Iterator, Mapping, Sequence, cast

import numpy as np
import pandas as pd

from . import formatting
from .indexes import Index, Indexes, assert_no_index_corrupted
from .indexes import Index, Indexes, PandasMultiIndex, assert_no_index_corrupted
from .merge import merge_coordinates_without_align, merge_coords
from .utils import Frozen, ReprObject
from .variable import Variable, calculate_dimensions
Expand Down Expand Up @@ -152,8 +153,32 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:

return pd.MultiIndex(level_list, code_list, names=names)

def _drop_multiindexes(self, keys):
from .dataset import Dataset

for key in keys:
maybe_midx = self._data._indexes.get(key, None)
idx_coord_names = set(maybe_midx.index.names + [maybe_midx.dim])
if isinstance(maybe_midx, PandasMultiIndex):
warnings.warn(
f"Updating MultiIndexed coordinate {key!r} would corrupt indices for "
f"other variables: {list(maybe_midx.index.names)!r}. "
f"This will raise an error in the future. Use `.drop_vars({idx_coord_names!r})` before "
"assigning new coordinate values.",
DeprecationWarning,
stacklevel=3,
)
for k in idx_coord_names:
if isinstance(self._data, Dataset):
del self._data._variables[k]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a helper function to drop "associated variables" on DataArrays and Datasets?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like for _update_coords(), you could implement DatasetCoordinates._drop_coords() and DataArrayCoordinates._drop_coords() and call self._drop_coords() from here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like it!

del self._data._indexes[k]
else:
del self._data._coords[k]
del self._data._indexes[k]
dcherian marked this conversation as resolved.
Show resolved Hide resolved

dcherian marked this conversation as resolved.
Show resolved Hide resolved
def update(self, other: Mapping[Any, Any]) -> None:
other_vars = getattr(other, "variables", other)
self._drop_multiindexes(other_vars.keys())
coords, indexes = merge_coords(
[self.variables, other_vars], priority_arg=1, indexes=self.xindexes
)
Expand Down
1 change: 1 addition & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5764,6 +5764,7 @@ def assign(
data = self.copy()
# do all calculations first...
results: CoercibleMapping = data._calc_assign_results(variables)
data.coords._drop_multiindexes(results.keys())
# ... and then assign
data.update(results)
return data
Expand Down
7 changes: 7 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1499,6 +1499,13 @@ def test_assign_coords(self) -> None:
with pytest.raises(ValueError):
da.coords["x"] = ("y", [1, 2, 3]) # no new dimension to a DataArray

def test_assign_coords_existing_multiindex(self) -> None:
data = self.mda
with pytest.warns(
DeprecationWarning, match=r"Updating MultiIndexed coordinate"
):
data.assign_coords(x=range(4))

def test_coords_alignment(self) -> None:
lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])])
rhs = DataArray([2, 3, 4], [("x", [1, 2, 3])])
Expand Down
12 changes: 12 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3967,6 +3967,18 @@ def test_assign_multiindex_level(self) -> None:
data.assign(level_1=range(4))
data.assign_coords(level_1=range(4))

def test_assign_coords_existing_multiindex(self) -> None:
data = create_test_multiindex()
with pytest.warns(
DeprecationWarning, match=r"Updating MultiIndexed coordinate"
):
data.assign_coords(x=range(4))

with pytest.warns(
DeprecationWarning, match=r"Updating MultiIndexed coordinate"
):
data.assign(x=range(4))

def test_assign_all_multiindex_coords(self) -> None:
data = create_test_multiindex()
actual = data.assign(x=range(4), level_1=range(4), level_2=range(4))
Expand Down