From 42d42bab5811702e56c638b9489665d3c505a0c1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 22 Aug 2023 14:46:29 -0600 Subject: [PATCH 01/14] [skip-ci] dev whats-new (#8098) --- doc/whats-new.rst | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8e119361ba1..4cd24a54fc8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,36 @@ What's New np.random.seed(123456) + +.. _whats-new.2023.08.1: + +v2023.08.1 (unreleased) +----------------------- + +New Features +~~~~~~~~~~~~ + + +Breaking changes +~~~~~~~~~~~~~~~~ + + +Deprecations +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Documentation +~~~~~~~~~~~~~ + + +Internal Changes +~~~~~~~~~~~~~~~~ + + .. _whats-new.2023.08.0: v2023.08.0 (Aug 18, 2023) From 1fedfd86604f87538d1953b01d6990c2c89fcbf3 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 29 Aug 2023 16:23:28 +0200 Subject: [PATCH 02/14] Refactor update coordinates to better handle multi-coordinate indexes (#8094) * generic warning implicitly wrap a pd.MultiIndex * refactor update_coords (assign) Fix more cases with multi-coordinate indexes: - do not try to align existing indexed coordinates with the new coordinates that will fully replace them - raise early if the new coordinates would corrupt the existing indexed coordinates - isolate PandasMultiIndex special cases so that it will be easier to drop support for it later (and warn now about deprecation) * fix alignment of updated coordinates when DataArray objects are passed as new coordinate objects * refactor Dataset.assign Need to update (replace) coordinates and data variables separately to ensure it goes through all (indexed) coordinate update checks. * fix and update tests * nit * fix mypy? * update what's new * fix import error * fix performance regression * nit * use warning util func and improve messages --- doc/whats-new.rst | 11 +- xarray/core/coordinates.py | 193 ++++++++++++++++++++------------- xarray/core/dataset.py | 22 +++- xarray/tests/test_dataarray.py | 10 +- xarray/tests/test_dataset.py | 71 +++++++++--- 5 files changed, 205 insertions(+), 102 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4cd24a54fc8..3eacbce2895 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,7 +14,6 @@ What's New np.random.seed(123456) - .. _whats-new.2023.08.1: v2023.08.1 (unreleased) @@ -31,10 +30,19 @@ Breaking changes Deprecations ~~~~~~~~~~~~ +- Deprecate passing a :py:class:`pandas.MultiIndex` object directly to the + :py:class:`Dataset` and :py:class:`DataArray` constructors as well as to + :py:meth:`Dataset.assign` and :py:meth:`Dataset.assign_coords`. + A new Xarray :py:class:`Coordinates` object has to be created first using + :py:meth:`Coordinates.from_pandas_multiindex` (:pull:`8094`). + By `Benoît Bovy `_. Bug fixes ~~~~~~~~~ +- Improved handling of multi-coordinate indexes when updating coordinates, including bug fixes + (and improved warnings for deprecated features) for pandas multi-indexes (:pull:`8094`). + By `Benoît Bovy `_. Documentation ~~~~~~~~~~~~~ @@ -120,7 +128,6 @@ Breaking changes numbagg 0.1 0.2.1 ===================== ========= ======== - Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index f03d98f781a..7c14a8c3d0a 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -1,6 +1,5 @@ from __future__ import annotations -import warnings from collections.abc import Hashable, Iterator, Mapping, Sequence from contextlib import contextmanager from typing import ( @@ -24,7 +23,7 @@ ) from xarray.core.merge import merge_coordinates_without_align, merge_coords from xarray.core.types import Self, T_DataArray -from xarray.core.utils import Frozen, ReprObject +from xarray.core.utils import Frozen, ReprObject, emit_user_level_warning from xarray.core.variable import Variable, as_variable, calculate_dimensions if TYPE_CHECKING: @@ -83,7 +82,7 @@ def variables(self): def _update_coords(self, coords, indexes): raise NotImplementedError() - def _maybe_drop_multiindex_coords(self, coords): + def _drop_coords(self, coord_names): raise NotImplementedError() def __iter__(self) -> Iterator[Hashable]: @@ -379,9 +378,9 @@ def _update_coords( # redirect to DatasetCoordinates._update_coords self._data.coords._update_coords(coords, indexes) - def _maybe_drop_multiindex_coords(self, coords: set[Hashable]) -> None: - # redirect to DatasetCoordinates._maybe_drop_multiindex_coords - self._data.coords._maybe_drop_multiindex_coords(coords) + def _drop_coords(self, coord_names): + # redirect to DatasetCoordinates._drop_coords + self._data.coords._drop_coords(coord_names) def _merge_raw(self, other, reflexive): """For use with binary arithmetic.""" @@ -454,22 +453,40 @@ def __setitem__(self, key: Hashable, value: Any) -> None: def update(self, other: Mapping[Any, Any]) -> None: """Update this Coordinates variables with other coordinate variables.""" - other_obj: Coordinates | Mapping[Hashable, Variable] + + if not len(other): + return + + other_coords: Coordinates if isinstance(other, Coordinates): - # special case: default indexes won't be created - other_obj = other + # Coordinates object: just pass it (default indexes won't be created) + other_coords = other else: - other_obj = getattr(other, "variables", other) + other_coords = create_coords_with_default_indexes( + getattr(other, "variables", other) + ) - self._maybe_drop_multiindex_coords(set(other_obj)) + # Discard original indexed coordinates prior to merge allows to: + # - fail early if the new coordinates don't preserve the integrity of existing + # multi-coordinate indexes + # - drop & replace coordinates without alignment (note: we must keep indexed + # coordinates extracted from the DataArray objects passed as values to + # `other` - if any - as those are still used for aligning the old/new coordinates) + coords_to_align = drop_indexed_coords(set(other_coords) & set(other), self) coords, indexes = merge_coords( - [self.variables, other_obj], + [coords_to_align, other_coords], priority_arg=1, - indexes=self.xindexes, + indexes=coords_to_align.xindexes, ) + # special case for PandasMultiIndex: updating only its dimension coordinate + # is still allowed but depreciated. + # It is the only case where we need to actually drop coordinates here (multi-index levels) + # TODO: remove when removing PandasMultiIndex's dimension coordinate. + self._drop_coords(self._names - coords_to_align._names) + self._update_coords(coords, indexes) def _overwrite_indexes( @@ -610,15 +627,20 @@ def _update_coords( original_indexes.update(indexes) self._data._indexes = original_indexes - def _maybe_drop_multiindex_coords(self, coords: set[Hashable]) -> None: - """Drops variables in coords, and any associated variables as well.""" + def _drop_coords(self, coord_names): + # should drop indexed coordinates only + for name in coord_names: + del self._data._variables[name] + del self._data._indexes[name] + self._data._coord_names.difference_update(coord_names) + + def _drop_indexed_coords(self, coords_to_drop: set[Hashable]) -> None: assert self._data.xindexes is not None - variables, indexes = drop_coords( - coords, self._data._variables, self._data.xindexes - ) - self._data._coord_names.intersection_update(variables) - self._data._variables = variables - self._data._indexes = indexes + new_coords = drop_indexed_coords(coords_to_drop, self) + for name in self._data._coord_names - new_coords._names: + del self._data._variables[name] + self._data._indexes = dict(new_coords.xindexes) + self._data._coord_names.intersection_update(new_coords._names) def __delitem__(self, key: Hashable) -> None: if key in self: @@ -691,13 +713,11 @@ def _update_coords( original_indexes.update(indexes) self._data._indexes = original_indexes - def _maybe_drop_multiindex_coords(self, coords: set[Hashable]) -> None: - """Drops variables in coords, and any associated variables as well.""" - variables, indexes = drop_coords( - coords, self._data._coords, self._data.xindexes - ) - self._data._coords = variables - self._data._indexes = indexes + def _drop_coords(self, coord_names): + # should drop indexed coordinates only + for name in coord_names: + del self._data._coords[name] + del self._data._indexes[name] @property def variables(self): @@ -724,35 +744,48 @@ def _ipython_key_completions_(self): return self._data._ipython_key_completions_() -def drop_coords( - coords_to_drop: set[Hashable], variables, indexes: Indexes -) -> tuple[dict, dict]: - """Drop index variables associated with variables in coords_to_drop.""" - # Only warn when we're dropping the dimension with the multi-indexed coordinate - # If asked to drop a subset of the levels in a multi-index, we raise an error - # later but skip the warning here. - new_variables = dict(variables.copy()) - new_indexes = dict(indexes.copy()) - for key in coords_to_drop & set(indexes): - maybe_midx = indexes[key] - idx_coord_names = set(indexes.get_all_coords(key)) - if ( - isinstance(maybe_midx, PandasMultiIndex) - and key == maybe_midx.dim - and (idx_coord_names - coords_to_drop) - ): - warnings.warn( - f"Updating MultiIndexed coordinate {key!r} would corrupt indices for " - f"other variables: {list(maybe_midx.index.names)!r}. " - f"This will raise an error in the future. Use `.drop_vars({idx_coord_names!r})` before " +def drop_indexed_coords( + coords_to_drop: set[Hashable], coords: Coordinates +) -> Coordinates: + """Drop indexed coordinates associated with coordinates in coords_to_drop. + + This will raise an error in case it corrupts any passed index and its + coordinate variables. + + """ + new_variables = dict(coords.variables) + new_indexes = dict(coords.xindexes) + + for idx, idx_coords in coords.xindexes.group_by_index(): + idx_drop_coords = set(idx_coords) & coords_to_drop + + # special case for pandas multi-index: still allow but deprecate + # dropping only its dimension coordinate. + # TODO: remove when removing PandasMultiIndex's dimension coordinate. + if isinstance(idx, PandasMultiIndex) and idx_drop_coords == {idx.dim}: + idx_drop_coords.update(idx.index.names) + emit_user_level_warning( + f"updating coordinate {idx.dim!r} with a PandasMultiIndex would leave " + f"the multi-index level coordinates {list(idx.index.names)!r} in an inconsistent state. " + f"This will raise an error in the future. Use `.drop_vars({list(idx_coords)!r})` before " "assigning new coordinate values.", FutureWarning, - stacklevel=4, ) - for k in idx_coord_names: - del new_variables[k] - del new_indexes[k] - return new_variables, new_indexes + + elif idx_drop_coords and len(idx_drop_coords) != len(idx_coords): + idx_drop_coords_str = ", ".join(f"{k!r}" for k in idx_drop_coords) + idx_coords_str = ", ".join(f"{k!r}" for k in idx_coords) + raise ValueError( + f"cannot drop or update coordinate(s) {idx_drop_coords_str}, which would corrupt " + f"the following index built from coordinates {idx_coords_str}:\n" + f"{idx}" + ) + + for k in idx_drop_coords: + del new_variables[k] + del new_indexes[k] + + return Coordinates._construct_direct(coords=new_variables, indexes=new_indexes) def assert_coordinate_consistent( @@ -773,11 +806,15 @@ def assert_coordinate_consistent( def create_coords_with_default_indexes( - coords: Mapping[Any, Any], data_vars: Mapping[Any, Variable] | None = None + coords: Mapping[Any, Any], data_vars: Mapping[Any, Any] | None = None ) -> Coordinates: - """Maybe create default indexes from a mapping of coordinates.""" + """Returns a Coordinates object from a mapping of coordinates (arbitrary objects). + + Create default (pandas) indexes for each of the input dimension coordinates. + Extract coordinates from each input DataArray. - # Note: data_vars are needed here only because a pd.MultiIndex object + """ + # Note: data_vars is needed here only because a pd.MultiIndex object # can be promoted as coordinates. # TODO: It won't be relevant anymore when this behavior will be dropped # in favor of the more explicit ``Coordinates.from_pandas_multiindex()``. @@ -791,34 +828,34 @@ def create_coords_with_default_indexes( indexes: dict[Hashable, Index] = {} variables: dict[Hashable, Variable] = {} - maybe_index_vars: dict[Hashable, Variable] = {} - mindex_data_vars: list[Hashable] = [] + # promote any pandas multi-index in data_vars as coordinates + coords_promoted: dict[Hashable, Any] = {} + pd_mindex_keys: list[Hashable] = [] for k, v in all_variables.items(): - if k in coords: - maybe_index_vars[k] = v - elif isinstance(v, pd.MultiIndex): - # TODO: eventually stop promoting multi-index passed via data variables - mindex_data_vars.append(k) - maybe_index_vars[k] = v - - if mindex_data_vars: - warnings.warn( - f"passing one or more `pandas.MultiIndex` via data variable(s) {mindex_data_vars} " - "will no longer create indexed coordinates in the future. " - "If you want to keep this behavior, pass it as coordinates instead.", + if isinstance(v, pd.MultiIndex): + coords_promoted[k] = v + pd_mindex_keys.append(k) + elif k in coords: + coords_promoted[k] = v + + if pd_mindex_keys: + pd_mindex_keys_fmt = ",".join([f"'{k}'" for k in pd_mindex_keys]) + emit_user_level_warning( + f"the `pandas.MultiIndex` object(s) passed as {pd_mindex_keys_fmt} coordinate(s) or " + "data variable(s) will no longer be implicitly promoted and wrapped into " + "multiple indexed coordinates in the future " + "(i.e., one coordinate for each multi-index level + one dimension coordinate). " + "If you want to keep this behavior, you need to first wrap it explicitly using " + "`mindex_coords = xarray.Coordinates.from_pandas_multiindex(mindex_obj, 'dim')` " + "and pass it as coordinates, e.g., `xarray.Dataset(coords=mindex_coords)`, " + "`dataset.assign_coords(mindex_coords)` or `dataarray.assign_coords(mindex_coords)`.", FutureWarning, ) - maybe_index_vars = { - k: v - for k, v in all_variables.items() - if k in coords or isinstance(v, pd.MultiIndex) - } - dataarray_coords: list[DataArrayCoordinates] = [] - for name, obj in maybe_index_vars.items(): + for name, obj in coords_promoted.items(): if isinstance(obj, DataArray): dataarray_coords.append(obj.coords) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bdf2d8babe1..9a53f24c67c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -120,7 +120,7 @@ from xarray.backends.api import T_NetcdfEngine, T_NetcdfTypes from xarray.core.dataarray import DataArray from xarray.core.groupby import DatasetGroupBy - from xarray.core.merge import CoercibleMapping + from xarray.core.merge import CoercibleMapping, CoercibleValue from xarray.core.parallelcompat import ChunkManagerEntrypoint from xarray.core.resample import DatasetResample from xarray.core.rolling import DatasetCoarsen, DatasetRolling @@ -6879,6 +6879,10 @@ def assign( possible, but you cannot reference other variables created within the same ``assign`` call. + The new assigned variables that replace existing coordinates in the + original dataset are still listed as coordinates in the returned + Dataset. + See Also -------- pandas.DataFrame.assign @@ -6934,11 +6938,23 @@ def assign( """ variables = either_dict_or_kwargs(variables, variables_kwargs, "assign") data = self.copy() + # do all calculations first... results: CoercibleMapping = data._calc_assign_results(variables) - data.coords._maybe_drop_multiindex_coords(set(results.keys())) + + # split data variables to add/replace vs. coordinates to replace + results_data_vars: dict[Hashable, CoercibleValue] = {} + results_coords: dict[Hashable, CoercibleValue] = {} + for k, v in results.items(): + if k in data._coord_names: + results_coords[k] = v + else: + results_data_vars[k] = v + # ... and then assign - data.update(results) + data.coords.update(results_coords) + data.update(results_data_vars) + return data def to_array( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 183c0ad7371..f615ae70b6b 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1435,7 +1435,7 @@ def test_coords(self) -> None: assert_identical(da, expected) with pytest.raises( - ValueError, match=r"cannot set or update variable.*corrupt.*index " + ValueError, match=r"cannot drop or update coordinate.*corrupt.*index " ): self.mda["level_1"] = ("x", np.arange(4)) self.mda.coords["level_1"] = ("x", np.arange(4)) @@ -1555,7 +1555,7 @@ def test_assign_coords(self) -> None: assert_identical(actual, expected) with pytest.raises( - ValueError, match=r"cannot set or update variable.*corrupt.*index " + ValueError, match=r"cannot drop or update coordinate.*corrupt.*index " ): self.mda.assign_coords(level_1=("x", range(4))) @@ -1570,7 +1570,9 @@ def test_assign_coords(self) -> None: def test_assign_coords_existing_multiindex(self) -> None: data = self.mda - with pytest.warns(FutureWarning, match=r"Updating MultiIndexed coordinate"): + with pytest.warns( + FutureWarning, match=r"updating coordinate.*MultiIndex.*inconsistent" + ): data.assign_coords(x=range(4)) def test_assign_coords_custom_index(self) -> None: @@ -1608,7 +1610,7 @@ def test_set_coords_update_index(self) -> None: def test_set_coords_multiindex_level(self) -> None: with pytest.raises( - ValueError, match=r"cannot set or update variable.*corrupt.*index " + ValueError, match=r"cannot drop or update coordinate.*corrupt.*index " ): self.mda["level_1"] = range(4) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 5304c54971a..3110c4e2882 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -199,7 +199,7 @@ def create_test_multiindex() -> Dataset: mindex = pd.MultiIndex.from_product( [["a", "b"], [1, 2]], names=("level_1", "level_2") ) - return Dataset({}, {"x": mindex}) + return Dataset({}, Coordinates.from_pandas_multiindex(mindex, "x")) def create_test_stacked_array() -> tuple[DataArray, DataArray]: @@ -648,10 +648,17 @@ def test_constructor_multiindex(self) -> None: assert_identical(ds, coords.to_dataset()) with pytest.warns( - FutureWarning, match=".*`pandas.MultiIndex` via data variable.*" + FutureWarning, + match=".*`pandas.MultiIndex`.*no longer be implicitly promoted.*", ): Dataset(data_vars={"x": midx}) + with pytest.warns( + FutureWarning, + match=".*`pandas.MultiIndex`.*no longer be implicitly promoted.*", + ): + Dataset(coords={"x": midx}) + def test_constructor_custom_index(self) -> None: class CustomIndex(Index): ... @@ -872,7 +879,7 @@ def test_coords_modify(self) -> None: assert_array_equal(actual["z"], ["a", "b"]) actual = data.copy(deep=True) - with pytest.raises(ValueError, match=r"conflicting sizes"): + with pytest.raises(ValueError, match=r"conflicting dimension sizes"): actual.coords["x"] = ("x", [-1]) assert_identical(actual, data) # should not be modified @@ -909,9 +916,7 @@ def test_coords_setitem_with_new_dimension(self) -> None: def test_coords_setitem_multiindex(self) -> None: data = create_test_multiindex() - with pytest.raises( - ValueError, match=r"cannot set or update variable.*corrupt.*index " - ): + with pytest.raises(ValueError, match=r"cannot drop or update.*corrupt.*index "): data.coords["level_1"] = range(4) def test_coords_set(self) -> None: @@ -4244,22 +4249,58 @@ def test_assign_attrs(self) -> None: def test_assign_multiindex_level(self) -> None: data = create_test_multiindex() - with pytest.raises( - ValueError, match=r"cannot set or update variable.*corrupt.*index " - ): + with pytest.raises(ValueError, match=r"cannot drop or update.*corrupt.*index "): data.assign(level_1=range(4)) data.assign_coords(level_1=range(4)) + def test_assign_new_multiindex(self) -> None: + midx = pd.MultiIndex.from_arrays([["a", "a", "b", "b"], [0, 1, 0, 1]]) + midx_coords = Coordinates.from_pandas_multiindex(midx, "x") + + ds = Dataset(coords={"x": [1, 2]}) + expected = Dataset(coords=midx_coords) + + with pytest.warns( + FutureWarning, + match=".*`pandas.MultiIndex`.*no longer be implicitly promoted.*", + ): + actual = ds.assign(x=midx) + assert_identical(actual, expected) + + @pytest.mark.parametrize("orig_coords", [{}, {"x": range(4)}]) + def test_assign_coords_new_multiindex(self, orig_coords) -> None: + ds = Dataset(coords=orig_coords) + midx = pd.MultiIndex.from_arrays( + [["a", "a", "b", "b"], [0, 1, 0, 1]], names=("one", "two") + ) + midx_coords = Coordinates.from_pandas_multiindex(midx, "x") + + expected = Dataset(coords=midx_coords) + + with pytest.warns( + FutureWarning, + match=".*`pandas.MultiIndex`.*no longer be implicitly promoted.*", + ): + actual = ds.assign_coords({"x": midx}) + assert_identical(actual, expected) + + actual = ds.assign_coords(midx_coords) + assert_identical(actual, expected) + def test_assign_coords_existing_multiindex(self) -> None: data = create_test_multiindex() - with pytest.warns(FutureWarning, match=r"Updating MultiIndexed coordinate"): - data.assign_coords(x=range(4)) - - with pytest.warns(FutureWarning, match=r"Updating MultiIndexed coordinate"): - data.assign(x=range(4)) + with pytest.warns( + FutureWarning, match=r"updating coordinate.*MultiIndex.*inconsistent" + ): + updated = data.assign_coords(x=range(4)) + # https://github.com/pydata/xarray/issues/7097 (coord names updated) + assert len(updated.coords) == 1 + with pytest.warns( + FutureWarning, match=r"updating coordinate.*MultiIndex.*inconsistent" + ): + updated = data.assign(x=range(4)) # https://github.com/pydata/xarray/issues/7097 (coord names updated) - updated = data.assign_coords(x=range(4)) assert len(updated.coords) == 1 def test_assign_all_multiindex_coords(self) -> None: From e5a38f6837ae9b9aa28a4bd063620a1cd802e093 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 30 Aug 2023 09:13:15 +0200 Subject: [PATCH 03/14] better error message set index from scalar coord (#8109) --- xarray/core/dataset.py | 4 +++- xarray/core/indexes.py | 9 ++++++++- xarray/tests/test_dataset.py | 6 ++++++ xarray/tests/test_indexes.py | 5 +++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9a53f24c67c..388fea92d51 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4698,7 +4698,9 @@ def set_index( if len(var_names) == 1 and (not append or dim not in self._indexes): var_name = var_names[0] var = self._variables[var_name] - if var.dims != (dim,): + # an error with a better message will be raised for scalar variables + # when creating the PandasIndex + if var.ndim > 0 and var.dims != (dim,): raise ValueError( f"dimension mismatch: try setting an index for dimension {dim!r} with " f"variable {var_name!r} that has dimensions {var.dims}" diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 7de290f4e14..b5e396963a1 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -616,7 +616,14 @@ def from_variables( name, var = next(iter(variables.items())) - if var.ndim != 1: + if var.ndim == 0: + raise ValueError( + f"cannot set a PandasIndex from the scalar variable {name!r}, " + "only 1-dimensional variables are supported. " + f"Note: you might want to use `obj.expand_dims({name!r})` to create a " + f"new dimension and turn {name!r} as an indexed dimension coordinate." + ) + elif var.ndim != 1: raise ValueError( "PandasIndex only accepts a 1-dimensional variable, " f"variable {name!r} has {var.ndim} dimensions" diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 3110c4e2882..a3e93414c0f 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3397,6 +3397,12 @@ def test_set_index(self) -> None: with pytest.raises(ValueError, match=r"dimension mismatch.*"): ds.set_index(y="x_var") + ds = Dataset(coords={"x": 1}) + with pytest.raises( + ValueError, match=r".*cannot set a PandasIndex.*scalar variable.*" + ): + ds.set_index(x="x") + def test_set_index_deindexed_coords(self) -> None: # test de-indexed coordinates are converted to base variable # https://github.com/pydata/xarray/issues/6969 diff --git a/xarray/tests/test_indexes.py b/xarray/tests/test_indexes.py index ebe9f3fb932..05d748541ed 100644 --- a/xarray/tests/test_indexes.py +++ b/xarray/tests/test_indexes.py @@ -145,6 +145,11 @@ def test_from_variables(self) -> None: with pytest.raises(ValueError, match=r".*only accepts one variable.*"): PandasIndex.from_variables({"x": var, "foo": var2}, options={}) + with pytest.raises( + ValueError, match=r".*cannot set a PandasIndex.*scalar variable.*" + ): + PandasIndex.from_variables({"foo": xr.Variable((), 1)}, options={}) + with pytest.raises( ValueError, match=r".*only accepts a 1-dimensional variable.*" ): From b136fcb679e9e70fd44b60688d96e75d4e3f8dcb Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 30 Aug 2023 09:57:35 +0200 Subject: [PATCH 04/14] Fix merge with compat=minimal (coord names) (#8104) * fix coord names after merge / compat minimal * update what's new * add assert in len(data_vars) --- doc/whats-new.rst | 4 ++++ xarray/core/dataset.py | 4 +++- xarray/core/merge.py | 3 +++ xarray/tests/test_dataset.py | 11 +++++++++++ xarray/tests/test_merge.py | 10 ++++++++++ 5 files changed, 31 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3eacbce2895..3d20af80b56 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,6 +43,10 @@ Bug fixes - Improved handling of multi-coordinate indexes when updating coordinates, including bug fixes (and improved warnings for deprecated features) for pandas multi-indexes (:pull:`8094`). By `Benoît Bovy `_. +- Fixed a bug in :py:func:`merge` with ``compat='minimal'`` where the coordinate + names were not updated properly internally (:issue:`7405`, :issue:`7588`, + :pull:`8104`). + By `Benoît Bovy `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 388fea92d51..f1a0cb9dc34 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -438,7 +438,9 @@ def __iter__(self) -> Iterator[Hashable]: ) def __len__(self) -> int: - return len(self._dataset._variables) - len(self._dataset._coord_names) + length = len(self._dataset._variables) - len(self._dataset._coord_names) + assert length >= 0, "something is wrong with Dataset._coord_names" + return length def __contains__(self, key: Hashable) -> bool: return key in self._dataset._variables and key not in self._dataset._coord_names diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 24b6ed0ba43..3475db4a010 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -723,6 +723,9 @@ def merge_core( dims = calculate_dimensions(variables) coord_names, noncoord_names = determine_coords(coerced) + if compat == "minimal": + # coordinates may be dropped in merged results + coord_names.intersection_update(variables) if explicit_coords is not None: assert_valid_explicit_coords(variables, dims, explicit_coords) coord_names.update(explicit_coords) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a3e93414c0f..c31f3821e79 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1046,6 +1046,17 @@ def test_data_vars_properties(self) -> None: "bar": np.dtype("float64"), } + # len + ds.coords["x"] = [1] + assert len(ds.data_vars) == 2 + + # https://github.com/pydata/xarray/issues/7588 + with pytest.raises( + AssertionError, match="something is wrong with Dataset._coord_names" + ): + ds._coord_names = {"w", "x", "y", "z"} + len(ds.data_vars) + def test_equals_and_identical(self) -> None: data = create_test_data(seed=42) assert data.equals(data) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 63449708a79..c6597d5abb0 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -382,6 +382,16 @@ def test_merge_compat(self): assert ds1.identical(ds1.merge(ds2, compat="override")) + def test_merge_compat_minimal(self) -> None: + # https://github.com/pydata/xarray/issues/7405 + # https://github.com/pydata/xarray/issues/7588 + ds1 = xr.Dataset(coords={"foo": [1, 2, 3], "bar": 4}) + ds2 = xr.Dataset(coords={"foo": [1, 2, 3], "bar": 5}) + + actual = xr.merge([ds1, ds2], compat="minimal") + expected = xr.Dataset(coords={"foo": [1, 2, 3]}) + assert_identical(actual, expected) + def test_merge_auto_align(self): ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = xr.Dataset({"b": ("x", [3, 4]), "x": [1, 2]}) From afda88e660f870e04ae609e0845746a26bff281a Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Wed, 30 Aug 2023 20:47:33 +0200 Subject: [PATCH 05/14] Fix Codecov (#7142) Co-authored-by: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Co-authored-by: Anderson Banihirwe --- .codecov.yml | 34 +++++++++++++++++++++++----- .github/workflows/ci-additional.yaml | 4 ++-- README.md | 2 +- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/.codecov.yml b/.codecov.yml index f3a055c09d4..d0bec9539f8 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -1,16 +1,38 @@ codecov: - ci: - # by default, codecov doesn't recognize azure as a CI provider - - dev.azure.com - require_ci_to_pass: yes + require_ci_to_pass: true coverage: status: project: default: # Require 1% coverage, i.e., always succeed - target: 1 + target: 1% + flags: + - unittests + paths: + - "!xarray/tests/" + unittests: + target: 90% + flags: + - unittests + paths: + - "!xarray/tests/" + mypy: + target: 20% + flags: + - mypy patch: false changes: false -comment: off +comment: false + +flags: + unittests: + paths: + - "xarray" + - "!xarray/tests" + carryforward: false + mypy: + paths: + - "xarray" + carryforward: false diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 99ebefd9338..e3c406a981b 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -123,7 +123,7 @@ jobs: - name: Run mypy run: | - python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report + python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report xarray/ - name: Upload mypy coverage to Codecov uses: codecov/codecov-action@v3.1.4 @@ -177,7 +177,7 @@ jobs: - name: Run mypy run: | - python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report + python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report xarray/ - name: Upload mypy coverage to Codecov uses: codecov/codecov-action@v3.1.4 diff --git a/README.md b/README.md index 8035c9b901f..432d535d1b1 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # xarray: N-D labeled arrays and datasets [![CI](https://github.com/pydata/xarray/workflows/CI/badge.svg?branch=main)](https://github.com/pydata/xarray/actions?query=workflow%3ACI) -[![Code coverage](https://codecov.io/gh/pydata/xarray/branch/main/graph/badge.svg)](https://codecov.io/gh/pydata/xarray) +[![Code coverage](https://codecov.io/gh/pydata/xarray/branch/main/graph/badge.svg?flag=unittests)](https://codecov.io/gh/pydata/xarray) [![Docs](https://readthedocs.org/projects/xray/badge/?version=latest)](https://docs.xarray.dev/) [![Benchmarked with asv](https://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat)](https://pandas.pydata.org/speed/xarray/) [![Available on pypi](https://img.shields.io/pypi/v/xarray.svg)](https://pypi.python.org/pypi/xarray/) From 0f9f790c7e887bbfd13f4026fd1d37e4cd599ff1 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 31 Aug 2023 09:35:46 +0200 Subject: [PATCH 06/14] Better default behavior of the Coordinates constructor (#8107) * ``Coordinates.__init__`` create default indexes ... for any input dimension coordinate, if ``indexes=None``. Also, if another ``Coordinates`` object is passed, extract its indexes and raise if ``indexes`` is not None (no align/merge supported here). * add docstring examples * fix doctests * fix tests * update what's new --- doc/whats-new.rst | 6 ++ xarray/core/coordinates.py | 103 ++++++++++++++++++++++++++----- xarray/tests/test_coordinates.py | 57 ++++++++++------- xarray/tests/test_dataarray.py | 4 +- xarray/tests/test_dataset.py | 4 +- 5 files changed, 131 insertions(+), 43 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3d20af80b56..79dc2150b0c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,12 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- The :py:class:`Coordinates` constructor now creates a (pandas) index by + default for each dimension coordinate. To keep the previous behavior (no index + created), pass an empty dictionary to ``indexes``. The constructor now also + extracts and add the indexes from another :py:class:`Coordinates` object + passed via ``coords`` (:pull:`8107`). + By `Benoît Bovy `_. Deprecations ~~~~~~~~~~~~ diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 7c14a8c3d0a..c539536a294 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -17,6 +17,7 @@ from xarray.core.indexes import ( Index, Indexes, + PandasIndex, PandasMultiIndex, assert_no_index_corrupted, create_default_index_implicit, @@ -192,22 +193,69 @@ class Coordinates(AbstractCoordinates): Coordinates are either: - returned via the :py:attr:`Dataset.coords` and :py:attr:`DataArray.coords` - properties. - - built from index objects (e.g., :py:meth:`Coordinates.from_pandas_multiindex`). - - built directly from coordinate data and index objects (beware that no consistency - check is done on those inputs). - - In the latter case, no default (pandas) index is created. + properties + - built from Pandas or other index objects + (e.g., :py:meth:`Coordinates.from_pandas_multiindex`) + - built directly from coordinate data and Xarray ``Index`` objects (beware that + no consistency check is done on those inputs) Parameters ---------- - coords: dict-like - Mapping where keys are coordinate names and values are objects that - can be converted into a :py:class:`~xarray.Variable` object - (see :py:func:`~xarray.as_variable`). - indexes: dict-like - Mapping of where keys are coordinate names and values are - :py:class:`~xarray.indexes.Index` objects. + coords: dict-like, optional + Mapping where keys are coordinate names and values are objects that + can be converted into a :py:class:`~xarray.Variable` object + (see :py:func:`~xarray.as_variable`). If another + :py:class:`~xarray.Coordinates` object is passed, its indexes + will be added to the new created object. + indexes: dict-like, optional + Mapping of where keys are coordinate names and values are + :py:class:`~xarray.indexes.Index` objects. If None (default), + pandas indexes will be created for each dimension coordinate. + Passing an empty dictionary will skip this default behavior. + + Examples + -------- + Create a dimension coordinate with a default (pandas) index: + + >>> xr.Coordinates({"x": [1, 2]}) + Coordinates: + * x (x) int64 1 2 + + Create a dimension coordinate with no index: + + >>> xr.Coordinates(coords={"x": [1, 2]}, indexes={}) + Coordinates: + x (x) int64 1 2 + + Create a new Coordinates object from existing dataset coordinates + (indexes are passed): + + >>> ds = xr.Dataset(coords={"x": [1, 2]}) + >>> xr.Coordinates(ds.coords) + Coordinates: + * x (x) int64 1 2 + + Create indexed coordinates from a ``pandas.MultiIndex`` object: + + >>> midx = pd.MultiIndex.from_product([["a", "b"], [0, 1]]) + >>> xr.Coordinates.from_pandas_multiindex(midx, "x") + Coordinates: + * x (x) object MultiIndex + * x_level_0 (x) object 'a' 'a' 'b' 'b' + * x_level_1 (x) int64 0 1 0 1 + + Create a new Dataset object by passing a Coordinates object: + + >>> midx_coords = xr.Coordinates.from_pandas_multiindex(midx, "x") + >>> xr.Dataset(coords=midx_coords) + + Dimensions: (x: 4) + Coordinates: + * x (x) object MultiIndex + * x_level_0 (x) object 'a' 'a' 'b' 'b' + * x_level_1 (x) int64 0 1 0 1 + Data variables: + *empty* """ @@ -227,17 +275,40 @@ def __init__( from xarray.core.dataset import Dataset if coords is None: - variables = {} - elif isinstance(coords, Coordinates): + coords = {} + + variables: dict[Hashable, Variable] + default_indexes: dict[Hashable, PandasIndex] = {} + coords_obj_indexes: dict[Hashable, Index] = {} + + if isinstance(coords, Coordinates): + if indexes is not None: + raise ValueError( + "passing both a ``Coordinates`` object and a mapping of indexes " + "to ``Coordinates.__init__`` is not allowed " + "(this constructor does not support merging them)" + ) variables = {k: v.copy() for k, v in coords.variables.items()} + coords_obj_indexes = dict(coords.xindexes) else: - variables = {k: as_variable(v) for k, v in coords.items()} + variables = {} + for name, data in coords.items(): + var = as_variable(data, name=name) + if var.dims == (name,) and indexes is None: + index, index_vars = create_default_index_implicit(var, list(coords)) + default_indexes.update({k: index for k in index_vars}) + variables.update(index_vars) + else: + variables[name] = var if indexes is None: indexes = {} else: indexes = dict(indexes) + indexes.update(default_indexes) + indexes.update(coords_obj_indexes) + no_coord_index = set(indexes) - set(variables) if no_coord_index: raise ValueError( diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index bf68a5c1838..96eb9b045d2 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -17,6 +17,17 @@ def test_init_noindex(self) -> None: expected = Dataset(coords={"foo": ("x", [0, 1, 2])}) assert_identical(coords.to_dataset(), expected) + def test_init_default_index(self) -> None: + coords = Coordinates(coords={"x": [1, 2]}) + expected = Dataset(coords={"x": [1, 2]}) + assert_identical(coords.to_dataset(), expected) + assert "x" in coords.xindexes + + def test_init_no_default_index(self) -> None: + # dimension coordinate with no default index (explicit) + coords = Coordinates(coords={"x": [1, 2]}, indexes={}) + assert "x" not in coords.xindexes + def test_init_from_coords(self) -> None: expected = Dataset(coords={"foo": ("x", [0, 1, 2])}) coords = Coordinates(coords=expected.coords) @@ -25,10 +36,19 @@ def test_init_from_coords(self) -> None: # test variables copied assert coords.variables["foo"] is not expected.variables["foo"] - # default index - expected = Dataset(coords={"x": ("x", [0, 1, 2])}) - coords = Coordinates(coords=expected.coords, indexes=expected.xindexes) + # test indexes are extracted + expected = Dataset(coords={"x": [0, 1, 2]}) + coords = Coordinates(coords=expected.coords) assert_identical(coords.to_dataset(), expected) + assert expected.xindexes == coords.xindexes + + # coords + indexes not supported + with pytest.raises( + ValueError, match="passing both.*Coordinates.*indexes.*not allowed" + ): + coords = Coordinates( + coords=expected.coords, indexes={"x": PandasIndex([0, 1, 2], "x")} + ) def test_init_empty(self) -> None: coords = Coordinates() @@ -60,37 +80,31 @@ def test_from_pandas_multiindex(self) -> None: assert_identical(expected[name], coords.variables[name]) def test_dims(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.dims == {"x": 3} def test_sizes(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.sizes == {"x": 3} def test_dtypes(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.dtypes == {"x": int} def test_getitem(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert_identical( coords["x"], DataArray([0, 1, 2], coords={"x": [0, 1, 2]}, name="x"), ) def test_delitem(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) del coords["x"] assert "x" not in coords def test_update(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) coords.update({"y": ("y", [4, 5, 6])}) assert "y" in coords @@ -99,18 +113,16 @@ def test_update(self) -> None: assert_identical(coords["y"], expected) def test_equals(self): - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.equals(coords) - assert not coords.equals("no_a_coords") + assert not coords.equals("not_a_coords") def test_identical(self): - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) assert coords.identical(coords) - assert not coords.identical("no_a_coords") + assert not coords.identical("not_a_coords") def test_copy(self) -> None: no_index_coords = Coordinates({"foo": ("x", [1, 2, 3])}) @@ -129,8 +141,7 @@ def test_copy(self) -> None: assert source_ndarray(v0.data) is not source_ndarray(v1.data) def test_align(self) -> None: - _ds = Dataset(coords={"x": [0, 1, 2]}) - coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords = Coordinates(coords={"x": [0, 1, 2]}) left = coords diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index f615ae70b6b..b4efe4ab2a7 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -489,7 +489,7 @@ def test_constructor_dask_coords(self) -> None: def test_constructor_no_default_index(self) -> None: # explicitly passing a Coordinates object skips the creation of default index - da = DataArray(range(3), coords=Coordinates({"x": ("x", [1, 2, 3])})) + da = DataArray(range(3), coords=Coordinates({"x": [1, 2, 3]}, indexes={})) assert "x" in da.coords assert "x" not in da.xindexes @@ -1587,7 +1587,7 @@ class CustomIndex(Index): assert isinstance(actual.xindexes["x"], CustomIndex) def test_assign_coords_no_default_index(self) -> None: - coords = Coordinates({"y": ("y", [1, 2, 3])}) + coords = Coordinates({"y": [1, 2, 3]}, indexes={}) da = DataArray([1, 2, 3], dims="y") actual = da.assign_coords(coords) assert_identical(actual.coords, coords, check_default_indexes=False) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c31f3821e79..e119cfe9bc6 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -636,7 +636,7 @@ def test_constructor_with_coords(self) -> None: def test_constructor_no_default_index(self) -> None: # explicitly passing a Coordinates object skips the creation of default index - ds = Dataset(coords=Coordinates({"x": ("x", [1, 2, 3])})) + ds = Dataset(coords=Coordinates({"x": [1, 2, 3]}, indexes={})) assert "x" in ds assert "x" not in ds.xindexes @@ -4356,7 +4356,7 @@ class CustomIndex(Index): assert isinstance(actual.xindexes["x"], CustomIndex) def test_assign_coords_no_default_index(self) -> None: - coords = Coordinates({"y": ("y", [1, 2, 3])}) + coords = Coordinates({"y": [1, 2, 3]}, indexes={}) ds = Dataset() actual = ds.assign_coords(coords) expected = coords.to_dataset() From 1043a9e13574e859ec08d19425341b2e359d2802 Mon Sep 17 00:00:00 2001 From: JR Date: Thu, 31 Aug 2023 14:41:06 +0200 Subject: [PATCH 07/14] Document drop_variables in open_mfdataset (#8083) * Document drop_variables in open_mfdataset * Document open_mfdataset drop_variables option in example * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Spelling --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Tom Nicholas --- xarray/backends/api.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index e35d85a1e2f..7be7541a79b 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -930,7 +930,9 @@ def open_mfdataset( If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. **kwargs : optional - Additional arguments passed on to :py:func:`xarray.open_dataset`. + Additional arguments passed on to :py:func:`xarray.open_dataset`. For an + overview of some of the possible options, see the documentation of + :py:func:`xarray.open_dataset` Returns ------- @@ -965,6 +967,13 @@ def open_mfdataset( ... "file_*.nc", concat_dim="time", preprocess=partial_func ... ) # doctest: +SKIP + It is also possible to use any argument to ``open_dataset`` together + with ``open_mfdataset``, such as for example ``drop_variables``: + + >>> ds = xr.open_mfdataset( + ... "file.nc", drop_variables=["varname_1", "varname_2"] # any list of vars + ... ) # doctest: +SKIP + References ---------- From dd09bddc62d701721565bbed3731e9586ea306d0 Mon Sep 17 00:00:00 2001 From: Alexander Fischer <42172336+afisc@users.noreply.github.com> Date: Fri, 1 Sep 2023 11:37:14 +0200 Subject: [PATCH 08/14] adapted the docstring of xarray.DataArray.differentiate (#8127) Co-authored-by: afisc --- xarray/core/dataarray.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index df57ad898e4..3fab6e8cf0a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -5193,9 +5193,10 @@ def differentiate( The coordinate to be used to compute the gradient. edge_order : {1, 2}, default: 1 N-th order accurate differences at the boundaries. - datetime_unit : {"Y", "M", "W", "D", "h", "m", "s", "ms", \ + datetime_unit : {"W", "D", "h", "m", "s", "ms", \ "us", "ns", "ps", "fs", "as", None}, optional - Unit to compute gradient. Only valid for datetime coordinate. + Unit to compute gradient. Only valid for datetime coordinate. "Y" and "M" are not available as + datetime_unit. Returns ------- From 71177d481eb0c3547cb850a4b3e866af6d4fded7 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 1 Sep 2023 15:28:16 +0200 Subject: [PATCH 09/14] Add `Coordinates.assign()` method (#8102) * add Coordinates.assign method * update what's new * test typing Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * add docstring examples * fix doctests * fix doctests (spaces) * doctests typo again * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix assign test after merging main --------- Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- doc/api-hidden.rst | 5 ++- doc/whats-new.rst | 4 +++ xarray/core/coordinates.py | 54 +++++++++++++++++++++++++++++++- xarray/tests/test_coordinates.py | 10 ++++++ 4 files changed, 71 insertions(+), 2 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 527bdcdede2..d97c4010528 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -22,6 +22,7 @@ Coordinates.to_dataset Coordinates.to_index Coordinates.update + Coordinates.assign Coordinates.merge Coordinates.copy Coordinates.equals @@ -39,8 +40,9 @@ core.coordinates.DatasetCoordinates.to_dataset core.coordinates.DatasetCoordinates.to_index core.coordinates.DatasetCoordinates.update + core.coordinates.DatasetCoordinates.assign core.coordinates.DatasetCoordinates.merge - core.coordinates.DataArrayCoordinates.copy + core.coordinates.DatasetCoordinates.copy core.coordinates.DatasetCoordinates.equals core.coordinates.DatasetCoordinates.identical @@ -79,6 +81,7 @@ core.coordinates.DataArrayCoordinates.to_dataset core.coordinates.DataArrayCoordinates.to_index core.coordinates.DataArrayCoordinates.update + core.coordinates.DataArrayCoordinates.assign core.coordinates.DataArrayCoordinates.merge core.coordinates.DataArrayCoordinates.copy core.coordinates.DataArrayCoordinates.equals diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 79dc2150b0c..157795f08d1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,10 @@ v2023.08.1 (unreleased) New Features ~~~~~~~~~~~~ +- Added the :py:meth:`Coordinates.assign` method that can be used to combine + different collections of coordinates prior to assign them to a Dataset or + DataArray (:pull:`8102`) at once. + By `Benoît Bovy `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index c539536a294..bebf9362532 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -24,7 +24,12 @@ ) from xarray.core.merge import merge_coordinates_without_align, merge_coords from xarray.core.types import Self, T_DataArray -from xarray.core.utils import Frozen, ReprObject, emit_user_level_warning +from xarray.core.utils import ( + Frozen, + ReprObject, + either_dict_or_kwargs, + emit_user_level_warning, +) from xarray.core.variable import Variable, as_variable, calculate_dimensions if TYPE_CHECKING: @@ -560,6 +565,53 @@ def update(self, other: Mapping[Any, Any]) -> None: self._update_coords(coords, indexes) + def assign( + self, coords: Mapping | None = None, **coords_kwargs: Any + ) -> Coordinates: + """Assign new coordinates (and indexes) to a Coordinates object, returning + a new object with all the original coordinates in addition to the new ones. + + Parameters + ---------- + coords : :class:`Coordinates` or mapping of hashable to Any + Mapping from coordinate names to the new values. If a ``Coordinates`` + object is passed, its indexes are assigned in the returned object. + Otherwise, a default (pandas) index is created for each dimension + coordinate found in the mapping. + **coords_kwargs + The keyword arguments form of ``coords``. + One of ``coords`` or ``coords_kwargs`` must be provided. + + Returns + ------- + new_coords : Coordinates + A new Coordinates object with the new coordinates (and indexes) + in addition to all the existing coordinates. + + Examples + -------- + >>> coords = xr.Coordinates() + >>> coords + Coordinates: + *empty* + + >>> coords.assign(x=[1, 2]) + Coordinates: + * x (x) int64 1 2 + + >>> midx = pd.MultiIndex.from_product([["a", "b"], [0, 1]]) + >>> coords.assign(xr.Coordinates.from_pandas_multiindex(midx, "y")) + Coordinates: + * y (y) object MultiIndex + * y_level_0 (y) object 'a' 'a' 'b' 'b' + * y_level_1 (y) int64 0 1 0 1 + + """ + coords = either_dict_or_kwargs(coords, coords_kwargs, "assign") + new_coords = self.copy() + new_coords.update(coords) + return new_coords + def _overwrite_indexes( self, indexes: Mapping[Any, Index], diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 96eb9b045d2..27abc6c0ae2 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -124,6 +124,16 @@ def test_identical(self): assert coords.identical(coords) assert not coords.identical("not_a_coords") + def test_assign(self) -> None: + coords = Coordinates(coords={"x": [0, 1, 2]}) + expected = Coordinates(coords={"x": [0, 1, 2], "y": [3, 4]}) + + actual = coords.assign(y=[3, 4]) + assert_identical(actual, expected) + + actual = coords.assign({"y": [3, 4]}) + assert_identical(actual, expected) + def test_copy(self) -> None: no_index_coords = Coordinates({"foo": ("x", [1, 2, 3])}) copied = no_index_coords.copy() From e9c1962f31a7b5fd7a98ee4c2adf2ac147aabbcf Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 4 Sep 2023 11:05:13 -0700 Subject: [PATCH 10/14] Fix pandas interpolate(fill_value=) error (#8139) --- xarray/tests/test_missing.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index a6b6b1f80ce..c303659116b 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -104,7 +104,8 @@ def test_interpolate_pd_compat(): for dim in ["time", "x"]: actual = da.interpolate_na(method=method, dim=dim, fill_value=np.nan) expected = df.interpolate( - method=method, axis=da.get_axis_num(dim), fill_value=(np.nan, np.nan) + method=method, + axis=da.get_axis_num(dim), ) # Note, Pandas does some odd things with the left/right fill_value # for the linear methods. This next line inforces the xarray @@ -140,7 +141,8 @@ def test_interpolate_pd_compat_non_uniform_index(): method="linear", dim=dim, use_coordinate=True, fill_value=np.nan ) expected = df.interpolate( - method=method, axis=da.get_axis_num(dim), fill_value=np.nan + method=method, + axis=da.get_axis_num(dim), ) # Note, Pandas does some odd things with the left/right fill_value From f13da94db8ab4b564938a5e67435ac709698f1c9 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 5 Sep 2023 10:35:36 +0200 Subject: [PATCH 11/14] fix doctests: pandas 2.1 MultiIndex repr with nan (#8141) Co-authored-by: Mathias Hauser --- xarray/core/dataarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3fab6e8cf0a..5a68fc7ffac 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2904,9 +2904,9 @@ def to_unstacked_dataset(self, dim: Hashable, level: int | Hashable = 0) -> Data b (x) int64 0 3 >>> stacked = data.to_stacked_array("z", ["x"]) >>> stacked.indexes["z"] - MultiIndex([('a', 0.0), - ('a', 1.0), - ('a', 2.0), + MultiIndex([('a', 0), + ('a', 1), + ('a', 2), ('b', nan)], name='z') >>> roundtripped = stacked.to_unstacked_dataset(dim="z") From 92f52783683eb9b1665a08d11f4c233ccef7f0f0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Sep 2023 20:03:48 -0700 Subject: [PATCH 12/14] [pre-commit.ci] pre-commit autoupdate (#8145) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.0.282 → v0.0.287](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.282...v0.0.287) - [github.com/pre-commit/mirrors-mypy: v1.4.1 → v1.5.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.4.1...v1.5.1) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: Maximilian Roos --- .pre-commit-config.yaml | 4 ++-- xarray/backends/api.py | 4 ++-- xarray/coding/cftime_offsets.py | 8 +++---- xarray/coding/cftimeindex.py | 11 +++++----- xarray/coding/strings.py | 6 +++--- xarray/coding/times.py | 14 ++++++------- xarray/coding/variables.py | 4 ++-- xarray/conventions.py | 18 +++++++--------- xarray/core/alignment.py | 2 +- xarray/core/arithmetic.py | 4 ++-- xarray/core/combine.py | 12 +++++------ xarray/core/computation.py | 6 +++--- xarray/core/coordinates.py | 2 +- xarray/core/dataarray.py | 8 +++---- xarray/core/dataset.py | 37 +++++++++++++++------------------ xarray/core/formatting_html.py | 6 +++--- xarray/core/indexing.py | 2 +- xarray/core/variable.py | 18 +++++++--------- xarray/plot/dataarray_plot.py | 4 ++-- xarray/plot/utils.py | 2 +- xarray/testing.py | 2 +- xarray/tests/test_dataset.py | 2 +- xarray/tests/test_variable.py | 2 +- 23 files changed, 82 insertions(+), 96 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e02b7d0bd08..c2586a12aa2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: files: ^xarray/ - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.0.282' + rev: 'v0.0.287' hooks: - id: ruff args: ["--fix"] @@ -35,7 +35,7 @@ repos: additional_dependencies: ["black==23.7.0"] - id: blackdoc-autoupdate-black - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.4.1 + rev: v1.5.1 hooks: - id: mypy # Copied from setup.cfg diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 7be7541a79b..58a05aeddce 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1056,8 +1056,8 @@ def open_mfdataset( ) else: raise ValueError( - "{} is an invalid option for the keyword argument" - " ``combine``".format(combine) + f"{combine} is an invalid option for the keyword argument" + " ``combine``" ) except ValueError: for ds in datasets: diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index a746163c3fd..0b469ae26fc 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -105,7 +105,7 @@ def __init__(self, n: int = 1): if not isinstance(n, int): raise TypeError( "The provided multiple 'n' must be an integer. " - "Instead a value of type {!r} was provided.".format(type(n)) + f"Instead a value of type {type(n)!r} was provided." ) self.n = n @@ -353,13 +353,13 @@ def _validate_month(month, default_month): raise TypeError( "'self.month' must be an integer value between 1 " "and 12. Instead, it was set to a value of " - "{!r}".format(result_month) + f"{result_month!r}" ) elif not (1 <= result_month <= 12): raise ValueError( "'self.month' must be an integer value between 1 " "and 12. Instead, it was set to a value of " - "{!r}".format(result_month) + f"{result_month!r}" ) return result_month @@ -771,7 +771,7 @@ def to_cftime_datetime(date_str_or_date, calendar=None): raise TypeError( "date_str_or_date must be a string or a " "subclass of cftime.datetime. Instead got " - "{!r}.".format(date_str_or_date) + f"{date_str_or_date!r}." ) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 8f3472dce19..a0800db445a 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -228,12 +228,12 @@ def assert_all_valid_date_type(data): if not isinstance(sample, cftime.datetime): raise TypeError( "CFTimeIndex requires cftime.datetime " - "objects. Got object of {}.".format(date_type) + f"objects. Got object of {date_type}." ) if not all(isinstance(value, date_type) for value in data): raise TypeError( "CFTimeIndex requires using datetime " - "objects of all the same type. Got\n{}.".format(data) + f"objects of all the same type. Got\n{data}." ) @@ -553,8 +553,7 @@ def shift(self, n: int | float, freq: str | timedelta): return self + n * to_offset(freq) else: raise TypeError( - "'freq' must be of type " - "str or datetime.timedelta, got {}.".format(freq) + "'freq' must be of type " f"str or datetime.timedelta, got {freq}." ) def __add__(self, other): @@ -636,10 +635,10 @@ def to_datetimeindex(self, unsafe=False): if calendar not in _STANDARD_CALENDARS and not unsafe: warnings.warn( "Converting a CFTimeIndex with dates from a non-standard " - "calendar, {!r}, to a pandas.DatetimeIndex, which uses dates " + f"calendar, {calendar!r}, to a pandas.DatetimeIndex, which uses dates " "from the standard calendar. This may lead to subtle errors " "in operations that depend on the length of time between " - "dates.".format(calendar), + "dates.", RuntimeWarning, stacklevel=2, ) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index d0bfb1a7a63..d10af65c44a 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -59,9 +59,9 @@ def encode(self, variable, name=None): if contains_unicode and (encode_as_char or not self.allows_unicode): if "_FillValue" in attrs: raise NotImplementedError( - "variable {!r} has a _FillValue specified, but " + f"variable {name!r} has a _FillValue specified, but " "_FillValue is not yet supported on unicode strings: " - "https://github.com/pydata/xarray/issues/1647".format(name) + "https://github.com/pydata/xarray/issues/1647" ) string_encoding = encoding.pop("_Encoding", "utf-8") @@ -176,7 +176,7 @@ def char_to_bytes(arr): if len(arr.chunks[-1]) > 1: raise ValueError( "cannot stacked dask character array with " - "multiple chunks in the last dimension: {}".format(arr) + f"multiple chunks in the last dimension: {arr}" ) dtype = np.dtype("S" + str(arr.shape[-1])) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 3745d61acc0..4291d95979c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -218,8 +218,8 @@ def _decode_datetime_with_pandas( ) -> np.ndarray: if not _is_standard_calendar(calendar): raise OutOfBoundsDatetime( - "Cannot decode times from a non-standard calendar, {!r}, using " - "pandas.".format(calendar) + f"Cannot decode times from a non-standard calendar, {calendar!r}, using " + "pandas." ) delta, ref_date = _unpack_netcdf_time_units(units) @@ -452,8 +452,8 @@ def cftime_to_nptime(times, raise_on_invalid: bool = True) -> np.ndarray: except ValueError as e: if raise_on_invalid: raise ValueError( - "Cannot convert date {} to a date in the " - "standard calendar. Reason: {}.".format(t, e) + f"Cannot convert date {t} to a date in the " + f"standard calendar. Reason: {e}." ) else: dt = "NaT" @@ -485,10 +485,8 @@ def convert_times(times, date_type, raise_on_invalid: bool = True) -> np.ndarray except ValueError as e: if raise_on_invalid: raise ValueError( - "Cannot convert date {} to a date in the " - "{} calendar. Reason: {}.".format( - t, date_type(2000, 1, 1).calendar, e - ) + f"Cannot convert date {t} to a date in the " + f"{date_type(2000, 1, 1).calendar} calendar. Reason: {e}." ) else: dt = np.NaN diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 8ba7dcbb0e2..58c4739b810 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -179,10 +179,10 @@ def safe_setitem(dest, key: Hashable, value, name: T_Name = None): if key in dest: var_str = f" on variable {name!r}" if name else "" raise ValueError( - "failed to prevent overwriting existing key {} in attrs{}. " + f"failed to prevent overwriting existing key {key} in attrs{var_str}. " "This is probably an encoding field used by xarray to describe " "how a variable is serialized. To proceed, remove this key from " - "the variable's attributes manually.".format(key, var_str) + "the variable's attributes manually." ) dest[key] = value diff --git a/xarray/conventions.py b/xarray/conventions.py index 5a6675d60c1..596831e270a 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -75,20 +75,18 @@ def _infer_dtype(array, name: T_Name = None) -> np.dtype: return dtype raise ValueError( - "unable to infer dtype on variable {!r}; xarray " - "cannot serialize arbitrary Python objects".format(name) + f"unable to infer dtype on variable {name!r}; xarray " + "cannot serialize arbitrary Python objects" ) def ensure_not_multiindex(var: Variable, name: T_Name = None) -> None: if isinstance(var, IndexVariable) and isinstance(var.to_index(), pd.MultiIndex): raise NotImplementedError( - "variable {!r} is a MultiIndex, which cannot yet be " + f"variable {name!r} is a MultiIndex, which cannot yet be " "serialized to netCDF files. Instead, either use reset_index() " "to convert MultiIndex levels into coordinate variables instead " - "or use https://cf-xarray.readthedocs.io/en/latest/coding.html.".format( - name - ) + "or use https://cf-xarray.readthedocs.io/en/latest/coding.html." ) @@ -114,11 +112,11 @@ def ensure_dtype_not_object(var: Variable, name: T_Name = None) -> Variable: if is_duck_dask_array(data): warnings.warn( - "variable {} has data in the form of a dask array with " + f"variable {name} has data in the form of a dask array with " "dtype=object, which means it is being loaded into memory " "to determine a data type that can be safely stored on disk. " "To avoid this, coerce this variable to a fixed-size dtype " - "with astype() before saving it.".format(name), + "with astype() before saving it.", SerializationWarning, ) data = data.compute() @@ -635,9 +633,9 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): for name in list(non_dim_coord_names): if isinstance(name, str) and " " in name: warnings.warn( - "coordinate {!r} has a space in its name, which means it " + f"coordinate {name!r} has a space in its name, which means it " "cannot be marked as a coordinate on disk and will be " - "saved as a data variable instead".format(name), + "saved as a data variable instead", SerializationWarning, stacklevel=6, ) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 39ff878b56d..d2bbc459d83 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -839,7 +839,7 @@ def is_alignable(obj): elif raise_on_invalid: raise ValueError( "object to align is neither an xarray.Dataset, " - "an xarray.DataArray nor a dictionary: {!r}".format(variables) + f"an xarray.DataArray nor a dictionary: {variables!r}" ) else: out.append(variables) diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 5b2cf38ee2e..5cdbc732741 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -56,10 +56,10 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if ufunc.signature is not None: raise NotImplementedError( - "{} not supported: xarray objects do not directly implement " + f"{ufunc} not supported: xarray objects do not directly implement " "generalized ufuncs. Instead, use xarray.apply_ufunc or " "explicitly convert to xarray objects to NumPy arrays " - "(e.g., with `.values`).".format(ufunc) + "(e.g., with `.values`)." ) if method != "__call__": diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 1599fb60ddc..eecd01d011e 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -109,9 +109,9 @@ def _infer_concat_order_from_coords(datasets): ascending = False else: raise ValueError( - "Coordinate variable {} is neither " + f"Coordinate variable {dim} is neither " "monotonically increasing nor " - "monotonically decreasing on all datasets".format(dim) + "monotonically decreasing on all datasets" ) # Assume that any two datasets whose coord along dim starts @@ -221,10 +221,8 @@ def _combine_nd( n_dims = len(example_tile_id) if len(concat_dims) != n_dims: raise ValueError( - "concat_dims has length {} but the datasets " - "passed are nested in a {}-dimensional structure".format( - len(concat_dims), n_dims - ) + f"concat_dims has length {len(concat_dims)} but the datasets " + f"passed are nested in a {n_dims}-dimensional structure" ) # Each iteration of this loop reduces the length of the tile_ids tuples @@ -646,7 +644,7 @@ def _combine_single_variable_hypercube( if not (indexes.is_monotonic_increasing or indexes.is_monotonic_decreasing): raise ValueError( "Resulting object does not have monotonic" - " global indexes along dimension {}".format(dim) + f" global indexes along dimension {dim}" ) return concatenated diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 685307fc8c3..fe89672e392 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1286,7 +1286,7 @@ def cov( if any(not isinstance(arr, DataArray) for arr in [da_a, da_b]): raise TypeError( "Only xr.DataArray is supported." - "Given {}.".format([type(arr) for arr in [da_a, da_b]]) + f"Given {[type(arr) for arr in [da_a, da_b]]}." ) return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="cov") @@ -1364,7 +1364,7 @@ def corr(da_a: T_DataArray, da_b: T_DataArray, dim: Dims = None) -> T_DataArray: if any(not isinstance(arr, DataArray) for arr in [da_a, da_b]): raise TypeError( "Only xr.DataArray is supported." - "Given {}.".format([type(arr) for arr in [da_a, da_b]]) + f"Given {[type(arr) for arr in [da_a, da_b]]}." ) return _cov_corr(da_a, da_b, dim=dim, method="corr") @@ -1707,7 +1707,7 @@ def dot( if any(not isinstance(arr, (Variable, DataArray)) for arr in arrays): raise TypeError( "Only xr.DataArray and xr.Variable are supported." - "Given {}.".format([type(arr) for arr in arrays]) + f"Given {[type(arr) for arr in arrays]}." ) if len(arrays) == 0: diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index bebf9362532..489b6f0d04e 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -130,7 +130,7 @@ def to_index(self, ordered_dims: Sequence[Hashable] | None = None) -> pd.Index: elif set(ordered_dims) != set(self.dims): raise ValueError( "ordered_dims must match dims, but does not: " - "{} vs {}".format(ordered_dims, self.dims) + f"{ordered_dims} vs {self.dims}" ) if len(ordered_dims) == 0: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 5a68fc7ffac..dc0b2032a37 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2234,8 +2234,7 @@ def interp( """ if self.dtype.kind not in "uifc": raise TypeError( - "interp only works for a numeric type array. " - "Given {}.".format(self.dtype) + "interp only works for a numeric type array. " f"Given {self.dtype}." ) ds = self._to_temp_dataset().interp( coords, @@ -2362,8 +2361,7 @@ def interp_like( """ if self.dtype.kind not in "uifc": raise TypeError( - "interp only works for a numeric type array. " - "Given {}.".format(self.dtype) + "interp only works for a numeric type array. " f"Given {self.dtype}." ) ds = self._to_temp_dataset().interp_like( other, method=method, kwargs=kwargs, assume_sorted=assume_sorted @@ -4325,7 +4323,7 @@ def from_dict(cls: type[T_DataArray], d: Mapping[str, Any]) -> T_DataArray: except KeyError as e: raise ValueError( "cannot convert dict when coords are missing the key " - "'{dims_data}'".format(dims_data=str(e.args[0])) + f"'{str(e.args[0])}'" ) try: data = d["data"] diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f1a0cb9dc34..76c9a2359d9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1334,13 +1334,13 @@ def _copy( if keys_not_in_vars: raise ValueError( "Data must only contain variables in original " - "dataset. Extra variables: {}".format(keys_not_in_vars) + f"dataset. Extra variables: {keys_not_in_vars}" ) keys_missing_from_data = var_keys - data_keys if keys_missing_from_data: raise ValueError( "Data must contain all variables in original " - "dataset. Data is missing {}".format(keys_missing_from_data) + f"dataset. Data is missing {keys_missing_from_data}" ) indexes, index_vars = self.xindexes.copy_indexes(deep=deep) @@ -2697,7 +2697,7 @@ def _validate_indexers( if v.ndim > 1: raise IndexError( "Unlabeled multi-dimensional array cannot be " - "used for indexing: {}".format(k) + f"used for indexing: {k}" ) yield k, v @@ -2737,9 +2737,9 @@ def _get_indexers_coords_and_indexes(self, indexers): if v.dtype.kind == "b": if v.ndim != 1: # we only support 1-d boolean array raise ValueError( - "{:d}d-boolean array is used for indexing along " - "dimension {!r}, but only 1d boolean arrays are " - "supported.".format(v.ndim, k) + f"{v.ndim:d}d-boolean array is used for indexing along " + f"dimension {k!r}, but only 1d boolean arrays are " + "supported." ) # Make sure in case of boolean DataArray, its # coordinate also should be indexed. @@ -3881,7 +3881,7 @@ def _validate_interp_indexer(x, new_x): "coordinate, the coordinates to " "interpolate to must be either datetime " "strings or datetimes. " - "Instead got\n{}".format(new_x) + f"Instead got\n{new_x}" ) return x, new_x @@ -4531,8 +4531,7 @@ def expand_dims( raise ValueError(f"Dimension {d} already exists.") if d in self._variables and not utils.is_scalar(self._variables[d]): raise ValueError( - "{dim} already exists as coordinate or" - " variable name.".format(dim=d) + f"{d} already exists as coordinate or" " variable name." ) variables: dict[Hashable, Variable] = {} @@ -4555,8 +4554,7 @@ def expand_dims( pass # Do nothing if the dimensions value is just an int else: raise TypeError( - "The value of new dimension {k} must be " - "an iterable or an int".format(k=k) + f"The value of new dimension {k} must be " "an iterable or an int" ) for k, v in self._variables.items(): @@ -5282,7 +5280,7 @@ def to_stacked_array( if not dims_include_sample_dims: raise ValueError( "All variables in the dataset must contain the " - "dimensions {}.".format(dims) + f"dimensions {dims}." ) def ensure_stackable(val): @@ -7020,8 +7018,8 @@ def _normalize_dim_order( dim_order = list(self.dims) elif set(dim_order) != set(self.dims): raise ValueError( - "dim_order {} does not match the set of dimensions of this " - "Dataset: {}".format(dim_order, list(self.dims)) + f"dim_order {dim_order} does not match the set of dimensions of this " + f"Dataset: {list(self.dims)}" ) ordered_dims = {k: self.dims[k] for k in dim_order} @@ -7452,8 +7450,7 @@ def from_dict(cls: type[T_Dataset], d: Mapping[Any, Any]) -> T_Dataset: } except KeyError as e: raise ValueError( - "cannot convert dict without the key " - "'{dims_data}'".format(dims_data=str(e.args[0])) + "cannot convert dict without the key " f"'{str(e.args[0])}'" ) obj = cls(variable_dict) @@ -8169,8 +8166,8 @@ def differentiate( coord_var = self[coord].variable if coord_var.ndim != 1: raise ValueError( - "Coordinate {} must be 1 dimensional but is {}" - " dimensional".format(coord, coord_var.ndim) + f"Coordinate {coord} must be 1 dimensional but is {coord_var.ndim}" + " dimensional" ) dim = coord_var.dims[0] @@ -8271,8 +8268,8 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False): coord_var = self[coord].variable if coord_var.ndim != 1: raise ValueError( - "Coordinate {} must be 1 dimensional but is {}" - " dimensional".format(coord, coord_var.ndim) + f"Coordinate {coord} must be 1 dimensional but is {coord_var.ndim}" + " dimensional" ) dim = coord_var.dims[0] diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 60bb901c31a..d949cbdfbd1 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -65,10 +65,10 @@ def summarize_attrs(attrs): def _icon(icon_name): # icon_name should be defined in xarray/static/html/icon-svg-inline.html return ( - "" - "" + f"" + f"" "" - "".format(icon_name) + "" ) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index acab9ccc60b..7969ded3102 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1303,7 +1303,7 @@ def __init__(self, array): if not isinstance(array, np.ndarray): raise TypeError( "NumpyIndexingAdapter only wraps np.ndarray. " - "Trying to wrap {}".format(type(array)) + f"Trying to wrap {type(array)}" ) self.array = array diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c89545c43ae..05f9930aacd 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -759,18 +759,18 @@ def _validate_indexers(self, key): if k.ndim > 1: raise IndexError( "Unlabeled multi-dimensional array cannot be " - "used for indexing: {}".format(k) + f"used for indexing: {k}" ) if k.dtype.kind == "b": if self.shape[self.get_axis_num(dim)] != len(k): raise IndexError( - "Boolean array size {:d} is used to index array " - "with shape {:s}.".format(len(k), str(self.shape)) + f"Boolean array size {len(k):d} is used to index array " + f"with shape {str(self.shape):s}." ) if k.ndim > 1: raise IndexError( - "{}-dimensional boolean indexing is " - "not supported. ".format(k.ndim) + f"{k.ndim}-dimensional boolean indexing is " + "not supported. " ) if is_duck_dask_array(k.data): raise KeyError( @@ -783,9 +783,7 @@ def _validate_indexers(self, key): raise IndexError( "Boolean indexer should be unlabeled or on the " "same dimension to the indexed array. Indexer is " - "on {:s} but the target dimension is {:s}.".format( - str(k.dims), dim - ) + f"on {str(k.dims):s} but the target dimension is {dim:s}." ) def _broadcast_indexes_outer(self, key): @@ -2550,8 +2548,8 @@ def coarsen_reshape(self, windows, boundary, side): variable = variable.pad(pad_width, mode="constant") else: raise TypeError( - "{} is invalid for boundary. Valid option is 'exact', " - "'trim' and 'pad'".format(boundary[d]) + f"{boundary[d]} is invalid for boundary. Valid option is 'exact', " + "'trim' and 'pad'" ) shape = [] diff --git a/xarray/plot/dataarray_plot.py b/xarray/plot/dataarray_plot.py index d2c0a8e2af6..3f7b1568e64 100644 --- a/xarray/plot/dataarray_plot.py +++ b/xarray/plot/dataarray_plot.py @@ -486,8 +486,8 @@ def line( if ndims > 2: raise ValueError( "Line plots are for 1- or 2-dimensional DataArrays. " - "Passed DataArray has {ndims} " - "dimensions".format(ndims=ndims) + f"Passed DataArray has {ndims} " + "dimensions" ) # The allargs dict passed to _easy_facetgrid above contains args diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 2c58fe83cef..70e8bd3fdb9 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -1131,7 +1131,7 @@ def _get_color_and_size(value): # Labels are not numerical so modifying label_values is not # possible, instead filter the array with nicely distributed # indexes: - if type(num) == int: + if type(num) == int: # noqa: E721 loc = mpl.ticker.LinearLocator(num) else: raise ValueError("`num` only supports integers for non-numeric labels.") diff --git a/xarray/testing.py b/xarray/testing.py index 6a8bb04f170..0837b562668 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -364,7 +364,7 @@ def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool): set(ds._variables), ) - assert type(ds._dims) is dict, ds._dims + assert type(ds._dims) is dict, ds._dims # noqa: E721 assert all(isinstance(v, int) for v in ds._dims.values()), ds._dims var_dims: set[Hashable] = set() for v in ds._variables.values(): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e119cfe9bc6..d0e9f01bdae 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -680,7 +680,7 @@ def test_properties(self) -> None: # change them inadvertently: assert isinstance(ds.dims, utils.Frozen) assert isinstance(ds.dims.mapping, dict) - assert type(ds.dims.mapping) is dict + assert type(ds.dims.mapping) is dict # noqa: E721 assert ds.dims == {"dim1": 8, "dim2": 9, "dim3": 10, "time": 20} assert ds.sizes == ds.dims diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index f30cdcf3f73..118d78d2e04 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1091,7 +1091,7 @@ def test_data_and_values(self): def test_numpy_same_methods(self): v = Variable([], np.float32(0.0)) assert v.item() == 0 - assert type(v.item()) is float + assert type(v.item()) is float # noqa: E721 v = IndexVariable("x", np.arange(5)) assert 2 == v.searchsorted(2) From c9946ec4cef48477471942221805fccb3e3b01b6 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Wed, 6 Sep 2023 23:29:02 -0700 Subject: [PATCH 13/14] Cut middle version from CI (#8156) --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index da4ad32b1f5..4d59fe0531f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -42,7 +42,7 @@ jobs: matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] # Bookend python versions - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.9", "3.11"] env: [""] include: # Minimum python version: From e2b6f3468ef829b8a83637965d34a164bf3bca78 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 7 Sep 2023 10:21:11 +0200 Subject: [PATCH 14/14] dirty workaround for mypy 1.5 error (#8142) Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/dataset.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 76c9a2359d9..97f528aea7d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -132,6 +132,7 @@ DatetimeLike, DatetimeUnitOptions, Dims, + DsCompatible, ErrorOptions, ErrorOptionsWithWarn, InterpOptions, @@ -696,6 +697,11 @@ def __init__( self._dims = dims self._indexes = indexes + # TODO: dirty workaround for mypy 1.5 error with inherited DatasetOpsMixin vs. Mapping + # related to https://github.com/python/mypy/issues/9319? + def __eq__(self: T_Dataset, other: DsCompatible) -> T_Dataset: # type: ignore[override] + return super().__eq__(other) + @classmethod def load_store(cls: type[T_Dataset], store, decoder=None) -> T_Dataset: """Create a new dataset from the contents of a backends.*DataStore