From c6c01b12e06be53bbcdd3292b3db1d410ea9c21f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Mar 2024 10:00:12 -0600 Subject: [PATCH 1/9] Support pandas copy-on-write behaviour (#8846) * Support pandas copy-on-write behaviour Closes #8843 * Update xarray/tests/__init__.py * One more fix * Fix interp * Avoid copy * Try again --- xarray/core/variable.py | 9 +++++- xarray/tests/__init__.py | 12 +++++++- xarray/tests/test_backends.py | 8 +++-- xarray/tests/test_dataset.py | 57 +++++++++++++---------------------- xarray/tests/test_missing.py | 9 ++++-- xarray/tests/test_variable.py | 15 +++++++++ 6 files changed, 67 insertions(+), 43 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index a03e93ac699..cad48d0775a 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -209,7 +209,14 @@ def _possibly_convert_objects(values): as_series = pd.Series(values.ravel(), copy=False) if as_series.dtype.kind in "mM": as_series = _as_nanosecond_precision(as_series) - return np.asarray(as_series).reshape(values.shape) + result = np.asarray(as_series).reshape(values.shape) + if not result.flags.writeable: + # GH8843, pandas copy-on-write mode creates read-only arrays by default + try: + result.flags.writeable = True + except ValueError: + result = result.copy() + return result def _possibly_convert_datetime_or_timedelta_index(data): diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 2e6e638f5b1..5007db9eeb2 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -20,6 +20,7 @@ from xarray.core.duck_array_ops import allclose_or_equiv # noqa: F401 from xarray.core.indexing import ExplicitlyIndexed from xarray.core.options import set_options +from xarray.core.variable import IndexVariable from xarray.testing import ( # noqa: F401 assert_chunks_equal, assert_duckarray_allclose, @@ -47,6 +48,15 @@ ) +def assert_writeable(ds): + readonly = [ + name + for name, var in ds.variables.items() + if not isinstance(var, IndexVariable) and not var.data.flags.writeable + ] + assert not readonly, readonly + + def _importorskip( modname: str, minversion: str | None = None ) -> tuple[bool, pytest.MarkDecorator]: @@ -326,7 +336,7 @@ def create_test_data( numbers_values = np.random.randint(0, 3, _dims["dim3"], dtype="int64") obj.coords["numbers"] = ("dim3", numbers_values) obj.encoding = {"foo": "bar"} - assert all(obj.data.flags.writeable for obj in obj.variables.values()) + assert_writeable(obj) return obj diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b97d5ced938..3fb137977e8 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2605,7 +2605,9 @@ def test_append_with_append_dim_no_overwrite(self) -> None: # overwrite a coordinate; # for mode='a-', this will not get written to the store # because it does not have the append_dim as a dim - ds_to_append.lon.data[:] = -999 + lon = ds_to_append.lon.to_numpy().copy() + lon[:] = -999 + ds_to_append["lon"] = lon ds_to_append.to_zarr( store_target, mode="a-", append_dim="time", **self.version_kwargs ) @@ -2615,7 +2617,9 @@ def test_append_with_append_dim_no_overwrite(self) -> None: # by default, mode="a" will overwrite all coordinates. ds_to_append.to_zarr(store_target, append_dim="time", **self.version_kwargs) actual = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs) - original2.lon.data[:] = -999 + lon = original2.lon.to_numpy().copy() + lon[:] = -999 + original2["lon"] = lon assert_identical(original2, actual) @requires_dask diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 4937fc5f3a3..d2b8634b8b9 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -51,6 +51,7 @@ assert_equal, assert_identical, assert_no_warnings, + assert_writeable, create_test_data, has_cftime, has_dask, @@ -96,11 +97,11 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]: nt2 = 2 time1 = pd.date_range("2000-01-01", periods=nt1) time2 = pd.date_range("2000-02-01", periods=nt2) - string_var = np.array(["ae", "bc", "df"], dtype=object) + string_var = np.array(["a", "bc", "def"], dtype=object) string_var_to_append = np.array(["asdf", "asdfg"], dtype=object) string_var_fixed_length = np.array(["aa", "bb", "cc"], dtype="|S2") string_var_fixed_length_to_append = np.array(["dd", "ee"], dtype="|S2") - unicode_var = ["áó", "áó", "áó"] + unicode_var = np.array(["áó", "áó", "áó"]) datetime_var = np.array( ["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[s]" ) @@ -119,17 +120,11 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]: coords=[lat, lon, time1], dims=["lat", "lon", "time"], ), - "string_var": xr.DataArray(string_var, coords=[time1], dims=["time"]), - "string_var_fixed_length": xr.DataArray( - string_var_fixed_length, coords=[time1], dims=["time"] - ), - "unicode_var": xr.DataArray( - unicode_var, coords=[time1], dims=["time"] - ).astype(np.str_), - "datetime_var": xr.DataArray( - datetime_var, coords=[time1], dims=["time"] - ), - "bool_var": xr.DataArray(bool_var, coords=[time1], dims=["time"]), + "string_var": ("time", string_var), + "string_var_fixed_length": ("time", string_var_fixed_length), + "unicode_var": ("time", unicode_var), + "datetime_var": ("time", datetime_var), + "bool_var": ("time", bool_var), } ) @@ -140,21 +135,11 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]: coords=[lat, lon, time2], dims=["lat", "lon", "time"], ), - "string_var": xr.DataArray( - string_var_to_append, coords=[time2], dims=["time"] - ), - "string_var_fixed_length": xr.DataArray( - string_var_fixed_length_to_append, coords=[time2], dims=["time"] - ), - "unicode_var": xr.DataArray( - unicode_var[:nt2], coords=[time2], dims=["time"] - ).astype(np.str_), - "datetime_var": xr.DataArray( - datetime_var_to_append, coords=[time2], dims=["time"] - ), - "bool_var": xr.DataArray( - bool_var_to_append, coords=[time2], dims=["time"] - ), + "string_var": ("time", string_var_to_append), + "string_var_fixed_length": ("time", string_var_fixed_length_to_append), + "unicode_var": ("time", unicode_var[:nt2]), + "datetime_var": ("time", datetime_var_to_append), + "bool_var": ("time", bool_var_to_append), } ) @@ -168,8 +153,9 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]: } ) - assert all(objp.data.flags.writeable for objp in ds.variables.values()) - assert all(objp.data.flags.writeable for objp in ds_to_append.variables.values()) + assert_writeable(ds) + assert_writeable(ds_to_append) + assert_writeable(ds_with_new_var) return ds, ds_to_append, ds_with_new_var @@ -182,10 +168,8 @@ def make_datasets(data, data_to_append) -> tuple[Dataset, Dataset]: ds_to_append = xr.Dataset( {"temperature": (["time"], data_to_append)}, coords={"time": [0, 1, 2]} ) - assert all(objp.data.flags.writeable for objp in ds.variables.values()) - assert all( - objp.data.flags.writeable for objp in ds_to_append.variables.values() - ) + assert_writeable(ds) + assert_writeable(ds_to_append) return ds, ds_to_append u2_strings = ["ab", "cd", "ef"] @@ -2964,10 +2948,11 @@ def test_copy_coords(self, deep, expected_orig) -> None: name="value", ).to_dataset() ds_cp = ds.copy(deep=deep) - ds_cp.coords["a"].data[0] = 999 + new_a = np.array([999, 2]) + ds_cp.coords["a"] = ds_cp.a.copy(data=new_a) expected_cp = xr.DataArray( - xr.IndexVariable("a", np.array([999, 2])), + xr.IndexVariable("a", new_a), coords={"a": [999, 2]}, dims=["a"], ) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index f13406d0acc..c1d1058fd6e 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -122,10 +122,13 @@ def test_interpolate_pd_compat(method, fill_value) -> None: # for the numpy linear methods. # see https://github.com/pandas-dev/pandas/issues/55144 # This aligns the pandas output with the xarray output - expected.values[pd.isnull(actual.values)] = np.nan - expected.values[actual.values == fill_value] = fill_value + fixed = expected.values.copy() + fixed[pd.isnull(actual.values)] = np.nan + fixed[actual.values == fill_value] = fill_value + else: + fixed = expected.values - np.testing.assert_allclose(actual.values, expected.values) + np.testing.assert_allclose(actual.values, fixed) @requires_scipy diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 73f5abe66e5..061510f2515 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -64,6 +64,21 @@ def var(): return Variable(dims=list("xyz"), data=np.random.rand(3, 4, 5)) +@pytest.mark.parametrize( + "data", + [ + np.array(["a", "bc", "def"], dtype=object), + np.array(["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[ns]"), + ], +) +def test_as_compatible_data_writeable(data): + pd.set_option("mode.copy_on_write", True) + # GH8843, ensure writeable arrays for data_vars even with + # pandas copy-on-write mode + assert as_compatible_data(data).flags.writeable + pd.reset_option("mode.copy_on_write") + + class VariableSubclassobjects(NamedArraySubclassobjects, ABC): @pytest.fixture def target(self, data): From 79272c3dbc4748608df40290660dd7593127254f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Mon, 18 Mar 2024 20:29:58 -0700 Subject: [PATCH 2/9] Implement setitem syntax for `.oindex` and `.vindex` properties (#8845) * Implement setitem syntax for `.oindex` and `.vindex` properties * Apply suggestions from code review Co-authored-by: Deepak Cherian * use getter and setter properties instead of func_get and func_set methods * delete unnecessary _indexing_array_and_key method * Add tests for IndexCallable class * fix bug/unnecessary code introduced in #8790 * add unit tests --------- Co-authored-by: Deepak Cherian --- xarray/core/indexing.py | 171 ++++++++++++++++++++++------------ xarray/core/variable.py | 2 +- xarray/tests/test_indexing.py | 68 ++++++++++++-- 3 files changed, 174 insertions(+), 67 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index ea8ae44bb4d..407fda610fc 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -326,18 +326,23 @@ def as_integer_slice(value): class IndexCallable: - """Provide getitem syntax for a callable object.""" + """Provide getitem and setitem syntax for callable objects.""" - __slots__ = ("func",) + __slots__ = ("getter", "setter") - def __init__(self, func): - self.func = func + def __init__(self, getter, setter=None): + self.getter = getter + self.setter = setter def __getitem__(self, key): - return self.func(key) + return self.getter(key) def __setitem__(self, key, value): - raise NotImplementedError + if self.setter is None: + raise NotImplementedError( + "Setting values is not supported for this indexer." + ) + self.setter(key, value) class BasicIndexer(ExplicitIndexer): @@ -486,10 +491,24 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: return np.asarray(self.get_duck_array(), dtype=dtype) def _oindex_get(self, key): - raise NotImplementedError("This method should be overridden") + raise NotImplementedError( + f"{self.__class__.__name__}._oindex_get method should be overridden" + ) def _vindex_get(self, key): - raise NotImplementedError("This method should be overridden") + raise NotImplementedError( + f"{self.__class__.__name__}._vindex_get method should be overridden" + ) + + def _oindex_set(self, key, value): + raise NotImplementedError( + f"{self.__class__.__name__}._oindex_set method should be overridden" + ) + + def _vindex_set(self, key, value): + raise NotImplementedError( + f"{self.__class__.__name__}._vindex_set method should be overridden" + ) def _check_and_raise_if_non_basic_indexer(self, key): if isinstance(key, (VectorizedIndexer, OuterIndexer)): @@ -500,11 +519,11 @@ def _check_and_raise_if_non_basic_indexer(self, key): @property def oindex(self): - return IndexCallable(self._oindex_get) + return IndexCallable(self._oindex_get, self._oindex_set) @property def vindex(self): - return IndexCallable(self._vindex_get) + return IndexCallable(self._vindex_get, self._vindex_set) class ImplicitToExplicitIndexingAdapter(NDArrayMixin): @@ -616,12 +635,18 @@ def __getitem__(self, indexer): self._check_and_raise_if_non_basic_indexer(indexer) return type(self)(self.array, self._updated_key(indexer)) + def _vindex_set(self, key, value): + raise NotImplementedError( + "Lazy item assignment with the vectorized indexer is not yet " + "implemented. Load your data first by .load() or compute()." + ) + + def _oindex_set(self, key, value): + full_key = self._updated_key(key) + self.array.oindex[full_key] = value + def __setitem__(self, key, value): - if isinstance(key, VectorizedIndexer): - raise NotImplementedError( - "Lazy item assignment with the vectorized indexer is not yet " - "implemented. Load your data first by .load() or compute()." - ) + self._check_and_raise_if_non_basic_indexer(key) full_key = self._updated_key(key) self.array[full_key] = value @@ -657,7 +682,6 @@ def shape(self) -> tuple[int, ...]: return np.broadcast(*self.key.tuple).shape def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): array = apply_indexer(self.array, self.key) else: @@ -739,8 +763,18 @@ def __getitem__(self, key): def transpose(self, order): return self.array.transpose(order) + def _vindex_set(self, key, value): + self._ensure_copied() + self.array.vindex[key] = value + + def _oindex_set(self, key, value): + self._ensure_copied() + self.array.oindex[key] = value + def __setitem__(self, key, value): + self._check_and_raise_if_non_basic_indexer(key) self._ensure_copied() + self.array[key] = value def __deepcopy__(self, memo): @@ -779,7 +813,14 @@ def __getitem__(self, key): def transpose(self, order): return self.array.transpose(order) + def _vindex_set(self, key, value): + self.array.vindex[key] = value + + def _oindex_set(self, key, value): + self.array.oindex[key] = value + def __setitem__(self, key, value): + self._check_and_raise_if_non_basic_indexer(key) self.array[key] = value @@ -950,6 +991,16 @@ def apply_indexer(indexable, indexer): return indexable[indexer] +def set_with_indexer(indexable, indexer, value): + """Set values in an indexable object using an indexer.""" + if isinstance(indexer, VectorizedIndexer): + indexable.vindex[indexer] = value + elif isinstance(indexer, OuterIndexer): + indexable.oindex[indexer] = value + else: + indexable[indexer] = value + + def decompose_indexer( indexer: ExplicitIndexer, shape: tuple[int, ...], indexing_support: IndexingSupport ) -> tuple[ExplicitIndexer, ExplicitIndexer]: @@ -1399,24 +1450,6 @@ def __init__(self, array): ) self.array = array - def _indexing_array_and_key(self, key): - if isinstance(key, OuterIndexer): - array = self.array - key = _outer_to_numpy_indexer(key, self.array.shape) - elif isinstance(key, VectorizedIndexer): - array = NumpyVIndexAdapter(self.array) - key = key.tuple - elif isinstance(key, BasicIndexer): - array = self.array - # We want 0d slices rather than scalars. This is achieved by - # appending an ellipsis (see - # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = key.tuple + (Ellipsis,) - else: - raise TypeError(f"unexpected key type: {type(key)}") - - return array, key - def transpose(self, order): return self.array.transpose(order) @@ -1430,14 +1463,18 @@ def _vindex_get(self, key): def __getitem__(self, key): self._check_and_raise_if_non_basic_indexer(key) - array, key = self._indexing_array_and_key(key) + + array = self.array + # We want 0d slices rather than scalars. This is achieved by + # appending an ellipsis (see + # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). + key = key.tuple + (Ellipsis,) return array[key] - def __setitem__(self, key, value): - array, key = self._indexing_array_and_key(key) + def _safe_setitem(self, array, key, value): try: array[key] = value - except ValueError: + except ValueError as exc: # More informative exception if read-only view if not array.flags.writeable and not array.flags.owndata: raise ValueError( @@ -1445,7 +1482,24 @@ def __setitem__(self, key, value): "Do you want to .copy() array first?" ) else: - raise + raise exc + + def _oindex_set(self, key, value): + key = _outer_to_numpy_indexer(key, self.array.shape) + self._safe_setitem(self.array, key, value) + + def _vindex_set(self, key, value): + array = NumpyVIndexAdapter(self.array) + self._safe_setitem(array, key.tuple, value) + + def __setitem__(self, key, value): + self._check_and_raise_if_non_basic_indexer(key) + array = self.array + # We want 0d slices rather than scalars. This is achieved by + # appending an ellipsis (see + # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). + key = key.tuple + (Ellipsis,) + self._safe_setitem(array, key, value) class NdArrayLikeIndexingAdapter(NumpyIndexingAdapter): @@ -1488,13 +1542,15 @@ def __getitem__(self, key): self._check_and_raise_if_non_basic_indexer(key) return self.array[key.tuple] + def _oindex_set(self, key, value): + self.array[key.tuple] = value + + def _vindex_set(self, key, value): + raise TypeError("Vectorized indexing is not supported") + def __setitem__(self, key, value): - if isinstance(key, (BasicIndexer, OuterIndexer)): - self.array[key.tuple] = value - elif isinstance(key, VectorizedIndexer): - raise TypeError("Vectorized indexing is not supported") - else: - raise TypeError(f"Unrecognized indexer: {key}") + self._check_and_raise_if_non_basic_indexer(key) + self.array[key.tuple] = value def transpose(self, order): xp = self.array.__array_namespace__() @@ -1530,19 +1586,20 @@ def __getitem__(self, key): self._check_and_raise_if_non_basic_indexer(key) return self.array[key.tuple] + def _oindex_set(self, key, value): + num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in key.tuple) + if num_non_slices > 1: + raise NotImplementedError( + "xarray can't set arrays with multiple " "array indices to dask yet." + ) + self.array[key.tuple] = value + + def _vindex_set(self, key, value): + self.array.vindex[key.tuple] = value + def __setitem__(self, key, value): - if isinstance(key, BasicIndexer): - self.array[key.tuple] = value - elif isinstance(key, VectorizedIndexer): - self.array.vindex[key.tuple] = value - elif isinstance(key, OuterIndexer): - num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in key.tuple) - if num_non_slices > 1: - raise NotImplementedError( - "xarray can't set arrays with multiple " - "array indices to dask yet." - ) - self.array[key.tuple] = value + self._check_and_raise_if_non_basic_indexer(key) + self.array[key.tuple] = value def transpose(self, order): return self.array.transpose(order) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index cad48d0775a..2ac0c04d726 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -849,7 +849,7 @@ def __setitem__(self, key, value): value = np.moveaxis(value, new_order, range(len(new_order))) indexable = as_indexable(self._data) - indexable[index_tuple] = value + indexing.set_with_indexer(indexable, index_tuple, value) @property def encoding(self) -> dict[Any, Any]: diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index c3989bbf23e..e650c454eac 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -23,6 +23,28 @@ B = IndexerMaker(indexing.BasicIndexer) +class TestIndexCallable: + def test_getitem(self): + def getter(key): + return key * 2 + + indexer = indexing.IndexCallable(getter) + assert indexer[3] == 6 + assert indexer[0] == 0 + assert indexer[-1] == -2 + + def test_setitem(self): + def getter(key): + return key * 2 + + def setter(key, value): + raise NotImplementedError("Setter not implemented") + + indexer = indexing.IndexCallable(getter, setter) + with pytest.raises(NotImplementedError): + indexer[3] = 6 + + class TestIndexers: def set_to_zero(self, x, i): x = x.copy() @@ -361,15 +383,8 @@ def test_vectorized_lazily_indexed_array(self) -> None: def check_indexing(v_eager, v_lazy, indexers): for indexer in indexers: - if isinstance(indexer, indexing.VectorizedIndexer): - actual = v_lazy.vindex[indexer] - expected = v_eager.vindex[indexer] - elif isinstance(indexer, indexing.OuterIndexer): - actual = v_lazy.oindex[indexer] - expected = v_eager.oindex[indexer] - else: - actual = v_lazy[indexer] - expected = v_eager[indexer] + actual = v_lazy[indexer] + expected = v_eager[indexer] assert expected.shape == actual.shape assert isinstance( actual._data, @@ -406,6 +421,41 @@ def check_indexing(v_eager, v_lazy, indexers): ] check_indexing(v_eager, v_lazy, indexers) + def test_lazily_indexed_array_vindex_setitem(self) -> None: + + lazy = indexing.LazilyIndexedArray(np.random.rand(10, 20, 30)) + + # vectorized indexing + indexer = indexing.VectorizedIndexer( + (np.array([0, 1]), np.array([0, 1]), slice(None, None, None)) + ) + with pytest.raises( + NotImplementedError, + match=r"Lazy item assignment with the vectorized indexer is not yet", + ): + lazy.vindex[indexer] = 0 + + @pytest.mark.parametrize( + "indexer_class, key, value", + [ + (indexing.OuterIndexer, (0, 1, slice(None, None, None)), 10), + (indexing.BasicIndexer, (0, 1, slice(None, None, None)), 10), + ], + ) + def test_lazily_indexed_array_setitem(self, indexer_class, key, value) -> None: + original = np.random.rand(10, 20, 30) + x = indexing.NumpyIndexingAdapter(original) + lazy = indexing.LazilyIndexedArray(x) + + if indexer_class is indexing.BasicIndexer: + indexer = indexer_class(key) + lazy[indexer] = value + elif indexer_class is indexing.OuterIndexer: + indexer = indexer_class(key) + lazy.oindex[indexer] = value + + assert_array_equal(original[key], value) + class TestCopyOnWriteArray: def test_setitem(self) -> None: From 61ffc38a495b224c49f9486404ee3bdcb9049663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 19 Mar 2024 15:25:37 +0100 Subject: [PATCH 3/9] FIX: do not cast _FillValue/missing_value in CFMaskCoder if _Unsigned is provided (#8852) * FIX: do not cast _FillValue/missing_value in CFMaskCoder if _Unsigned is provided * add whats-new.rst entry * add comment on _Unsigned --- doc/whats-new.rst | 3 +++ xarray/coding/variables.py | 14 +++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b81be3c0192..6bde6504a7f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,6 +57,9 @@ Bug fixes `CFMaskCoder`/`CFScaleOffsetCoder` (:issue:`2304`, :issue:`5597`, :issue:`7691`, :pull:`8713`, see also discussion in :pull:`7654`). By `Kai Mühlbauer `_. +- do not cast `_FillValue`/`missing_value` in `CFMaskCoder` if `_Unsigned` is provided + (:issue:`8844`, :pull:`8852`). + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 3b11e7bfa02..52cf0fc3656 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -309,6 +309,9 @@ def encode(self, variable: Variable, name: T_Name = None): dtype = np.dtype(encoding.get("dtype", data.dtype)) fv = encoding.get("_FillValue") mv = encoding.get("missing_value") + # to properly handle _FillValue/missing_value below [a], [b] + # we need to check if unsigned data is written as signed data + unsigned = encoding.get("_Unsigned") is not None fv_exists = fv is not None mv_exists = mv is not None @@ -323,13 +326,19 @@ def encode(self, variable: Variable, name: T_Name = None): if fv_exists: # Ensure _FillValue is cast to same dtype as data's - encoding["_FillValue"] = dtype.type(fv) + # [a] need to skip this if _Unsigned is available + if not unsigned: + encoding["_FillValue"] = dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if mv_exists: # try to use _FillValue, if it exists to align both values # or use missing_value and ensure it's cast to same dtype as data's - encoding["missing_value"] = attrs.get("_FillValue", dtype.type(mv)) + # [b] need to provide mv verbatim if _Unsigned is available + encoding["missing_value"] = attrs.get( + "_FillValue", + (dtype.type(mv) if not unsigned else mv), + ) fill_value = pop_to(encoding, attrs, "missing_value", name=name) # apply fillna @@ -522,7 +531,6 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: def decode(self, variable: Variable, name: T_Name = None) -> Variable: if "_Unsigned" in variable.attrs: dims, data, attrs, encoding = unpack_for_decoding(variable) - unsigned = pop_to(attrs, encoding, "_Unsigned") if data.dtype.kind == "i": From 0b6716d12dec5628618137c8a34a120b60ba9c30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 20 Mar 2024 15:47:45 +0100 Subject: [PATCH 4/9] FIX: adapt handling of copy keyword argument in scipy backend for numpy >= 2.0dev (#8851) * FIX: adapt handling of copy keyword argument in scipy backend for numpy >= 2.0dev FIX: adapt handling of copy keyword argument in scipy backend for numpy >= 2.0dev * Add whats-new.rst entry * Apply suggestions from code review --------- Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 ++ xarray/backends/scipy_.py | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6bde6504a7f..cd01f0adaf1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -59,6 +59,8 @@ Bug fixes By `Kai Mühlbauer `_. - do not cast `_FillValue`/`missing_value` in `CFMaskCoder` if `_Unsigned` is provided (:issue:`8844`, :pull:`8852`). +- Adapt handling of copy keyword argument in scipy backend for numpy >= 2.0dev + (:issue:`8844`, :pull:`8851`). By `Kai Mühlbauer `_. Documentation diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 154d82bb871..f8c486e512c 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -28,6 +28,7 @@ Frozen, FrozenDict, close_on_error, + module_available, try_read_magic_number_from_file_or_path, ) from xarray.core.variable import Variable @@ -39,6 +40,9 @@ from xarray.core.dataset import Dataset +HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") + + def _decode_string(s): if isinstance(s, bytes): return s.decode("utf-8", "replace") @@ -76,6 +80,12 @@ def __getitem__(self, key): # with the netCDF4 library by ensuring we can safely read arrays even # after closing associated files. copy = self.datastore.ds.use_mmap + + # adapt handling of copy-kwarg to numpy 2.0 + # see https://github.com/numpy/numpy/issues/25916 + # and https://github.com/numpy/numpy/pull/25922 + copy = None if HAS_NUMPY_2_0 and copy is False else copy + return np.array(data, dtype=self.dtype, copy=copy) def __setitem__(self, key, value): From 4a0bb2eb80538806468233d11bc5a4c06ffb417e Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 20 Mar 2024 09:00:18 -0600 Subject: [PATCH 5/9] pandas 3 MultiIndex fixes (#8847) * Fix dropping of muiltiindexes xref #8844 Closes https://github.com/xarray-contrib/flox/issues/342 * More fixes --- xarray/core/dataset.py | 2 +- xarray/tests/test_indexes.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b4c00b66ed8..10bf1466156 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5867,7 +5867,7 @@ def drop_vars( for var in names_set: maybe_midx = self._indexes.get(var, None) if isinstance(maybe_midx, PandasMultiIndex): - idx_coord_names = set(maybe_midx.index.names + [maybe_midx.dim]) + idx_coord_names = set(list(maybe_midx.index.names) + [maybe_midx.dim]) idx_other_names = idx_coord_names - set(names_set) other_names.update(idx_other_names) if other_names: diff --git a/xarray/tests/test_indexes.py b/xarray/tests/test_indexes.py index 3ee7f045360..5ebdfd5da6e 100644 --- a/xarray/tests/test_indexes.py +++ b/xarray/tests/test_indexes.py @@ -352,7 +352,7 @@ def test_constructor(self) -> None: # default level names pd_idx = pd.MultiIndex.from_arrays([foo_data, bar_data]) index = PandasMultiIndex(pd_idx, "x") - assert index.index.names == ("x_level_0", "x_level_1") + assert list(index.index.names) == ["x_level_0", "x_level_1"] def test_from_variables(self) -> None: v_level1 = xr.Variable( @@ -370,7 +370,7 @@ def test_from_variables(self) -> None: assert index.dim == "x" assert index.index.equals(expected_idx) assert index.index.name == "x" - assert index.index.names == ["level1", "level2"] + assert list(index.index.names) == ["level1", "level2"] var = xr.Variable(("x", "y"), [[1, 2, 3], [4, 5, 6]]) with pytest.raises( @@ -413,7 +413,8 @@ def test_stack(self) -> None: index = PandasMultiIndex.stack(prod_vars, "z") assert index.dim == "z" - assert index.index.names == ["x", "y"] + # TODO: change to tuple when pandas 3 is minimum + assert list(index.index.names) == ["x", "y"] np.testing.assert_array_equal( index.index.codes, [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] ) @@ -531,12 +532,12 @@ def test_rename(self) -> None: assert new_index is index new_index = index.rename({"two": "three"}, {}) - assert new_index.index.names == ["one", "three"] + assert list(new_index.index.names) == ["one", "three"] assert new_index.dim == "x" assert new_index.level_coords_dtype == {"one": " Date: Wed, 20 Mar 2024 13:20:52 -0700 Subject: [PATCH 6/9] increase typing annotations coverage in `xarray/core/indexing.py` (#8857) --- xarray/core/indexing.py | 343 ++++++++++++++++++---------------- xarray/namedarray/core.py | 2 +- xarray/tests/test_indexing.py | 2 +- 3 files changed, 183 insertions(+), 164 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 407fda610fc..82ee4ccb0e4 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -4,7 +4,7 @@ import functools import operator from collections import Counter, defaultdict -from collections.abc import Hashable, Mapping +from collections.abc import Hashable, Iterable, Mapping from contextlib import suppress from dataclasses import dataclass, field from datetime import timedelta @@ -35,6 +35,8 @@ from xarray.core.indexes import Index from xarray.core.variable import Variable + from xarray.namedarray._typing import _Shape, duckarray + from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @dataclass @@ -163,7 +165,7 @@ def map_index_queries( obj: T_Xarray, indexers: Mapping[Any, Any], method=None, - tolerance=None, + tolerance: int | float | Iterable[int | float] | None = None, **indexers_kwargs: Any, ) -> IndexSelResult: """Execute index queries from a DataArray / Dataset and label-based indexers @@ -234,17 +236,17 @@ def expanded_indexer(key, ndim): return tuple(new_key) -def _expand_slice(slice_, size): +def _expand_slice(slice_, size: int) -> np.ndarray: return np.arange(*slice_.indices(size)) -def _normalize_slice(sl, size): +def _normalize_slice(sl: slice, size) -> slice: """Ensure that given slice only contains positive start and stop values (stop can be -1 for full-size slices with negative steps, e.g. [-10::-1])""" return slice(*sl.indices(size)) -def slice_slice(old_slice, applied_slice, size): +def slice_slice(old_slice: slice, applied_slice: slice, size: int) -> slice: """Given a slice and the size of the dimension to which it will be applied, index it with another slice to return a new slice equivalent to applying the slices sequentially @@ -273,7 +275,7 @@ def slice_slice(old_slice, applied_slice, size): return slice(start, stop, step) -def _index_indexer_1d(old_indexer, applied_indexer, size): +def _index_indexer_1d(old_indexer, applied_indexer, size: int): assert isinstance(applied_indexer, integer_types + (slice, np.ndarray)) if isinstance(applied_indexer, slice) and applied_indexer == slice(None): # shortcut for the usual case @@ -282,7 +284,7 @@ def _index_indexer_1d(old_indexer, applied_indexer, size): if isinstance(applied_indexer, slice): indexer = slice_slice(old_indexer, applied_indexer, size) else: - indexer = _expand_slice(old_indexer, size)[applied_indexer] + indexer = _expand_slice(old_indexer, size)[applied_indexer] # type: ignore[assignment] else: indexer = old_indexer[applied_indexer] return indexer @@ -301,16 +303,16 @@ class ExplicitIndexer: __slots__ = ("_key",) - def __init__(self, key): + def __init__(self, key: tuple[Any, ...]): if type(self) is ExplicitIndexer: raise TypeError("cannot instantiate base ExplicitIndexer objects") self._key = tuple(key) @property - def tuple(self): + def tuple(self) -> tuple[Any, ...]: return self._key - def __repr__(self): + def __repr__(self) -> str: return f"{type(self).__name__}({self.tuple})" @@ -330,14 +332,16 @@ class IndexCallable: __slots__ = ("getter", "setter") - def __init__(self, getter, setter=None): + def __init__( + self, getter: Callable[..., Any], setter: Callable[..., Any] | None = None + ): self.getter = getter self.setter = setter - def __getitem__(self, key): + def __getitem__(self, key: Any) -> Any: return self.getter(key) - def __setitem__(self, key, value): + def __setitem__(self, key: Any, value: Any) -> None: if self.setter is None: raise NotImplementedError( "Setting values is not supported for this indexer." @@ -355,7 +359,7 @@ class BasicIndexer(ExplicitIndexer): __slots__ = () - def __init__(self, key): + def __init__(self, key: tuple[int | np.integer | slice, ...]): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") @@ -371,7 +375,7 @@ def __init__(self, key): ) new_key.append(k) - super().__init__(new_key) + super().__init__(tuple(new_key)) class OuterIndexer(ExplicitIndexer): @@ -385,7 +389,12 @@ class OuterIndexer(ExplicitIndexer): __slots__ = () - def __init__(self, key): + def __init__( + self, + key: tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... + ], + ): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") @@ -400,19 +409,19 @@ def __init__(self, key): raise TypeError( f"invalid indexer array, does not have integer dtype: {k!r}" ) - if k.ndim > 1: + if k.ndim > 1: # type: ignore[union-attr] raise TypeError( f"invalid indexer array for {type(self).__name__}; must be scalar " f"or have 1 dimension: {k!r}" ) - k = k.astype(np.int64) + k = k.astype(np.int64) # type: ignore[union-attr] else: raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) new_key.append(k) - super().__init__(new_key) + super().__init__(tuple(new_key)) class VectorizedIndexer(ExplicitIndexer): @@ -427,7 +436,7 @@ class VectorizedIndexer(ExplicitIndexer): __slots__ = () - def __init__(self, key): + def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...]): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") @@ -448,21 +457,21 @@ def __init__(self, key): f"invalid indexer array, does not have integer dtype: {k!r}" ) if ndim is None: - ndim = k.ndim + ndim = k.ndim # type: ignore[union-attr] elif ndim != k.ndim: ndims = [k.ndim for k in key if isinstance(k, np.ndarray)] raise ValueError( "invalid indexer key: ndarray arguments " f"have different numbers of dimensions: {ndims}" ) - k = k.astype(np.int64) + k = k.astype(np.int64) # type: ignore[union-attr] else: raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) new_key.append(k) - super().__init__(new_key) + super().__init__(tuple(new_key)) class ExplicitlyIndexed: @@ -490,39 +499,39 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: # Note this is the base class for all lazy indexing classes return np.asarray(self.get_duck_array(), dtype=dtype) - def _oindex_get(self, key): + def _oindex_get(self, indexer: OuterIndexer): raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, key): + def _vindex_get(self, indexer: VectorizedIndexer): raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) - def _oindex_set(self, key, value): + def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._oindex_set method should be overridden" ) - def _vindex_set(self, key, value): + def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._vindex_set method should be overridden" ) - def _check_and_raise_if_non_basic_indexer(self, key): - if isinstance(key, (VectorizedIndexer, OuterIndexer)): + def _check_and_raise_if_non_basic_indexer(self, indexer: ExplicitIndexer) -> None: + if isinstance(indexer, (VectorizedIndexer, OuterIndexer)): raise TypeError( "Vectorized indexing with vectorized or outer indexers is not supported. " "Please use .vindex and .oindex properties to index the array." ) @property - def oindex(self): + def oindex(self) -> IndexCallable: return IndexCallable(self._oindex_get, self._oindex_set) @property - def vindex(self): + def vindex(self) -> IndexCallable: return IndexCallable(self._vindex_get, self._vindex_set) @@ -531,7 +540,7 @@ class ImplicitToExplicitIndexingAdapter(NDArrayMixin): __slots__ = ("array", "indexer_cls") - def __init__(self, array, indexer_cls=BasicIndexer): + def __init__(self, array, indexer_cls: type[ExplicitIndexer] = BasicIndexer): self.array = as_indexable(array) self.indexer_cls = indexer_cls @@ -541,7 +550,7 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: def get_duck_array(self): return self.array.get_duck_array() - def __getitem__(self, key): + def __getitem__(self, key: Any): key = expanded_indexer(key, self.ndim) indexer = self.indexer_cls(key) @@ -560,7 +569,7 @@ class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin): __slots__ = ("array", "key") - def __init__(self, array, key=None): + def __init__(self, array: Any, key: ExplicitIndexer | None = None): """ Parameters ---------- @@ -572,8 +581,8 @@ def __init__(self, array, key=None): """ if isinstance(array, type(self)) and key is None: # unwrap - key = array.key - array = array.array + key = array.key # type: ignore[has-type] + array = array.array # type: ignore[has-type] if key is None: key = BasicIndexer((slice(None),) * array.ndim) @@ -581,7 +590,7 @@ def __init__(self, array, key=None): self.array = as_indexable(array) self.key = key - def _updated_key(self, new_key): + def _updated_key(self, new_key: ExplicitIndexer) -> BasicIndexer | OuterIndexer: iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) full_key = [] for size, k in zip(self.array.shape, self.key.tuple): @@ -589,14 +598,14 @@ def _updated_key(self, new_key): full_key.append(k) else: full_key.append(_index_indexer_1d(k, next(iter_new_key), size)) - full_key = tuple(full_key) + full_key_tuple = tuple(full_key) - if all(isinstance(k, integer_types + (slice,)) for k in full_key): - return BasicIndexer(full_key) - return OuterIndexer(full_key) + if all(isinstance(k, integer_types + (slice,)) for k in full_key_tuple): + return BasicIndexer(full_key_tuple) + return OuterIndexer(full_key_tuple) @property - def shape(self) -> tuple[int, ...]: + def shape(self) -> _Shape: shape = [] for size, k in zip(self.array.shape, self.key.tuple): if isinstance(k, slice): @@ -624,33 +633,33 @@ def get_duck_array(self): def transpose(self, order): return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer): + def _oindex_get(self, indexer: OuterIndexer): return type(self)(self.array, self._updated_key(indexer)) - def _vindex_get(self, indexer): + def _vindex_get(self, indexer: VectorizedIndexer): array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer): + def __getitem__(self, indexer: ExplicitIndexer): self._check_and_raise_if_non_basic_indexer(indexer) return type(self)(self.array, self._updated_key(indexer)) - def _vindex_set(self, key, value): + def _vindex_set(self, key: VectorizedIndexer, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." ) - def _oindex_set(self, key, value): + def _oindex_set(self, key: OuterIndexer, value: Any) -> None: full_key = self._updated_key(key) self.array.oindex[full_key] = value - def __setitem__(self, key, value): + def __setitem__(self, key: BasicIndexer, value: Any) -> None: self._check_and_raise_if_non_basic_indexer(key) full_key = self._updated_key(key) self.array[full_key] = value - def __repr__(self): + def __repr__(self) -> str: return f"{type(self).__name__}(array={self.array!r}, key={self.key!r})" @@ -663,7 +672,7 @@ class LazilyVectorizedIndexedArray(ExplicitlyIndexedNDArrayMixin): __slots__ = ("array", "key") - def __init__(self, array, key): + def __init__(self, array: duckarray[Any, Any], key: ExplicitIndexer): """ Parameters ---------- @@ -673,12 +682,12 @@ def __init__(self, array, key): """ if isinstance(key, (BasicIndexer, OuterIndexer)): self.key = _outer_to_vectorized_indexer(key, array.shape) - else: + elif isinstance(key, VectorizedIndexer): self.key = _arrayize_vectorized_indexer(key, array.shape) self.array = as_indexable(array) @property - def shape(self) -> tuple[int, ...]: + def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape def get_duck_array(self): @@ -696,16 +705,16 @@ def get_duck_array(self): array = array.get_duck_array() return _wrap_numpy_scalars(array) - def _updated_key(self, new_key): + def _updated_key(self, new_key: ExplicitIndexer): return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer): + def _oindex_get(self, indexer: OuterIndexer): return type(self)(self.array, self._updated_key(indexer)) - def _vindex_get(self, indexer): + def _vindex_get(self, indexer: VectorizedIndexer): return type(self)(self.array, self._updated_key(indexer)) - def __getitem__(self, indexer): + def __getitem__(self, indexer: ExplicitIndexer): self._check_and_raise_if_non_basic_indexer(indexer) # If the indexed array becomes a scalar, return LazilyIndexedArray if all(isinstance(ind, integer_types) for ind in indexer.tuple): @@ -717,13 +726,13 @@ def transpose(self, order): key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) - def __setitem__(self, key, value): + def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." ) - def __repr__(self): + def __repr__(self) -> str: return f"{type(self).__name__}(array={self.array!r}, key={self.key!r})" @@ -738,7 +747,7 @@ def _wrap_numpy_scalars(array): class CopyOnWriteArray(ExplicitlyIndexedNDArrayMixin): __slots__ = ("array", "_copied") - def __init__(self, array): + def __init__(self, array: duckarray[Any, Any]): self.array = as_indexable(array) self._copied = False @@ -750,32 +759,32 @@ def _ensure_copied(self): def get_duck_array(self): return self.array.get_duck_array() - def _oindex_get(self, key): - return type(self)(_wrap_numpy_scalars(self.array.oindex[key])) + def _oindex_get(self, indexer: OuterIndexer): + return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, key): - return type(self)(_wrap_numpy_scalars(self.array.vindex[key])) + def _vindex_get(self, indexer: VectorizedIndexer): + return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, key): - self._check_and_raise_if_non_basic_indexer(key) - return type(self)(_wrap_numpy_scalars(self.array[key])) + def __getitem__(self, indexer: ExplicitIndexer): + self._check_and_raise_if_non_basic_indexer(indexer) + return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, key, value): + def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: self._ensure_copied() - self.array.vindex[key] = value + self.array.vindex[indexer] = value - def _oindex_set(self, key, value): + def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: self._ensure_copied() - self.array.oindex[key] = value + self.array.oindex[indexer] = value - def __setitem__(self, key, value): - self._check_and_raise_if_non_basic_indexer(key) + def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: + self._check_and_raise_if_non_basic_indexer(indexer) self._ensure_copied() - self.array[key] = value + self.array[indexer] = value def __deepcopy__(self, memo): # CopyOnWriteArray is used to wrap backend array objects, which might @@ -800,28 +809,28 @@ def get_duck_array(self): self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, key): - return type(self)(_wrap_numpy_scalars(self.array.oindex[key])) + def _oindex_get(self, indexer: OuterIndexer): + return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, key): - return type(self)(_wrap_numpy_scalars(self.array.vindex[key])) + def _vindex_get(self, indexer: VectorizedIndexer): + return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, key): - self._check_and_raise_if_non_basic_indexer(key) - return type(self)(_wrap_numpy_scalars(self.array[key])) + def __getitem__(self, indexer: ExplicitIndexer): + self._check_and_raise_if_non_basic_indexer(indexer) + return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, key, value): - self.array.vindex[key] = value + def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + self.array.vindex[indexer] = value - def _oindex_set(self, key, value): - self.array.oindex[key] = value + def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + self.array.oindex[indexer] = value - def __setitem__(self, key, value): - self._check_and_raise_if_non_basic_indexer(key) - self.array[key] = value + def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: + self._check_and_raise_if_non_basic_indexer(indexer) + self.array[indexer] = value def as_indexable(array): @@ -846,12 +855,14 @@ def as_indexable(array): raise TypeError(f"Invalid array type: {type(array)}") -def _outer_to_vectorized_indexer(key, shape): +def _outer_to_vectorized_indexer( + indexer: BasicIndexer | OuterIndexer, shape: _Shape +) -> VectorizedIndexer: """Convert an OuterIndexer into an vectorized indexer. Parameters ---------- - key : Outer/Basic Indexer + indexer : Outer/Basic Indexer An indexer to convert. shape : tuple Shape of the array subject to the indexing. @@ -863,7 +874,7 @@ def _outer_to_vectorized_indexer(key, shape): Each element is an array: broadcasting them together gives the shape of the result. """ - key = key.tuple + key = indexer.tuple n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 @@ -875,18 +886,18 @@ def _outer_to_vectorized_indexer(key, shape): if isinstance(k, slice): k = np.arange(*k.indices(size)) assert k.dtype.kind in {"i", "u"} - shape = [(1,) * i_dim + (k.size,) + (1,) * (n_dim - i_dim - 1)] - new_key.append(k.reshape(*shape)) + new_shape = [(1,) * i_dim + (k.size,) + (1,) * (n_dim - i_dim - 1)] + new_key.append(k.reshape(*new_shape)) i_dim += 1 return VectorizedIndexer(tuple(new_key)) -def _outer_to_numpy_indexer(key, shape): +def _outer_to_numpy_indexer(indexer: BasicIndexer | OuterIndexer, shape: _Shape): """Convert an OuterIndexer into an indexer for NumPy. Parameters ---------- - key : Basic/OuterIndexer + indexer : Basic/OuterIndexer An indexer to convert. shape : tuple Shape of the array subject to the indexing. @@ -896,16 +907,16 @@ def _outer_to_numpy_indexer(key, shape): tuple Tuple suitable for use to index a NumPy array. """ - if len([k for k in key.tuple if not isinstance(k, slice)]) <= 1: + if len([k for k in indexer.tuple if not isinstance(k, slice)]) <= 1: # If there is only one vector and all others are slice, # it can be safely used in mixed basic/advanced indexing. # Boolean index should already be converted to integer array. - return key.tuple + return indexer.tuple else: - return _outer_to_vectorized_indexer(key, shape).tuple + return _outer_to_vectorized_indexer(indexer, shape).tuple -def _combine_indexers(old_key, shape, new_key): +def _combine_indexers(old_key, shape: _Shape, new_key) -> VectorizedIndexer: """Combine two indexers. Parameters @@ -947,9 +958,9 @@ class IndexingSupport(enum.Enum): def explicit_indexing_adapter( key: ExplicitIndexer, - shape: tuple[int, ...], + shape: _Shape, indexing_support: IndexingSupport, - raw_indexing_method: Callable, + raw_indexing_method: Callable[..., Any], ) -> Any: """Support explicit indexing by delegating to a raw indexing method. @@ -981,7 +992,7 @@ def explicit_indexing_adapter( return result -def apply_indexer(indexable, indexer): +def apply_indexer(indexable, indexer: ExplicitIndexer): """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): return indexable.vindex[indexer] @@ -991,7 +1002,7 @@ def apply_indexer(indexable, indexer): return indexable[indexer] -def set_with_indexer(indexable, indexer, value): +def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: """Set values in an indexable object using an indexer.""" if isinstance(indexer, VectorizedIndexer): indexable.vindex[indexer] = value @@ -1002,7 +1013,7 @@ def set_with_indexer(indexable, indexer, value): def decompose_indexer( - indexer: ExplicitIndexer, shape: tuple[int, ...], indexing_support: IndexingSupport + indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport ) -> tuple[ExplicitIndexer, ExplicitIndexer]: if isinstance(indexer, VectorizedIndexer): return _decompose_vectorized_indexer(indexer, shape, indexing_support) @@ -1041,7 +1052,7 @@ def _decompose_slice(key: slice, size: int) -> tuple[slice, slice]: def _decompose_vectorized_indexer( indexer: VectorizedIndexer, - shape: tuple[int, ...], + shape: _Shape, indexing_support: IndexingSupport, ) -> tuple[ExplicitIndexer, ExplicitIndexer]: """ @@ -1123,7 +1134,7 @@ def _decompose_vectorized_indexer( def _decompose_outer_indexer( indexer: BasicIndexer | OuterIndexer, - shape: tuple[int, ...], + shape: _Shape, indexing_support: IndexingSupport, ) -> tuple[ExplicitIndexer, ExplicitIndexer]: """ @@ -1264,7 +1275,9 @@ def _decompose_outer_indexer( return (BasicIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) -def _arrayize_vectorized_indexer(indexer, shape): +def _arrayize_vectorized_indexer( + indexer: VectorizedIndexer, shape: _Shape +) -> VectorizedIndexer: """Return an identical vindex but slices are replaced by arrays""" slices = [v for v in indexer.tuple if isinstance(v, slice)] if len(slices) == 0: @@ -1284,7 +1297,9 @@ def _arrayize_vectorized_indexer(indexer, shape): return VectorizedIndexer(tuple(new_key)) -def _chunked_array_with_chunks_hint(array, chunks, chunkmanager): +def _chunked_array_with_chunks_hint( + array, chunks, chunkmanager: ChunkManagerEntrypoint[Any] +): """Create a chunked array using the chunks hint for dimensions of size > 1.""" if len(chunks) < array.ndim: @@ -1292,21 +1307,21 @@ def _chunked_array_with_chunks_hint(array, chunks, chunkmanager): new_chunks = [] for chunk, size in zip(chunks, array.shape): new_chunks.append(chunk if size > 1 else (1,)) - return chunkmanager.from_array(array, new_chunks) + return chunkmanager.from_array(array, new_chunks) # type: ignore[arg-type] def _logical_any(args): return functools.reduce(operator.or_, args) -def _masked_result_drop_slice(key, data=None): +def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None): key = (k for k in key if not isinstance(k, slice)) chunks_hint = getattr(data, "chunks", None) new_keys = [] for k in key: if isinstance(k, np.ndarray): - if is_chunked_array(data): + if is_chunked_array(data): # type: ignore[arg-type] chunkmanager = get_chunked_array_type(data) new_keys.append( _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager) @@ -1324,7 +1339,9 @@ def _masked_result_drop_slice(key, data=None): return mask -def create_mask(indexer, shape, data=None): +def create_mask( + indexer: ExplicitIndexer, shape: _Shape, data: duckarray[Any, Any] | None = None +): """Create a mask for indexing with a fill-value. Parameters @@ -1369,7 +1386,9 @@ def create_mask(indexer, shape, data=None): return mask -def _posify_mask_subindexer(index): +def _posify_mask_subindexer( + index: np.ndarray[Any, np.dtype[np.generic]], +) -> np.ndarray[Any, np.dtype[np.generic]]: """Convert masked indices in a flat array to the nearest unmasked index. Parameters @@ -1395,7 +1414,7 @@ def _posify_mask_subindexer(index): return new_index -def posify_mask_indexer(indexer): +def posify_mask_indexer(indexer: ExplicitIndexer) -> ExplicitIndexer: """Convert masked values (-1) in an indexer to nearest unmasked values. This routine is useful for dask, where it can be much faster to index @@ -1453,25 +1472,25 @@ def __init__(self, array): def transpose(self, order): return self.array.transpose(order) - def _oindex_get(self, key): - key = _outer_to_numpy_indexer(key, self.array.shape) + def _oindex_get(self, indexer: OuterIndexer): + key = _outer_to_numpy_indexer(indexer, self.array.shape) return self.array[key] - def _vindex_get(self, key): + def _vindex_get(self, indexer: VectorizedIndexer): array = NumpyVIndexAdapter(self.array) - return array[key.tuple] + return array[indexer.tuple] - def __getitem__(self, key): - self._check_and_raise_if_non_basic_indexer(key) + def __getitem__(self, indexer: ExplicitIndexer): + self._check_and_raise_if_non_basic_indexer(indexer) array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = key.tuple + (Ellipsis,) + key = indexer.tuple + (Ellipsis,) return array[key] - def _safe_setitem(self, array, key, value): + def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: try: array[key] = value except ValueError as exc: @@ -1484,21 +1503,21 @@ def _safe_setitem(self, array, key, value): else: raise exc - def _oindex_set(self, key, value): - key = _outer_to_numpy_indexer(key, self.array.shape) + def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + key = _outer_to_numpy_indexer(indexer, self.array.shape) self._safe_setitem(self.array, key, value) - def _vindex_set(self, key, value): + def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: array = NumpyVIndexAdapter(self.array) - self._safe_setitem(array, key.tuple, value) + self._safe_setitem(array, indexer.tuple, value) - def __setitem__(self, key, value): - self._check_and_raise_if_non_basic_indexer(key) + def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: + self._check_and_raise_if_non_basic_indexer(indexer) array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = key.tuple + (Ellipsis,) + key = indexer.tuple + (Ellipsis,) self._safe_setitem(array, key, value) @@ -1527,30 +1546,30 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, key): + def _oindex_get(self, indexer: OuterIndexer): # manual orthogonal indexing (implemented like DaskIndexingAdapter) - key = key.tuple + key = indexer.tuple value = self.array for axis, subkey in reversed(list(enumerate(key))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, key): + def _vindex_get(self, indexer: VectorizedIndexer): raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, key): - self._check_and_raise_if_non_basic_indexer(key) - return self.array[key.tuple] + def __getitem__(self, indexer: ExplicitIndexer): + self._check_and_raise_if_non_basic_indexer(indexer) + return self.array[indexer.tuple] - def _oindex_set(self, key, value): - self.array[key.tuple] = value + def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + self.array[indexer.tuple] = value - def _vindex_set(self, key, value): + def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: raise TypeError("Vectorized indexing is not supported") - def __setitem__(self, key, value): - self._check_and_raise_if_non_basic_indexer(key) - self.array[key.tuple] = value + def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: + self._check_and_raise_if_non_basic_indexer(indexer) + self.array[indexer.tuple] = value def transpose(self, order): xp = self.array.__array_namespace__() @@ -1568,8 +1587,8 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, key): - key = key.tuple + def _oindex_get(self, indexer: OuterIndexer): + key = indexer.tuple try: return self.array[key] except NotImplementedError: @@ -1579,27 +1598,27 @@ def _oindex_get(self, key): value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, key): - return self.array.vindex[key.tuple] + def _vindex_get(self, indexer: VectorizedIndexer): + return self.array.vindex[indexer.tuple] - def __getitem__(self, key): - self._check_and_raise_if_non_basic_indexer(key) - return self.array[key.tuple] + def __getitem__(self, indexer: ExplicitIndexer): + self._check_and_raise_if_non_basic_indexer(indexer) + return self.array[indexer.tuple] - def _oindex_set(self, key, value): - num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in key.tuple) + def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer.tuple) if num_non_slices > 1: raise NotImplementedError( "xarray can't set arrays with multiple " "array indices to dask yet." ) - self.array[key.tuple] = value + self.array[indexer.tuple] = value - def _vindex_set(self, key, value): - self.array.vindex[key.tuple] = value + def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + self.array.vindex[indexer.tuple] = value - def __setitem__(self, key, value): - self._check_and_raise_if_non_basic_indexer(key) - self.array[key.tuple] = value + def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: + self._check_and_raise_if_non_basic_indexer(indexer) + self.array[indexer.tuple] = value def transpose(self, order): return self.array.transpose(order) @@ -1638,7 +1657,7 @@ def get_duck_array(self) -> np.ndarray: return np.asarray(self) @property - def shape(self) -> tuple[int, ...]: + def shape(self) -> _Shape: return (len(self.array),) def _convert_scalar(self, item): @@ -1661,14 +1680,14 @@ def _convert_scalar(self, item): # a NumPy array. return to_0d_array(item) - def _oindex_get(self, key): - return self.__getitem__(key) + def _oindex_get(self, indexer: OuterIndexer): + return self.__getitem__(indexer) - def _vindex_get(self, key): - return self.__getitem__(key) + def _vindex_get(self, indexer: VectorizedIndexer): + return self.__getitem__(indexer) def __getitem__( - self, indexer + self, indexer: ExplicitIndexer ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1747,7 +1766,7 @@ def _convert_scalar(self, item): item = item[idx] return super()._convert_scalar(item) - def __getitem__(self, indexer): + def __getitem__(self, indexer: ExplicitIndexer): result = super().__getitem__(indexer) if isinstance(result, type(self)): result.level = self.level diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index fd209bc273f..135dabc0656 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -813,7 +813,7 @@ def chunk( # Using OuterIndexer is a pragmatic choice: dask does not yet handle # different indexing types in an explicit way: # https://github.com/dask/dask/issues/2883 - ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore[no-untyped-call, assignment] + ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore[assignment] if is_dict_like(chunks): chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) # type: ignore[assignment] diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index e650c454eac..f019d3c789c 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -880,7 +880,7 @@ def test_create_mask_dask() -> None: def test_create_mask_error() -> None: with pytest.raises(TypeError, match=r"unexpected key type"): - indexing.create_mask((1, 2), (3, 4)) + indexing.create_mask((1, 2), (3, 4)) # type: ignore[arg-type] @pytest.mark.parametrize( From bd9495fdeb8be38932a66b354d53f39923c8448b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 20 Mar 2024 22:16:45 -0600 Subject: [PATCH 7/9] upstream-dev CI: Fix interp and cumtrapz (#8861) --- xarray/tests/test_dataset.py | 8 +++----- xarray/tests/test_interp.py | 4 +++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d2b8634b8b9..19b7ef7292c 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -7052,12 +7052,10 @@ def test_cumulative_integrate(dask) -> None: # along x actual = da.cumulative_integrate("x") - # From scipy-1.6.0 cumtrapz is renamed to cumulative_trapezoid, but cumtrapz is - # still provided for backward compatibility - from scipy.integrate import cumtrapz + from scipy.integrate import cumulative_trapezoid expected_x = xr.DataArray( - cumtrapz(da.compute(), da["x"], axis=0, initial=0.0), + cumulative_trapezoid(da.compute(), da["x"], axis=0, initial=0.0), dims=["x", "y"], coords=da.coords, ) @@ -7073,7 +7071,7 @@ def test_cumulative_integrate(dask) -> None: # along y actual = da.cumulative_integrate("y") expected_y = xr.DataArray( - cumtrapz(da, da["y"], axis=1, initial=0.0), + cumulative_trapezoid(da, da["y"], axis=1, initial=0.0), dims=["x", "y"], coords=da.coords, ) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index a7644ac9d2b..7151c669fbc 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -833,7 +833,9 @@ def test_interpolate_chunk_1d( dest[dim] = cast( xr.DataArray, - np.linspace(before, after, len(da.coords[dim]) * 13), + np.linspace( + before.item(), after.item(), len(da.coords[dim]) * 13 + ), ) if chunked: dest[dim] = xr.DataArray(data=dest[dim], dims=[dim]) From 7c3d2dd2987f855a44d91993f73a9e1aef0a481d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 22 Mar 2024 16:30:11 +0100 Subject: [PATCH 8/9] numpy 2.0 copy-keyword and trapz vs trapezoid (#8865) * adapt handling of copy keyword argument in coding/strings.py for numpy >= 2.0dev * import either trapz or trapezoid depending on numpy version * add /change whats-new.rst entry * fix mypy, fix import order * adapt handling of copy keyword argument in coding/strings.py for numpy >= 2.0dev --- doc/whats-new.rst | 9 +++++++-- xarray/coding/strings.py | 15 +++++++++++++-- xarray/tests/test_dataset.py | 15 +++++++++++---- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cd01f0adaf1..c1bfaba8756 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -59,9 +59,14 @@ Bug fixes By `Kai Mühlbauer `_. - do not cast `_FillValue`/`missing_value` in `CFMaskCoder` if `_Unsigned` is provided (:issue:`8844`, :pull:`8852`). -- Adapt handling of copy keyword argument in scipy backend for numpy >= 2.0dev - (:issue:`8844`, :pull:`8851`). +- Adapt handling of copy keyword argument for numpy >= 2.0dev + (:issue:`8844`, :pull:`8851`, :pull:`8865``). By `Kai Mühlbauer `_. +- import trapz/trapezoid depending on numpy version. + (:issue:`8844`, :pull:`8865`). + By `Kai Mühlbauer `_. + + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index b3b9d8d1041..db95286f6aa 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -15,10 +15,13 @@ unpack_for_encoding, ) from xarray.core import indexing +from xarray.core.utils import module_available from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array +HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") + def create_vlen_dtype(element_type): if element_type not in (str, bytes): @@ -156,8 +159,12 @@ def bytes_to_char(arr): def _numpy_bytes_to_char(arr): """Like netCDF4.stringtochar, but faster and more flexible.""" + # adapt handling of copy-kwarg to numpy 2.0 + # see https://github.com/numpy/numpy/issues/25916 + # and https://github.com/numpy/numpy/pull/25922 + copy = None if HAS_NUMPY_2_0 else False # ensure the array is contiguous - arr = np.array(arr, copy=False, order="C", dtype=np.bytes_) + arr = np.array(arr, copy=copy, order="C", dtype=np.bytes_) return arr.reshape(arr.shape + (1,)).view("S1") @@ -199,8 +206,12 @@ def char_to_bytes(arr): def _numpy_char_to_bytes(arr): """Like netCDF4.chartostring, but faster and more flexible.""" + # adapt handling of copy-kwarg to numpy 2.0 + # see https://github.com/numpy/numpy/issues/25916 + # and https://github.com/numpy/numpy/pull/25922 + copy = None if HAS_NUMPY_2_0 else False # based on: http://stackoverflow.com/a/10984878/809705 - arr = np.array(arr, copy=False, order="C") + arr = np.array(arr, copy=copy, order="C") dtype = "S" + str(arr.shape[-1]) return arr.view(dtype).reshape(arr.shape[:-1]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 19b7ef7292c..39c404d096b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -80,6 +80,13 @@ except ImportError: pass +# from numpy version 2.0 trapz is deprecated and renamed to trapezoid +# remove once numpy 2.0 is the oldest supported version +try: + from numpy import trapezoid # type: ignore[attr-defined,unused-ignore] +except ImportError: + from numpy import trapz as trapezoid + sparse_array_type = array_type("sparse") pytestmark = [ @@ -6999,7 +7006,7 @@ def test_integrate(dask) -> None: actual = da.integrate("x") # coordinate that contains x should be dropped. expected_x = xr.DataArray( - np.trapz(da.compute(), da["x"], axis=0), + trapezoid(da.compute(), da["x"], axis=0), dims=["y"], coords={k: v for k, v in da.coords.items() if "x" not in v.dims}, ) @@ -7012,7 +7019,7 @@ def test_integrate(dask) -> None: # along y actual = da.integrate("y") expected_y = xr.DataArray( - np.trapz(da, da["y"], axis=1), + trapezoid(da, da["y"], axis=1), dims=["x"], coords={k: v for k, v in da.coords.items() if "y" not in v.dims}, ) @@ -7093,7 +7100,7 @@ def test_cumulative_integrate(dask) -> None: @pytest.mark.filterwarnings("ignore:Converting non-nanosecond") @pytest.mark.parametrize("dask", [True, False]) @pytest.mark.parametrize("which_datetime", ["np", "cftime"]) -def test_trapz_datetime(dask, which_datetime) -> None: +def test_trapezoid_datetime(dask, which_datetime) -> None: rs = np.random.RandomState(42) if which_datetime == "np": coord = np.array( @@ -7124,7 +7131,7 @@ def test_trapz_datetime(dask, which_datetime) -> None: da = da.chunk({"time": 4}) actual = da.integrate("time", datetime_unit="D") - expected_data = np.trapz( + expected_data = trapezoid( da.compute().data, duck_array_ops.datetime_to_numeric(da["time"].data, datetime_unit="D"), axis=0, From 6af547cdd9beac3b18420ccb204f801603e11519 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Fri, 22 Mar 2024 19:30:44 -0700 Subject: [PATCH 9/9] Handle .oindex and .vindex for the PandasMultiIndexingAdapter and PandasIndexingAdapter (#8869) --- xarray/core/indexing.py | 103 ++++++++++++++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 14 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 82ee4ccb0e4..e26c50c8b90 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1680,11 +1680,65 @@ def _convert_scalar(self, item): # a NumPy array. return to_0d_array(item) - def _oindex_get(self, indexer: OuterIndexer): - return self.__getitem__(indexer) + def _prepare_key(self, key: tuple[Any, ...]) -> tuple[Any, ...]: + if isinstance(key, tuple) and len(key) == 1: + # unpack key so it can index a pandas.Index object (pandas.Index + # objects don't like tuples) + (key,) = key - def _vindex_get(self, indexer: VectorizedIndexer): - return self.__getitem__(indexer) + return key + + def _handle_result( + self, result: Any + ) -> ( + PandasIndexingAdapter + | NumpyIndexingAdapter + | np.ndarray + | np.datetime64 + | np.timedelta64 + ): + if isinstance(result, pd.Index): + return type(self)(result, dtype=self.dtype) + else: + return self._convert_scalar(result) + + def _oindex_get( + self, indexer: OuterIndexer + ) -> ( + PandasIndexingAdapter + | NumpyIndexingAdapter + | np.ndarray + | np.datetime64 + | np.timedelta64 + ): + key = self._prepare_key(indexer.tuple) + + if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional + indexable = NumpyIndexingAdapter(np.asarray(self)) + return indexable.oindex[indexer] + + result = self.array[key] + + return self._handle_result(result) + + def _vindex_get( + self, indexer: VectorizedIndexer + ) -> ( + PandasIndexingAdapter + | NumpyIndexingAdapter + | np.ndarray + | np.datetime64 + | np.timedelta64 + ): + key = self._prepare_key(indexer.tuple) + + if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional + indexable = NumpyIndexingAdapter(np.asarray(self)) + return indexable.vindex[indexer] + + result = self.array[key] + + return self._handle_result(result) def __getitem__( self, indexer: ExplicitIndexer @@ -1695,22 +1749,15 @@ def __getitem__( | np.datetime64 | np.timedelta64 ): - key = indexer.tuple - if isinstance(key, tuple) and len(key) == 1: - # unpack key so it can index a pandas.Index object (pandas.Index - # objects don't like tuples) - (key,) = key + key = self._prepare_key(indexer.tuple) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) - return apply_indexer(indexable, indexer) + return indexable[indexer] result = self.array[key] - if isinstance(result, pd.Index): - return type(self)(result, dtype=self.dtype) - else: - return self._convert_scalar(result) + return self._handle_result(result) def transpose(self, order) -> pd.Index: return self.array # self.array should be always one-dimensional @@ -1766,6 +1813,34 @@ def _convert_scalar(self, item): item = item[idx] return super()._convert_scalar(item) + def _oindex_get( + self, indexer: OuterIndexer + ) -> ( + PandasIndexingAdapter + | NumpyIndexingAdapter + | np.ndarray + | np.datetime64 + | np.timedelta64 + ): + result = super()._oindex_get(indexer) + if isinstance(result, type(self)): + result.level = self.level + return result + + def _vindex_get( + self, indexer: VectorizedIndexer + ) -> ( + PandasIndexingAdapter + | NumpyIndexingAdapter + | np.ndarray + | np.datetime64 + | np.timedelta64 + ): + result = super()._vindex_get(indexer) + if isinstance(result, type(self)): + result.level = self.level + return result + def __getitem__(self, indexer: ExplicitIndexer): result = super().__getitem__(indexer) if isinstance(result, type(self)):