From 33ee4a92555f359c824427a975910555def8064f Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Sat, 13 Jul 2024 13:14:04 -0400 Subject: [PATCH 01/27] sandbox open groups --- xarray/backends/api.py | 74 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 521bdf65e6a..2fc0c744cde 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -2,6 +2,7 @@ import os from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence +from typing import Optional from functools import partial from io import BytesIO from numbers import Number @@ -27,6 +28,7 @@ _normalize_path, ) from xarray.backends.locks import _get_scheduler +from xarray.backends.netCDF4_ import NetCDF4DataStore from xarray.core import indexing from xarray.core.combine import ( _infer_concat_order_from_positions, @@ -37,6 +39,7 @@ from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk from xarray.core.indexes import Index from xarray.core.types import NetcdfWriteModes, ZarrWriteModes +from xarray.core.treenode import NodePath from xarray.core.utils import is_remote_uri from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager @@ -1720,3 +1723,74 @@ def to_zarr( return dask.delayed(_finalize_store)(writes, zstore) return zstore + + +def open_groups( + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + engine: T_Engine = None, + group: Optional[str] = None, + **kwargs, +) -> dict[str, Dataset]: + """ + Open and decode a file or file-like object, creating a dictionary containing one xarray Dataset for each group in the file. + + Useful when you have e.g. a netCDF file containing many groups, some of which are not alignable with their parents and so the file cannot be opened directly with ``open_datatree``. + + It is encouraged to use this function to inspect your data, then make the necessary changes to make the structure coercible to a `DataTree` object before calling `DataTree.from_dict()` and proceeding with your analysis. + + Parameters + ---------- + filename_or_obj : str, Path, file-like, or DataStore + Strings and Path objects are interpreted as a path to a netCDF file or Zarr store. + engine : str, optional + Xarray backend engine to use. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`. + group : str, optional + Group to use as the root group to start reading from. Groups above this root group will not be included in the output. + **kwargs : dict + Additional keyword arguments passed to :py:func:`~xarray.open_dataset` for each group. + + Returns + ------- + dict[str, xarray.Dataset] + + See Also + -------- + open_datatree() + DataTree.from_dict() + """ + filename_or_obj = _normalize_path(filename_or_obj) + store = NetCDF4DataStore.open( + filename_or_obj, + group=group, + ) + if group: + parent = NodePath("/") / NodePath(group) + else: + parent = NodePath("/") + + manager = store._manager + + ds = open_dataset(store, **kwargs) + # tree_root = DataTree.from_dict({str(parent): ds}) + for path_group in _iter_nc_groups(store.ds, parent=parent): + group_store = NetCDF4DataStore(manager, group=path_group, **kwargs) + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(group_store): + ds = store_entrypoint.open_dataset( + group_store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + new_node: DataTree = DataTree(name=NodePath(path_group).name, data=ds) + tree_root._set_item( + path_group, + new_node, + allow_overwrite=False, + new_nodes_along_path=True, + ) + return tree_root From 8186d8692c6719658f41771f7ebd0c7229921b03 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Sat, 13 Jul 2024 15:41:17 -0400 Subject: [PATCH 02/27] rough implementation of open_groups --- xarray/backends/api.py | 42 +++----------------- xarray/backends/common.py | 11 ++++++ xarray/backends/h5netcdf_.py | 77 ++++++++++++++++++++++++++++++------ xarray/backends/netCDF4_.py | 77 ++++++++++++++++++++++++++++++------ xarray/backends/plugins.py | 1 + 5 files changed, 149 insertions(+), 59 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 2fc0c744cde..01614f5bab0 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1758,39 +1758,9 @@ def open_groups( open_datatree() DataTree.from_dict() """ - filename_or_obj = _normalize_path(filename_or_obj) - store = NetCDF4DataStore.open( - filename_or_obj, - group=group, - ) - if group: - parent = NodePath("/") / NodePath(group) - else: - parent = NodePath("/") - - manager = store._manager - - ds = open_dataset(store, **kwargs) - # tree_root = DataTree.from_dict({str(parent): ds}) - for path_group in _iter_nc_groups(store.ds, parent=parent): - group_store = NetCDF4DataStore(manager, group=path_group, **kwargs) - store_entrypoint = StoreBackendEntrypoint() - with close_on_error(group_store): - ds = store_entrypoint.open_dataset( - group_store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - ) - new_node: DataTree = DataTree(name=NodePath(path_group).name, data=ds) - tree_root._set_item( - path_group, - new_node, - allow_overwrite=False, - new_nodes_along_path=True, - ) - return tree_root + if engine is None: + engine = plugins.guess_engine(filename_or_obj) + + backend = plugins.get_backend(engine) + + return backend.open_groups(filename_or_obj, **kwargs) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index e9bfdd9d2c8..5d0a69796ab 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -535,6 +535,17 @@ def open_datatree( raise NotImplementedError() + def open_groups( + self, + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + **kwargs: Any, + ) -> dict[str, Dataset]: + """ + Backend open_groups method used by Xarray in :py:func:`~xarray.open_groups`. + """ + + raise NotImplementedError() + # mapping of engine name to (module name, BackendEntrypoint Class) BACKEND_ENTRYPOINTS: dict[str, tuple[str | None, type[BackendEntrypoint]]] = {} diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index cd6bde45caa..5fdb20b8407 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -441,9 +441,9 @@ def open_datatree( group: str | Iterable[str] | Callable | None = None, **kwargs, ) -> DataTree: - from xarray.backends.api import open_dataset + # TODO: Add function docstring + from xarray.backends.common import _iter_nc_groups - from xarray.core.datatree import DataTree from xarray.core.treenode import NodePath from xarray.core.utils import close_on_error @@ -452,19 +452,20 @@ def open_datatree( filename_or_obj, group=group, ) + # Check for a group and make it a parent if it exists if group: parent = NodePath("/") / NodePath(group) else: parent = NodePath("/") manager = store._manager - ds = open_dataset(store, **kwargs) - tree_root = DataTree.from_dict({str(parent): ds}) + + groups_dict = {} for path_group in _iter_nc_groups(store.ds, parent=parent): group_store = H5NetCDFStore(manager, group=path_group, **kwargs) store_entrypoint = StoreBackendEntrypoint() with close_on_error(group_store): - ds = store_entrypoint.open_dataset( + group_ds = store_entrypoint.open_dataset( group_store, mask_and_scale=mask_and_scale, decode_times=decode_times, @@ -474,14 +475,66 @@ def open_datatree( use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) - new_node: DataTree = DataTree(name=NodePath(path_group).name, data=ds) - tree_root._set_item( - path_group, - new_node, - allow_overwrite=False, - new_nodes_along_path=True, + + group_name = NodePath(path_group).name + groups_dict[group_name] = group_ds + + return DataTree.from_dict(groups_dict) + + def open_groups( + self, + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables: str | Iterable[str] | None = None, + use_cftime=None, + decode_timedelta=None, + group: str | Iterable[str] | Callable | None = None, + **kwargs, + ) -> dict[str, Dataset]: + # TODO: Add function docstring + + from xarray.backends.common import _iter_nc_groups + from xarray.core.treenode import NodePath + from xarray.core.utils import close_on_error + + filename_or_obj = _normalize_path(filename_or_obj) + store = H5NetCDFStore.open( + filename_or_obj, + group=group, + ) + # Check for a group and make it a parent if it exists + if group: + parent = NodePath("/") / NodePath(group) + else: + parent = NodePath("/") + + manager = store._manager + + groups_dict = {} + for path_group in _iter_nc_groups(store.ds, parent=parent): + group_store = H5NetCDFStore(manager, group=path_group, **kwargs) + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(group_store): + group_ds = store_entrypoint.open_dataset( + group_store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) - return tree_root + + group_name = NodePath(path_group).name + groups_dict[group_name] = group_ds + + return groups_dict + BACKEND_ENTRYPOINTS["h5netcdf"] = ("h5netcdf", H5netcdfBackendEntrypoint) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index f8dd1c96572..9a2f563d8dd 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -682,7 +682,8 @@ def open_datatree( group: str | Iterable[str] | Callable | None = None, **kwargs, ) -> DataTree: - from xarray.backends.api import open_dataset + # TODO: Add function docstring + from xarray.backends.common import _iter_nc_groups from xarray.core.datatree import DataTree from xarray.core.treenode import NodePath @@ -692,19 +693,21 @@ def open_datatree( filename_or_obj, group=group, ) + + # Check for a group and make it a parent if it exists if group: parent = NodePath("/") / NodePath(group) else: parent = NodePath("/") manager = store._manager - ds = open_dataset(store, **kwargs) - tree_root = DataTree.from_dict({str(parent): ds}) + groups_dict = {} + for path_group in _iter_nc_groups(store.ds, parent=parent): group_store = NetCDF4DataStore(manager, group=path_group, **kwargs) store_entrypoint = StoreBackendEntrypoint() with close_on_error(group_store): - ds = store_entrypoint.open_dataset( + group_ds = store_entrypoint.open_dataset( group_store, mask_and_scale=mask_and_scale, decode_times=decode_times, @@ -714,14 +717,66 @@ def open_datatree( use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) - new_node: DataTree = DataTree(name=NodePath(path_group).name, data=ds) - tree_root._set_item( - path_group, - new_node, - allow_overwrite=False, - new_nodes_along_path=True, + + group_name = NodePath(path_group).name + groups_dict[group_name] = group_ds + + return DataTree.from_dict(groups_dict) + + + def open_groups( + self, + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables: str | Iterable[str] | None = None, + use_cftime=None, + decode_timedelta=None, + group: str | Iterable[str] | Callable | None = None, + **kwargs, + ) -> dict: + # TODO: Add function docstring + + from xarray.backends.common import _iter_nc_groups + from xarray.core.treenode import NodePath + + filename_or_obj = _normalize_path(filename_or_obj) + store = NetCDF4DataStore.open( + filename_or_obj, + group=group, + ) + + # Check for a group and make it a parent if it exists + if group: + parent = NodePath("/") / NodePath(group) + else: + parent = NodePath("/") + + manager = store._manager + + groups_dict = {} + for path_group in _iter_nc_groups(store.ds, parent=parent): + group_store = NetCDF4DataStore(manager, group=path_group, **kwargs) + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(group_store): + group_ds = store_entrypoint.open_dataset( + group_store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) - return tree_root + + group_name = NodePath(path_group).name + groups_dict[group_name] = group_ds + + return groups_dict BACKEND_ENTRYPOINTS["netcdf4"] = ("netCDF4", NetCDF4BackendEntrypoint) diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index a62ca6c9862..2a2da3790a0 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -202,6 +202,7 @@ def get_backend(engine: str | type[BackendEntrypoint]) -> BackendEntrypoint: if isinstance(engine, str): engines = list_engines() if engine not in engines: + # TODO: Improve error handing message raise ValueError( f"unrecognized engine {engine} must be one of: {list(engines)}" ) From 6b63704b29ae4acbac74204bc3c149378b1df610 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Sat, 13 Jul 2024 18:02:57 -0400 Subject: [PATCH 03/27] removed unused imports --- xarray/backends/api.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 01614f5bab0..13162a48a69 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -2,7 +2,6 @@ import os from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence -from typing import Optional from functools import partial from io import BytesIO from numbers import Number @@ -28,7 +27,6 @@ _normalize_path, ) from xarray.backends.locks import _get_scheduler -from xarray.backends.netCDF4_ import NetCDF4DataStore from xarray.core import indexing from xarray.core.combine import ( _infer_concat_order_from_positions, @@ -39,7 +37,6 @@ from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk from xarray.core.indexes import Index from xarray.core.types import NetcdfWriteModes, ZarrWriteModes -from xarray.core.treenode import NodePath from xarray.core.utils import is_remote_uri from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager From ef01edca7ab4426e5a112f8f24dd8fc1cfc29b32 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 13 Jul 2024 22:04:25 +0000 Subject: [PATCH 04/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/h5netcdf_.py | 1 - xarray/backends/netCDF4_.py | 1 - 2 files changed, 2 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 5fdb20b8407..a37887a5b89 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -536,5 +536,4 @@ def open_groups( return groups_dict - BACKEND_ENTRYPOINTS["h5netcdf"] = ("h5netcdf", H5netcdfBackendEntrypoint) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 9a2f563d8dd..45d8da51334 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -723,7 +723,6 @@ def open_datatree( return DataTree.from_dict(groups_dict) - def open_groups( self, filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, From 08e230a990b268faa1d6ad7e64f43c6b3234293f Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Sat, 13 Jul 2024 18:14:32 -0400 Subject: [PATCH 05/27] oops deleted optional --- xarray/backends/api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 13162a48a69..93819d35bb8 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -2,6 +2,7 @@ import os from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence +from typing import Optional from functools import partial from io import BytesIO from numbers import Number From e01b0fbc1879596095b60ca7cfd5e5f14edde846 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 13 Jul 2024 22:15:34 +0000 Subject: [PATCH 06/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 93819d35bb8..d7aa0bde0a5 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -2,7 +2,6 @@ import os from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence -from typing import Optional from functools import partial from io import BytesIO from numbers import Number @@ -12,6 +11,7 @@ Callable, Final, Literal, + Optional, Union, cast, overload, From 9e1984c4bbedc052f36854b806bddc0e4db4199d Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Fri, 26 Jul 2024 18:04:15 -0400 Subject: [PATCH 07/27] commit to test from main --- xarray/backends/api.py | 79 +++++++++++++++++------------------- xarray/backends/h5netcdf_.py | 38 +---------------- xarray/backends/netCDF4_.py | 37 +---------------- xarray/backends/plugins.py | 3 +- 4 files changed, 41 insertions(+), 116 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index d7aa0bde0a5..8c3d4ce69c0 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -11,7 +11,6 @@ Callable, Final, Literal, - Optional, Union, cast, overload, @@ -828,6 +827,43 @@ def open_datatree( return backend.open_datatree(filename_or_obj, **kwargs) +def open_groups( + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + engine: T_Engine = None, + **kwargs, +) -> dict[str, Dataset]: + """ + Open and decode a file or file-like object, creating a dictionary containing one xarray Dataset for each group in the file. + Useful for an HDF file ("netcdf4" or "h5netcdf") containing many groups that are not alignable with their parents + and cannot be opened directly with ``open_datatree``. It is encouraged to use this function to inspect your data, + then make the necessary changes to make the structure coercible to a `DataTree` object before calling `DataTree.from_dict()` and proceeding with your analysis. + + Parameters + ---------- + filename_or_obj : str, Path, file-like, or DataStore + Strings and Path objects are interpreted as a path to a netCDF file. + engine : str, optional + Xarray backend engine to use. Valid options include `{"netcdf4", "h5netcdf"}`. + **kwargs : dict + Additional keyword arguments passed to :py:func:`~xarray.open_dataset` for each group. + + Returns + ------- + dict[str, xarray.Dataset] + + See Also + -------- + open_datatree() + DataTree.from_dict() + """ + if engine is None: + engine = plugins.guess_engine(filename_or_obj) + + backend = plugins.get_backend(engine) + + return backend.open_groups(filename_or_obj, **kwargs) + + def open_mfdataset( paths: str | NestedSequence[str | os.PathLike], chunks: T_Chunks | None = None, @@ -1721,44 +1757,3 @@ def to_zarr( return dask.delayed(_finalize_store)(writes, zstore) return zstore - - -def open_groups( - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, - engine: T_Engine = None, - group: Optional[str] = None, - **kwargs, -) -> dict[str, Dataset]: - """ - Open and decode a file or file-like object, creating a dictionary containing one xarray Dataset for each group in the file. - - Useful when you have e.g. a netCDF file containing many groups, some of which are not alignable with their parents and so the file cannot be opened directly with ``open_datatree``. - - It is encouraged to use this function to inspect your data, then make the necessary changes to make the structure coercible to a `DataTree` object before calling `DataTree.from_dict()` and proceeding with your analysis. - - Parameters - ---------- - filename_or_obj : str, Path, file-like, or DataStore - Strings and Path objects are interpreted as a path to a netCDF file or Zarr store. - engine : str, optional - Xarray backend engine to use. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`. - group : str, optional - Group to use as the root group to start reading from. Groups above this root group will not be included in the output. - **kwargs : dict - Additional keyword arguments passed to :py:func:`~xarray.open_dataset` for each group. - - Returns - ------- - dict[str, xarray.Dataset] - - See Also - -------- - open_datatree() - DataTree.from_dict() - """ - if engine is None: - engine = plugins.guess_engine(filename_or_obj) - - backend = plugins.get_backend(engine) - - return backend.open_groups(filename_or_obj, **kwargs) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index a37887a5b89..f6fb370c2f8 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -441,43 +441,10 @@ def open_datatree( group: str | Iterable[str] | Callable | None = None, **kwargs, ) -> DataTree: - # TODO: Add function docstring - from xarray.backends.common import _iter_nc_groups - from xarray.core.treenode import NodePath - from xarray.core.utils import close_on_error - - filename_or_obj = _normalize_path(filename_or_obj) - store = H5NetCDFStore.open( - filename_or_obj, - group=group, - ) - # Check for a group and make it a parent if it exists - if group: - parent = NodePath("/") / NodePath(group) - else: - parent = NodePath("/") - - manager = store._manager - - groups_dict = {} - for path_group in _iter_nc_groups(store.ds, parent=parent): - group_store = H5NetCDFStore(manager, group=path_group, **kwargs) - store_entrypoint = StoreBackendEntrypoint() - with close_on_error(group_store): - group_ds = store_entrypoint.open_dataset( - group_store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - ) + from xarray.core.datatree import DataTree - group_name = NodePath(path_group).name - groups_dict[group_name] = group_ds + groups_dict = self.open_groups(filename_or_obj, **kwargs) return DataTree.from_dict(groups_dict) @@ -495,7 +462,6 @@ def open_groups( group: str | Iterable[str] | Callable | None = None, **kwargs, ) -> dict[str, Dataset]: - # TODO: Add function docstring from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 45d8da51334..01b7de71a28 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -682,44 +682,10 @@ def open_datatree( group: str | Iterable[str] | Callable | None = None, **kwargs, ) -> DataTree: - # TODO: Add function docstring - from xarray.backends.common import _iter_nc_groups from xarray.core.datatree import DataTree - from xarray.core.treenode import NodePath - - filename_or_obj = _normalize_path(filename_or_obj) - store = NetCDF4DataStore.open( - filename_or_obj, - group=group, - ) - - # Check for a group and make it a parent if it exists - if group: - parent = NodePath("/") / NodePath(group) - else: - parent = NodePath("/") - - manager = store._manager - groups_dict = {} - for path_group in _iter_nc_groups(store.ds, parent=parent): - group_store = NetCDF4DataStore(manager, group=path_group, **kwargs) - store_entrypoint = StoreBackendEntrypoint() - with close_on_error(group_store): - group_ds = store_entrypoint.open_dataset( - group_store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - ) - - group_name = NodePath(path_group).name - groups_dict[group_name] = group_ds + groups_dict = self.open_groups(filename_or_obj, **kwargs) return DataTree.from_dict(groups_dict) @@ -737,7 +703,6 @@ def open_groups( group: str | Iterable[str] | Callable | None = None, **kwargs, ) -> dict: - # TODO: Add function docstring from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 2a2da3790a0..a622791561a 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -202,9 +202,8 @@ def get_backend(engine: str | type[BackendEntrypoint]) -> BackendEntrypoint: if isinstance(engine, str): engines = list_engines() if engine not in engines: - # TODO: Improve error handing message raise ValueError( - f"unrecognized engine {engine} must be one of: {list(engines)}" + f"unrecognized engine {engine} must be one of your download engines: {list(engines)}" ) backend = engines[engine] elif isinstance(engine, type) and issubclass(engine, BackendEntrypoint): From 33a71acf24da50faf19d6dd63cff7e01146871c4 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Mon, 29 Jul 2024 22:24:59 -0400 Subject: [PATCH 08/27] added tests and small sample file --- xarray/backends/h5netcdf_.py | 6 ++- xarray/backends/netCDF4_.py | 10 +++-- xarray/tests/data/test_data_not_aligned.nc | Bin 0 -> 9038 bytes xarray/tests/test_backends_datatree.py | 49 ++++++++++++++++++++- 4 files changed, 59 insertions(+), 6 deletions(-) create mode 100644 xarray/tests/data/test_data_not_aligned.nc diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index f6fb370c2f8..2d733502be9 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -463,6 +463,7 @@ def open_groups( **kwargs, ) -> dict[str, Dataset]: + from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath from xarray.core.utils import close_on_error @@ -480,7 +481,10 @@ def open_groups( manager = store._manager - groups_dict = {} + # Open root group with `xr.open_dataset()` and it to dictionary of groups + ds = open_dataset(filename_or_obj, **kwargs) + groups_dict = {str(parent): ds} + for path_group in _iter_nc_groups(store.ds, parent=parent): group_store = H5NetCDFStore(manager, group=path_group, **kwargs) store_entrypoint = StoreBackendEntrypoint() diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 01b7de71a28..ad442fe95bc 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -702,8 +702,8 @@ def open_groups( decode_timedelta=None, group: str | Iterable[str] | Callable | None = None, **kwargs, - ) -> dict: - + ) -> DataTree: + from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath @@ -721,7 +721,10 @@ def open_groups( manager = store._manager - groups_dict = {} + # Open root group with `xr.open_dataset() and to dictionary of groups + ds = open_dataset(filename_or_obj, **kwargs) + groups_dict = {str(parent): ds} + for path_group in _iter_nc_groups(store.ds, parent=parent): group_store = NetCDF4DataStore(manager, group=path_group, **kwargs) store_entrypoint = StoreBackendEntrypoint() @@ -736,7 +739,6 @@ def open_groups( use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) - group_name = NodePath(path_group).name groups_dict[group_name] = group_ds diff --git a/xarray/tests/data/test_data_not_aligned.nc b/xarray/tests/data/test_data_not_aligned.nc new file mode 100644 index 0000000000000000000000000000000000000000..e26c7ea16eddfdd6b7613344d1ddfdab4f1ebdd6 GIT binary patch literal 9038 zcmeHNO>7fK6n?uKTb6N1fTW=a$~N=@0Sl)oYLU_;_$P^CoMPwF3d!BX3#{sRqxB}L zR3t!!11AtSK%x>75<&a|`rgbuCvh4?XnLTUk-cwc z=FQu0zIih{e&f?4nOtjpQ{0TjfYTe53fpAKZdLK%)vnI*kxbD{+%~&Knbo4IxJk2& zn_!Si^kcdjtPauPz@ME-{XwOe+S}>6Axc+(oD}LR>yB-b*`F5GMQzF&EtV>64SYW= z&pCnTOjg}J{5Sytex7=i_iV;Y#0L6%kr5x(xztspyZLstyCj-ggW422b!e4dEN zhUZlK&ds6$4k-7%Stfc&)q*I*NHv`wDwSXZ7SY%dk(tr`*+OY_yihKshw@oSe1}7W z4I;8ZVQ7C==npFWi6dU!#*bsy+%TQV*%fcvt<^ohW>3zBcFhmxt9VG0Hm4L2AQSDc zg#-aA7u;~Fa;PukO?wq+M|Wlhc$5LQj=4QcIplhl@5Z48PlZzhT*eMjVh$r83WvQb z<52q8ujh03B!>fr@fr2oBuAthOw3_(UU6y&saY+;_3tc~6IN8{;{vF7ed$3+BHns64h!EQoO z%;vJiY$2U33j}-6Bq69cp;LFmc_@u}1Tm^_OanwH5HJ#yjU^KADV=$K{UjlZss)er zX~DapGCT;zvV-t!%hls#MJ_e3yQS?3DIl>~&ReyR%+|cHE*1u|Pc)4I{CK^1O{t?; zttpG9)s+2Um%SBES)vo*;O{T4h?}LC;q-W3jH{YrJm5SVdC;XH>FQ*K%gK!?vHqV* zkzJU}`E99QI}LpIopxe0lshJJ1sQ>ngPv#Wu6bNS+JP`0kb^nJa7tT|Gy|G}{}}@d zH%G74ze{Y@#{_OG=-@Rjw;i}4Ib5ze)2`TZ&`A_+IUFx2hwe1)Mx1aqa8KQRIQTcr CR3}0J literal 0 HcmV?d00001 diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index b4c4f481359..27249a5dd4e 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -1,11 +1,12 @@ from __future__ import annotations +import os from typing import TYPE_CHECKING, cast import pytest import xarray as xr -from xarray.backends.api import open_datatree +from xarray.backends.api import open_datatree, open_groups from xarray.core.datatree import DataTree from xarray.testing import assert_equal from xarray.tests import ( @@ -62,6 +63,52 @@ def test_netcdf_encoding(self, tmpdir, simple_datatree): with pytest.raises(ValueError, match="unexpected encoding group.*"): original_dt.to_netcdf(filepath, encoding=enc, engine=self.engine) + def test_open_datatree(self): + """Test `open_datatree` with netCDF4 file with this structure: + DataTree('None', parent=None) + │ Dimensions: (lat: 1, lon: 2) + │ Dimensions without coordinates: lat, lon + │ Data variables: + │ root_variable (lat, lon) float64 16B ... + └── DataTree('Group1') + Dimensions: (lat: 2, lon: 2) + Dimensions without coordinates: lat, lon + Data variables: + group_1_var (lat, lon) float64 32B ... + """ + filepath = os.path.join( + os.path.dirname(__file__), "data", "test_data_not_aligned.nc" + ) + with pytest.raises(ValueError): + open_datatree(filepath) + + def test_open_groups(self): + """Test `open_groups` with netCDF4 file with this structure: + DataTree('None', parent=None) + │ Dimensions: (lat: 1, lon: 2) + │ Dimensions without coordinates: lat, lon + │ Data variables: + │ root_variable (lat, lon) float64 16B ... + └── DataTree('Group1') + Dimensions: (lat: 2, lon: 2) + Dimensions without coordinates: lat, lon + Data variables: + group_1_var (lat, lon) float64 32B ... + """ + filepath = os.path.join( + os.path.dirname(__file__), "data", "test_data_not_aligned.nc" + ) + unaligned_dict_of_datasets = open_groups(filepath) + + # Check that group names are keys in the dictionary of `xr.Datasets` + assert "/" in unaligned_dict_of_datasets.keys() + assert "Group1" in unaligned_dict_of_datasets.keys() + # Check that group name returns the correct datasets + assert unaligned_dict_of_datasets["/"].identical(xr.open_dataset(filepath)) + assert unaligned_dict_of_datasets["Group1"].identical( + xr.open_dataset(filepath, group="Group1") + ) + @requires_netCDF4 class TestNetCDF4DatatreeIO(DatatreeIOBase): From 565ffb1830da45dbcc2242b187a84b3c57b42ef9 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Tue, 30 Jul 2024 10:29:00 -0400 Subject: [PATCH 09/27] updated what is new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e959ec111f5..b8d4bf044c9 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -79,6 +79,8 @@ New Features to return an object without ``attrs``. A ``deep`` parameter controls whether variables' ``attrs`` are also dropped. By `Maximilian Roos `_. (:pull:`8288`) + By `Eni Awowale `_. +- Add `open_groups` method for unaligned datasets (:issue:`9137`, :pull:`9243`) Breaking changes ~~~~~~~~~~~~~~~~ From ce607e628505c8fcc1d8cf6172a3a25165460546 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Tue, 30 Jul 2024 15:51:07 -0400 Subject: [PATCH 10/27] updated: open_groups to include fullpath of group, improved test to create netcdf4 file, on the fly --- xarray/backends/h5netcdf_.py | 3 +- xarray/backends/netCDF4_.py | 2 +- xarray/tests/data/test_data_not_aligned.nc | Bin 9038 -> 0 bytes xarray/tests/test_backends_datatree.py | 146 +++++++++++++++++---- 4 files changed, 126 insertions(+), 25 deletions(-) delete mode 100644 xarray/tests/data/test_data_not_aligned.nc diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 9028502d487..85b05edc5d7 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -466,6 +466,7 @@ def open_groups( drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, + format=None, group: str | Iterable[str] | Callable | None = None, lock=None, invalid_netcdf=None, @@ -520,7 +521,7 @@ def open_groups( decode_timedelta=decode_timedelta, ) - group_name = NodePath(path_group).name + group_name = str(NodePath(path_group)) groups_dict[group_name] = group_ds return groups_dict diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index f8277523f0f..6ae38e46a37 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -757,7 +757,7 @@ def open_groups( use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) - group_name = NodePath(path_group).name + group_name = str(NodePath(path_group)) groups_dict[group_name] = group_ds return groups_dict diff --git a/xarray/tests/data/test_data_not_aligned.nc b/xarray/tests/data/test_data_not_aligned.nc deleted file mode 100644 index e26c7ea16eddfdd6b7613344d1ddfdab4f1ebdd6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9038 zcmeHNO>7fK6n?uKTb6N1fTW=a$~N=@0Sl)oYLU_;_$P^CoMPwF3d!BX3#{sRqxB}L zR3t!!11AtSK%x>75<&a|`rgbuCvh4?XnLTUk-cwc z=FQu0zIih{e&f?4nOtjpQ{0TjfYTe53fpAKZdLK%)vnI*kxbD{+%~&Knbo4IxJk2& zn_!Si^kcdjtPauPz@ME-{XwOe+S}>6Axc+(oD}LR>yB-b*`F5GMQzF&EtV>64SYW= z&pCnTOjg}J{5Sytex7=i_iV;Y#0L6%kr5x(xztspyZLstyCj-ggW422b!e4dEN zhUZlK&ds6$4k-7%Stfc&)q*I*NHv`wDwSXZ7SY%dk(tr`*+OY_yihKshw@oSe1}7W z4I;8ZVQ7C==npFWi6dU!#*bsy+%TQV*%fcvt<^ohW>3zBcFhmxt9VG0Hm4L2AQSDc zg#-aA7u;~Fa;PukO?wq+M|Wlhc$5LQj=4QcIplhl@5Z48PlZzhT*eMjVh$r83WvQb z<52q8ujh03B!>fr@fr2oBuAthOw3_(UU6y&saY+;_3tc~6IN8{;{vF7ed$3+BHns64h!EQoO z%;vJiY$2U33j}-6Bq69cp;LFmc_@u}1Tm^_OanwH5HJ#yjU^KADV=$K{UjlZss)er zX~DapGCT;zvV-t!%hls#MJ_e3yQS?3DIl>~&ReyR%+|cHE*1u|Pc)4I{CK^1O{t?; zttpG9)s+2Um%SBES)vo*;O{T4h?}LC;q-W3jH{YrJm5SVdC;XH>FQ*K%gK!?vHqV* zkzJU}`E99QI}LpIopxe0lshJJ1sQ>ngPv#Wu6bNS+JP`0kb^nJa7tT|Gy|G}{}}@d zH%G74ze{Y@#{_OG=-@Rjw;i}4Ib5ze)2`TZ&`A_+IUFx2hwe1)Mx1aqa8KQRIQTcr CR3}0J diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 27249a5dd4e..7a871af0f68 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -1,8 +1,9 @@ from __future__ import annotations -import os from typing import TYPE_CHECKING, cast +import netCDF4 as nc4 +import numpy as np import pytest import xarray as xr @@ -63,26 +64,55 @@ def test_netcdf_encoding(self, tmpdir, simple_datatree): with pytest.raises(ValueError, match="unexpected encoding group.*"): original_dt.to_netcdf(filepath, encoding=enc, engine=self.engine) - def test_open_datatree(self): - """Test `open_datatree` with netCDF4 file with this structure: + +@requires_netCDF4 +class TestNetCDF4DatatreeIO(DatatreeIOBase): + engine: T_DataTreeNetcdfEngine | None = "netcdf4" + + def test_open_datatree(self, tmpdir): + """Create a test netCDF4 file with this unaligned structure: DataTree('None', parent=None) │ Dimensions: (lat: 1, lon: 2) │ Dimensions without coordinates: lat, lon │ Data variables: │ root_variable (lat, lon) float64 16B ... └── DataTree('Group1') - Dimensions: (lat: 2, lon: 2) - Dimensions without coordinates: lat, lon - Data variables: - group_1_var (lat, lon) float64 32B ... + │ Dimensions: (lat: 1, lon: 2) + │ Dimensions without coordinates: lat, lon + │ Data variables: + │ group_1_var (lat, lon) float64 16B ... + └── DataTree('subgroup1') + Dimensions: (lat: 2, lon: 2) + Dimensions without coordinates: lat, lon + Data variables: + subgroup1_var (lat, lon) float64 32B ... """ - filepath = os.path.join( - os.path.dirname(__file__), "data", "test_data_not_aligned.nc" - ) + filepath = tmpdir + "/unaligned_subgroups.nc" + with nc4.Dataset(filepath, "w", format="NETCDF4") as root_group: + group_1 = root_group.createGroup("/Group1") + subgroup_1 = group_1.createGroup("/subgroup1") + + root_group.createDimension("lat", 1) + root_group.createDimension("lon", 2) + root_group.createVariable("root_variable", np.float_, ("lat", "lon")) + + group_1_var = group_1.createVariable( + "group_1_var", np.float_, ("lat", "lon") + ) + group_1_var[:] = np.array([[0.1, 0.2]]) + group_1_var.units = "K" + group_1_var.long_name = "air_temperature" + + subgroup_1.createDimension("lat", 2) + + subgroup1_var = subgroup_1.createVariable( + "subgroup1_var", np.float_, ("lat", "lon") + ) + subgroup1_var[:] = np.array([[0.1, 0.2]]) with pytest.raises(ValueError): open_datatree(filepath) - def test_open_groups(self): + def test_open_groups(self, tmpdir): """Test `open_groups` with netCDF4 file with this structure: DataTree('None', parent=None) │ Dimensions: (lat: 1, lon: 2) @@ -90,29 +120,99 @@ def test_open_groups(self): │ Data variables: │ root_variable (lat, lon) float64 16B ... └── DataTree('Group1') - Dimensions: (lat: 2, lon: 2) - Dimensions without coordinates: lat, lon - Data variables: - group_1_var (lat, lon) float64 32B ... + │ Dimensions: (lat: 1, lon: 2) + │ Dimensions without coordinates: lat, lon + │ Data variables: + │ group_1_var (lat, lon) float64 16B ... + └── DataTree('subgroup1') + Dimensions: (lat: 2, lon: 2) + Dimensions without coordinates: lat, lon + Data variables: + subgroup1_var (lat, lon) float64 32B ... """ - filepath = os.path.join( - os.path.dirname(__file__), "data", "test_data_not_aligned.nc" - ) + filepath = tmpdir + "/unaligned_subgroups.nc" + with nc4.Dataset(filepath, "w", format="NETCDF4") as root_group: + group_1 = root_group.createGroup("/Group1") + subgroup_1 = group_1.createGroup("/subgroup1") + + root_group.createDimension("lat", 1) + root_group.createDimension("lon", 2) + root_group.createVariable("root_variable", np.float_, ("lat", "lon")) + + group_1_var = group_1.createVariable( + "group_1_var", np.float_, ("lat", "lon") + ) + group_1_var[:] = np.array([[0.1, 0.2]]) + group_1_var.units = "K" + group_1_var.long_name = "air_temperature" + + subgroup_1.createDimension("lat", 2) + + subgroup1_var = subgroup_1.createVariable( + "subgroup1_var", np.float_, ("lat", "lon") + ) + subgroup1_var[:] = np.array([[0.1, 0.2]]) + unaligned_dict_of_datasets = open_groups(filepath) # Check that group names are keys in the dictionary of `xr.Datasets` assert "/" in unaligned_dict_of_datasets.keys() - assert "Group1" in unaligned_dict_of_datasets.keys() + assert "/Group1" in unaligned_dict_of_datasets.keys() + assert "/Group1/subgroup1" in unaligned_dict_of_datasets.keys() # Check that group name returns the correct datasets assert unaligned_dict_of_datasets["/"].identical(xr.open_dataset(filepath)) - assert unaligned_dict_of_datasets["Group1"].identical( + assert unaligned_dict_of_datasets["/Group1"].identical( xr.open_dataset(filepath, group="Group1") ) + assert unaligned_dict_of_datasets["/Group1/subgroup1"].identical( + xr.open_dataset(filepath, group="/Group1/subgroup1") + ) + def test_open_groups_to_dict(self, tmpdir): + """Create a an aligned netCDF4 with the following structure to test `open_groups` + and `DataTree.from_dict`. -@requires_netCDF4 -class TestNetCDF4DatatreeIO(DatatreeIOBase): - engine: T_DataTreeNetcdfEngine | None = "netcdf4" + Group: / + │ Dimensions: (lat: 1, lon: 2) + │ Dimensions without coordinates: lat, lon + │ Data variables: + │ root_variable (lat, lon) float64 16B ... + └── Group: /Group1 + │ Dimensions: (lat: 1, lon: 2) + │ Dimensions without coordinates: lat, lon + │ Data variables: + │ group_1_var (lat, lon) float64 16B ... + └── Group: /Group1/subgroup1 + Dimensions: (lat: 1, lon: 2) + Dimensions without coordinates: lat, lon + Data variables: + subgroup1_var (lat, lon) float64 16B ... + """ + filepath = tmpdir + "/all_aligned_child_nodes.nc" + with nc4.Dataset(filepath, "w", format="NETCDF4") as root_group: + group_1 = root_group.createGroup("/Group1") + subgroup_1 = group_1.createGroup("/subgroup1") + + root_group.createDimension("lat", 1) + root_group.createDimension("lon", 2) + root_group.createVariable("root_variable", np.float_, ("lat", "lon")) + + group_1_var = group_1.createVariable( + "group_1_var", np.float_, ("lat", "lon") + ) + group_1_var[:] = np.array([[0.1, 0.2]]) + group_1_var.units = "K" + group_1_var.long_name = "air_temperature" + + subgroup1_var = subgroup_1.createVariable( + "subgroup1_var", np.float_, ("lat", "lon") + ) + subgroup1_var[:] = np.array([[0.1, 0.2]]) + + aligned_dict_of_datasets = open_groups(filepath) + aligned_dt = DataTree.from_dict(aligned_dict_of_datasets) + + assert open_datatree(filepath).identical(aligned_dt) @requires_h5netcdf From eaba90851b6a5d04a415a4959ee3873bddb5fa79 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Tue, 30 Jul 2024 16:10:59 -0400 Subject: [PATCH 11/27] update float_ to float64 for numpy 2.0 --- xarray/tests/test_backends_datatree.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 7a871af0f68..455976b0170 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -94,10 +94,10 @@ def test_open_datatree(self, tmpdir): root_group.createDimension("lat", 1) root_group.createDimension("lon", 2) - root_group.createVariable("root_variable", np.float_, ("lat", "lon")) + root_group.createVariable("root_variable", np.float64, ("lat", "lon")) group_1_var = group_1.createVariable( - "group_1_var", np.float_, ("lat", "lon") + "group_1_var", np.float64, ("lat", "lon") ) group_1_var[:] = np.array([[0.1, 0.2]]) group_1_var.units = "K" @@ -106,7 +106,7 @@ def test_open_datatree(self, tmpdir): subgroup_1.createDimension("lat", 2) subgroup1_var = subgroup_1.createVariable( - "subgroup1_var", np.float_, ("lat", "lon") + "subgroup1_var", np.float64, ("lat", "lon") ) subgroup1_var[:] = np.array([[0.1, 0.2]]) with pytest.raises(ValueError): @@ -137,10 +137,10 @@ def test_open_groups(self, tmpdir): root_group.createDimension("lat", 1) root_group.createDimension("lon", 2) - root_group.createVariable("root_variable", np.float_, ("lat", "lon")) + root_group.createVariable("root_variable", np.float64, ("lat", "lon")) group_1_var = group_1.createVariable( - "group_1_var", np.float_, ("lat", "lon") + "group_1_var", np.float64, ("lat", "lon") ) group_1_var[:] = np.array([[0.1, 0.2]]) group_1_var.units = "K" @@ -149,7 +149,7 @@ def test_open_groups(self, tmpdir): subgroup_1.createDimension("lat", 2) subgroup1_var = subgroup_1.createVariable( - "subgroup1_var", np.float_, ("lat", "lon") + "subgroup1_var", np.float64, ("lat", "lon") ) subgroup1_var[:] = np.array([[0.1, 0.2]]) @@ -195,17 +195,17 @@ def test_open_groups_to_dict(self, tmpdir): root_group.createDimension("lat", 1) root_group.createDimension("lon", 2) - root_group.createVariable("root_variable", np.float_, ("lat", "lon")) + root_group.createVariable("root_variable", np.float64, ("lat", "lon")) group_1_var = group_1.createVariable( - "group_1_var", np.float_, ("lat", "lon") + "group_1_var", np.float64, ("lat", "lon") ) group_1_var[:] = np.array([[0.1, 0.2]]) group_1_var.units = "K" group_1_var.long_name = "air_temperature" subgroup1_var = subgroup_1.createVariable( - "subgroup1_var", np.float_, ("lat", "lon") + "subgroup1_var", np.float64, ("lat", "lon") ) subgroup1_var[:] = np.array([[0.1, 0.2]]) From b4b9822892de88cef3726ac7311672840673c17f Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Fri, 2 Aug 2024 16:22:03 -0400 Subject: [PATCH 12/27] added pr suggestions and mypy changes --- xarray/backends/common.py | 9 +++++++-- xarray/backends/h5netcdf_.py | 2 +- xarray/backends/netCDF4_.py | 4 ++-- xarray/tests/test_backends_datatree.py | 21 ++++++++++++--------- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 5d0a69796ab..c811a081283 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -535,13 +535,18 @@ def open_datatree( raise NotImplementedError() - def open_groups( + def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, **kwargs: Any, ) -> dict[str, Dataset]: """ - Backend open_groups method used by Xarray in :py:func:`~xarray.open_groups`. + Opens a dictionary mapping from group names to Datasets. + + Called by :py:func:`~xarray.open_groups`. + This function exists to provide a universal way to open all groups in a file, + before applying any additional consistency checks or requirements necessary + to create a `DataTree` object (typically done using :py:meth:`~xarray.DataTree.from_dict`). """ raise NotImplementedError() diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 85b05edc5d7..0be4b7fc87b 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -503,7 +503,7 @@ def open_groups( manager = store._manager # Open root group with `xr.open_dataset()` and it to dictionary of groups - ds = open_dataset(filename_or_obj, **kwargs) + ds = open_dataset(store, **kwargs) groups_dict = {str(parent): ds} for path_group in _iter_nc_groups(store.ds, parent=parent): diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 6ae38e46a37..110c9b9e171 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -714,7 +714,7 @@ def open_groups( lock=None, autoclose=False, **kwargs, - ) -> DataTree: + ) -> dict[str, Dataset]: from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath @@ -740,7 +740,7 @@ def open_groups( manager = store._manager # Open root group with `xr.open_dataset() and to dictionary of groups - ds = open_dataset(filename_or_obj, **kwargs) + ds = open_dataset(store, **kwargs) groups_dict = {str(parent): ds} for path_group in _iter_nc_groups(store.ds, parent=parent): diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 455976b0170..8b413a3af88 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING, cast -import netCDF4 as nc4 import numpy as np import pytest @@ -19,6 +18,11 @@ if TYPE_CHECKING: from xarray.core.datatree_io import T_DataTreeNetcdfEngine +try: + import netCDF4 as nc4 +except ImportError: + pass + class DatatreeIOBase: engine: T_DataTreeNetcdfEngine | None = None @@ -71,17 +75,17 @@ class TestNetCDF4DatatreeIO(DatatreeIOBase): def test_open_datatree(self, tmpdir): """Create a test netCDF4 file with this unaligned structure: - DataTree('None', parent=None) + Group: / │ Dimensions: (lat: 1, lon: 2) │ Dimensions without coordinates: lat, lon │ Data variables: │ root_variable (lat, lon) float64 16B ... - └── DataTree('Group1') + └── Group: /Group1 │ Dimensions: (lat: 1, lon: 2) │ Dimensions without coordinates: lat, lon │ Data variables: │ group_1_var (lat, lon) float64 16B ... - └── DataTree('subgroup1') + └── Group: /Group1/subgroup1 Dimensions: (lat: 2, lon: 2) Dimensions without coordinates: lat, lon Data variables: @@ -113,18 +117,18 @@ def test_open_datatree(self, tmpdir): open_datatree(filepath) def test_open_groups(self, tmpdir): - """Test `open_groups` with netCDF4 file with this structure: - DataTree('None', parent=None) + """Test `open_groups` with netCDF4 file with the same unaligned structure: + Group: / │ Dimensions: (lat: 1, lon: 2) │ Dimensions without coordinates: lat, lon │ Data variables: │ root_variable (lat, lon) float64 16B ... - └── DataTree('Group1') + └── Group: /Group1 │ Dimensions: (lat: 1, lon: 2) │ Dimensions without coordinates: lat, lon │ Data variables: │ group_1_var (lat, lon) float64 16B ... - └── DataTree('subgroup1') + └── Group: /Group1/subgroup1 Dimensions: (lat: 2, lon: 2) Dimensions without coordinates: lat, lon Data variables: @@ -171,7 +175,6 @@ def test_open_groups(self, tmpdir): def test_open_groups_to_dict(self, tmpdir): """Create a an aligned netCDF4 with the following structure to test `open_groups` and `DataTree.from_dict`. - Group: / │ Dimensions: (lat: 1, lon: 2) │ Dimensions without coordinates: lat, lon From 9b9c1e7f1b83d50b61a5fcaadaf4f299982937fd Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Tue, 6 Aug 2024 10:19:50 -0400 Subject: [PATCH 13/27] merge conflict plugins.py --- xarray/backends/plugins.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index a622791561a..7632a7b052a 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -204,6 +204,9 @@ def get_backend(engine: str | type[BackendEntrypoint]) -> BackendEntrypoint: if engine not in engines: raise ValueError( f"unrecognized engine {engine} must be one of your download engines: {list(engines)}" + "To install additional dependencies, see:\n" + "https://docs.xarray.dev/en/stable/user-guide/io.html \n" + "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html" ) backend = engines[engine] elif isinstance(engine, type) and issubclass(engine, BackendEntrypoint): From 5fe8e96e1fc651062608571ae5a5ad6d39ee2161 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Tue, 6 Aug 2024 11:09:57 -0400 Subject: [PATCH 14/27] added mutable mapping --- xarray/backends/h5netcdf_.py | 5 +++-- xarray/backends/netCDF4_.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 0be4b7fc87b..e5424cddaf3 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -3,7 +3,7 @@ import functools import io import os -from collections.abc import Callable, Iterable +from collections.abc import Callable, Iterable, MutableMapping from typing import TYPE_CHECKING, Any from xarray.backends.common import ( @@ -38,6 +38,7 @@ from io import BufferedIOBase from xarray.backends.common import AbstractDataStore + from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree @@ -475,7 +476,7 @@ def open_groups( driver=None, driver_kwds=None, **kwargs, - ) -> dict[str, Dataset]: + ) -> MutableMapping[str, Dataset | DataArray | DataTree | None]: from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 110c9b9e171..8d1dfd702ae 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -3,7 +3,7 @@ import functools import operator import os -from collections.abc import Callable, Iterable +from collections.abc import Callable, Iterable, MutableMapping from contextlib import suppress from typing import TYPE_CHECKING, Any @@ -43,6 +43,7 @@ from io import BufferedIOBase from xarray.backends.common import AbstractDataStore + from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree @@ -714,7 +715,7 @@ def open_groups( lock=None, autoclose=False, **kwargs, - ) -> dict[str, Dataset]: + ) -> MutableMapping[str, Dataset | DataArray | DataTree | None]: from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath From 3b9c418f63a671fe3adc9417ed043f3a8cbd3cb5 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Tue, 6 Aug 2024 11:21:21 -0400 Subject: [PATCH 15/27] added mutable mapping to api --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index e01aa22862c..ce88b912d58 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -841,7 +841,7 @@ def open_groups( filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, engine: T_Engine = None, **kwargs, -) -> dict[str, Dataset]: +) -> MutableMapping[str, Dataset | DataArray | DataTree | None]: """ Open and decode a file or file-like object, creating a dictionary containing one xarray Dataset for each group in the file. Useful for an HDF file ("netcdf4" or "h5netcdf") containing many groups that are not alignable with their parents From 222279c88215e49d1b5e54ee17235fa5a34255dd Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Wed, 7 Aug 2024 15:25:10 -0400 Subject: [PATCH 16/27] lets see if this passes mypy --- xarray/backends/api.py | 2 +- xarray/backends/common.py | 5 +++-- xarray/backends/h5netcdf_.py | 2 +- xarray/backends/netCDF4_.py | 2 +- xarray/tests/test_backends_datatree.py | 4 +++- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index ce88b912d58..8f14c049479 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -841,7 +841,7 @@ def open_groups( filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, engine: T_Engine = None, **kwargs, -) -> MutableMapping[str, Dataset | DataArray | DataTree | None]: +) -> MutableMapping[str, Dataset | DataArray | DataTree[Any]]: """ Open and decode a file or file-like object, creating a dictionary containing one xarray Dataset for each group in the file. Useful for an HDF file ("netcdf4" or "h5netcdf") containing many groups that are not alignable with their parents diff --git a/xarray/backends/common.py b/xarray/backends/common.py index c811a081283..386c557af7f 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -4,7 +4,7 @@ import os import time import traceback -from collections.abc import Iterable +from collections.abc import Iterable, MutableMapping from glob import glob from typing import TYPE_CHECKING, Any, ClassVar @@ -19,6 +19,7 @@ if TYPE_CHECKING: from io import BufferedIOBase + from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import NestedSequence @@ -539,7 +540,7 @@ def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, **kwargs: Any, - ) -> dict[str, Dataset]: + ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any]]: """ Opens a dictionary mapping from group names to Datasets. diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index e5424cddaf3..7df574d1144 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -476,7 +476,7 @@ def open_groups( driver=None, driver_kwds=None, **kwargs, - ) -> MutableMapping[str, Dataset | DataArray | DataTree | None]: + ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any]]: from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 8d1dfd702ae..de7021fb1ee 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -715,7 +715,7 @@ def open_groups( lock=None, autoclose=False, **kwargs, - ) -> MutableMapping[str, Dataset | DataArray | DataTree | None]: + ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any]]: from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 8b413a3af88..26f19ddb179 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -164,7 +164,9 @@ def test_open_groups(self, tmpdir): assert "/Group1" in unaligned_dict_of_datasets.keys() assert "/Group1/subgroup1" in unaligned_dict_of_datasets.keys() # Check that group name returns the correct datasets - assert unaligned_dict_of_datasets["/"].identical(xr.open_dataset(filepath)) + assert unaligned_dict_of_datasets["/"].identical( + xr.open_dataset(filepath, group="/") + ) assert unaligned_dict_of_datasets["/Group1"].identical( xr.open_dataset(filepath, group="Group1") ) From 4c65b0aa33a5de920918b389a428c48eb042a285 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Wed, 7 Aug 2024 15:31:04 -0400 Subject: [PATCH 17/27] mypy take 2 --- xarray/backends/api.py | 2 +- xarray/backends/common.py | 2 +- xarray/backends/h5netcdf_.py | 2 +- xarray/backends/netCDF4_.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 8f14c049479..455690ac9cd 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -841,7 +841,7 @@ def open_groups( filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, engine: T_Engine = None, **kwargs, -) -> MutableMapping[str, Dataset | DataArray | DataTree[Any]]: +) -> MutableMapping[str, Dataset | DataArray | DataTree[Any] | None]: """ Open and decode a file or file-like object, creating a dictionary containing one xarray Dataset for each group in the file. Useful for an HDF file ("netcdf4" or "h5netcdf") containing many groups that are not alignable with their parents diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 386c557af7f..0c51073c535 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -540,7 +540,7 @@ def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, **kwargs: Any, - ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any]]: + ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any] | None]: """ Opens a dictionary mapping from group names to Datasets. diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 7df574d1144..e214afa15f0 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -476,7 +476,7 @@ def open_groups( driver=None, driver_kwds=None, **kwargs, - ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any]]: + ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any] | None]: from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index de7021fb1ee..1a405463d9a 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -715,7 +715,7 @@ def open_groups( lock=None, autoclose=False, **kwargs, - ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any]]: + ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any] | None]: from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath From 8c81a875456355b820f4f50ccebc16c3ab16d561 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Wed, 7 Aug 2024 15:38:18 -0400 Subject: [PATCH 18/27] mypy --- xarray/tests/test_backends_datatree.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 26f19ddb179..7c44ed7969b 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -8,7 +8,7 @@ import xarray as xr from xarray.backends.api import open_datatree, open_groups from xarray.core.datatree import DataTree -from xarray.testing import assert_equal +from xarray.testing import assert_equal, assert_identical from xarray.tests import ( requires_h5netcdf, requires_netCDF4, @@ -164,14 +164,16 @@ def test_open_groups(self, tmpdir): assert "/Group1" in unaligned_dict_of_datasets.keys() assert "/Group1/subgroup1" in unaligned_dict_of_datasets.keys() # Check that group name returns the correct datasets - assert unaligned_dict_of_datasets["/"].identical( - xr.open_dataset(filepath, group="/") + assert_identical( + unaligned_dict_of_datasets["/"], xr.open_dataset(filepath, group="/") ) - assert unaligned_dict_of_datasets["/Group1"].identical( - xr.open_dataset(filepath, group="Group1") + assert_identical( + unaligned_dict_of_datasets["/Group1"], + xr.open_dataset(filepath, group="Group1"), ) - assert unaligned_dict_of_datasets["/Group1/subgroup1"].identical( - xr.open_dataset(filepath, group="/Group1/subgroup1") + assert_identical( + unaligned_dict_of_datasets["/Group1/subgroup1"], + xr.open_dataset(filepath, group="/Group1/subgroup1"), ) def test_open_groups_to_dict(self, tmpdir): From d2c74d6f6691695ff2def4e2890b2080614b7869 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Thu, 8 Aug 2024 13:21:16 -0400 Subject: [PATCH 19/27] updated open_groups_dict --- xarray/backends/api.py | 4 ++-- xarray/backends/common.py | 5 ++--- xarray/backends/h5netcdf_.py | 9 ++++----- xarray/backends/netCDF4_.py | 9 ++++----- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 455690ac9cd..1c38344262f 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -841,7 +841,7 @@ def open_groups( filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, engine: T_Engine = None, **kwargs, -) -> MutableMapping[str, Dataset | DataArray | DataTree[Any] | None]: +) -> dict[str, Dataset]: """ Open and decode a file or file-like object, creating a dictionary containing one xarray Dataset for each group in the file. Useful for an HDF file ("netcdf4" or "h5netcdf") containing many groups that are not alignable with their parents @@ -871,7 +871,7 @@ def open_groups( backend = plugins.get_backend(engine) - return backend.open_groups(filename_or_obj, **kwargs) + return backend.open_groups_as_dict(filename_or_obj, **kwargs) def open_mfdataset( diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 0c51073c535..c811a081283 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -4,7 +4,7 @@ import os import time import traceback -from collections.abc import Iterable, MutableMapping +from collections.abc import Iterable from glob import glob from typing import TYPE_CHECKING, Any, ClassVar @@ -19,7 +19,6 @@ if TYPE_CHECKING: from io import BufferedIOBase - from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import NestedSequence @@ -540,7 +539,7 @@ def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, **kwargs: Any, - ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any] | None]: + ) -> dict[str, Dataset]: """ Opens a dictionary mapping from group names to Datasets. diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index e214afa15f0..2f76e10bfac 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -3,7 +3,7 @@ import functools import io import os -from collections.abc import Callable, Iterable, MutableMapping +from collections.abc import Callable, Iterable from typing import TYPE_CHECKING, Any from xarray.backends.common import ( @@ -38,7 +38,6 @@ from io import BufferedIOBase from xarray.backends.common import AbstractDataStore - from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree @@ -452,11 +451,11 @@ def open_datatree( from xarray.core.datatree import DataTree - groups_dict = self.open_groups(filename_or_obj, **kwargs) + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) return DataTree.from_dict(groups_dict) - def open_groups( + def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, *, @@ -476,7 +475,7 @@ def open_groups( driver=None, driver_kwds=None, **kwargs, - ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any] | None]: + ) -> dict[str, Dataset]: from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 1a405463d9a..8c05b761a56 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -3,7 +3,7 @@ import functools import operator import os -from collections.abc import Callable, Iterable, MutableMapping +from collections.abc import Callable, Iterable from contextlib import suppress from typing import TYPE_CHECKING, Any @@ -43,7 +43,6 @@ from io import BufferedIOBase from xarray.backends.common import AbstractDataStore - from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree @@ -692,11 +691,11 @@ def open_datatree( from xarray.core.datatree import DataTree - groups_dict = self.open_groups(filename_or_obj, **kwargs) + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) return DataTree.from_dict(groups_dict) - def open_groups( + def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, *, @@ -715,7 +714,7 @@ def open_groups( lock=None, autoclose=False, **kwargs, - ) -> MutableMapping[str, Dataset | DataArray | DataTree[Any] | None]: + ) -> dict[str, Dataset]: from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath From f2064081fd939d3c8a80170d93fa47eb575574fc Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Thu, 8 Aug 2024 14:09:45 -0400 Subject: [PATCH 20/27] changed return type for DataTree.from_dict --- xarray/core/datatree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 6289146308e..941b6aee243 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -3,7 +3,7 @@ import itertools import textwrap from collections import ChainMap -from collections.abc import Hashable, Iterable, Iterator, Mapping, MutableMapping +from collections.abc import Hashable, Iterable, Iterator, Mapping from html import escape from typing import ( TYPE_CHECKING, @@ -1067,7 +1067,7 @@ def drop_nodes( @classmethod def from_dict( cls, - d: MutableMapping[str, Dataset | DataArray | DataTree | None], + d: dict[str, Dataset], name: str | None = None, ) -> DataTree: """ From f72f3d2f8baafc90103f36bc0680bc035d3eee92 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Thu, 8 Aug 2024 15:12:16 -0400 Subject: [PATCH 21/27] fix test failures --- xarray/core/datatree.py | 6 +++--- xarray/tests/test_datatree.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 941b6aee243..6241d79f83f 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -3,7 +3,7 @@ import itertools import textwrap from collections import ChainMap -from collections.abc import Hashable, Iterable, Iterator, Mapping +from collections.abc import Hashable, Iterable, Iterator, Mapping, MutableMapping from html import escape from typing import ( TYPE_CHECKING, @@ -770,7 +770,7 @@ def _replace_node( if data is not _default: self._set_node_data(ds) - self._children = children + self.children = children def copy( self: DataTree, @@ -1067,7 +1067,7 @@ def drop_nodes( @classmethod def from_dict( cls, - d: dict[str, Dataset], + d: MutableMapping[str, Dataset | DataArray | DataTree | None], name: str | None = None, ) -> DataTree: """ diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index c875322b9c5..9a15376a1f8 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -245,6 +245,7 @@ def test_update(self): dt.update({"foo": xr.DataArray(0), "a": DataTree()}) expected = DataTree.from_dict({"/": xr.Dataset({"foo": 0}), "a": None}) assert_equal(dt, expected) + assert dt.groups == ("/", "/a") def test_update_new_named_dataarray(self): da = xr.DataArray(name="temp", data=[0, 50]) From 2f92b5c9e717b8e42a9651fe810a16ccfb5c1184 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Thu, 8 Aug 2024 17:32:29 -0400 Subject: [PATCH 22/27] update iter_nc_ to yield parent --- xarray/backends/common.py | 1 + xarray/backends/h5netcdf_.py | 7 +------ xarray/backends/netCDF4_.py | 7 +------ 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index c811a081283..38cba9af212 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -132,6 +132,7 @@ def _iter_nc_groups(root, parent="/"): from xarray.core.treenode import NodePath parent = NodePath(parent) + yield str(parent) for path, group in root.groups.items(): gpath = parent / path yield str(gpath) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 2f76e10bfac..a144bcac488 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -477,7 +477,6 @@ def open_groups_as_dict( **kwargs, ) -> dict[str, Dataset]: - from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath from xarray.core.utils import close_on_error @@ -501,11 +500,7 @@ def open_groups_as_dict( parent = NodePath("/") manager = store._manager - - # Open root group with `xr.open_dataset()` and it to dictionary of groups - ds = open_dataset(store, **kwargs) - groups_dict = {str(parent): ds} - + groups_dict = {} for path_group in _iter_nc_groups(store.ds, parent=parent): group_store = H5NetCDFStore(manager, group=path_group, **kwargs) store_entrypoint = StoreBackendEntrypoint() diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 8c05b761a56..7141827005e 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -715,7 +715,6 @@ def open_groups_as_dict( autoclose=False, **kwargs, ) -> dict[str, Dataset]: - from xarray.backends.api import open_dataset from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath @@ -738,11 +737,7 @@ def open_groups_as_dict( parent = NodePath("/") manager = store._manager - - # Open root group with `xr.open_dataset() and to dictionary of groups - ds = open_dataset(store, **kwargs) - groups_dict = {str(parent): ds} - + groups_dict = {} for path_group in _iter_nc_groups(store.ds, parent=parent): group_store = NetCDF4DataStore(manager, group=path_group, **kwargs) store_entrypoint = StoreBackendEntrypoint() From 631967835cd9125d5ff67e6f01ae5c6a33e79210 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Tue, 13 Aug 2024 13:57:35 -0400 Subject: [PATCH 23/27] mypy suggestions --- xarray/core/datatree.py | 4 ++-- xarray/tests/test_backends_datatree.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 6241d79f83f..1f90973ff12 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -3,7 +3,7 @@ import itertools import textwrap from collections import ChainMap -from collections.abc import Hashable, Iterable, Iterator, Mapping, MutableMapping +from collections.abc import Hashable, Iterable, Iterator, Mapping from html import escape from typing import ( TYPE_CHECKING, @@ -1067,7 +1067,7 @@ def drop_nodes( @classmethod def from_dict( cls, - d: MutableMapping[str, Dataset | DataArray | DataTree | None], + d: Mapping[str, Dataset | DataArray | DataTree | None], name: str | None = None, ) -> DataTree: """ diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 7c44ed7969b..604f27317b9 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -73,7 +73,7 @@ def test_netcdf_encoding(self, tmpdir, simple_datatree): class TestNetCDF4DatatreeIO(DatatreeIOBase): engine: T_DataTreeNetcdfEngine | None = "netcdf4" - def test_open_datatree(self, tmpdir): + def test_open_datatree(self, tmpdir) -> None: """Create a test netCDF4 file with this unaligned structure: Group: / │ Dimensions: (lat: 1, lon: 2) @@ -116,7 +116,7 @@ def test_open_datatree(self, tmpdir): with pytest.raises(ValueError): open_datatree(filepath) - def test_open_groups(self, tmpdir): + def test_open_groups(self, tmpdir) -> None: """Test `open_groups` with netCDF4 file with the same unaligned structure: Group: / │ Dimensions: (lat: 1, lon: 2) @@ -176,7 +176,7 @@ def test_open_groups(self, tmpdir): xr.open_dataset(filepath, group="/Group1/subgroup1"), ) - def test_open_groups_to_dict(self, tmpdir): + def test_open_groups_to_dict(self, tmpdir) -> None: """Create a an aligned netCDF4 with the following structure to test `open_groups` and `DataTree.from_dict`. Group: / From 14661472413d182cb3b9de267b644167a732c479 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Tue, 13 Aug 2024 14:54:21 -0400 Subject: [PATCH 24/27] adding casting --- xarray/core/datatree.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 1f90973ff12..42f6ec16ce7 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -13,6 +13,7 @@ Literal, NoReturn, Union, + cast, overload, ) @@ -1095,7 +1096,8 @@ def from_dict( """ # First create the root node - root_data = d.pop("/", None) + d_cast = cast(dict, d) + root_data = d_cast.pop("/", None) if isinstance(root_data, DataTree): obj = root_data.copy() obj.orphan() From 0e3c946ccb4af52306f98c90c6267afda6c91af9 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Tue, 13 Aug 2024 17:21:58 -0400 Subject: [PATCH 25/27] explicitly convert to dict --- xarray/core/datatree.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 42f6ec16ce7..e30de2ddb36 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -13,7 +13,6 @@ Literal, NoReturn, Union, - cast, overload, ) @@ -1096,7 +1095,7 @@ def from_dict( """ # First create the root node - d_cast = cast(dict, d) + d_cast = dict(d) root_data = d_cast.pop("/", None) if isinstance(root_data, DataTree): obj = root_data.copy() From d44bf980f6cedee5b03c5c94445d1a6e726be898 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Aug 2024 14:24:35 +0000 Subject: [PATCH 26/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/datatree.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 764865d284d..0b0926fc89b 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -3,16 +3,13 @@ import itertools import textwrap from collections import ChainMap - from collections.abc import ( Callable, Hashable, Iterable, Iterator, Mapping, - MutableMapping, ) - from html import escape from typing import ( TYPE_CHECKING, From b2cf9b41c986cd1fb60315708351724457c6b3b3 Mon Sep 17 00:00:00 2001 From: Olufunke Awowale Date: Wed, 14 Aug 2024 12:26:28 -0400 Subject: [PATCH 27/27] updated to add d_cast for remaining functions --- xarray/core/datatree.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 0b0926fc89b..1b8a5ffbf38 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -11,15 +11,7 @@ Mapping, ) from html import escape -from typing import ( - TYPE_CHECKING, - Any, - Generic, - Literal, - NoReturn, - Union, - overload, -) +from typing import TYPE_CHECKING, Any, Generic, Literal, NoReturn, Union, overload from xarray.core import utils from xarray.core.alignment import align @@ -1112,10 +1104,10 @@ def depth(item) -> int: pathstr, _ = item return len(NodePath(pathstr).parts) - if d: + if d_cast: # Populate tree with children determined from data_objects mapping # Sort keys by depth so as to insert nodes from root first (see GH issue #9276) - for path, data in sorted(d.items(), key=depth): + for path, data in sorted(d_cast.items(), key=depth): # Create and set new node node_name = NodePath(path).name if isinstance(data, DataTree):