diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 813c3ba2780..785ab3913ef 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -54,7 +54,6 @@ from dask.delayed import Delayed except ImportError: Delayed = None # type: ignore[assignment, misc] - from io import BufferedIOBase from xarray.backends.common import BackendEntrypoint from xarray.core.types import ( @@ -62,6 +61,7 @@ CompatOptions, JoinOptions, NestedSequence, + ReadBuffer, T_Chunks, ) @@ -474,7 +474,7 @@ def _datatree_from_backend_datatree( def open_dataset( - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, engine: T_Engine = None, chunks: T_Chunks = None, @@ -691,7 +691,7 @@ def open_dataset( def open_dataarray( - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, engine: T_Engine | None = None, chunks: T_Chunks | None = None, @@ -896,7 +896,7 @@ def open_dataarray( def open_datatree( - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, engine: T_Engine = None, chunks: T_Chunks = None, @@ -1111,7 +1111,7 @@ def open_datatree( def open_groups( - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, engine: T_Engine = None, chunks: T_Chunks = None, @@ -1137,10 +1137,6 @@ def open_groups( and cannot be opened directly with ``open_datatree``. It is encouraged to use this function to inspect your data, then make the necessary changes to make the structure coercible to a `DataTree` object before calling `DataTree.from_dict()` and proceeding with your analysis. - Parameters - ---------- - filename_or_obj : str, Path, file-like, or DataStore - Strings and Path objects are interpreted as a path to a netCDF file. Parameters ---------- filename_or_obj : str, Path, file-like, or DataStore @@ -1338,7 +1334,10 @@ def open_groups( def open_mfdataset( - paths: str | os.PathLike | NestedSequence[str | os.PathLike], + paths: str + | os.PathLike + | ReadBuffer + | NestedSequence[str | os.PathLike | ReadBuffer], chunks: T_Chunks | None = None, concat_dim: ( str @@ -1541,7 +1540,7 @@ def open_mfdataset( if not paths: raise OSError("no files to open") - paths1d: list[str] + paths1d: list[str | ReadBuffer] if combine == "nested": if isinstance(concat_dim, str | DataArray) or concat_dim is None: concat_dim = [concat_dim] # type: ignore[assignment] diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 534567221b5..11e6e20a9dc 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -6,20 +6,19 @@ import traceback from collections.abc import Iterable, Mapping, Sequence from glob import glob -from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, cast, overload +from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, overload import numpy as np from xarray.conventions import cf_encoder from xarray.core import indexing from xarray.core.datatree import DataTree +from xarray.core.types import ReadBuffer from xarray.core.utils import FrozenDict, NdimSizeLenMixin, is_remote_uri from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: - from io import BufferedIOBase - from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence @@ -65,24 +64,52 @@ def _normalize_path(path: str | os.PathLike | T) -> str | T: if isinstance(path, str) and not is_remote_uri(path): path = os.path.abspath(os.path.expanduser(path)) - return cast(str, path) + return path # type:ignore [return-value] @overload def _find_absolute_paths( - paths: str | os.PathLike | Sequence[str | os.PathLike], **kwargs + paths: str | os.PathLike | Sequence[str | os.PathLike], + **kwargs, ) -> list[str]: ... +@overload +def _find_absolute_paths( + paths: ReadBuffer | Sequence[ReadBuffer], + **kwargs, +) -> list[ReadBuffer]: ... + + @overload def _find_absolute_paths( paths: NestedSequence[str | os.PathLike], **kwargs ) -> NestedSequence[str]: ... +@overload def _find_absolute_paths( - paths: str | os.PathLike | NestedSequence[str | os.PathLike], **kwargs -) -> NestedSequence[str]: + paths: NestedSequence[ReadBuffer], **kwargs +) -> NestedSequence[ReadBuffer]: ... + + +@overload +def _find_absolute_paths( + paths: str + | os.PathLike + | ReadBuffer + | NestedSequence[str | os.PathLike | ReadBuffer], + **kwargs, +) -> NestedSequence[str | ReadBuffer]: ... + + +def _find_absolute_paths( + paths: str + | os.PathLike + | ReadBuffer + | NestedSequence[str | os.PathLike | ReadBuffer], + **kwargs, +) -> NestedSequence[str | ReadBuffer]: """ Find absolute paths from the pattern. @@ -132,10 +159,12 @@ def _find_absolute_paths( return sorted(glob(_normalize_path(paths))) elif isinstance(paths, os.PathLike): return [_normalize_path(paths)] + elif isinstance(paths, ReadBuffer): + return [paths] def _normalize_path_list( - lpaths: NestedSequence[str | os.PathLike], - ) -> NestedSequence[str]: + lpaths: NestedSequence[str | os.PathLike | ReadBuffer], + ) -> NestedSequence[str | ReadBuffer]: paths = [] for p in lpaths: if isinstance(p, str | os.PathLike): @@ -546,10 +575,9 @@ def __repr__(self) -> str: def open_dataset( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, drop_variables: str | Iterable[str] | None = None, - **kwargs: Any, ) -> Dataset: """ Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. @@ -559,7 +587,7 @@ def open_dataset( def guess_can_open( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, ) -> bool: """ Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. @@ -569,8 +597,9 @@ def guess_can_open( def open_datatree( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, - **kwargs: Any, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, + *, + drop_variables: str | Iterable[str] | None = None, ) -> DataTree: """ Backend open_datatree method used by Xarray in :py:func:`~xarray.open_datatree`. @@ -580,8 +609,9 @@ def open_datatree( def open_groups_as_dict( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, - **kwargs: Any, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, + *, + drop_variables: str | Iterable[str] | None = None, ) -> dict[str, Dataset]: """ Opens a dictionary mapping from group names to Datasets. diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 80579d68415..95cc1a1e93d 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -39,11 +39,10 @@ from xarray.core.variable import Variable if TYPE_CHECKING: - from io import BufferedIOBase - from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree + from xarray.core.types import ReadBuffer class H5NetCDFArrayWrapper(BaseNetCDF4Array): @@ -395,7 +394,7 @@ class H5netcdfBackendEntrypoint(BackendEntrypoint): def guess_can_open( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, ) -> bool: magic_number = try_read_magic_number_from_file_or_path(filename_or_obj) if magic_number is not None: @@ -407,9 +406,9 @@ def guess_can_open( return False - def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs + def open_dataset( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, @@ -456,7 +455,7 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti def open_datatree( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, @@ -499,7 +498,7 @@ def open_datatree( def open_groups_as_dict( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index acc083ee4c8..a23d247b6c3 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -41,14 +41,13 @@ from xarray.core.variable import Variable if TYPE_CHECKING: - from io import BufferedIOBase - from h5netcdf.core import EnumType as h5EnumType from netCDF4 import EnumType as ncEnumType from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree + from xarray.core.types import ReadBuffer # This lookup table maps from dtype.byteorder to a readable endian # string used by netCDF4. @@ -627,7 +626,7 @@ class NetCDF4BackendEntrypoint(BackendEntrypoint): def guess_can_open( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, ) -> bool: if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj): return True @@ -642,9 +641,9 @@ def guess_can_open( return False - def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs + def open_dataset( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, @@ -693,7 +692,7 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti def open_datatree( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, @@ -735,7 +734,7 @@ def open_datatree( def open_groups_as_dict( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 483ce6c425e..e321aece640 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -14,9 +14,9 @@ if TYPE_CHECKING: import os from importlib.metadata import EntryPoint, EntryPoints - from io import BufferedIOBase from xarray.backends.common import AbstractDataStore + from xarray.core.types import ReadBuffer STANDARD_BACKENDS_ORDER = ["netcdf4", "h5netcdf", "scipy"] @@ -138,7 +138,7 @@ def refresh_engines() -> None: def guess_engine( - store_spec: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + store_spec: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, ) -> str | type[BackendEntrypoint]: engines = list_engines() diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 5a475a7c3be..74ddbc8443b 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -26,9 +26,9 @@ if TYPE_CHECKING: import os - from io import BufferedIOBase from xarray.core.dataset import Dataset + from xarray.core.types import ReadBuffer class PydapArrayWrapper(BackendArray): @@ -166,13 +166,13 @@ class PydapBackendEntrypoint(BackendEntrypoint): def guess_can_open( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, ) -> bool: return isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj) - def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs + def open_dataset( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index e77443061fe..93d0e40a6e1 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -34,10 +34,9 @@ from xarray.core.variable import Variable if TYPE_CHECKING: - from io import BufferedIOBase - from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset + from xarray.core.types import ReadBuffer HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") @@ -292,7 +291,7 @@ class ScipyBackendEntrypoint(BackendEntrypoint): def guess_can_open( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, ) -> bool: magic_number = try_read_magic_number_from_file_or_path(filename_or_obj) if magic_number is not None and magic_number.startswith(b"\x1f\x8b"): @@ -307,9 +306,9 @@ def guess_can_open( return False - def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs + def open_dataset( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, diff --git a/xarray/backends/store.py b/xarray/backends/store.py index a507ee37470..b1b3956ca8e 100644 --- a/xarray/backends/store.py +++ b/xarray/backends/store.py @@ -13,7 +13,8 @@ if TYPE_CHECKING: import os - from io import BufferedIOBase + + from xarray.core.types import ReadBuffer class StoreBackendEntrypoint(BackendEntrypoint): @@ -22,13 +23,13 @@ class StoreBackendEntrypoint(BackendEntrypoint): def guess_can_open( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, ) -> bool: return isinstance(filename_or_obj, AbstractDataStore) - def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs + def open_dataset( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index fcbf1f8c4a0..e20ee531915 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -36,13 +36,12 @@ from xarray.namedarray.utils import module_available if TYPE_CHECKING: - from io import BufferedIOBase - from zarr import Group as ZarrGroup from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree + from xarray.core.types import ReadBuffer def _get_mappers(*, storage_options, store, chunk_store): @@ -1448,7 +1447,7 @@ class ZarrBackendEntrypoint(BackendEntrypoint): def guess_can_open( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, ) -> bool: if isinstance(filename_or_obj, str | os.PathLike): _, ext = os.path.splitext(filename_or_obj) @@ -1456,9 +1455,9 @@ def guess_can_open( return False - def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs + def open_dataset( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, @@ -1511,7 +1510,7 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti def open_datatree( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, @@ -1528,7 +1527,6 @@ def open_datatree( storage_options=None, zarr_version=None, zarr_format=None, - **kwargs, ) -> DataTree: filename_or_obj = _normalize_path(filename_or_obj) groups_dict = self.open_groups_as_dict( @@ -1548,14 +1546,13 @@ def open_datatree( storage_options=storage_options, zarr_version=zarr_version, zarr_format=zarr_format, - **kwargs, ) return datatree_from_dict_with_io_cleanup(groups_dict) def open_groups_as_dict( self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, mask_and_scale=True, decode_times=True, @@ -1572,7 +1569,6 @@ def open_groups_as_dict( storage_options=None, zarr_version=None, zarr_format=None, - **kwargs, ) -> dict[str, Dataset]: from xarray.core.treenode import NodePath diff --git a/xarray/core/types.py b/xarray/core/types.py index 3937e4d3631..56d45ddfed6 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -13,6 +13,7 @@ TypeVar, Union, overload, + runtime_checkable, ) import numpy as np @@ -308,6 +309,38 @@ def __iter__(self, /) -> Iterator[_T_co | NestedSequence[_T_co]]: ... def __reversed__(self, /) -> Iterator[_T_co | NestedSequence[_T_co]]: ... +AnyStr_co = TypeVar("AnyStr_co", str, bytes, covariant=True) + + +# this is shamelessly stolen from pandas._typing +@runtime_checkable +class BaseBuffer(Protocol): + @property + def mode(self) -> str: + # for _get_filepath_or_buffer + ... + + def seek(self, __offset: int, __whence: int = ...) -> int: + # with one argument: gzip.GzipFile, bz2.BZ2File + # with two arguments: zip.ZipFile, read_sas + ... + + def seekable(self) -> bool: + # for bz2.BZ2File + ... + + def tell(self) -> int: + # for zip.ZipFile, read_stata, to_stata + ... + + +@runtime_checkable +class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]): + def read(self, __n: int = ...) -> AnyStr_co: + # for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File + ... + + QuantileMethods = Literal[ "inverted_cdf", "averaged_inverted_cdf", diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index 3a4b1d76287..9342423b727 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -69,7 +69,7 @@ def open_dataset( class PassThroughBackendEntrypoint(xr.backends.BackendEntrypoint): """Access an object passed to the `open_dataset` method.""" - def open_dataset(self, dataset, *, drop_variables=None): # type: ignore[override] + def open_dataset(self, dataset, *, drop_variables=None): """Return the first argument.""" return dataset