Skip to content

Commit

Permalink
Refactor of the big if-chain to a dictionary in the form {backend_nam…
Browse files Browse the repository at this point in the history
…e: backend_open}. (#4431)

* Add docs re stable branch (#4444)

* Add docs re stable branch

* Update HOW_TO_RELEASE.md

Co-authored-by: keewis <keewis@users.noreply.github.com>

Co-authored-by: keewis <keewis@users.noreply.github.com>

* Port engine selection refactor from #3166 and add zarr

* Always add `mode="r"` to zarr and simplify logic

Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Co-authored-by: keewis <keewis@users.noreply.github.com>
Co-authored-by: Monica Rossetti <m.rossetti@bopen.eu>
  • Loading branch information
4 people authored Sep 24, 2020
1 parent 4f414f2 commit c0399d3
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 77 deletions.
122 changes: 47 additions & 75 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os.path
import warnings
from collections.abc import MutableMapping
from glob import glob
from io import BytesIO
from numbers import Number
Expand Down Expand Up @@ -41,6 +40,17 @@
DATAARRAY_NAME = "__xarray_dataarray_name__"
DATAARRAY_VARIABLE = "__xarray_dataarray_variable__"

ENGINES = {
"netcdf4": backends.NetCDF4DataStore.open,
"scipy": backends.ScipyDataStore,
"pydap": backends.PydapDataStore.open,
"h5netcdf": backends.H5NetCDFStore.open,
"pynio": backends.NioDataStore,
"pseudonetcdf": backends.PseudoNetCDFDataStore.open,
"cfgrib": backends.CfGribDataStore,
"zarr": backends.ZarrStore.open_group,
}


def _get_default_engine_remote_uri():
try:
Expand Down Expand Up @@ -153,6 +163,17 @@ def _get_default_engine(path, allow_remote=False):
return engine


def _get_backend_cls(engine):
"""Select open_dataset method based on current engine"""
try:
return ENGINES[engine]
except KeyError:
raise ValueError(
"unrecognized engine for open_dataset: {}\n"
"must be one of: {}".format(engine, list(ENGINES))
)


def _normalize_path(path):
if is_remote_uri(path):
return path
Expand Down Expand Up @@ -407,23 +428,6 @@ def open_dataset(
--------
open_mfdataset
"""
engines = [
None,
"netcdf4",
"scipy",
"pydap",
"h5netcdf",
"pynio",
"cfgrib",
"pseudonetcdf",
"zarr",
]
if engine not in engines:
raise ValueError(
"unrecognized engine for open_dataset: {}\n"
"must be one of: {}".format(engine, engines)
)

if autoclose is not None:
warnings.warn(
"The autoclose argument is no longer used by "
Expand All @@ -450,6 +454,7 @@ def open_dataset(

if backend_kwargs is None:
backend_kwargs = {}
extra_kwargs = {}

def maybe_decode_store(store, chunks, lock=False):
ds = conventions.decode_cf(
Expand Down Expand Up @@ -532,68 +537,35 @@ def maybe_decode_store(store, chunks, lock=False):

if isinstance(filename_or_obj, AbstractDataStore):
store = filename_or_obj
else:
if isinstance(filename_or_obj, str):
filename_or_obj = _normalize_path(filename_or_obj)

elif isinstance(filename_or_obj, MutableMapping) and engine == "zarr":
# Zarr supports a wide range of access modes, but for now xarray either
# reads or writes from a store, never both.
# For open_dataset(engine="zarr"), we only read (i.e. mode="r")
mode = "r"
_backend_kwargs = backend_kwargs.copy()
overwrite_encoded_chunks = _backend_kwargs.pop("overwrite_encoded_chunks", None)
store = backends.ZarrStore.open_group(
filename_or_obj, mode=mode, group=group, **_backend_kwargs
)

elif isinstance(filename_or_obj, str):
filename_or_obj = _normalize_path(filename_or_obj)
if engine is None:
engine = _get_default_engine(filename_or_obj, allow_remote=True)
elif engine != "zarr":
if engine not in [None, "scipy", "h5netcdf"]:
raise ValueError(
"can only read bytes or file-like objects "
"with engine='scipy' or 'h5netcdf'"
)
engine = _get_engine_from_magic_number(filename_or_obj)

if engine is None:
engine = _get_default_engine(filename_or_obj, allow_remote=True)
if engine == "netcdf4":
store = backends.NetCDF4DataStore.open(
filename_or_obj, group=group, lock=lock, **backend_kwargs
)
elif engine == "scipy":
store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs)
elif engine == "pydap":
store = backends.PydapDataStore.open(filename_or_obj, **backend_kwargs)
elif engine == "h5netcdf":
store = backends.H5NetCDFStore.open(
filename_or_obj, group=group, lock=lock, **backend_kwargs
)
elif engine == "pynio":
store = backends.NioDataStore(filename_or_obj, lock=lock, **backend_kwargs)
elif engine == "pseudonetcdf":
store = backends.PseudoNetCDFDataStore.open(
filename_or_obj, lock=lock, **backend_kwargs
)
elif engine == "cfgrib":
store = backends.CfGribDataStore(
filename_or_obj, lock=lock, **backend_kwargs
)
if engine in ["netcdf4", "h5netcdf"]:
extra_kwargs["group"] = group
extra_kwargs["lock"] = lock
elif engine in ["pynio", "pseudonetcdf", "cfgrib"]:
extra_kwargs["lock"] = lock
elif engine == "zarr":
# on ZarrStore, mode='r', synchronizer=None, group=None,
# consolidated=False.
_backend_kwargs = backend_kwargs.copy()
overwrite_encoded_chunks = _backend_kwargs.pop(
backend_kwargs = backend_kwargs.copy()
overwrite_encoded_chunks = backend_kwargs.pop(
"overwrite_encoded_chunks", None
)
store = backends.ZarrStore.open_group(
filename_or_obj, group=group, **_backend_kwargs
)
else:
if engine not in [None, "scipy", "h5netcdf"]:
raise ValueError(
"can only read bytes or file-like objects "
"with engine='scipy' or 'h5netcdf'"
)
engine = _get_engine_from_magic_number(filename_or_obj)
if engine == "scipy":
store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs)
elif engine == "h5netcdf":
store = backends.H5NetCDFStore.open(
filename_or_obj, group=group, lock=lock, **backend_kwargs
)
extra_kwargs["mode"] = "r"
extra_kwargs["group"] = group

opener = _get_backend_cls(engine)
store = opener(filename_or_obj, **extra_kwargs, **backend_kwargs)

with close_on_error(store):
ds = maybe_decode_store(store, chunks)
Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from ..core.pycompat import integer_types
from ..core.utils import FrozenDict, HiddenKeyDict
from ..core.variable import Variable
from .api import open_dataset
from .common import AbstractWritableDataStore, BackendArray, _encode_variable_name

# need some special secret attributes to tell us the dimensions
Expand Down Expand Up @@ -647,6 +646,7 @@ def open_zarr(
----------
http://zarr.readthedocs.io/
"""
from .api import open_dataset

if kwargs:
raise TypeError(
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2224,7 +2224,7 @@ def test_engine(self):
open_dataset(tmp_file, engine="foobar")

netcdf_bytes = data.to_netcdf()
with raises_regex(ValueError, "unrecognized engine"):
with raises_regex(ValueError, "can only read bytes or file-like"):
open_dataset(BytesIO(netcdf_bytes), engine="foobar")

def test_cross_engine_read_write_netcdf3(self):
Expand Down

0 comments on commit c0399d3

Please sign in to comment.