diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 9455ef2f127..1eddfb2a45e 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -11,7 +11,7 @@ dependencies: - dask<2021.02.0 - distributed - h5netcdf - - h5py=2 + - h5py - hdf5 - hypothesis - iris diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 7261b5b6954..e78c2b9181d 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -12,7 +12,7 @@ dependencies: - dask<2021.02.0 - distributed - h5netcdf - - h5py=2 + - h5py - hdf5 - hypothesis - iris diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml index 51ec48cc6b1..f6e3f7c5ff2 100644 --- a/ci/requirements/py38-all-but-dask.yml +++ b/ci/requirements/py38-all-but-dask.yml @@ -12,7 +12,7 @@ dependencies: - cftime - coveralls - h5netcdf - - h5py=2 + - h5py - hdf5 - hypothesis - lxml # Optional dep of pydap diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4b06003b630..f6b68c10b8f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -113,6 +113,8 @@ Bug fixes By `Leif Denby `_. - Fix time encoding bug associated with using cftime versions greater than 1.4.0 with xarray (:issue:`4870`, :pull:`4871`). By `Spencer Clark `_. +- Fix decoding of vlen strings using h5py versions greater than 3.0.0 with h5netcdf backend (:issue:`4570`, :pull:`4893`). + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index aa892c4f89c..5766b34d9bd 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -131,6 +131,7 @@ def open( autoclose=False, invalid_netcdf=None, phony_dims=None, + decode_vlen_strings=True, ): if isinstance(filename, bytes): @@ -157,6 +158,10 @@ def open( "h5netcdf backend keyword argument 'phony_dims' needs " "h5netcdf >= 0.8.0." ) + if LooseVersion(h5netcdf.__version__) >= LooseVersion( + "0.10.0" + ) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"): + kwargs["decode_vlen_strings"] = decode_vlen_strings if lock is None: if mode == "r": @@ -358,6 +363,7 @@ def open_dataset( lock=None, invalid_netcdf=None, phony_dims=None, + decode_vlen_strings=True, ): store = H5NetCDFStore.open( @@ -367,6 +373,7 @@ def open_dataset( lock=lock, invalid_netcdf=invalid_netcdf, phony_dims=phony_dims, + decode_vlen_strings=decode_vlen_strings, ) store_entrypoint = StoreBackendEntrypoint() diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3750c0715ae..03f52c12dec 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2578,13 +2578,19 @@ def test_open_dataset_group(self): v = group.createVariable("x", "int") v[...] = 42 - h5 = h5netcdf.File(tmp_file, mode="r") + kwargs = {} + if LooseVersion(h5netcdf.__version__) >= LooseVersion( + "0.10.0" + ) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"): + kwargs = dict(decode_vlen_strings=True) + + h5 = h5netcdf.File(tmp_file, mode="r", **kwargs) store = backends.H5NetCDFStore(h5["g"]) with open_dataset(store) as ds: expected = Dataset({"x": ((), 42)}) assert_identical(expected, ds) - h5 = h5netcdf.File(tmp_file, mode="r") + h5 = h5netcdf.File(tmp_file, mode="r", **kwargs) store = backends.H5NetCDFStore(h5, group="g") with open_dataset(store) as ds: expected = Dataset({"x": ((), 42)}) @@ -2599,7 +2605,13 @@ def test_deepcopy(self): v = nc.createVariable("y", np.int32, ("x",)) v[:] = np.arange(10) - h5 = h5netcdf.File(tmp_file, mode="r") + kwargs = {} + if LooseVersion(h5netcdf.__version__) >= LooseVersion( + "0.10.0" + ) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"): + kwargs = dict(decode_vlen_strings=True) + + h5 = h5netcdf.File(tmp_file, mode="r", **kwargs) store = backends.H5NetCDFStore(h5) with open_dataset(store) as ds: copied = ds.copy(deep=True)