From 17c9e8fa50da610990c42f0a49d63754a9a7db0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 6 Jul 2023 17:11:49 +0200 Subject: [PATCH] ensure no forward slashes in names for HDF5-based backends (#7953) * ensure no forward slashes in names for HDF5-based backends * fix mypy * Update xarray/backends/netCDF4_.py Co-authored-by: Joe Hamman * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Joe Hamman Co-authored-by: Deepak Cherian Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- doc/whats-new.rst | 3 +++ xarray/backends/h5netcdf_.py | 3 +++ xarray/backends/netCDF4_.py | 12 ++++++++++++ xarray/tests/test_backends.py | 14 ++++++++++++++ 4 files changed, 32 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index be7f8ade57c..87559ee9b02 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,9 @@ Deprecations Bug fixes ~~~~~~~~~ +- Ensure no forward slashes in variable and dimension names for HDF5-based engines. + (:issue:`7943`, :pull:`7953`) By `Kai Mühlbauer `_. + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 7389f6a2862..697ebb8ab92 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -20,6 +20,7 @@ from xarray.backends.netCDF4_ import ( BaseNetCDF4Array, _encode_nc4_variable, + _ensure_no_forward_slash_in_name, _extract_nc4_variable_encoding, _get_datatype, _nc4_require_group, @@ -256,6 +257,7 @@ def get_encoding(self): } def set_dimension(self, name, length, is_unlimited=False): + _ensure_no_forward_slash_in_name(name) if is_unlimited: self.ds.dimensions[name] = None self.ds.resize_dimension(name, length) @@ -273,6 +275,7 @@ def prepare_variable( ): import h5py + _ensure_no_forward_slash_in_name(name) attrs = variable.attrs.copy() dtype = _get_datatype(variable, raise_on_invalid_encoding=check_encoding) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 8a5d48c8c1e..b5c3413e7f8 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -194,6 +194,15 @@ def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): return ds +def _ensure_no_forward_slash_in_name(name): + if "/" in name: + raise ValueError( + f"Forward slashes '/' are not allowed in variable and dimension names (got {name!r}). " + "Forward slashes are used as hierarchy-separators for " + "HDF5-based files ('netcdf4'/'h5netcdf')." + ) + + def _ensure_fill_value_valid(data, attributes): # work around for netCDF4/scipy issue where _FillValue has the wrong type: # https://github.com/Unidata/netcdf4-python/issues/271 @@ -447,6 +456,7 @@ def get_encoding(self): } def set_dimension(self, name, length, is_unlimited=False): + _ensure_no_forward_slash_in_name(name) dim_length = length if not is_unlimited else None self.ds.createDimension(name, size=dim_length) @@ -470,6 +480,8 @@ def encode_variable(self, variable): def prepare_variable( self, name, variable, check_encoding=False, unlimited_dims=None ): + _ensure_no_forward_slash_in_name(name) + datatype = _get_datatype( variable, self.format, raise_on_invalid_encoding=check_encoding ) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0450e769e7b..dad2d668ff8 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1611,6 +1611,20 @@ def test_encoding_unlimited_dims(self) -> None: assert actual.encoding["unlimited_dims"] == set("y") assert_equal(ds, actual) + def test_raise_on_forward_slashes_in_names(self) -> None: + # test for forward slash in variable names and dimensions + # see GH 7943 + data_vars: list[dict[str, Any]] = [ + {"PASS/FAIL": (["PASSFAIL"], np.array([0]))}, + {"PASS/FAIL": np.array([0])}, + {"PASSFAIL": (["PASS/FAIL"], np.array([0]))}, + ] + for dv in data_vars: + ds = Dataset(data_vars=dv) + with pytest.raises(ValueError, match="Forward slashes '/' are not allowed"): + with self.roundtrip(ds): + pass + @requires_netCDF4 class TestNetCDF4Data(NetCDF4Base):