Skip to content

Commit

Permalink
Ensure attributes on coordinate variables are preserved during round-…
Browse files Browse the repository at this point in the history
…tripping (#154)

* ensure roundtripping tests check attrs

* add regression test checking that attrs are preserved on lat and lon coordinate variables

* fix bug for lat/lon coordinates

* decode_times=False everywhere

* release notes
  • Loading branch information
TomNicholas authored Jun 24, 2024
1 parent a7a5d4c commit ef2429a
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 14 deletions.
4 changes: 3 additions & 1 deletion docs/releases.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ Deprecations
Bug fixes
~~~~~~~~~

- Ensure that `_ARRAY_DIMENSIONS` are dropped from variable `.attrs`. (:issue:`150`, :pull:`152`)
- Ensure that `_ARRAY_DIMENSIONS` are dropped from variable `.attrs`. (:issue:`150`, :pull:`152`)
By `Tom Nicholas <https://github.com/TomNicholas>`_.
- Ensure that `.attrs` on coordinate variables are preserved during round-tripping. (:issue:`155`, :pull:`154`)
By `Tom Nicholas <https://github.com/TomNicholas>`_.

Documentation
Expand Down
24 changes: 14 additions & 10 deletions virtualizarr/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,16 @@ def test_kerchunk_roundtrip_no_concat(self, tmpdir, format):
# use open_dataset_via_kerchunk to read it as references
vds = open_virtual_dataset(f"{tmpdir}/air.nc", indexes={})

# write those references to disk as kerchunk json
# write those references to disk as kerchunk references format
vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)

# use fsspec to read the dataset from disk via the zarr store
roundtrip = xr.open_dataset(f"{tmpdir}/refs.{format}", engine="kerchunk")
# use fsspec to read the dataset from disk via the kerchunk references
roundtrip = xr.open_dataset(
f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False
)

# assert equal to original dataset
xrt.assert_equal(roundtrip, ds)
# assert identical to original dataset
xrt.assert_identical(roundtrip, ds)

def test_kerchunk_roundtrip_concat(self, tmpdir, format):
# set up example xarray dataset
Expand All @@ -82,14 +84,16 @@ def test_kerchunk_roundtrip_concat(self, tmpdir, format):
# concatenate virtually along time
vds = xr.concat([vds1, vds2], dim="time", coords="minimal", compat="override")

# write those references to disk as kerchunk json
# write those references to disk as kerchunk references format
vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)

# use fsspec to read the dataset from disk via the zarr store
roundtrip = xr.open_dataset(f"{tmpdir}/refs.{format}", engine="kerchunk")
# use fsspec to read the dataset from disk via the kerchunk references
roundtrip = xr.open_dataset(
f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False
)

# assert equal to original dataset
xrt.assert_equal(roundtrip, ds)
# assert identical to original dataset
xrt.assert_identical(roundtrip, ds)


def test_open_scalar_variable(tmpdir):
Expand Down
10 changes: 10 additions & 0 deletions virtualizarr/tests/test_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,16 @@ def test_drop_array_dimensions(self, netcdf4_file):
vds = open_virtual_dataset(netcdf4_file, indexes={})
assert "_ARRAY_DIMENSIONS" not in vds["air"].attrs

def test_coordinate_variable_attrs_preserved(self, netcdf4_file):
# regression test for GH issue #155
vds = open_virtual_dataset(netcdf4_file, indexes={})
assert vds["lat"].attrs == {
"standard_name": "latitude",
"long_name": "Latitude",
"units": "degrees_north",
"axis": "Y",
}


class TestOpenVirtualDatasetIndexes:
def test_no_indexes(self, netcdf4_file):
Expand Down
5 changes: 2 additions & 3 deletions virtualizarr/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ def virtual_vars_from_kerchunk_refs(
var_name: variable_from_kerchunk_refs(refs, var_name, virtual_array_class)
for var_name in var_names_to_keep
}

return vars


Expand Down Expand Up @@ -306,7 +305,7 @@ def separate_coords(
"""
Try to generate a set of coordinates that won't cause xarray to automatically build a pandas.Index for the 1D coordinates.
Currently requires a workaround unless xarray 8107 is merged.
Currently requires this function as a workaround unless xarray PR #8124 is merged.
Will also preserve any loaded variables and indexes it is passed.
"""
Expand All @@ -322,7 +321,7 @@ def separate_coords(
# use workaround to avoid creating IndexVariables described here https://github.com/pydata/xarray/pull/8107#discussion_r1311214263
if len(var.dims) == 1:
dim1d, *_ = var.dims
coord_vars[name] = (dim1d, var.data)
coord_vars[name] = (dim1d, var.data, var.attrs)

if isinstance(var, IndexVariable):
# unless variable actually already is a loaded IndexVariable,
Expand Down

0 comments on commit ef2429a

Please sign in to comment.