Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deprecate open_zarr #7496

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,6 @@ Dataset methods
open_dataset
open_mfdataset
open_rasterio
open_zarr
save_mfdataset
Dataset.as_numpy
Dataset.from_dataframe
Expand Down Expand Up @@ -1139,6 +1138,7 @@ Deprecated / Pending Deprecation
.. autosummary::
:toctree: generated/

open_zarr
Dataset.drop
DataArray.drop
Dataset.apply
Expand Down
6 changes: 3 additions & 3 deletions doc/user-guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -626,11 +626,11 @@ To store variable length strings, convert them to object arrays first with
``dtype=object``.

To read back a zarr dataset that has been created this way, we use the
:py:func:`open_zarr` method:
:py:func:`open_dataset` function:

.. ipython:: python

ds_zarr = xr.open_zarr("path/to/directory.zarr")
ds_zarr = xr.open_dataset("path/to/directory.zarr", engine="zarr")
ds_zarr

Cloud Storage Buckets
Expand Down Expand Up @@ -671,7 +671,7 @@ instance and pass this, as follows:
# write to the bucket
ds.to_zarr(store=gcsmap)
# read it back
ds_gcs = xr.open_zarr(gcsmap)
ds_gcs = xr.open_dataset(gcsmap, engine="zarr")

(or use the utility function ``fsspec.get_mapper()``).

Expand Down
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ Breaking changes

Deprecations
~~~~~~~~~~~~

- The `open_zarr` function has been deprecated in favor of `open_dataset(..., engine='zarr')`.
(:issue:`7495`, :pull:`7496`).
By `Joe Hamman <https://github.com/jhamman>`_.

Bug fixes
~~~~~~~~~
Expand Down
12 changes: 12 additions & 0 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,17 +787,29 @@ def open_zarr(
dataset : Dataset
The newly created dataset.

.. deprecated:: v2023.02.0
`open_zarr` will be removed in Xarray v2023.06.0, please use
`open_dataset(..., engine='zarr', chunks={})` from now on.

See Also
--------
open_dataset
open_mfdataset



References
----------
http://zarr.readthedocs.io/
"""
from xarray.backends.api import open_dataset

warnings.warn(
"open_zarr is Deprecated in favor of open_dataset(..., engine='zarr', chunks={})"
"See https://github.com/pydata/xarray/issues/7495 for more information",
DeprecationWarning,
)

if chunks == "auto":
try:
import dask.array # noqa
Expand Down
46 changes: 32 additions & 14 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1812,12 +1812,12 @@ def test_read_non_consolidated_warning(self) -> None:
RuntimeWarning,
match="Failed to open Zarr store with consolidated",
):
with xr.open_zarr(store, **self.version_kwargs) as ds:
with xr.open_dataset(store, engine="zarr", **self.version_kwargs) as ds:
assert_identical(ds, expected)

def test_non_existent_store(self) -> None:
with pytest.raises(FileNotFoundError, match=r"No such file or directory:"):
xr.open_zarr(f"{uuid.uuid4()}")
xr.open_dataset(f"{uuid.uuid4()}", engine="zarr")

def test_with_chunkstore(self) -> None:
expected = create_test_data()
Expand Down Expand Up @@ -2365,8 +2365,11 @@ def test_write_region(self, consolidated, compute, use_dask) -> None:
**self.version_kwargs,
)
if compute:
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
with xr.open_dataset(
store,
consolidated=consolidated,
engine="zarr",
**self.version_kwargs,
) as actual:
assert_identical(actual, zeros)
for i in range(0, 10, 2):
Expand All @@ -2377,8 +2380,8 @@ def test_write_region(self, consolidated, compute, use_dask) -> None:
consolidated=consolidated,
**self.version_kwargs,
)
with xr.open_zarr(
store, consolidated=consolidated, **self.version_kwargs
with xr.open_dataset(
store, consolidated=consolidated, engine="zarr", **self.version_kwargs
) as actual:
assert_identical(actual, nonzeros)

Expand All @@ -2392,7 +2395,7 @@ def test_write_region_mode(self, mode) -> None:
nonzeros.isel(region).to_zarr(
store, region=region, mode=mode, **self.version_kwargs
)
with xr.open_zarr(store, **self.version_kwargs) as actual:
with xr.open_dataset(store, engine="zarr", **self.version_kwargs) as actual:
assert_identical(actual, nonzeros)

@requires_dask
Expand Down Expand Up @@ -2556,9 +2559,11 @@ def test_open_zarr_use_cftime(self) -> None:
ds = create_test_data()
with self.create_zarr_target() as store_target:
ds.to_zarr(store_target, **self.version_kwargs)
ds_a = xr.open_zarr(store_target, **self.version_kwargs)
ds_a = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs)
assert_identical(ds, ds_a)
ds_b = xr.open_zarr(store_target, use_cftime=True, **self.version_kwargs)
ds_b = xr.open_dataset(
store_target, use_cftime=True, engine="zarr", **self.version_kwargs
)
assert xr.coding.times.contains_cftime_datetimes(ds_b.time)

def test_write_read_select_write(self) -> None:
Expand All @@ -2568,7 +2573,7 @@ def test_write_read_select_write(self) -> None:
# NOTE: using self.roundtrip, which uses open_dataset, will not trigger the bug.
with self.create_zarr_target() as initial_store:
ds.to_zarr(initial_store, mode="w", **self.version_kwargs)
ds1 = xr.open_zarr(initial_store, **self.version_kwargs)
ds1 = xr.open_dataset(initial_store, engine="zarr", **self.version_kwargs)

# Combination of where+squeeze triggers error on write.
ds_sel = ds1.where(ds1.coords["dim3"] == "a", drop=True).squeeze("dim3")
Expand All @@ -2583,7 +2588,9 @@ def test_attributes(self, obj) -> None:
ds = obj if isinstance(obj, Dataset) else obj.to_dataset()
with self.create_zarr_target() as store_target:
ds.to_zarr(store_target, **self.version_kwargs)
assert_identical(ds, xr.open_zarr(store_target, **self.version_kwargs))
assert_identical(
ds, xr.open_dataset(store_target, engine="zarr", **self.version_kwargs)
)

obj.attrs["bad"] = DataArray()
ds = obj if isinstance(obj, Dataset) else obj.to_dataset()
Expand Down Expand Up @@ -2666,7 +2673,9 @@ def test_zarr_storage_options() -> None:
ds = create_test_data()
store_target = "memory://test.zarr"
ds.to_zarr(store_target, storage_options={"test": "zarr_write"})
ds_a = xr.open_zarr(store_target, storage_options={"test": "zarr_read"})
ds_a = xr.open_dataset(
store_target, engine="zarr", storage_options={"test": "zarr_read"}
)
assert_identical(ds, ds_a)


Expand Down Expand Up @@ -5696,15 +5705,15 @@ def _create_nczarr(filename):
def test_open_nczarr(self) -> None:
with create_tmp_file(suffix=".zarr") as tmp:
expected = self._create_nczarr(tmp)
actual = xr.open_zarr(tmp, consolidated=False)
actual = xr.open_dataset(tmp, engine="zarr", consolidated=False)
assert_identical(expected, actual)

def test_overwriting_nczarr(self) -> None:
with create_tmp_file(suffix=".zarr") as tmp:
ds = self._create_nczarr(tmp)
expected = ds[["var1"]]
expected.to_zarr(tmp, mode="w")
actual = xr.open_zarr(tmp, consolidated=False)
actual = xr.open_dataset(tmp, engine="zarr", consolidated=False)
assert_identical(expected, actual)

@pytest.mark.parametrize("mode", ["a", "r+"])
Expand All @@ -5723,3 +5732,12 @@ def test_raise_writing_to_nczarr(self, mode) -> None:
def test_pickle_open_mfdataset_dataset():
ds = open_example_mfdataset(["bears.nc"])
assert_identical(ds, pickle.loads(pickle.dumps(ds)))


@requires_zarr
def test_open_zarr_deprecation_warns():
ds = create_test_data()
with create_tmp_file(suffix=".zarr") as tmp:
ds.to_zarr(tmp, mode="w")
with pytest.warns(DeprecationWarning):
xr.open_zarr(tmp)