Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow chunk_store argument when opening Zarr datasets #3804

Merged
merged 11 commits into from
Aug 25, 2020
4 changes: 4 additions & 0 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1307,6 +1307,7 @@ def _validate_append_dim_and_encoding(
def to_zarr(
dataset,
store=None,
chunk_store=None,
mode=None,
synchronizer=None,
group=None,
Expand All @@ -1322,6 +1323,8 @@ def to_zarr(
"""
if isinstance(store, Path):
store = str(store)
if isinstance(chunk_store, Path):
chunk_store = str(store)
if encoding is None:
encoding = {}

Expand All @@ -1346,6 +1349,7 @@ def to_zarr(
synchronizer=synchronizer,
group=group,
consolidate_on_close=consolidated,
chunk_store=chunk_store,
)
zstore.append_dim = append_dim
writer = ArrayWriter()
Expand Down
8 changes: 8 additions & 0 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,14 @@ def open_group(
group=None,
consolidated=False,
consolidate_on_close=False,
chunk_store=None,
):
import zarr

open_kwargs = dict(mode=mode, synchronizer=synchronizer, path=group)
if chunk_store:
open_kwargs["chunk_store"] = chunk_store

if consolidated:
# TODO: an option to pass the metadata_key keyword
zarr_group = zarr.open_consolidated(store, **open_kwargs)
Expand Down Expand Up @@ -503,6 +507,7 @@ def open_zarr(
drop_variables=None,
consolidated=False,
overwrite_encoded_chunks=False,
chunk_store=None,
decode_timedelta=None,
**kwargs,
):
Expand Down Expand Up @@ -563,6 +568,8 @@ def open_zarr(
consolidated : bool, optional
Whether to open the store using zarr's consolidated metadata
capability. Only works for stores that have already been consolidated.
chunk_store : MutableMapping, optional
A separate Zarr store only for chunk data.
dcherian marked this conversation as resolved.
Show resolved Hide resolved
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
Expand Down Expand Up @@ -644,6 +651,7 @@ def maybe_decode_store(store, lock=False):
synchronizer=synchronizer,
group=group,
consolidated=consolidated,
chunk_store=chunk_store,
)
ds = maybe_decode_store(zarr_store)

Expand Down
4 changes: 4 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1571,6 +1571,7 @@ def to_netcdf(
def to_zarr(
self,
store: Union[MutableMapping, str, Path] = None,
chunk_store: Union[MutableMapping, str, Path] = None,
mode: str = None,
synchronizer=None,
group: str = None,
Expand All @@ -1589,6 +1590,8 @@ def to_zarr(
----------
store : MutableMapping, str or Path, optional
Store or path to directory in file system.
chunk_store : MutableMapping, str or Path, optional
Store or path to directory in file system only for Zarr array chunks.
mode : {'w', 'w-', 'a', None}
Persistence mode: 'w' means create (overwrite if exists);
'w-' means create (fail if exists);
Expand Down Expand Up @@ -1649,6 +1652,7 @@ def to_zarr(
return to_zarr(
self,
store=store,
chunk_store=chunk_store,
mode=mode,
synchronizer=synchronizer,
group=group,
Expand Down
10 changes: 10 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1564,6 +1564,15 @@ def test_roundtrip_consolidated(self):
self.check_dtypes_roundtripped(expected, actual)
assert_identical(expected, actual)

def test_with_chunkstore(self):
expected = create_test_data()
with self.create_zarr_target() as store_target, self.create_zarr_target() as chunk_store:
save_kwargs = {"chunk_store": chunk_store}
self.save(expected, store_target, **save_kwargs)
open_kwargs = {"chunk_store": chunk_store}
with self.open(store_target, **open_kwargs) as ds:
rabernat marked this conversation as resolved.
Show resolved Hide resolved
assert_equal(ds, expected)

@requires_dask
def test_auto_chunk(self):
original = create_test_data().chunk()
Expand All @@ -1580,6 +1589,7 @@ def test_auto_chunk(self):
# only index variables should be in memory
assert v._in_memory == (k in actual.dims)
# chunk size should be the same as original

dcherian marked this conversation as resolved.
Show resolved Hide resolved
assert v.chunks == original[k].chunks

@requires_dask
Expand Down
Loading