Skip to content

Commit

Permalink
remove extra file, and raise deprecating warning on open_zarr
Browse files Browse the repository at this point in the history
  • Loading branch information
Miguel Jimenez-Urias committed Apr 29, 2020
1 parent aed1cc5 commit b488363
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 120 deletions.
Binary file removed default.profraw
Binary file not shown.
251 changes: 131 additions & 120 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from ..core.variable import Variable
from .common import AbstractWritableDataStore, BackendArray, _encode_variable_name

from .api import open_dataset

# need some special secret attributes to tell us the dimensions
DIMENSION_KEY = "_ARRAY_DIMENSIONS"

Expand Down Expand Up @@ -569,126 +571,135 @@ def open_zarr(
----------
http://zarr.readthedocs.io/
"""
if "auto_chunk" in kwargs:
auto_chunk = kwargs.pop("auto_chunk")
if auto_chunk:
chunks = "auto" # maintain backwards compatibility
else:
chunks = None

warnings.warn(
"auto_chunk is deprecated. Use chunks='auto' instead.",
FutureWarning,
stacklevel=2,
)

if kwargs:
raise TypeError(
"open_zarr() got unexpected keyword arguments " + ",".join(kwargs.keys())
)

if not isinstance(chunks, (int, dict)):
if chunks != "auto" and chunks is not None:
raise ValueError(
"chunks must be an int, dict, 'auto', or None. "
"Instead found %s. " % chunks
)

if chunks == "auto":
try:
import dask.array # noqa
except ImportError:
chunks = None

if not decode_cf:
mask_and_scale = False
decode_times = False
concat_characters = False
decode_coords = False

def maybe_decode_store(store, lock=False):
ds = conventions.decode_cf(
store,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
concat_characters=concat_characters,
decode_coords=decode_coords,
drop_variables=drop_variables,
warnings.warn(
"open_zarr is deprecated. Use open_dataset(engine='zarr') instead.",
DeprecationWarning,
)

# TODO: this is where we would apply caching

return ds

# Zarr supports a wide range of access modes, but for now xarray either
# reads or writes from a store, never both. For open_zarr, we only read
mode = "r"
zarr_store = ZarrStore.open_group(
store,
mode=mode,
synchronizer=synchronizer,
group=group,
consolidated=consolidated,
)
ds = maybe_decode_store(zarr_store)

# auto chunking needs to be here and not in ZarrStore because variable
# chunks do not survive decode_cf
# return trivial case
if not chunks:
return ds

# adapted from Dataset.Chunk()
if isinstance(chunks, int):
chunks = dict.fromkeys(ds.dims, chunks)

if isinstance(chunks, tuple) and len(chunks) == len(ds.dims):
chunks = dict(zip(ds.dims, chunks))

def get_chunk(name, var, chunks):
chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))

# Coordinate labels aren't chunked
if var.ndim == 1 and var.dims[0] == name:
return chunk_spec

if chunks == "auto":
return chunk_spec

for dim in var.dims:
if dim in chunks:
spec = chunks[dim]
if isinstance(spec, int):
spec = (spec,)
if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
if any(s % chunk_spec[dim] for s in spec):
warnings.warn(
"Specified Dask chunks %r would "
"separate Zarr chunk shape %r for "
"dimension %r. This significantly "
"degrades performance. Consider "
"rechunking after loading instead."
% (chunks[dim], chunk_spec[dim], dim),
stacklevel=2,
)
chunk_spec[dim] = chunks[dim]
return chunk_spec

def maybe_chunk(name, var, chunks):
from dask.base import tokenize

chunk_spec = get_chunk(name, var, chunks)

if (var.ndim > 0) and (chunk_spec is not None):
# does this cause any data to be read?
token2 = tokenize(name, var._data)
name2 = "zarr-%s" % token2
var = var.chunk(chunk_spec, name=name2, lock=None)
if overwrite_encoded_chunks and var.chunks is not None:
var.encoding["chunks"] = tuple(x[0] for x in var.chunks)
return var
else:
return var

variables = {k: maybe_chunk(k, v, chunks) for k, v in ds.variables.items()}
return ds._replace_vars_and_dims(variables)
ds = open_dataset()

# if "auto_chunk" in kwargs:
# auto_chunk = kwargs.pop("auto_chunk")
# if auto_chunk:
# chunks = "auto" # maintain backwards compatibility
# else:
# chunks = None

# warnings.warn(
# "auto_chunk is deprecated. Use chunks='auto' instead.",
# FutureWarning,
# stacklevel=2,
# )

# if kwargs:
# raise TypeError(
# "open_zarr() got unexpected keyword arguments " + ",".join(kwargs.keys())
# )

# if not isinstance(chunks, (int, dict)):
# if chunks != "auto" and chunks is not None:
# raise ValueError(
# "chunks must be an int, dict, 'auto', or None. "
# "Instead found %s. " % chunks
# )

# if chunks == "auto":
# try:
# import dask.array # noqa
# except ImportError:
# chunks = None

# if not decode_cf:
# mask_and_scale = False
# decode_times = False
# concat_characters = False
# decode_coords = False

# def maybe_decode_store(store, lock=False):
# ds = conventions.decode_cf(
# store,
# mask_and_scale=mask_and_scale,
# decode_times=decode_times,
# concat_characters=concat_characters,
# decode_coords=decode_coords,
# drop_variables=drop_variables,
# )

# # TODO: this is where we would apply caching

# return ds

# # Zarr supports a wide range of access modes, but for now xarray either
# # reads or writes from a store, never both. For open_zarr, we only read
# mode = "r"
# zarr_store = ZarrStore.open_group(
# store,
# mode=mode,
# synchronizer=synchronizer,
# group=group,
# consolidated=consolidated,
# )
# ds = maybe_decode_store(zarr_store)

# # auto chunking needs to be here and not in ZarrStore because variable
# # chunks do not survive decode_cf
# # return trivial case
# if not chunks:
# return ds

# # adapted from Dataset.Chunk()
# if isinstance(chunks, int):
# chunks = dict.fromkeys(ds.dims, chunks)

# if isinstance(chunks, tuple) and len(chunks) == len(ds.dims):
# chunks = dict(zip(ds.dims, chunks))

# def get_chunk(name, var, chunks):
# chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))

# # Coordinate labels aren't chunked
# if var.ndim == 1 and var.dims[0] == name:
# return chunk_spec

# if chunks == "auto":
# return chunk_spec

# for dim in var.dims:
# if dim in chunks:
# spec = chunks[dim]
# if isinstance(spec, int):
# spec = (spec,)
# if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
# if any(s % chunk_spec[dim] for s in spec):
# warnings.warn(
# "Specified Dask chunks %r would "
# "separate Zarr chunk shape %r for "
# "dimension %r. This significantly "
# "degrades performance. Consider "
# "rechunking after loading instead."
# % (chunks[dim], chunk_spec[dim], dim),
# stacklevel=2,
# )
# chunk_spec[dim] = chunks[dim]
# return chunk_spec

# def maybe_chunk(name, var, chunks):
# from dask.base import tokenize

# chunk_spec = get_chunk(name, var, chunks)

# if (var.ndim > 0) and (chunk_spec is not None):
# # does this cause any data to be read?
# token2 = tokenize(name, var._data)
# name2 = "zarr-%s" % token2
# var = var.chunk(chunk_spec, name=name2, lock=None)
# if overwrite_encoded_chunks and var.chunks is not None:
# var.encoding["chunks"] = tuple(x[0] for x in var.chunks)
# return var
# else:
# return var

# variables = {k: maybe_chunk(k, v, chunks) for k, v in ds.variables.items()}
# return ds._replace_vars_and_dims(variables)
return ds

0 comments on commit b488363

Please sign in to comment.