From fe49f5fed28a046b250920ae4a38b5124fb29023 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 14 Sep 2024 15:40:08 -0700 Subject: [PATCH 01/22] fix: zarr v2 compatability fixes - port normalize_chunks from v2 - add array.store property - default to append in create --- src/zarr/api/asynchronous.py | 2 +- src/zarr/core/array.py | 35 ++++++++++++++++----------- src/zarr/core/chunk_grids.py | 47 +++++++++++++++++++++++++++++++++++- src/zarr/core/group.py | 4 ++- src/zarr/store/common.py | 8 +++--- 5 files changed, 75 insertions(+), 21 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 8a1b0c5f36..fe9bc824ec 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -724,7 +724,7 @@ async def create( if meta_array is not None: warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) - mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "w")) + mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "a")) store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 7311b6eec2..18ff09c75d 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -8,12 +8,12 @@ import numpy as np import numpy.typing as npt -from zarr.abc.store import set_or_delete +from zarr.abc.store import Store, set_or_delete from zarr.codecs import BytesCodec from zarr.codecs._v2 import V2Compressor, V2Filters from zarr.core.attributes import Attributes from zarr.core.buffer import BufferPrototype, NDArrayLike, NDBuffer, default_buffer_prototype -from zarr.core.chunk_grids import RegularChunkGrid, _guess_chunks +from zarr.core.chunk_grids import RegularChunkGrid, normalize_chunks from zarr.core.chunk_key_encodings import ( ChunkKeyEncoding, DefaultChunkKeyEncoding, @@ -129,7 +129,7 @@ async def create( fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only - chunk_shape: ChunkCoords | None = None, + chunk_shape: ChunkCoords | None = None, # TODO: handle bool and iterable of iterable types chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -139,7 +139,7 @@ async def create( codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only - chunks: ShapeLike | None = None, + chunks: ShapeLike | None = None, # TODO: handle bool and iterable of iterable types dimension_separator: Literal[".", "/"] | None = None, order: Literal["C", "F"] | None = None, filters: list[dict[str, JSON]] | None = None, @@ -152,15 +152,14 @@ async def create( shape = parse_shapelike(shape) - if chunk_shape is None: - if chunks is None: - chunk_shape = chunks = _guess_chunks(shape=shape, typesize=np.dtype(dtype).itemsize) - else: - chunks = parse_shapelike(chunks) + if chunks is not None and chunk_shape is not None: + raise ValueError("Only one of chunk_shape or chunks can be provided.") - chunk_shape = chunks - elif chunks is not None: - raise ValueError("Only one of chunk_shape or chunks must be provided.") + dtype = np.dtype(dtype) + if chunks: + _chunks = normalize_chunks(chunks, shape, dtype.itemsize) + if chunk_shape: + _chunks = normalize_chunks(chunk_shape, shape, dtype.itemsize) if zarr_format == 3: if dimension_separator is not None: @@ -183,7 +182,7 @@ async def create( store_path, shape=shape, dtype=dtype, - chunk_shape=chunk_shape, + chunk_shape=_chunks, fill_value=fill_value, chunk_key_encoding=chunk_key_encoding, codecs=codecs, @@ -206,7 +205,7 @@ async def create( store_path, shape=shape, dtype=dtype, - chunks=chunk_shape, + chunks=_chunks, dimension_separator=dimension_separator, fill_value=fill_value, order=order, @@ -393,6 +392,10 @@ async def open( metadata=ArrayV3Metadata.from_dict(json.loads(zarr_json_bytes.to_bytes())), ) + @property + def store(self) -> Store: + return self.store_path.store + @property def ndim(self) -> int: return len(self.metadata.shape) @@ -697,6 +700,10 @@ def open( async_array = sync(AsyncArray.open(store)) return cls(async_array) + @property + def store(self) -> Store: + return self._async_array.store + @property def ndim(self) -> int: return self._async_array.ndim diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 61723215c6..4dc97da144 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -2,11 +2,12 @@ import itertools import math +import numbers import operator from abc import abstractmethod from dataclasses import dataclass from functools import reduce -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import numpy as np @@ -98,6 +99,50 @@ def _guess_chunks( return tuple(int(x) for x in chunks) +def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tuple[int, ...]: + """Convenience function to normalize the `chunks` argument for an array + with the given `shape`.""" + + # N.B., expect shape already normalized + + # handle auto-chunking + if chunks is None or chunks is True: + return _guess_chunks(shape, typesize) + + # handle no chunking + if chunks is False: + return shape + + # handle 1D convenience form + if isinstance(chunks, numbers.Integral): + chunks = tuple(int(chunks) for _ in shape) + + # handle dask-style chunks (iterable of iterables) + if all(isinstance(c, (tuple | list)) for c in chunks): + # take first chunk size for each dimension + chunks = ( + c[0] for c in chunks + ) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1]) + + # handle bad dimensionality + if len(chunks) > len(shape): + raise ValueError("too many dimensions in chunks") + + # handle underspecified chunks + if len(chunks) < len(shape): + # assume chunks across remaining dimensions + chunks += shape[len(chunks) :] + + # handle None or -1 in chunks + if -1 in chunks or None in chunks: + chunks = tuple( + s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks, strict=False) + ) + + out = tuple(int(c) for c in chunks) + return out + + @dataclass(frozen=True) class ChunkGrid(Metadata): @classmethod diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 40815b96c8..cfa3781b7b 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -173,7 +173,9 @@ async def open( # alternatively, we could warn and favor v3 raise ValueError("Both zarr.json and .zgroup objects exist") if zarr_json_bytes is None and zgroup_bytes is None: - raise FileNotFoundError(store_path) + raise FileNotFoundError( + f"could not find zarr.json or .zgroup objects in {store_path}" + ) # set zarr_format based on which keys were found if zarr_json_bytes is not None: zarr_format = 3 diff --git a/src/zarr/store/common.py b/src/zarr/store/common.py index 8028c9af3d..f7a8ca5cac 100644 --- a/src/zarr/store/common.py +++ b/src/zarr/store/common.py @@ -78,12 +78,12 @@ async def make_store_path( store_like: StoreLike | None, *, mode: AccessModeLiteral | None = None ) -> StorePath: if isinstance(store_like, StorePath): - if mode is not None: - assert AccessMode.from_literal(mode) == store_like.store.mode + if (mode is not None) and (AccessMode.from_literal(mode) != store_like.store.mode): + raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.store.mode})") return store_like elif isinstance(store_like, Store): - if mode is not None: - assert AccessMode.from_literal(mode) == store_like.mode + if (mode is not None) and (AccessMode.from_literal(mode) != store_like.mode): + raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.mode})") await store_like._ensure_open() return StorePath(store_like) elif store_like is None: From 9a1580b7f97667e0d024e5a5898fb38e61977e38 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 16 Sep 2024 16:47:50 -0700 Subject: [PATCH 02/22] move zarr.store to zarr.storage also fix failing ci --- src/zarr/api/asynchronous.py | 40 +++++++++++++++++----- src/zarr/api/synchronous.py | 2 +- src/zarr/core/array.py | 6 ++-- src/zarr/core/group.py | 4 +-- src/zarr/storage/__init__.py | 21 ++++++++++++ src/zarr/{store => storage}/_utils.py | 0 src/zarr/{store => storage}/common.py | 10 +++--- src/zarr/{store => storage}/local.py | 0 src/zarr/{store => storage}/memory.py | 2 +- src/zarr/{store => storage}/remote.py | 2 +- src/zarr/{store => storage}/zip.py | 0 src/zarr/store/__init__.py | 15 -------- src/zarr/testing/buffer.py | 2 +- src/zarr/testing/store.py | 2 +- src/zarr/testing/strategies.py | 2 +- tests/v3/conftest.py | 4 +-- tests/v3/test_api.py | 7 ++-- tests/v3/test_array.py | 4 +-- tests/v3/test_buffer.py | 4 +-- tests/v3/test_codecs/test_blosc.py | 2 +- tests/v3/test_codecs/test_codecs.py | 20 +++++------ tests/v3/test_codecs/test_endian.py | 2 +- tests/v3/test_codecs/test_gzip.py | 2 +- tests/v3/test_codecs/test_sharding.py | 2 +- tests/v3/test_codecs/test_transpose.py | 2 +- tests/v3/test_codecs/test_zstd.py | 2 +- tests/v3/test_group.py | 4 +-- tests/v3/test_indexing.py | 4 +-- tests/v3/test_store/test_core.py | 6 ++-- tests/v3/test_store/test_local.py | 2 +- tests/v3/test_store/test_memory.py | 2 +- tests/v3/test_store/test_remote.py | 2 +- tests/v3/test_store/test_stateful_store.py | 2 +- tests/v3/test_store/test_zip.py | 2 +- tests/v3/test_v2.py | 2 +- 35 files changed, 109 insertions(+), 76 deletions(-) create mode 100644 src/zarr/storage/__init__.py rename src/zarr/{store => storage}/_utils.py (100%) rename src/zarr/{store => storage}/common.py (97%) rename src/zarr/{store => storage}/local.py (100%) rename src/zarr/{store => storage}/memory.py (99%) rename src/zarr/{store => storage}/remote.py (99%) rename src/zarr/{store => storage}/zip.py (100%) delete mode 100644 src/zarr/store/__init__.py diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index fe9bc824ec..3bbf908bea 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -7,14 +7,16 @@ import numpy as np import numpy.typing as npt +from zarr.abc.store import Store from zarr.core.array import Array, AsyncArray from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat from zarr.core.config import config from zarr.core.group import AsyncGroup from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata -from zarr.store import ( +from zarr.storage import ( StoreLike, + StorePath, make_store_path, ) @@ -221,15 +223,16 @@ async def open( Return type depends on what exists in the given store. """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path try: - return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + return await open_array(store=store_path, zarr_format=zarr_format, **kwargs) except KeyError: - return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + return await open_group(store=store_path, zarr_format=zarr_format, **kwargs) async def open_consolidated(*args: Any, **kwargs: Any) -> AsyncGroup: @@ -299,7 +302,14 @@ async def save_array( or _default_zarr_version() ) - store_path = await make_store_path(store, mode="w") + mode = kwargs.pop("mode", None) + if isinstance(store, Store): + if mode is not None: + raise ValueError("mode cannot be specified if store is a Store") + elif mode is None: + mode = cast(AccessModeLiteral, "a") + + store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path new = await AsyncArray.create( @@ -453,7 +463,9 @@ async def group( zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) - store_path = await make_store_path(store) + mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a") + + store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path @@ -551,6 +563,9 @@ async def open_group( if storage_options is not None: warnings.warn("storage_options is not yet implemented", RuntimeWarning, stacklevel=2) + if mode is not None and isinstance(store, Store | StorePath): + raise ValueError("store and mode cannot be specified at the same time") + store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path @@ -724,7 +739,13 @@ async def create( if meta_array is not None: warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) - mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "a")) + mode = kwargs.pop("mode", None) + if isinstance(store, Store | StorePath): + if mode is not None: + raise ValueError("mode cannot be set when store is already initialized") + elif mode is None: + mode = cast(AccessModeLiteral, "r" if read_only else "a") + store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path @@ -896,8 +917,11 @@ async def open_array( The opened array. """ - store_path = await make_store_path(store) - if path is not None: + mode = kwargs.pop("mode", None) + store_path = await make_store_path(store, mode=mode) + if ( + path is not None + ): # FIXME: apply path before opening store in w or risk deleting existing data store_path = store_path / path zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 93a33b8d3f..8c05f873c6 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: from zarr.core.buffer import NDArrayLike from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, ZarrFormat - from zarr.store import StoreLike + from zarr.storage import StoreLike __all__ = [ "consolidate_metadata", diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 18ff09c75d..eea3eaace0 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -59,8 +59,8 @@ from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.core.sync import sync from zarr.registry import get_pipeline_class -from zarr.store import StoreLike, StorePath, make_store_path -from zarr.store.common import ( +from zarr.storage import StoreLike, StorePath, make_store_path +from zarr.storage.common import ( ensure_no_existing_node, ) @@ -158,7 +158,7 @@ async def create( dtype = np.dtype(dtype) if chunks: _chunks = normalize_chunks(chunks, shape, dtype.itemsize) - if chunk_shape: + else: _chunks = normalize_chunks(chunk_shape, shape, dtype.itemsize) if zarr_format == 3: diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index cfa3781b7b..579c66c0b2 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -28,8 +28,8 @@ ) from zarr.core.config import config from zarr.core.sync import SyncMixin, sync -from zarr.store import StoreLike, StorePath, make_store_path -from zarr.store.common import ensure_no_existing_node +from zarr.storage import StoreLike, StorePath, make_store_path +from zarr.storage.common import ensure_no_existing_node if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable, Iterator diff --git a/src/zarr/storage/__init__.py b/src/zarr/storage/__init__.py new file mode 100644 index 0000000000..216dcbac95 --- /dev/null +++ b/src/zarr/storage/__init__.py @@ -0,0 +1,21 @@ +from zarr.storage.common import StoreLike, StorePath, make_store_path +from zarr.storage.local import LocalStore +from zarr.storage.memory import MemoryStore +from zarr.storage.remote import RemoteStore +from zarr.storage.zip import ZipStore + +# alias for backwards compatibility +FSStore = RemoteStore +DirectoryStore = LocalStore + +__all__ = [ + "DirectoryStore", + "FSStore", + "StorePath", + "StoreLike", + "make_store_path", + "RemoteStore", + "LocalStore", + "MemoryStore", + "ZipStore", +] diff --git a/src/zarr/store/_utils.py b/src/zarr/storage/_utils.py similarity index 100% rename from src/zarr/store/_utils.py rename to src/zarr/storage/_utils.py diff --git a/src/zarr/store/common.py b/src/zarr/storage/common.py similarity index 97% rename from src/zarr/store/common.py rename to src/zarr/storage/common.py index f7a8ca5cac..a27e794a2d 100644 --- a/src/zarr/store/common.py +++ b/src/zarr/storage/common.py @@ -8,8 +8,8 @@ from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, ZarrFormat from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError -from zarr.store.local import LocalStore -from zarr.store.memory import MemoryStore +from zarr.storage.local import LocalStore +from zarr.storage.memory import MemoryStore if TYPE_CHECKING: from zarr.core.buffer import BufferPrototype @@ -79,11 +79,13 @@ async def make_store_path( ) -> StorePath: if isinstance(store_like, StorePath): if (mode is not None) and (AccessMode.from_literal(mode) != store_like.store.mode): - raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.store.mode})") + raise ValueError( + f"mode mismatch (mode={mode} != store.mode={store_like.store.mode.str})" + ) return store_like elif isinstance(store_like, Store): if (mode is not None) and (AccessMode.from_literal(mode) != store_like.mode): - raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.mode})") + raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.mode.str})") await store_like._ensure_open() return StorePath(store_like) elif store_like is None: diff --git a/src/zarr/store/local.py b/src/zarr/storage/local.py similarity index 100% rename from src/zarr/store/local.py rename to src/zarr/storage/local.py diff --git a/src/zarr/store/memory.py b/src/zarr/storage/memory.py similarity index 99% rename from src/zarr/store/memory.py rename to src/zarr/storage/memory.py index 13e289f374..bf56e2d95f 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/storage/memory.py @@ -6,7 +6,7 @@ from zarr.abc.store import Store from zarr.core.buffer import Buffer, gpu from zarr.core.common import concurrent_map -from zarr.store._utils import _normalize_interval_index +from zarr.storage._utils import _normalize_interval_index if TYPE_CHECKING: from collections.abc import AsyncGenerator, MutableMapping diff --git a/src/zarr/store/remote.py b/src/zarr/storage/remote.py similarity index 99% rename from src/zarr/store/remote.py rename to src/zarr/storage/remote.py index e3e2ba3447..e532f3ac0c 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/storage/remote.py @@ -5,7 +5,7 @@ import fsspec from zarr.abc.store import Store -from zarr.store.common import _dereference_path +from zarr.storage.common import _dereference_path if TYPE_CHECKING: from collections.abc import AsyncGenerator diff --git a/src/zarr/store/zip.py b/src/zarr/storage/zip.py similarity index 100% rename from src/zarr/store/zip.py rename to src/zarr/storage/zip.py diff --git a/src/zarr/store/__init__.py b/src/zarr/store/__init__.py deleted file mode 100644 index 47bbccd66e..0000000000 --- a/src/zarr/store/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from zarr.store.common import StoreLike, StorePath, make_store_path -from zarr.store.local import LocalStore -from zarr.store.memory import MemoryStore -from zarr.store.remote import RemoteStore -from zarr.store.zip import ZipStore - -__all__ = [ - "StorePath", - "StoreLike", - "make_store_path", - "RemoteStore", - "LocalStore", - "MemoryStore", - "ZipStore", -] diff --git a/src/zarr/testing/buffer.py b/src/zarr/testing/buffer.py index 9d640d2c64..e4affa59ac 100644 --- a/src/zarr/testing/buffer.py +++ b/src/zarr/testing/buffer.py @@ -7,7 +7,7 @@ import numpy.typing as npt from zarr.core.buffer import Buffer, BufferPrototype, cpu -from zarr.store import MemoryStore +from zarr.storage import MemoryStore if TYPE_CHECKING: from collections.abc import Iterable diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index a08b6960db..7ff73135d6 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -6,7 +6,7 @@ import zarr.api.asynchronous from zarr.abc.store import AccessMode, Store from zarr.core.buffer import Buffer, default_buffer_prototype -from zarr.store._utils import _normalize_interval_index +from zarr.storage._utils import _normalize_interval_index from zarr.testing.utils import assert_bytes_equal __all__ = ["StoreTests"] diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 83de3d92ce..4f02ba9e57 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -9,7 +9,7 @@ from zarr.core.array import Array from zarr.core.group import Group -from zarr.store import MemoryStore, StoreLike +from zarr.storage import MemoryStore, StoreLike # Copied from Xarray _attr_keys = st.text(st.characters(), min_size=1) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 41cd359346..db462a021b 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -10,8 +10,8 @@ from hypothesis import HealthCheck, Verbosity, settings from zarr import AsyncGroup, config -from zarr.store import LocalStore, MemoryStore, StorePath, ZipStore -from zarr.store.remote import RemoteStore +from zarr.storage import LocalStore, MemoryStore, StorePath, ZipStore +from zarr.storage.remote import RemoteStore if TYPE_CHECKING: from collections.abc import Generator, Iterator diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py index ddfab587cc..a75077e1c7 100644 --- a/tests/v3/test_api.py +++ b/tests/v3/test_api.py @@ -9,7 +9,7 @@ from zarr.abc.store import Store from zarr.api.synchronous import create, load, open, open_group, save, save_array, save_group from zarr.core.common import ZarrFormat -from zarr.store.memory import MemoryStore +from zarr.storage.memory import MemoryStore def test_create_array(memory_store: Store) -> None: @@ -42,8 +42,9 @@ async def test_open_array(memory_store: MemoryStore) -> None: assert z.shape == (100,) # open array, overwrite - store._store_dict = {} - z = open(store=store, shape=200, mode="w") # mode="w" + # store._store_dict = {} + store = MemoryStore(mode="w") + z = open(store=store, shape=200) assert isinstance(z, Array) assert z.shape == (200,) diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 11be51682c..cf938e3375 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -7,8 +7,8 @@ from zarr import Array, AsyncArray, Group from zarr.core.common import ZarrFormat from zarr.errors import ContainsArrayError, ContainsGroupError -from zarr.store import LocalStore, MemoryStore -from zarr.store.common import StorePath +from zarr.storage import LocalStore, MemoryStore +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py index 5a313dc1ab..883a17a87c 100644 --- a/tests/v3/test_buffer.py +++ b/tests/v3/test_buffer.py @@ -13,8 +13,8 @@ from zarr.codecs.transpose import TransposeCodec from zarr.codecs.zstd import ZstdCodec from zarr.core.buffer import ArrayLike, BufferPrototype, NDArrayLike, cpu, gpu -from zarr.store.common import StorePath -from zarr.store.memory import MemoryStore +from zarr.storage.common import StorePath +from zarr.storage.memory import MemoryStore from zarr.testing.buffer import ( NDBufferUsingTestNDArrayLike, StoreExpectingTestBuffer, diff --git a/tests/v3/test_codecs/test_blosc.py b/tests/v3/test_codecs/test_blosc.py index 4c569055b7..386101035b 100644 --- a/tests/v3/test_codecs/test_blosc.py +++ b/tests/v3/test_codecs/test_blosc.py @@ -7,7 +7,7 @@ from zarr.abc.store import Store from zarr.codecs import BloscCodec, BytesCodec, ShardingCodec from zarr.core.buffer import default_buffer_prototype -from zarr.store.common import StorePath +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) diff --git a/tests/v3/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py index 57103d17c2..7a9cd311e5 100644 --- a/tests/v3/test_codecs/test_codecs.py +++ b/tests/v3/test_codecs/test_codecs.py @@ -17,7 +17,7 @@ ) from zarr.core.buffer import default_buffer_prototype from zarr.core.indexing import Selection, morton_order_iter -from zarr.store import StorePath +from zarr.storage import StorePath from zarr.testing.utils import assert_bytes_equal if TYPE_CHECKING: @@ -375,15 +375,15 @@ async def test_dimension_names(store: Store) -> None: @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) def test_invalid_metadata(store: Store) -> None: - spath = StorePath(store, "invalid_metadata") - with pytest.raises(ValueError): - Array.create( - spath, - shape=(16, 16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - fill_value=0, - ) + # spath = StorePath(store, "invalid_metadata") + # with pytest.raises(ValueError): + # Array.create( + # spath, + # shape=(16, 16, 16), + # chunk_shape=(16, 16), # this is now allowed (like in v2) + # dtype=np.dtype("uint8"), + # fill_value=0, + # ) spath2 = StorePath(store, "invalid_endian") with pytest.raises(TypeError): Array.create( diff --git a/tests/v3/test_codecs/test_endian.py b/tests/v3/test_codecs/test_endian.py index 3c36c90b81..cc7abcb272 100644 --- a/tests/v3/test_codecs/test_endian.py +++ b/tests/v3/test_codecs/test_endian.py @@ -8,7 +8,7 @@ from zarr.abc.store import Store from zarr.codecs import BytesCodec from zarr.core.buffer import default_buffer_prototype -from zarr.store.common import StorePath +from zarr.storage.common import StorePath from zarr.testing.utils import assert_bytes_equal from .test_codecs import _AsyncArrayProxy diff --git a/tests/v3/test_codecs/test_gzip.py b/tests/v3/test_codecs/test_gzip.py index 6495f8236c..91a7852d3c 100644 --- a/tests/v3/test_codecs/test_gzip.py +++ b/tests/v3/test_codecs/test_gzip.py @@ -4,7 +4,7 @@ from zarr import Array from zarr.abc.store import Store from zarr.codecs import BytesCodec, GzipCodec -from zarr.store.common import StorePath +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index bd8aab5e03..20bf0b60fe 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -15,7 +15,7 @@ TransposeCodec, ) from zarr.core.buffer import default_buffer_prototype -from zarr.store.common import StorePath +from zarr.storage.common import StorePath from ..conftest import ArrayRequest from .test_codecs import _AsyncArrayProxy, order_from_dim diff --git a/tests/v3/test_codecs/test_transpose.py b/tests/v3/test_codecs/test_transpose.py index c42c56034a..315746321f 100644 --- a/tests/v3/test_codecs/test_transpose.py +++ b/tests/v3/test_codecs/test_transpose.py @@ -9,7 +9,7 @@ from zarr.codecs import BytesCodec, ShardingCodec, TransposeCodec from zarr.core.buffer import default_buffer_prototype from zarr.core.common import MemoryOrder -from zarr.store.common import StorePath +from zarr.storage.common import StorePath from .test_codecs import _AsyncArrayProxy diff --git a/tests/v3/test_codecs/test_zstd.py b/tests/v3/test_codecs/test_zstd.py index 0726e5944c..e95e2355d7 100644 --- a/tests/v3/test_codecs/test_zstd.py +++ b/tests/v3/test_codecs/test_zstd.py @@ -4,7 +4,7 @@ from zarr import Array from zarr.abc.store import Store from zarr.codecs import BytesCodec, ZstdCodec -from zarr.store.common import StorePath +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 94b839a186..1dda4f1ce3 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -13,8 +13,8 @@ from zarr.core.group import GroupMetadata from zarr.core.sync import sync from zarr.errors import ContainsArrayError, ContainsGroupError -from zarr.store import LocalStore, StorePath -from zarr.store.common import make_store_path +from zarr.storage import LocalStore, StorePath +from zarr.storage.common import make_store_path from .conftest import parse_store diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py index efb11f36a1..b1bf229146 100644 --- a/tests/v3/test_indexing.py +++ b/tests/v3/test_indexing.py @@ -19,8 +19,8 @@ replace_ellipsis, ) from zarr.registry import get_ndbuffer_class -from zarr.store.common import StorePath -from zarr.store.memory import MemoryStore +from zarr.storage.common import StorePath +from zarr.storage.memory import MemoryStore if TYPE_CHECKING: from collections.abc import Iterator diff --git a/tests/v3/test_store/test_core.py b/tests/v3/test_store/test_core.py index c65d91f9d0..c62ba19bb2 100644 --- a/tests/v3/test_store/test_core.py +++ b/tests/v3/test_store/test_core.py @@ -2,9 +2,9 @@ import pytest -from zarr.store.common import make_store_path -from zarr.store.local import LocalStore -from zarr.store.memory import MemoryStore +from zarr.storage.common import make_store_path +from zarr.storage.local import LocalStore +from zarr.storage.memory import MemoryStore async def test_make_store_path(tmpdir: str) -> None: diff --git a/tests/v3/test_store/test_local.py b/tests/v3/test_store/test_local.py index 59cae22de3..f34e84863f 100644 --- a/tests/v3/test_store/test_local.py +++ b/tests/v3/test_store/test_local.py @@ -3,7 +3,7 @@ import pytest from zarr.core.buffer import Buffer, cpu -from zarr.store.local import LocalStore +from zarr.storage.local import LocalStore from zarr.testing.store import StoreTests diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py index 04d17eb240..6eebc4df39 100644 --- a/tests/v3/test_store/test_memory.py +++ b/tests/v3/test_store/test_memory.py @@ -5,7 +5,7 @@ import pytest from zarr.core.buffer import Buffer, cpu -from zarr.store.memory import MemoryStore +from zarr.storage.memory import MemoryStore from zarr.testing.store import StoreTests diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index afa991209f..13f993c9c5 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -8,7 +8,7 @@ from zarr.core.buffer import Buffer, cpu, default_buffer_prototype from zarr.core.sync import sync -from zarr.store import RemoteStore +from zarr.storage import RemoteStore from zarr.testing.store import StoreTests s3fs = pytest.importorskip("s3fs") diff --git a/tests/v3/test_store/test_stateful_store.py b/tests/v3/test_store/test_stateful_store.py index 1ecbd87cc1..be36961b0c 100644 --- a/tests/v3/test_store/test_stateful_store.py +++ b/tests/v3/test_store/test_stateful_store.py @@ -14,7 +14,7 @@ import zarr from zarr.abc.store import AccessMode, Store from zarr.core.buffer import BufferPrototype, cpu, default_buffer_prototype -from zarr.store import MemoryStore +from zarr.storage import MemoryStore from zarr.testing.strategies import key_ranges, paths diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index 7c332e9a2e..31df03eaba 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -10,7 +10,7 @@ import zarr from zarr.abc.store import AccessMode from zarr.core.buffer import Buffer, cpu, default_buffer_prototype -from zarr.store.zip import ZipStore +from zarr.storage.zip import ZipStore from zarr.testing.store import StoreTests if TYPE_CHECKING: diff --git a/tests/v3/test_v2.py b/tests/v3/test_v2.py index 9ddde68e23..7909acab30 100644 --- a/tests/v3/test_v2.py +++ b/tests/v3/test_v2.py @@ -4,7 +4,7 @@ import pytest from zarr import Array -from zarr.store import MemoryStore, StorePath +from zarr.storage import MemoryStore, StorePath @pytest.fixture From 0d89912bec3e796e19f719baf3552c95aa01572f Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 16 Sep 2024 19:36:38 -0700 Subject: [PATCH 03/22] make chunks a tuple --- src/zarr/core/chunk_grids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 4dc97da144..91827daaf9 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -120,7 +120,7 @@ def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tupl # handle dask-style chunks (iterable of iterables) if all(isinstance(c, (tuple | list)) for c in chunks): # take first chunk size for each dimension - chunks = ( + chunks = tuple( c[0] for c in chunks ) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1]) From e534279a06c98fdd8f36342bf9ff5a7929291e52 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 17 Sep 2024 08:46:42 -0700 Subject: [PATCH 04/22] Apply suggestions from code review --- src/zarr/api/asynchronous.py | 6 +++--- tests/v3/test_codecs/test_codecs.py | 9 --------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 3bbf908bea..37ab9eb69f 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -303,9 +303,9 @@ async def save_array( ) mode = kwargs.pop("mode", None) - if isinstance(store, Store): + if isinstance(store, Store | StorePath): if mode is not None: - raise ValueError("mode cannot be specified if store is a Store") + raise ValueError("mode cannot be set when store is already initialized") elif mode is None: mode = cast(AccessModeLiteral, "a") @@ -564,7 +564,7 @@ async def open_group( warnings.warn("storage_options is not yet implemented", RuntimeWarning, stacklevel=2) if mode is not None and isinstance(store, Store | StorePath): - raise ValueError("store and mode cannot be specified at the same time") + raise ValueError("mode cannot be set when store is already initialized") store_path = await make_store_path(store, mode=mode) if path is not None: diff --git a/tests/v3/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py index 7a9cd311e5..bef13c56f1 100644 --- a/tests/v3/test_codecs/test_codecs.py +++ b/tests/v3/test_codecs/test_codecs.py @@ -375,15 +375,6 @@ async def test_dimension_names(store: Store) -> None: @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) def test_invalid_metadata(store: Store) -> None: - # spath = StorePath(store, "invalid_metadata") - # with pytest.raises(ValueError): - # Array.create( - # spath, - # shape=(16, 16, 16), - # chunk_shape=(16, 16), # this is now allowed (like in v2) - # dtype=np.dtype("uint8"), - # fill_value=0, - # ) spath2 = StorePath(store, "invalid_endian") with pytest.raises(TypeError): Array.create( From 93b61fca2f7075b47d7bfc70976e24caccdd113e Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 23 Sep 2024 12:09:46 -0700 Subject: [PATCH 05/22] more merge conflict resolution --- src/zarr/storage/common.py | 8 +++++++- tests/v3/test_attributes.py | 4 ++-- tests/v3/test_store/test_core.py | 2 +- tests/v3/test_sync.py | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index 83f3956c06..58efa3f46f 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -82,7 +82,7 @@ async def make_store_path( mode: AccessModeLiteral | None = None, storage_options: dict[str, Any] | None = None, ) -> StorePath: - from zarr.store.remote import RemoteStore # circular import + from zarr.storage.remote import RemoteStore # circular import used_storage_options = False @@ -91,10 +91,14 @@ async def make_store_path( raise ValueError( f"mode mismatch (mode={mode} != store.mode={store_like.store.mode.str})" ) + if storage_options: + raise TypeError("storage_options passed but store has already been initialized") return store_like elif isinstance(store_like, Store): if (mode is not None) and (AccessMode.from_literal(mode) != store_like.mode): raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.mode.str})") + if storage_options: + raise TypeError("storage_options passed but store has already been initialized") await store_like._ensure_open() result = StorePath(store_like) elif store_like is None: @@ -116,6 +120,8 @@ async def make_store_path( elif isinstance(store_like, dict): # We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings. # By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate. + if mode is None: + mode = "r" result = StorePath(await MemoryStore.open(store_dict=store_like, mode=mode)) else: msg = f"Unsupported type for store_like: '{type(store_like).__name__}'" # type: ignore[unreachable] diff --git a/tests/v3/test_attributes.py b/tests/v3/test_attributes.py index 65b6a02e8d..c4d37ebec5 100644 --- a/tests/v3/test_attributes.py +++ b/tests/v3/test_attributes.py @@ -1,10 +1,10 @@ import zarr.core import zarr.core.attributes -import zarr.store +import zarr.storage def test_put() -> None: - store = zarr.store.MemoryStore({}, mode="w") + store = zarr.storage.MemoryStore({}, mode="w") attrs = zarr.core.attributes.Attributes( zarr.Group.from_store(store, attributes={"a": 1, "b": 2}) ) diff --git a/tests/v3/test_store/test_core.py b/tests/v3/test_store/test_core.py index e0fb12c680..b2a8292ea9 100644 --- a/tests/v3/test_store/test_core.py +++ b/tests/v3/test_store/test_core.py @@ -59,7 +59,7 @@ async def test_make_store_path_fsspec(monkeypatch) -> None: ) async def test_make_store_path_storage_options_raises(store_like: StoreLike) -> None: with pytest.raises(TypeError, match="storage_options"): - await make_store_path(store_like, storage_options={"foo": "bar"}, mode="w") + await make_store_path(store_like, storage_options={"foo": "bar"}) async def test_unsupported() -> None: diff --git a/tests/v3/test_sync.py b/tests/v3/test_sync.py index 864c9e01cb..081e65f3d2 100644 --- a/tests/v3/test_sync.py +++ b/tests/v3/test_sync.py @@ -6,7 +6,7 @@ import zarr from zarr.core.sync import SyncError, SyncMixin, _get_lock, _get_loop, sync -from zarr.store.memory import MemoryStore +from zarr.storage.memory import MemoryStore @pytest.fixture(params=[True, False]) From fb6752d586e14960c0dd0545c1553e0b0e51c4f5 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 29 Sep 2024 14:51:43 -0700 Subject: [PATCH 06/22] fixups --- src/zarr/core/array.py | 14 ++++++++++++-- src/zarr/core/group.py | 11 +++++++++-- src/zarr/storage/logging.py | 4 ++-- src/zarr/storage/memory.py | 4 ++-- tests/v3/test_array.py | 5 ++--- tests/v3/test_group.py | 11 ++++------- 6 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 0f529ddc87..539b791446 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2385,8 +2385,18 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata - required_parts = node.store_path.path.split("/")[:-1] - parents = [] + print("path", node.store_path.path) + + if "/" in node.store_path.path: + required_parts = node.store_path.path.split("/")[:-1] + else: + required_parts = [] + parents = [ + AsyncGroup( + metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), + store_path=StorePath(store=node.store_path.store, path=""), + ) + ] for i, part in enumerate(required_parts): path = "/".join(required_parts[:i] + [part]) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 63dfc75adc..96e72fd25e 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -700,6 +700,10 @@ async def _members( "Object at %s is not recognized as a component of a Zarr hierarchy.", key ) + async def keys(self) -> AsyncGenerator[str, None]: + async for key, _ in self.members(): + yield key + async def contains(self, member: str) -> bool: # TODO: this can be made more efficient. try: @@ -823,10 +827,10 @@ def __delitem__(self, key: str) -> None: self._sync(self._async_group.delitem(key)) def __iter__(self) -> Iterator[str]: - raise NotImplementedError + yield from self.keys() def __len__(self) -> int: - raise NotImplementedError + return self.nmembers() def __setitem__(self, key: str, value: Any) -> None: """__setitem__ is not supported in v3""" @@ -906,6 +910,9 @@ def members(self, max_depth: int | None = 0) -> tuple[tuple[str, Array | Group], return tuple((kv[0], _parse_async_node(kv[1])) for kv in _members) + def keys(self) -> Generator[str, None]: + yield from self._sync_iter(self._async_group.keys()) + def __contains__(self, member: str) -> bool: return self._sync(self._async_group.contains(member)) diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index a9113aabe4..52c7c7b84d 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -150,9 +150,9 @@ async def set(self, key: str, value: Buffer) -> None: with self.log(): return await self._store.set(key=key, value=value) - async def set_if_not_exists(self, key: str, default: Buffer) -> None: + async def set_if_not_exists(self, key: str, value: Buffer) -> None: with self.log(): - return await self._store.set_if_not_exists(key=key, value=default) + return await self._store.set_if_not_exists(key=key, value=value) async def delete(self, key: str) -> None: with self.log(): diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index 8dcdd14bce..24ea7e0040 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -101,10 +101,10 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None else: self._store_dict[key] = value - async def set_if_not_exists(self, key: str, default: Buffer) -> None: + async def set_if_not_exists(self, key: str, value: Buffer) -> None: self._check_writable() await self._ensure_open() - self._store_dict.setdefault(key, default) + self._store_dict.setdefault(key, value) async def delete(self, key: str) -> None: self._check_writable() diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 7ffb62491b..5778f7e8fa 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -92,10 +92,9 @@ async def test_create_creates_parents( expected = [f"{part}/{file}" for file in files for part in parts] if zarr_format == 2: - expected.append("a/b/c/d/.zarray") - expected.append("a/b/c/d/.zattrs") + expected.extend([".zattrs", ".zgroup", "a/b/c/d/.zarray", "a/b/c/d/.zattrs"]) else: - expected.append("a/b/c/d/zarr.json") + expected.extend(["zarr.json", "a/b/c/d/zarr.json"]) expected = sorted(expected) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 7ddc7c54b8..63c5e036fe 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -74,10 +74,9 @@ async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> expected = [f"{part}/{file}" for file in files for part in parts] if zarr_format == 2: - expected.append("a/b/c/d/.zgroup") - expected.append("a/b/c/d/.zattrs") + expected.extend([".zgroup", ".zattrs", "a/b/c/d/.zgroup", "a/b/c/d/.zattrs"]) else: - expected.append("a/b/c/d/zarr.json") + expected.extend(["zarr.json", "a/b/c/d/zarr.json"]) expected = sorted(expected) @@ -311,8 +310,7 @@ def test_group_iter(store: Store, zarr_format: ZarrFormat) -> None: """ group = Group.from_store(store, zarr_format=zarr_format) - with pytest.raises(NotImplementedError): - list(group) + assert list(group) == [] def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: @@ -321,8 +319,7 @@ def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: """ group = Group.from_store(store, zarr_format=zarr_format) - with pytest.raises(NotImplementedError): - len(group) + assert len(group) == 0 def test_group_setitem(store: Store, zarr_format: ZarrFormat) -> None: From 0b1dedc8c0f6955e99af601f669a45af120c186f Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 29 Sep 2024 15:04:33 -0700 Subject: [PATCH 07/22] fixup zipstore --- src/zarr/storage/zip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index 116d6de83a..14d0a4362c 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -191,12 +191,12 @@ async def set(self, key: str, value: Buffer) -> None: async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: raise NotImplementedError - async def set_if_not_exists(self, key: str, default: Buffer) -> None: + async def set_if_not_exists(self, key: str, value: Buffer) -> None: self._check_writable() with self._lock: members = self._zf.namelist() if key not in members: - self._set(key, default) + self._set(key, value) async def delete(self, key: str) -> None: raise NotImplementedError From 322918a20d559f59027334138e48a90c85ffbaa4 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sun, 29 Sep 2024 15:24:58 -0700 Subject: [PATCH 08/22] Apply suggestions from code review --- src/zarr/api/asynchronous.py | 12 +++++------- src/zarr/core/array.py | 1 - 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index ccf85dca83..456ed174a9 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -228,7 +228,7 @@ async def open( """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) - store_path = await make_store_path(store, mode=mode) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path @@ -323,7 +323,7 @@ async def save_array( ) mode = kwargs.pop("mode", None) - store_path = await make_store_path(store, mode=mode) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path new = await AsyncArray.create( @@ -502,7 +502,7 @@ async def group( mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a") - store_path = await make_store_path(store, mode=mode) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path @@ -780,7 +780,7 @@ async def create( if not isinstance(store, Store | StorePath): mode = "a" - store_path = await make_store_path(store, mode=mode) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path @@ -957,9 +957,7 @@ async def open_array( mode = kwargs.pop("mode", None) store_path = await make_store_path(store, mode=mode) - if ( - path is not None - ): # FIXME: apply path before opening store in w or risk deleting existing data + if path is not None: store_path = store_path / path zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 539b791446..9d055f4981 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2385,7 +2385,6 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata - print("path", node.store_path.path) if "/" in node.store_path.path: required_parts = node.store_path.path.split("/")[:-1] From a95d54ad7e3c16adcb2bb2ba2db8acd471477dc4 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 30 Sep 2024 13:12:08 -0700 Subject: [PATCH 09/22] Apply suggestions from code review --- src/zarr/api/asynchronous.py | 2 +- src/zarr/core/array.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 456ed174a9..62a973257c 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -598,7 +598,7 @@ async def open_group( if chunk_store is not None: warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) - store_path = await make_store_path(store, mode=mode) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 9d055f4981..8167f3266d 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -197,7 +197,7 @@ async def create( fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only - chunk_shape: ChunkCoords | None = None, # TODO: handle bool and iterable of iterable types + chunk_shape: ChunkCoords | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -207,7 +207,7 @@ async def create( codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only - chunks: ShapeLike | None = None, # TODO: handle bool and iterable of iterable types + chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, order: Literal["C", "F"] | None = None, filters: list[dict[str, JSON]] | None = None, From 128eb5332b8665d38d109449f4589f8726debb7b Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 30 Sep 2024 13:28:48 -0700 Subject: [PATCH 10/22] add test --- src/zarr/core/array.py | 1 - tests/v3/test_chunk_grids.py | 38 +++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 8167f3266d..c15b4201be 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2385,7 +2385,6 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata - if "/" in node.store_path.path: required_parts = node.store_path.path.split("/")[:-1] else: diff --git a/tests/v3/test_chunk_grids.py b/tests/v3/test_chunk_grids.py index 12166bd210..4c69c483ae 100644 --- a/tests/v3/test_chunk_grids.py +++ b/tests/v3/test_chunk_grids.py @@ -1,7 +1,9 @@ +from typing import Any + import numpy as np import pytest -from zarr.core.chunk_grids import _guess_chunks +from zarr.core.chunk_grids import _guess_chunks, normalize_chunks @pytest.mark.parametrize( @@ -16,3 +18,37 @@ def test_guess_chunks(shape: tuple[int, ...], itemsize: int) -> None: assert chunk_size < (64 * 1024 * 1024) # doesn't make any sense to allow chunks to have zero length dimension assert all(0 < c <= max(s, 1) for c, s in zip(chunks, shape, strict=False)) + + +@pytest.mark.parametrize( + ("chunks", "shape", "typesize", "expected"), + [ + ((10,), (100,), 1, (10,)), + ([10], (100,), 1, (10,)), + (10, (100,), 1, (10,)), + ((10, 10), (100, 10), 1, (10, 10)), + (10, (100, 10), 1, (10, 10)), + ((10, None), (100, 10), 1, (10, 10)), + (30, (100, 20, 10), 1, (30, 30, 30)), + ((30,), (100, 20, 10), 1, (30, 20, 10)), + ((30, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, None, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, 20, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, 20, 10), (100, 20, 10), 1, (30, 20, 10)), + # auto chunking + (None, (100,), 1, (100,)), + (-1, (100,), 1, (100,)), + ((30, -1, None), (100, 20, 10), 1, (30, 20, 10)), + ], +) +def test_normalize_chunks( + chunks: Any, shape: tuple[int, ...], typesize: int, expected: tuple[int, ...] +) -> None: + assert expected == normalize_chunks(chunks, shape, typesize) + + +def test_normalize_chunks_errors() -> None: + with pytest.raises(ValueError): + normalize_chunks("foo", (100,), 1) + with pytest.raises(ValueError): + normalize_chunks((100, 10), (100,), 1) From 54ab9efa63667615afe5d24a7a119305f2ad054e Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 30 Sep 2024 14:56:50 -0700 Subject: [PATCH 11/22] extend test --- tests/v3/test_group.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 63c5e036fe..51605dd7ef 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -62,6 +62,14 @@ async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> await zarr.api.asynchronous.open_group( store=store, path="a", zarr_format=zarr_format, attributes={"key": "value"} ) + + # test that root group node was created + root = await zarr.api.asynchronous.open_group( + store=store, + ) + agroup = await root.getitem("a") + assert agroup.attrs == {"key": "value"} + # create a child node with a couple intermediates await zarr.api.asynchronous.open_group(store=store, path="a/b/c/d", zarr_format=zarr_format) parts = ["a", "a/b", "a/b/c"] From 77f293882eca6b4193a5d7645fcc0058d12934a0 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 30 Sep 2024 15:44:49 -0700 Subject: [PATCH 12/22] clean up parents --- src/zarr/core/array.py | 11 +++++++---- src/zarr/core/group.py | 3 +++ tests/v3/test_group.py | 4 +--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index c15b4201be..f8c14fe389 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -3,6 +3,7 @@ import json from asyncio import gather from dataclasses import dataclass, field, replace +from logging import getLogger from typing import TYPE_CHECKING, Any, Literal, cast import numpy as np @@ -80,6 +81,8 @@ # Array and AsyncArray are defined in the base ``zarr`` namespace __all__ = ["create_codec_pipeline", "parse_array_metadata"] +logger = getLogger(__name__) + def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata: if isinstance(data, ArrayV2Metadata | ArrayV3Metadata): @@ -636,6 +639,8 @@ async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = F # To enable zarr.create(store, path="a/b/c"), we need to create all the intermediate groups. parents = _build_parents(self) + logger.debug("Ensure parents: %s", parents) + for parent in parents: awaitables.extend( [ @@ -2385,11 +2390,9 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata - if "/" in node.store_path.path: - required_parts = node.store_path.path.split("/")[:-1] - else: - required_parts = [] + required_parts = node.store_path.path.split("/")[:-1] parents = [ + # the root group AsyncGroup( metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), store_path=StorePath(store=node.store_path.store, path=""), diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 96e72fd25e..18caea3fd4 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -836,6 +836,9 @@ def __setitem__(self, key: str, value: Any) -> None: """__setitem__ is not supported in v3""" raise NotImplementedError + def __repr__(self) -> str: + return f"" + async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group: new_metadata = replace(self.metadata, attributes=new_attributes) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 51605dd7ef..106663c6dc 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -237,9 +237,7 @@ def test_group_create(store: Store, exists_ok: bool, zarr_format: ZarrFormat) -> if not exists_ok: with pytest.raises(ContainsGroupError): - group = Group.from_store( - store, attributes=attributes, exists_ok=exists_ok, zarr_format=zarr_format - ) + _ = Group.from_store(store, exists_ok=exists_ok, zarr_format=zarr_format) def test_group_open(store: Store, zarr_format: ZarrFormat, exists_ok: bool) -> None: From 2295d76a5de869d9dee259589ee7622e773a9d1a Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 30 Sep 2024 16:06:18 -0700 Subject: [PATCH 13/22] debug race condition --- src/zarr/core/array.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index f8c14fe389..67489f8acc 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -651,7 +651,10 @@ async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = F ] ) - await gather(*awaitables) + for awaitable in awaitables: + await awaitable + + # await gather(*awaitables) async def _set_selection( self, From 5879d67ed4ac55c71d9b95ae7c26c36e9b06ae2f Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 30 Sep 2024 16:19:20 -0700 Subject: [PATCH 14/22] more debug --- src/zarr/core/array.py | 18 ++++++++++-------- tests/v3/test_group.py | 5 +++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 67489f8acc..5251b309d3 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -651,10 +651,7 @@ async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = F ] ) - for awaitable in awaitables: - await awaitable - - # await gather(*awaitables) + await gather(*awaitables) async def _set_selection( self, @@ -2393,21 +2390,26 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata - required_parts = node.store_path.path.split("/")[:-1] + store = node.store_path.store + path = node.store_path.path + if not path: + return [] + + required_parts = path.split("/")[:-1] parents = [ # the root group AsyncGroup( metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), - store_path=StorePath(store=node.store_path.store, path=""), + store_path=StorePath(store=store, path=""), ) ] for i, part in enumerate(required_parts): - path = "/".join(required_parts[:i] + [part]) + p = "/".join(required_parts[:i] + [part]) parents.append( AsyncGroup( metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), - store_path=StorePath(store=node.store_path.store, path=path), + store_path=StorePath(store=store, path=p), ) ) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 106663c6dc..15b9658e43 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -62,6 +62,11 @@ async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> await zarr.api.asynchronous.open_group( store=store, path="a", zarr_format=zarr_format, attributes={"key": "value"} ) + objs = {x async for x in store.list()} + if zarr_format == 2: + assert objs == {".zgroup", ".zattrs", "a/.zgroup", "a/.zattrs"} + else: + assert objs == {"zarr.json", "a/zarr.json"} # test that root group node was created root = await zarr.api.asynchronous.open_group( From f54f6f2931b4ff905f4d9491cc95ab0038a303f4 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 30 Sep 2024 21:02:56 -0700 Subject: [PATCH 15/22] document storage classes and some developer apis --- src/zarr/abc/store.py | 94 ++++++++++++++++++++++++++- src/zarr/storage/common.py | 123 ++++++++++++++++++++++++++++++++++++ src/zarr/storage/local.py | 74 ++++++---------------- src/zarr/storage/logging.py | 25 ++++++++ src/zarr/storage/memory.py | 32 ++++++---- src/zarr/storage/remote.py | 48 +++++++------- src/zarr/storage/zip.py | 23 ++++--- 7 files changed, 312 insertions(+), 107 deletions(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 920eaa7995..1b60fa934c 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -20,6 +20,8 @@ class AccessMode(NamedTuple): + """Access mode flags.""" + str: AccessModeLiteral readonly: bool overwrite: bool @@ -28,6 +30,24 @@ class AccessMode(NamedTuple): @classmethod def from_literal(cls, mode: AccessModeLiteral) -> Self: + """ + Create an AccessMode instance from a literal. + + Parameters + ---------- + mode : AccessModeLiteral + One of 'r', 'r+', 'w', 'w-', 'a'. + + Returns + ------- + AccessMode + The created instance. + + Raises + ------ + ValueError + If mode is not one of 'r', 'r+', 'w', 'w-', 'a'. + """ if mode in ("r", "r+", "a", "w", "w-"): return cls( str=mode, @@ -40,6 +60,17 @@ def from_literal(cls, mode: AccessModeLiteral) -> Self: class Store(ABC): + """ + Abstract base class for Zarr stores. + + Attributes + ---------- + _mode : AccessMode + Access mode flags. + _is_open : bool + Whether the store is open. + """ + _mode: AccessMode _is_open: bool @@ -49,6 +80,21 @@ def __init__(self, mode: AccessModeLiteral = "r", *args: Any, **kwargs: Any) -> @classmethod async def open(cls, *args: Any, **kwargs: Any) -> Self: + """ + Create and open the store. + + Parameters + ---------- + *args : Any + Positional arguments to pass to the store constructor. + **kwargs : Any + Keyword arguments to pass to the store constructor. + + Returns + ------- + store + The opened store instance. + """ store = cls(*args, **kwargs) await store._open() return store @@ -67,6 +113,20 @@ def __exit__( self.close() async def _open(self) -> None: + """ + Open the store. + + Notes + ----- + * When `mode='w'` and the store already exists, it will be cleared. + + Raises + ------ + ValueError + If the store is already open. + FileExistsError + If `mode='w-'` and the store already exists. + """ if self._is_open: raise ValueError("store is already open") if not await self.empty(): @@ -79,14 +139,31 @@ async def _open(self) -> None: self._is_open = True async def _ensure_open(self) -> None: + """Open the store if it is not already open.""" if not self._is_open: await self._open() @abstractmethod - async def empty(self) -> bool: ... + async def empty(self) -> bool: + """ + Check if the store is empty. + + Returns + ------- + bool + True if the store is empty, False otherwise. + """ + ... @abstractmethod - async def clear(self) -> None: ... + async def clear(self) -> None: + """ + Clear the store. + + Remove all keys and values from the store. + + """ + ... @abstractmethod def with_mode(self, mode: AccessModeLiteral) -> Self: @@ -119,6 +196,7 @@ def mode(self) -> AccessMode: return self._mode def _check_writable(self) -> None: + """Raise an exception if the store is not writable.""" if self.mode.readonly: raise ValueError("store mode does not support writing") @@ -342,6 +420,18 @@ async def set_if_not_exists(self, default: Buffer) -> None: ... async def set_or_delete(byte_setter: ByteSetter, value: Buffer | None) -> None: + """Set or delete a value in a byte setter + + Parameters + ---------- + byte_setter : ByteSetter + value : Buffer | None + + Notes + ----- + If value is None, the key will be deleted. + + """ if value is None: await byte_setter.delete() else: diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index 977fe4ba2b..6f2d0b0a9a 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -27,6 +27,17 @@ def _dereference_path(root: str, path: str) -> str: class StorePath: + """ + Path-like interface for a Store. + + Parameters + ---------- + store : Store + The store to use. + path : str + The path within the store. + """ + store: Store path: str @@ -39,25 +50,80 @@ async def get( prototype: BufferPrototype | None = None, byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: + """ + Read bytes from the store. + + Parameters + ---------- + prototype : BufferPrototype, optional + The buffer prototype to use when reading the bytes. + byte_range : ByteRangeRequest, optional + The range of bytes to read. + + Returns + ------- + buffer : Buffer or None + The read bytes, or None if the key does not exist. + """ if prototype is None: prototype = default_buffer_prototype() return await self.store.get(self.path, prototype=prototype, byte_range=byte_range) async def set(self, value: Buffer, byte_range: ByteRangeRequest | None = None) -> None: + """ + Write bytes to the store. + + Parameters + ---------- + value : Buffer + The buffer to write. + byte_range : ByteRangeRequest, optional + The range of bytes to write. If None, the entire buffer is written. + + Raises + ------ + NotImplementedError + If `byte_range` is not None, because Store.set does not support partial writes yet. + """ if byte_range is not None: raise NotImplementedError("Store.set does not have partial writes yet") await self.store.set(self.path, value) async def delete(self) -> None: + """ + Delete the key from the store. + + Raises + ------ + NotImplementedError + If the store does not support deletion. + """ await self.store.delete(self.path) async def set_if_not_exists(self, default: Buffer) -> None: + """ + Store a key to ``value`` if the key is not already present. + + Parameters + ---------- + default : Buffer + The buffer to store if the key is not already present. + """ await self.store.set_if_not_exists(self.path, default) async def exists(self) -> bool: + """ + Check if the key exists in the store. + + Returns + ------- + exists : bool + True if the key exists in the store, False otherwise. + """ return await self.store.exists(self.path) def __truediv__(self, other: str) -> StorePath: + """combine this store path with another path""" return self.__class__(self.store, _dereference_path(self.path, other)) def __str__(self) -> str: @@ -67,6 +133,19 @@ def __repr__(self) -> str: return f"StorePath({self.store.__class__.__name__}, {str(self)!r})" def __eq__(self, other: object) -> bool: + """ + Check if two StorePath objects are equal. + + Returns + ------- + equals : bool + True if the two objects are equal, False otherwise. + + Notes + ----- + Two StorePath objects are considered equal if their stores are equal + and their paths are equal. + """ try: return self.store == other.store and self.path == other.path # type: ignore[attr-defined, no-any-return] except Exception: @@ -83,6 +162,50 @@ async def make_store_path( mode: AccessModeLiteral | None = None, storage_options: dict[str, Any] | None = None, ) -> StorePath: + """ + Convert a `StoreLike` object into a StorePath object. + + This function takes a `StoreLike` object and returns a `StorePath` object. The + `StoreLike` object can be a `Store`, `StorePath`, `Path`, `str`, or `dict[str, Buffer]`. + If the `StoreLike` object is a Store or `StorePath`, it is converted to a + `StorePath` object. If the `StoreLike` object is a Path or str, it is converted + to a LocalStore object and then to a `StorePath` object. If the `StoreLike` + object is a dict[str, Buffer], it is converted to a `MemoryStore` object and + then to a `StorePath` object. + + If the `StoreLike` object is None, a `MemoryStore` object is created and + converted to a `StorePath` object. + + If the `StoreLike` object is a str and starts with a protocol, it is + converted to a RemoteStore object and then to a `StorePath` object. + + If the `StoreLike` object is a dict[str, Buffer] and the mode is not None, + the `MemoryStore` object is created with the given mode. + + If the `StoreLike` object is a str and starts with a protocol, the + RemoteStore object is created with the given mode and storage options. + + Parameters + ---------- + store_like : StoreLike | None + The object to convert to a `StorePath` object. + mode : AccessModeLiteral | None, optional + The mode to use when creating the `StorePath` object. If None, the + default mode is 'r'. + storage_options : dict[str, Any] | None, optional + The storage options to use when creating the `RemoteStore` object. If + None, the default storage options are used. + + Returns + ------- + StorePath + The converted StorePath object. + + Raises + ------ + TypeError + If the StoreLike object is not one of the supported types. + """ from zarr.storage.remote import RemoteStore # circular import used_storage_options = False diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index 0dc5c79e78..375f01bc68 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -20,19 +20,6 @@ def _get( path: Path, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None ) -> Buffer: - """ - Fetch a contiguous region of bytes from a file. - - Parameters - ---------- - path: Path - The file to read bytes from. - byte_range: tuple[int, int | None] | None = None - The range of bytes to read. If `byte_range` is `None`, then the entire file will be read. - If `byte_range` is a tuple, the first value specifies the index of the first byte to read, - and the second value specifies the total number of bytes to read. If the total value is - `None`, then the entire file after the first byte will be read. - """ if byte_range is not None: if byte_range[0] is None: start = 0 @@ -79,6 +66,25 @@ def _put( class LocalStore(Store): + """ + Local file system store. + + Parameters + ---------- + root : str or Path + Directory to use as root of store. + mode : str + Mode in which to open the store. Either 'r', 'r+', 'a', 'w', 'w-'. + + Attributes + ---------- + supports_writes + supports_deletes + supports_partial_writes + supports_listing + root + """ + supports_writes: bool = True supports_deletes: bool = True supports_partial_writes: bool = True @@ -143,17 +149,6 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: - """ - Read byte ranges from multiple keys. - - Parameters - ---------- - key_ranges: List[Tuple[str, Tuple[int, int]]] - A list of (key, (start, length)) tuples. The first element of the tuple is the name of - the key in storage to fetch bytes from. The second element the tuple defines the byte - range to retrieve. These values are arguments to `get`, as this method wraps - concurrent invocation of `get`. - """ args = [] for key, byte_range in key_ranges: assert isinstance(key, str) @@ -204,49 +199,18 @@ async def exists(self, key: str) -> bool: return await to_thread(path.is_file) async def list(self) -> AsyncGenerator[str, None]: - """Retrieve all keys in the store. - - Returns - ------- - AsyncGenerator[str, None] - """ to_strip = str(self.root) + "/" for p in list(self.root.rglob("*")): if p.is_file(): yield str(p).replace(to_strip, "") async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: - """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned with the - common leading prefix removed. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ to_strip = os.path.join(str(self.root / prefix)) for p in (self.root / prefix).rglob("*"): if p.is_file(): yield str(p.relative_to(to_strip)) async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: - """ - Retrieve all keys and prefixes with a given prefix and which do not contain the character - “/” after the given prefix. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ - base = self.root / prefix to_strip = str(base) + "/" diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index 52c7c7b84d..281ae7d7de 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -18,6 +18,26 @@ class LoggingStore(Store): + """ + Store wrapper that logs all calls to the wrapped store. + + Parameters + ---------- + store: Store + Store to wrap + log_level: str + Log level + log_handler: logging.Handler + Log handler + + Attributes + ---------- + _store: Store + Wrapped store + counter: dict + Counter of number of times each method has been called + """ + _store: Store counter: defaultdict[str, int] @@ -58,6 +78,11 @@ def _default_handler(self) -> logging.Handler: @contextmanager def log(self) -> Generator[None, None, None]: + """context manager to log method calls + + Each call to the wrapped store is logged to the configured logger and added to + the counter dict. + """ method = inspect.stack()[2].function op = f"{type(self._store).__name__}.{method}" self.logger.info(f"Calling {op}") diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index 24ea7e0040..74c51f3575 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -14,9 +14,25 @@ from zarr.core.common import AccessModeLiteral -# TODO: this store could easily be extended to wrap any MutableMapping store from v2 -# When that is done, the `MemoryStore` will just be a store that wraps a dict. class MemoryStore(Store): + """ + In-memory store for testing purposes. + + Parameters + ---------- + store_dict : dict + Initial data + mode : str + Access mode + + Attributes + ---------- + supports_writes + supports_deletes + supports_partial_writes + supports_listing + """ + supports_writes: bool = True supports_deletes: bool = True supports_partial_writes: bool = True @@ -126,18 +142,6 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: yield key.removeprefix(prefix) async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: - """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned with the - common leading prefix removed. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ if prefix.endswith("/"): prefix = prefix[:-1] diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 9ff779ac77..664f0c593e 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -25,6 +25,30 @@ class RemoteStore(Store): + """ + A remote Store based on FSSpec + + Parameters + ---------- + fs: AsyncFileSystem + The Async FSSpec filesystem to use with this store. + mode: AccessModeLiteral + The access mode to use. + path: str + The root path of the store. + allowed_exceptions: tuple[type[Exception], ...] + When fetching data, these cases will be deemed to correspond to missing + + Attributes + ---------- + fs + allowed_exceptions + supports_writes + supports_deletes + supports_partial_writes + supports_listing + """ + # based on FSSpec supports_writes: bool = True supports_deletes: bool = True @@ -41,17 +65,6 @@ def __init__( path: str = "/", allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, ) -> None: - """ - Parameters - ---------- - url: root of the datastore. In fsspec notation, this is usually like "protocol://path/to". - Can also be a upath.UPath instance/ - allowed_exceptions: when fetching data, these cases will be deemed to correspond to missing - keys, rather than some other IO failure - storage_options: passed on to fsspec to make the filesystem instance. If url is a UPath, - this must not be used. - - """ super().__init__(mode=mode) self.fs = fs self.path = path @@ -232,19 +245,6 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: yield onefile.removeprefix(self.path).removeprefix("/") async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: - """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned with the - common leading prefix removed. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ - find_str = f"{self.path}/{prefix}" for onefile in await self.fs._find(find_str, detail=False, maxdepth=None, withdirs=False): yield onefile.removeprefix(find_str) diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index 14d0a4362c..f5328dd7fe 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -35,6 +35,17 @@ class ZipStore(Store): One of 'r' to read an existing file, 'w' to truncate and write a new file, 'a' to append to an existing file, or 'x' to exclusively create and write a new file. + + Attributes + ---------- + allowed_exceptions + supports_writes + supports_deletes + supports_partial_writes + supports_listing + path + compression + allowZip64 """ supports_writes: bool = True @@ -216,18 +227,6 @@ async def list(self) -> AsyncGenerator[str, None]: yield key async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: - """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned with the - common leading prefix removed. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ async for key in self.list(): if key.startswith(prefix): yield key.removeprefix(prefix) From 3940d22fb86cd17aa9eb48ddcdad9b03ac15a010 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 1 Oct 2024 07:43:27 -0700 Subject: [PATCH 16/22] Update src/zarr/core/array.py --- src/zarr/core/array.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 5251b309d3..9a78297c6f 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -639,8 +639,6 @@ async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = F # To enable zarr.create(store, path="a/b/c"), we need to create all the intermediate groups. parents = _build_parents(self) - logger.debug("Ensure parents: %s", parents) - for parent in parents: awaitables.extend( [ From 4e93aa0a8188cdd3832f3864bbfbed5d3c29d79b Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 10 Oct 2024 16:43:22 -0700 Subject: [PATCH 17/22] inherit docstrings from baseclass --- src/zarr/core/common.py | 30 ++++++++++++++++++++++++++++++ src/zarr/storage/__init__.py | 2 ++ src/zarr/storage/local.py | 3 ++- src/zarr/storage/logging.py | 2 ++ src/zarr/storage/memory.py | 3 ++- src/zarr/storage/remote.py | 2 ++ src/zarr/storage/zip.py | 2 ++ 7 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py index efce0f98f2..e39485b175 100644 --- a/src/zarr/core/common.py +++ b/src/zarr/core/common.py @@ -151,3 +151,33 @@ def parse_order(data: Any) -> Literal["C", "F"]: if data in ("C", "F"): return cast(Literal["C", "F"], data) raise ValueError(f"Expected one of ('C', 'F'), got {data} instead.") + + +def _inherit_docstrings(cls: type[Any]) -> type[Any]: + """ + Inherit docstrings from base class + + Iterate over the methods of the class and if a method is missing a docstring, + try to inherit one from a base class (ABC). + + Parameters + ---------- + cls : object + the class to inherit docstrings from + + Returns + ------- + cls + the class with updated docstrings + """ + # Iterate over the methods of the class + for name, method in cls.__dict__.items(): + # Skip if it's not a callable (method or function) + if callable(method): + # Get the corresponding method from the base class (ABC) + for base in cls.__bases__: + base_method = getattr(base, name, None) + if base_method and not method.__doc__: + method.__doc__ = base_method.__doc__ + break + return cls diff --git a/src/zarr/storage/__init__.py b/src/zarr/storage/__init__.py index 47f70bcc9e..6703aa2723 100644 --- a/src/zarr/storage/__init__.py +++ b/src/zarr/storage/__init__.py @@ -1,11 +1,13 @@ from zarr.storage.common import StoreLike, StorePath, make_store_path from zarr.storage.local import LocalStore +from zarr.storage.logging import LoggingStore from zarr.storage.memory import MemoryStore from zarr.storage.remote import RemoteStore from zarr.storage.zip import ZipStore __all__ = [ "LocalStore", + "LoggingStore", "MemoryStore", "RemoteStore", "StoreLike", diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index 85e12954cf..baf34ef19d 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -9,7 +9,7 @@ from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer -from zarr.core.common import concurrent_map +from zarr.core.common import _inherit_docstrings, concurrent_map if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable @@ -66,6 +66,7 @@ def _put( return f.write(view) +@_inherit_docstrings class LocalStore(Store): """ Local file system store. diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index ad1457977b..0238033ce5 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -9,6 +9,7 @@ from zarr.abc.store import AccessMode, ByteRangeRequest, Store from zarr.core.buffer import Buffer +from zarr.core.common import _inherit_docstrings if TYPE_CHECKING: from collections.abc import AsyncGenerator, Generator, Iterable @@ -17,6 +18,7 @@ from zarr.core.common import AccessModeLiteral +@_inherit_docstrings class LoggingStore(Store): """ Store wrapper that logs all calls to the wrapped store. diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index 74c51f3575..ce4f0af533 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -4,7 +4,7 @@ from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, gpu -from zarr.core.common import concurrent_map +from zarr.core.common import _inherit_docstrings, concurrent_map from zarr.storage._utils import _normalize_interval_index if TYPE_CHECKING: @@ -14,6 +14,7 @@ from zarr.core.common import AccessModeLiteral +@_inherit_docstrings class MemoryStore(Store): """ In-memory store for testing purposes. diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 2a72e35399..25494a67fe 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -6,6 +6,7 @@ from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer +from zarr.core.common import _inherit_docstrings from zarr.storage.common import _dereference_path if TYPE_CHECKING: @@ -24,6 +25,7 @@ ) +@_inherit_docstrings class RemoteStore(Store): """ A remote Store based on FSSpec diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index 8fbbe17305..9dc5677d7a 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -9,6 +9,7 @@ from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, BufferPrototype +from zarr.core.common import _inherit_docstrings if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable @@ -16,6 +17,7 @@ ZipStoreAccessModeLiteral = Literal["r", "w", "a"] +@_inherit_docstrings class ZipStore(Store): """ Storage class using a ZIP file. From 5477d327e37aea74be45fc260414f5307a69dd19 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 10 Oct 2024 21:35:55 -0700 Subject: [PATCH 18/22] fix sphinx warning --- src/zarr/abc/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 4f4e55e866..261e56dd01 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -269,7 +269,7 @@ async def set_if_not_exists(self, key: str, value: Buffer) -> None: Store a key to ``value`` if the key is not already present. Parameters - ----------- + ---------- key : str value : Buffer """ From 728d4f7218cc3a9f0ff8062eea6a5a15d6bc1d59 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 11 Oct 2024 10:05:03 -0700 Subject: [PATCH 19/22] # docstring inherited --- src/zarr/core/common.py | 30 ---------------------- src/zarr/storage/local.py | 16 ++++++++++-- src/zarr/storage/logging.py | 15 +++++++++-- src/zarr/storage/memory.py | 20 ++++++++++++--- src/zarr/storage/remote.py | 51 +++++++++++++++++++++++++++++++++++-- src/zarr/storage/zip.py | 15 +++++++++-- 6 files changed, 106 insertions(+), 41 deletions(-) diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py index e39485b175..efce0f98f2 100644 --- a/src/zarr/core/common.py +++ b/src/zarr/core/common.py @@ -151,33 +151,3 @@ def parse_order(data: Any) -> Literal["C", "F"]: if data in ("C", "F"): return cast(Literal["C", "F"], data) raise ValueError(f"Expected one of ('C', 'F'), got {data} instead.") - - -def _inherit_docstrings(cls: type[Any]) -> type[Any]: - """ - Inherit docstrings from base class - - Iterate over the methods of the class and if a method is missing a docstring, - try to inherit one from a base class (ABC). - - Parameters - ---------- - cls : object - the class to inherit docstrings from - - Returns - ------- - cls - the class with updated docstrings - """ - # Iterate over the methods of the class - for name, method in cls.__dict__.items(): - # Skip if it's not a callable (method or function) - if callable(method): - # Get the corresponding method from the base class (ABC) - for base in cls.__bases__: - base_method = getattr(base, name, None) - if base_method and not method.__doc__: - method.__doc__ = base_method.__doc__ - break - return cls diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index baf34ef19d..e5f3d6a56c 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -9,7 +9,7 @@ from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer -from zarr.core.common import _inherit_docstrings, concurrent_map +from zarr.core.common import concurrent_map if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable @@ -66,7 +66,6 @@ def _put( return f.write(view) -@_inherit_docstrings class LocalStore(Store): """ Local file system store. @@ -102,11 +101,13 @@ def __init__(self, root: Path | str, *, mode: AccessModeLiteral = "r") -> None: self.root = root async def clear(self) -> None: + # docstring inherited self._check_writable() shutil.rmtree(self.root) self.root.mkdir() async def empty(self) -> bool: + # docstring inherited try: with os.scandir(self.root) as it: for entry in it: @@ -119,6 +120,7 @@ async def empty(self) -> bool: return True def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited return type(self)(root=self.root, mode=mode) def __str__(self) -> str: @@ -136,6 +138,7 @@ async def get( prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None = None, ) -> Buffer | None: + # docstring inherited if not self._is_open: await self._open() assert isinstance(key, str) @@ -151,6 +154,7 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: + # docstring inherited args = [] for key, byte_range in key_ranges: assert isinstance(key, str) @@ -159,9 +163,11 @@ async def get_partial_values( return await concurrent_map(args, asyncio.to_thread, limit=None) # TODO: fix limit async def set(self, key: str, value: Buffer) -> None: + # docstring inherited return await self._set(key, value) async def set_if_not_exists(self, key: str, value: Buffer) -> None: + # docstring inherited try: return await self._set(key, value, exclusive=True) except FileExistsError: @@ -180,6 +186,7 @@ async def _set(self, key: str, value: Buffer, exclusive: bool = False) -> None: async def set_partial_values( self, key_start_values: Iterable[tuple[str, int, bytes | bytearray | memoryview]] ) -> None: + # docstring inherited self._check_writable() args = [] for key, start, value in key_start_values: @@ -189,6 +196,7 @@ async def set_partial_values( await concurrent_map(args, asyncio.to_thread, limit=None) # TODO: fix limit async def delete(self, key: str) -> None: + # docstring inherited self._check_writable() path = self.root / key if path.is_dir(): # TODO: support deleting directories? shutil.rmtree? @@ -197,22 +205,26 @@ async def delete(self, key: str) -> None: await asyncio.to_thread(path.unlink, True) # Q: we may want to raise if path is missing async def exists(self, key: str) -> bool: + # docstring inherited path = self.root / key return await asyncio.to_thread(path.is_file) async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited to_strip = str(self.root) + "/" for p in list(self.root.rglob("*")): if p.is_file(): yield str(p).replace(to_strip, "") async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited to_strip = os.path.join(str(self.root / prefix)) for p in (self.root / prefix).rglob("*"): if p.is_file(): yield str(p.relative_to(to_strip)) async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited base = self.root / prefix to_strip = str(base) + "/" diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index 0238033ce5..2e0436b048 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -9,7 +9,6 @@ from zarr.abc.store import AccessMode, ByteRangeRequest, Store from zarr.core.buffer import Buffer -from zarr.core.common import _inherit_docstrings if TYPE_CHECKING: from collections.abc import AsyncGenerator, Generator, Iterable @@ -18,7 +17,6 @@ from zarr.core.common import AccessModeLiteral -@_inherit_docstrings class LoggingStore(Store): """ Store wrapper that logs all calls to the wrapped store. @@ -133,10 +131,12 @@ async def _ensure_open(self) -> None: return await self._store._ensure_open() async def empty(self) -> bool: + # docstring inherited with self.log(): return await self._store.empty() async def clear(self) -> None: + # docstring inherited with self.log(): return await self._store.clear() @@ -156,6 +156,7 @@ async def get( prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None = None, ) -> Buffer | None: + # docstring inherited with self.log(): return await self._store.get(key=key, prototype=prototype, byte_range=byte_range) @@ -164,47 +165,57 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: + # docstring inherited with self.log(): return await self._store.get_partial_values(prototype=prototype, key_ranges=key_ranges) async def exists(self, key: str) -> bool: + # docstring inherited with self.log(): return await self._store.exists(key) async def set(self, key: str, value: Buffer) -> None: + # docstring inherited with self.log(): return await self._store.set(key=key, value=value) async def set_if_not_exists(self, key: str, value: Buffer) -> None: + # docstring inherited with self.log(): return await self._store.set_if_not_exists(key=key, value=value) async def delete(self, key: str) -> None: + # docstring inherited with self.log(): return await self._store.delete(key=key) async def set_partial_values( self, key_start_values: Iterable[tuple[str, int, bytes | bytearray | memoryview]] ) -> None: + # docstring inherited with self.log(): return await self._store.set_partial_values(key_start_values=key_start_values) async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited with self.log(): async for key in self._store.list(): yield key async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited with self.log(): async for key in self._store.list_prefix(prefix=prefix): yield key async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited with self.log(): async for key in self._store.list_dir(prefix=prefix): yield key def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited with self.log(): return type(self)( self._store.with_mode(mode), diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index ce4f0af533..673c2a75d5 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -4,7 +4,7 @@ from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, gpu -from zarr.core.common import _inherit_docstrings, concurrent_map +from zarr.core.common import concurrent_map from zarr.storage._utils import _normalize_interval_index if TYPE_CHECKING: @@ -14,7 +14,6 @@ from zarr.core.common import AccessModeLiteral -@_inherit_docstrings class MemoryStore(Store): """ In-memory store for testing purposes. @@ -53,12 +52,15 @@ def __init__( self._store_dict = store_dict async def empty(self) -> bool: + # docstring inherited return not self._store_dict async def clear(self) -> None: + # docstring inherited self._store_dict.clear() def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited return type(self)(store_dict=self._store_dict, mode=mode) def __str__(self) -> str: @@ -80,6 +82,7 @@ async def get( prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None = None, ) -> Buffer | None: + # docstring inherited if not self._is_open: await self._open() assert isinstance(key, str) @@ -95,6 +98,8 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: + # docstring inherited + # All the key-ranges arguments goes with the same prototype async def _get(key: str, byte_range: ByteRangeRequest) -> Buffer | None: return await self.get(key, prototype=prototype, byte_range=byte_range) @@ -102,9 +107,11 @@ async def _get(key: str, byte_range: ByteRangeRequest) -> Buffer | None: return await concurrent_map(key_ranges, _get, limit=None) async def exists(self, key: str) -> bool: + # docstring inherited return key in self._store_dict async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + # docstring inherited self._check_writable() await self._ensure_open() assert isinstance(key, str) @@ -119,30 +126,36 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None self._store_dict[key] = value async def set_if_not_exists(self, key: str, value: Buffer) -> None: + # docstring inherited self._check_writable() await self._ensure_open() self._store_dict.setdefault(key, value) async def delete(self, key: str) -> None: + # docstring inherited self._check_writable() try: del self._store_dict[key] except KeyError: - pass # Q(JH): why not raise? + pass async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: + # docstring inherited raise NotImplementedError async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited for key in self._store_dict: yield key async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited for key in self._store_dict: if key.startswith(prefix): yield key.removeprefix(prefix) async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited if prefix.endswith("/"): prefix = prefix[:-1] @@ -217,6 +230,7 @@ def from_dict(cls, store_dict: MutableMapping[str, Buffer]) -> Self: return cls(gpu_store_dict) async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + # docstring inherited self._check_writable() assert isinstance(key, str) if not isinstance(value, Buffer): diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 25494a67fe..9782a98772 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -6,7 +6,6 @@ from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer -from zarr.core.common import _inherit_docstrings from zarr.storage.common import _dereference_path if TYPE_CHECKING: @@ -25,7 +24,6 @@ ) -@_inherit_docstrings class RemoteStore(Store): """ A remote Store based on FSSpec @@ -82,6 +80,23 @@ def from_upath( mode: AccessModeLiteral = "r", allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, ) -> RemoteStore: + """ + Create a RemoteStore from an upath object. + + Parameters + ---------- + upath : UPath + The upath to the root of the store. + mode : str, optional + The mode of the store. Defaults to "r". + allowed_exceptions : tuple, optional + The exceptions that are allowed to be raised when accessing the + store. Defaults to ALLOWED_EXCEPTIONS. + + Returns + ------- + RemoteStore + """ return cls( fs=upath.fs, path=upath.path.rstrip("/"), @@ -97,10 +112,30 @@ def from_url( mode: AccessModeLiteral = "r", allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, ) -> RemoteStore: + """ + Create a RemoteStore from a URL. + + Parameters + ---------- + url : str + The URL to the root of the store. + storage_options : dict, optional + The options to pass to fsspec when creating the filesystem. + mode : str, optional + The mode of the store. Defaults to "r". + allowed_exceptions : tuple, optional + The exceptions that are allowed to be raised when accessing the + store. Defaults to ALLOWED_EXCEPTIONS. + + Returns + ------- + RemoteStore + """ fs, path = fsspec.url_to_fs(url, **storage_options) return cls(fs=fs, path=path, mode=mode, allowed_exceptions=allowed_exceptions) async def clear(self) -> None: + # docstring inherited try: for subpath in await self.fs._find(self.path, withdirs=True): if subpath != self.path: @@ -109,11 +144,14 @@ async def clear(self) -> None: pass async def empty(self) -> bool: + # docstring inherited + # TODO: it would be nice if we didn't have to list all keys here # it should be possible to stop after the first key is discovered return not await self.fs._ls(self.path) def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited return type(self)( fs=self.fs, mode=mode, @@ -138,6 +176,7 @@ async def get( prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: + # docstring inherited if not self._is_open: await self._open() path = _dereference_path(self.path, key) @@ -176,6 +215,7 @@ async def set( value: Buffer, byte_range: tuple[int, int] | None = None, ) -> None: + # docstring inherited if not self._is_open: await self._open() self._check_writable() @@ -186,6 +226,7 @@ async def set( await self.fs._pipe_file(path, value.to_bytes()) async def delete(self, key: str) -> None: + # docstring inherited self._check_writable() path = _dereference_path(self.path, key) try: @@ -196,6 +237,7 @@ async def delete(self, key: str) -> None: pass async def exists(self, key: str) -> bool: + # docstring inherited path = _dereference_path(self.path, key) exists: bool = await self.fs._exists(path) return exists @@ -205,6 +247,7 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: + # docstring inherited if key_ranges: paths, starts, stops = zip( *( @@ -232,14 +275,17 @@ async def get_partial_values( async def set_partial_values( self, key_start_values: Iterable[tuple[str, int, BytesLike]] ) -> None: + # docstring inherited raise NotImplementedError async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited allfiles = await self.fs._find(self.path, detail=False, withdirs=False) for onefile in (a.replace(self.path + "/", "") for a in allfiles): yield onefile async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited prefix = f"{self.path}/{prefix.rstrip('/')}" try: allfiles = await self.fs._ls(prefix, detail=False) @@ -249,6 +295,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: yield onefile.removeprefix(self.path).removeprefix("/") async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited find_str = f"{self.path}/{prefix}" for onefile in await self.fs._find(find_str, detail=False, maxdepth=None, withdirs=False): yield onefile.removeprefix(find_str) diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index 9dc5677d7a..c9cb579586 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -9,7 +9,6 @@ from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, BufferPrototype -from zarr.core.common import _inherit_docstrings if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable @@ -17,7 +16,6 @@ ZipStoreAccessModeLiteral = Literal["r", "w", "a"] -@_inherit_docstrings class ZipStore(Store): """ Storage class using a ZIP file. @@ -108,11 +106,13 @@ def __setstate__(self, state: Any) -> None: self._sync_open() def close(self) -> None: + # docstring inherited super().close() with self._lock: self._zf.close() async def clear(self) -> None: + # docstring inherited with self._lock: self._check_writable() self._zf.close() @@ -122,10 +122,12 @@ async def clear(self) -> None: ) async def empty(self) -> bool: + # docstring inherited with self._lock: return not self._zf.namelist() def with_mode(self, mode: ZipStoreAccessModeLiteral) -> Self: # type: ignore[override] + # docstring inherited raise NotImplementedError("ZipStore cannot be reopened with a new mode.") def __str__(self) -> str: @@ -143,6 +145,7 @@ def _get( prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: + # docstring inherited try: with self._zf.open(key) as f: # will raise KeyError if byte_range is None: @@ -166,6 +169,7 @@ async def get( prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: + # docstring inherited assert isinstance(key, str) with self._lock: @@ -176,6 +180,7 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: + # docstring inherited out = [] with self._lock: for key, byte_range in key_ranges: @@ -194,6 +199,7 @@ def _set(self, key: str, value: Buffer) -> None: self._zf.writestr(keyinfo, value.to_bytes()) async def set(self, key: str, value: Buffer) -> None: + # docstring inherited self._check_writable() assert isinstance(key, str) if not isinstance(value, Buffer): @@ -212,9 +218,11 @@ async def set_if_not_exists(self, key: str, value: Buffer) -> None: self._set(key, value) async def delete(self, key: str) -> None: + # docstring inherited raise NotImplementedError async def exists(self, key: str) -> bool: + # docstring inherited with self._lock: try: self._zf.getinfo(key) @@ -224,16 +232,19 @@ async def exists(self, key: str) -> bool: return True async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited with self._lock: for key in self._zf.namelist(): yield key async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited async for key in self.list(): if key.startswith(prefix): yield key.removeprefix(prefix) async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited if prefix.endswith("/"): prefix = prefix[:-1] From 3f7290f51c4cf88be0b74e9a77d7392d378b99f1 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 11 Oct 2024 14:07:16 -0700 Subject: [PATCH 20/22] add storage guide --- docs/guide/storage.rst | 101 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 docs/guide/storage.rst diff --git a/docs/guide/storage.rst b/docs/guide/storage.rst new file mode 100644 index 0000000000..8438e98e27 --- /dev/null +++ b/docs/guide/storage.rst @@ -0,0 +1,101 @@ +Storage +======= + +Zarr-Python supports multiple storage backends, including: local file systems, +Zip files, remote stores via ``fspec`` (S3, HTTP, etc.), and in-memory stores. In +Zarr-Python 3, stores must implement the abstract store API from +:class:`zarr.abc.store.Store`. + +.. note:: + Unlike Zarr-Python 2 where the store interface was built around a generic ``MutableMapping`` + API, Zarr-Python 3 utilizes a custom store API that utilizes Python's AsyncIO library. + +Implicit Store Creation +----------------------- + +In most cases, it is not required to create a ``Store`` object explicitly. Passing a string +to Zarr's top level API will result in the store being created automatically. + +.. code-block:: python + + >>> import zarr + >>> zarr.open("data/foo/bar", mode="r") # implicitly creates a LocalStore + + >>> zarr.open("s3://foo/bar", mode="r") # implicitly creates a RemoteStore + + >>> data = {} + >>> zarr.open(data, mode="w") # implicitly creates a MemoryStore + + +Explicit Store Creation +----------------------- + +In some cases, it may be helpful to create a store instance directly. Zarr-Python offers four +built-in store: :class:`zarr.store.LocalStore`, :class:`zarr.store.RemoteStore`, +:class:`zarr.store.ZipStore`, and :class:`zarr.store.MemoryStore`. + +Local Store +~~~~~~~~~~~ + +The :class:`zarr.store.LocalStore` stores data in a nested set of directories on a local +filesystem. + +.. code-block:: python + + >>> import zarr + >>> store = zarr.storage.LocalStore("data/foo/bar", mode="r") + >>> zarr.open(store=store) + + +Zip Store +~~~~~~~~~ + +The :class:`zarr.store.ZipStore` stores the contents of a Zarr hierarchy in a single +Zip file. The `Zip Store specification_` is currently in draft form. + +.. code-block:: python + + >>> import zarr + >>> store = zarr.storage.ZipStore("data.zip", mode="w") + >>> zarr.open(store=store, shape=(2,)) + >> import zarr + >>> store = zarr.storage.RemoteStore("gs://foo/bar", mode="r") + >>> zarr.open(store=store) + shape=(10, 20) dtype=float32> + +Memory Store +~~~~~~~~~~~~ + +The :class:`zarr.store.RemoteStore` a in-memory store that allows for serialization of +Zarr data (metadata and chunks) to a dictionary. + +.. code-block:: python + + >>> import zarr + >>> data = {} + >>> store = zarr.storage.MemoryStore(data, mode="w") + >>> zarr.open(store=store, shape=(2, )) + + +Developing custom stores +------------------------ + +Zarr-Python :class:`zarr.abc.store.Store` API is meant to be extended. The Store Abstract Base +Class includes all of the methods needed to be a fully operational store in Zarr Python. +Zarr also provides a test harness for custom stores: :class:`zarr.testing.store.StoreTests`. + +.. _Zip Store Specification: https://github.com/zarr-developers/zarr-specs/pull/311 +.. _Fsspec: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#consolidated-metadata \ No newline at end of file From 35a6428b22c30cd6664839cbb06fb59d4446e116 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 11 Oct 2024 14:12:59 -0700 Subject: [PATCH 21/22] add missing file --- docs/guide/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 106c35ce8d..f841dbb85d 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -4,4 +4,5 @@ Guide .. toctree:: :maxdepth: 1 + storage consolidated_metadata From a78461e130daf8d99bdaf7d7fa4f0792e6ee8042 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 11 Oct 2024 14:15:54 -0700 Subject: [PATCH 22/22] update links --- docs/guide/consolidated_metadata.rst | 6 +++--- docs/guide/storage.rst | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/guide/consolidated_metadata.rst b/docs/guide/consolidated_metadata.rst index 480a855831..5010d32481 100644 --- a/docs/guide/consolidated_metadata.rst +++ b/docs/guide/consolidated_metadata.rst @@ -12,8 +12,8 @@ Usage If consolidated metadata is present in a Zarr Group's metadata then it is used by default. The initial read to open the group will need to communicate with -the store (reading from a file for a :class:`zarr.store.LocalStore`, making a -network request for a :class:`zarr.store.RemoteStore`). After that, any subsequent +the store (reading from a file for a :class:`zarr.storage.LocalStore`, making a +network request for a :class:`zarr.storage.RemoteStore`). After that, any subsequent metadata reads get child Group or Array nodes will *not* require reads from the store. In Python, the consolidated metadata is available on the ``.consolidated_metadata`` @@ -22,7 +22,7 @@ attribute of the ``GroupMetadata`` object. .. code-block:: python >>> import zarr - >>> store = zarr.store.MemoryStore({}, mode="w") + >>> store = zarr.storage.MemoryStore({}, mode="w") >>> group = zarr.open_group(store=store) >>> group.create_array(shape=(1,), name="a") >>> group.create_array(shape=(2, 2), name="b") diff --git a/docs/guide/storage.rst b/docs/guide/storage.rst index 8438e98e27..dfda553c43 100644 --- a/docs/guide/storage.rst +++ b/docs/guide/storage.rst @@ -31,13 +31,13 @@ Explicit Store Creation ----------------------- In some cases, it may be helpful to create a store instance directly. Zarr-Python offers four -built-in store: :class:`zarr.store.LocalStore`, :class:`zarr.store.RemoteStore`, -:class:`zarr.store.ZipStore`, and :class:`zarr.store.MemoryStore`. +built-in store: :class:`zarr.storage.LocalStore`, :class:`zarr.storage.RemoteStore`, +:class:`zarr.storage.ZipStore`, and :class:`zarr.storage.MemoryStore`. Local Store ~~~~~~~~~~~ -The :class:`zarr.store.LocalStore` stores data in a nested set of directories on a local +The :class:`zarr.storage.LocalStore` stores data in a nested set of directories on a local filesystem. .. code-block:: python @@ -50,7 +50,7 @@ filesystem. Zip Store ~~~~~~~~~ -The :class:`zarr.store.ZipStore` stores the contents of a Zarr hierarchy in a single +The :class:`zarr.storage.ZipStore` stores the contents of a Zarr hierarchy in a single Zip file. The `Zip Store specification_` is currently in draft form. .. code-block:: python @@ -63,10 +63,10 @@ Zip file. The `Zip Store specification_` is currently in draft form. Remote Store ~~~~~~~~~~~~ -The :class:`zarr.store.RemoteStore` stores the contents of a Zarr hierarchy in following the same +The :class:`zarr.storage.RemoteStore` stores the contents of a Zarr hierarchy in following the same logical layout as the ``LocalStore``, except the store is assumed to be on a remote storage system such as cloud object storage (e.g. AWS S3, Google Cloud Storage, Azure Blob Store). The -:class:`zarr.store.RemoteStore` is backed by `Fsspec_` and can support any Fsspec backend +:class:`zarr.storage.RemoteStore` is backed by `Fsspec_` and can support any Fsspec backend that implements the `AbstractFileSystem` API, .. code-block:: python @@ -79,7 +79,7 @@ that implements the `AbstractFileSystem` API, Memory Store ~~~~~~~~~~~~ -The :class:`zarr.store.RemoteStore` a in-memory store that allows for serialization of +The :class:`zarr.storage.RemoteStore` a in-memory store that allows for serialization of Zarr data (metadata and chunks) to a dictionary. .. code-block:: python @@ -98,4 +98,4 @@ Class includes all of the methods needed to be a fully operational store in Zarr Zarr also provides a test harness for custom stores: :class:`zarr.testing.store.StoreTests`. .. _Zip Store Specification: https://github.com/zarr-developers/zarr-specs/pull/311 -.. _Fsspec: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#consolidated-metadata \ No newline at end of file +.. _Fsspec: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#consolidated-metadata