diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 00b81a3aad..70ac9adc17 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -11,6 +11,7 @@ class AccessMode(NamedTuple): + str: AccessModeLiteral readonly: bool overwrite: bool create: bool @@ -20,6 +21,7 @@ class AccessMode(NamedTuple): def from_literal(cls, mode: AccessModeLiteral) -> Self: if mode in ("r", "r+", "a", "w", "w-"): return cls( + str=mode, readonly=mode == "r", overwrite=mode == "w", create=mode in ("a", "w", "w-"), @@ -42,6 +44,14 @@ async def open(cls, *args: Any, **kwargs: Any) -> Self: await store._open() return store + def __enter__(self) -> Self: + """Enter a context manager that will close the store upon exiting.""" + return self + + def __exit__(self, *args: Any) -> None: + """Close the store.""" + self.close() + async def _open(self) -> None: if self._is_open: raise ValueError("store is already open") @@ -143,6 +153,12 @@ async def set(self, key: str, value: Buffer) -> None: """ ... + @property + @abstractmethod + def supports_deletes(self) -> bool: + """Does the store support deletes?""" + ... + @abstractmethod async def delete(self, key: str) -> None: """Remove a key from the store @@ -221,7 +237,6 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: def close(self) -> None: """Close the store.""" self._is_open = False - pass @runtime_checkable diff --git a/src/zarr/store/__init__.py b/src/zarr/store/__init__.py index 3a9e8201e8..47bbccd66e 100644 --- a/src/zarr/store/__init__.py +++ b/src/zarr/store/__init__.py @@ -2,5 +2,14 @@ from zarr.store.local import LocalStore from zarr.store.memory import MemoryStore from zarr.store.remote import RemoteStore +from zarr.store.zip import ZipStore -__all__ = ["StorePath", "StoreLike", "make_store_path", "RemoteStore", "LocalStore", "MemoryStore"] +__all__ = [ + "StorePath", + "StoreLike", + "make_store_path", + "RemoteStore", + "LocalStore", + "MemoryStore", + "ZipStore", +] diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py index b69b86dc4c..5fd48c2db0 100644 --- a/src/zarr/store/local.py +++ b/src/zarr/store/local.py @@ -73,6 +73,7 @@ def _put( class LocalStore(Store): supports_writes: bool = True + supports_deletes: bool = True supports_partial_writes: bool = True supports_listing: bool = True diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index 4f2627bf67..89e7ced31e 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -18,6 +18,7 @@ # When that is done, the `MemoryStore` will just be a store that wraps a dict. class MemoryStore(Store): supports_writes: bool = True + supports_deletes: bool = True supports_partial_writes: bool = True supports_listing: bool = True diff --git a/src/zarr/store/remote.py b/src/zarr/store/remote.py index f883832bc7..3907ac3cc2 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/store/remote.py @@ -20,6 +20,7 @@ class RemoteStore(Store): # based on FSSpec supports_writes: bool = True + supports_deletes: bool = True supports_partial_writes: bool = False supports_listing: bool = True diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py new file mode 100644 index 0000000000..15473aa674 --- /dev/null +++ b/src/zarr/store/zip.py @@ -0,0 +1,223 @@ +from __future__ import annotations + +import os +import threading +import time +import zipfile +from pathlib import Path +from typing import TYPE_CHECKING, Literal + +from zarr.abc.store import Store +from zarr.core.buffer import Buffer, BufferPrototype + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator + +ZipStoreAccessModeLiteral = Literal["r", "w", "a"] + + +class ZipStore(Store): + """ + Storage class using a ZIP file. + + Parameters + ---------- + path : string + Location of file. + compression : integer, optional + Compression method to use when writing to the archive. + allowZip64 : bool, optional + If True (the default) will create ZIP files that use the ZIP64 + extensions when the zipfile is larger than 2 GiB. If False + will raise an exception when the ZIP file would require ZIP64 + extensions. + mode : string, optional + One of 'r' to read an existing file, 'w' to truncate and write a new + file, 'a' to append to an existing file, or 'x' to exclusively create + and write a new file. + """ + + supports_writes: bool = True + supports_deletes: bool = False + supports_partial_writes: bool = False + supports_listing: bool = True + + path: Path + compression: int + allowZip64: bool + + _zf: zipfile.ZipFile + _lock: threading.RLock + + def __init__( + self, + path: Path | str, + *, + mode: ZipStoreAccessModeLiteral = "r", + compression: int = zipfile.ZIP_STORED, + allowZip64: bool = True, + ): + super().__init__(mode=mode) + + if isinstance(path, str): + path = Path(path) + assert isinstance(path, Path) + self.path = path # root? + + self._zmode = mode + self.compression = compression + self.allowZip64 = allowZip64 + + async def _open(self) -> None: + if self._is_open: + raise ValueError("store is already open") + + self._lock = threading.RLock() + + self._zf = zipfile.ZipFile( + self.path, + mode=self._zmode, + compression=self.compression, + allowZip64=self.allowZip64, + ) + + self._is_open = True + + def close(self) -> None: + super().close() + with self._lock: + self._zf.close() + + async def clear(self) -> None: + with self._lock: + self._check_writable() + self._zf.close() + os.remove(self.path) + self._zf = zipfile.ZipFile( + self.path, mode="w", compression=self.compression, allowZip64=self.allowZip64 + ) + + async def empty(self) -> bool: + with self._lock: + if self._zf.namelist(): + return False + else: + return True + + def __str__(self) -> str: + return f"zip://{self.path}" + + def __repr__(self) -> str: + return f"ZipStore({str(self)!r})" + + def __eq__(self, other: object) -> bool: + return isinstance(other, type(self)) and self.path == other.path + + def _get( + self, + key: str, + prototype: BufferPrototype, + byte_range: tuple[int | None, int | None] | None = None, + ) -> Buffer | None: + try: + with self._zf.open(key) as f: # will raise KeyError + if byte_range is None: + return prototype.buffer.from_bytes(f.read()) + start, length = byte_range + if start: + if start < 0: + start = f.seek(start, os.SEEK_END) + start + else: + start = f.seek(start, os.SEEK_SET) + if length: + return prototype.buffer.from_bytes(f.read(length)) + else: + return prototype.buffer.from_bytes(f.read()) + except KeyError: + return None + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: tuple[int | None, int | None] | None = None, + ) -> Buffer | None: + assert isinstance(key, str) + + with self._lock: + return self._get(key, prototype=prototype, byte_range=byte_range) + + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: list[tuple[str, tuple[int | None, int | None]]], + ) -> list[Buffer | None]: + out = [] + with self._lock: + for key, byte_range in key_ranges: + out.append(self._get(key, prototype=prototype, byte_range=byte_range)) + return out + + def _set(self, key: str, value: Buffer) -> None: + # generally, this should be called inside a lock + keyinfo = zipfile.ZipInfo(filename=key, date_time=time.localtime(time.time())[:6]) + keyinfo.compress_type = self.compression + if keyinfo.filename[-1] == os.sep: + keyinfo.external_attr = 0o40775 << 16 # drwxrwxr-x + keyinfo.external_attr |= 0x10 # MS-DOS directory flag + else: + keyinfo.external_attr = 0o644 << 16 # ?rw-r--r-- + self._zf.writestr(keyinfo, value.to_bytes()) + + async def set(self, key: str, value: Buffer) -> None: + self._check_writable() + assert isinstance(key, str) + if not isinstance(value, Buffer): + raise TypeError("ZipStore.set(): `value` must a Buffer instance") + with self._lock: + self._set(key, value) + + async def set_partial_values(self, key_start_values: list[tuple[str, int, bytes]]) -> None: + raise NotImplementedError + + async def delete(self, key: str) -> None: + raise NotImplementedError + + async def exists(self, key: str) -> bool: + with self._lock: + try: + self._zf.getinfo(key) + except KeyError: + return False + else: + return True + + async def list(self) -> AsyncGenerator[str, None]: + with self._lock: + for key in self._zf.namelist(): + yield key + + async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + async for key in self.list(): + if key.startswith(prefix): + yield key + + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + if prefix.endswith("/"): + prefix = prefix[:-1] + + keys = self._zf.namelist() + seen = set() + if prefix == "": + keys_unique = set(k.split("/")[0] for k in keys) + for key in keys_unique: + if key not in seen: + seen.add(key) + yield key + else: + for key in keys: + if key.startswith(prefix + "/") and key != prefix: + k = key.removeprefix(prefix + "/").split("/")[0] + if k not in seen: + seen.add(k) + yield k diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index b1308f058f..41cd359346 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -10,7 +10,7 @@ from hypothesis import HealthCheck, Verbosity, settings from zarr import AsyncGroup, config -from zarr.store import LocalStore, MemoryStore, StorePath +from zarr.store import LocalStore, MemoryStore, StorePath, ZipStore from zarr.store.remote import RemoteStore if TYPE_CHECKING: @@ -25,14 +25,16 @@ async def parse_store( - store: Literal["local", "memory", "remote"], path: str -) -> LocalStore | MemoryStore | RemoteStore: + store: Literal["local", "memory", "remote", "zip"], path: str +) -> LocalStore | MemoryStore | RemoteStore | ZipStore: if store == "local": return await LocalStore.open(path, mode="w") if store == "memory": return await MemoryStore.open(mode="w") if store == "remote": return await RemoteStore.open(url=path, mode="w") + if store == "zip": + return await ZipStore.open(path + "/zarr.zip", mode="w") raise AssertionError @@ -64,6 +66,11 @@ async def memory_store() -> MemoryStore: return await MemoryStore.open(mode="w") +@pytest.fixture(scope="function") +async def zip_store(tmpdir: LEGACY_PATH) -> ZipStore: + return await ZipStore.open(str(tmpdir / "zarr.zip"), mode="w") + + @pytest.fixture(scope="function") async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: param = request.param @@ -73,7 +80,7 @@ async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: @dataclass class AsyncGroupRequest: zarr_format: ZarrFormat - store: Literal["local", "remote", "memory"] + store: Literal["local", "remote", "memory", "zip"] attributes: dict[str, Any] = field(default_factory=dict) diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index fb726757dc..cd20ab6e58 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -10,7 +10,7 @@ from zarr.store.common import StorePath -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", (2, 3)) @pytest.mark.parametrize("exists_ok", [True, False]) @pytest.mark.parametrize("extant_node", ["array", "group"]) @@ -59,7 +59,7 @@ def test_array_creation_existing_node( ) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", (2, 3)) def test_array_name_properties_no_group( store: LocalStore | MemoryStore, zarr_format: ZarrFormat @@ -70,7 +70,7 @@ def test_array_name_properties_no_group( assert arr.basename is None -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", (2, 3)) def test_array_name_properties_with_group( store: LocalStore | MemoryStore, zarr_format: ZarrFormat diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index e5f66224e9..ffe1db67a9 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -19,7 +19,7 @@ from .test_codecs import _AsyncArrayProxy, order_from_dim -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize("index_location", ["start", "end"]) @pytest.mark.parametrize( "array_fixture", @@ -71,7 +71,7 @@ def test_sharding( @pytest.mark.parametrize("index_location", ["start", "end"]) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize( "array_fixture", [ @@ -121,7 +121,7 @@ def test_sharding_partial( indirect=["array_fixture"], ) @pytest.mark.parametrize("index_location", ["start", "end"]) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) def test_sharding_partial_read( store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation ) -> None: @@ -158,7 +158,7 @@ def test_sharding_partial_read( indirect=["array_fixture"], ) @pytest.mark.parametrize("index_location", ["start", "end"]) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) def test_sharding_partial_overwrite( store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation ) -> None: @@ -209,7 +209,7 @@ def test_sharding_partial_overwrite( "inner_index_location", ["start", "end"], ) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) def test_nested_sharding( store: Store, array_fixture: np.ndarray, @@ -242,7 +242,7 @@ def test_nested_sharding( assert np.array_equal(data, read_data) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) def test_open_sharding(store: Store) -> None: path = "open_sharding" spath = StorePath(store, path) @@ -267,7 +267,7 @@ def test_open_sharding(store: Store) -> None: assert a.metadata == b.metadata -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) def test_write_partial_sharded_chunks(store: Store) -> None: data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) spath = StorePath(store) @@ -291,8 +291,10 @@ def test_write_partial_sharded_chunks(store: Store) -> None: assert np.array_equal(a[0:16, 0:16], data) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) async def test_delete_empty_shards(store: Store) -> None: + if not store.supports_deletes: + pytest.skip("store does not support deletes") path = "delete_empty_shards" spath = StorePath(store, path) a = await AsyncArray.create( diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index de09d1b0c1..67a2e4a3dc 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -7,6 +7,7 @@ import zarr.api.asynchronous from zarr import Array, AsyncArray, AsyncGroup, Group +from zarr.abc.store import Store from zarr.api.synchronous import open_group from zarr.core.buffer import default_buffer_prototype from zarr.core.common import ZarrFormat @@ -22,10 +23,10 @@ from _pytest.compat import LEGACY_PATH -@pytest.fixture(params=["local", "memory"]) -async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> LocalStore | MemoryStore: +@pytest.fixture(params=["local", "memory", "zip"]) +async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: result = await parse_store(request.param, str(tmpdir)) - if not isinstance(result, MemoryStore | LocalStore): + if not isinstance(result, Store): raise TypeError("Wrong store class returned by test fixture! got " + result + " instead") return result @@ -46,7 +47,7 @@ def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: return cast(ZarrFormat, result) -def test_group_init(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +def test_group_init(store: Store, zarr_format: ZarrFormat) -> None: """ Test that initializing a group from an asyncgroup works. """ @@ -55,7 +56,7 @@ def test_group_init(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> assert group._async_group == agroup -def test_group_name_properties(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +def test_group_name_properties(store: Store, zarr_format: ZarrFormat) -> None: """ Test basic properties of groups """ @@ -75,7 +76,7 @@ def test_group_name_properties(store: LocalStore | MemoryStore, zarr_format: Zar assert bar.basename == "bar" -def test_group_members(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_members(store: Store, zarr_format: ZarrFormat) -> None: """ Test that `Group.members` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. @@ -133,7 +134,7 @@ def test_group_members(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) members_observed = group.members(max_depth=-1) -def test_group(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group(store: Store, zarr_format: ZarrFormat) -> None: """ Test basic Group routines. """ @@ -175,9 +176,7 @@ def test_group(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None assert dict(bar3.attrs) == {"baz": "qux", "name": "bar"} -def test_group_create( - store: MemoryStore | LocalStore, exists_ok: bool, zarr_format: ZarrFormat -) -> None: +def test_group_create(store: Store, exists_ok: bool, zarr_format: ZarrFormat) -> None: """ Test that `Group.create` works as expected. """ @@ -193,9 +192,7 @@ def test_group_create( ) -def test_group_open( - store: MemoryStore | LocalStore, zarr_format: ZarrFormat, exists_ok: bool -) -> None: +def test_group_open(store: Store, zarr_format: ZarrFormat, exists_ok: bool) -> None: """ Test the `Group.open` method. """ @@ -227,7 +224,7 @@ def test_group_open( assert group_created_again.store_path == spath -def test_group_getitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_getitem(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__getitem__` method. """ @@ -242,10 +239,12 @@ def test_group_getitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) group["nope"] -def test_group_delitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_delitem(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__delitem__` method. """ + if not store.supports_deletes: + pytest.skip("store does not support deletes") group = Group.create(store, zarr_format=zarr_format) subgroup = group.create_group(name="subgroup") @@ -263,7 +262,7 @@ def test_group_delitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) group["subarray"] -def test_group_iter(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_iter(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__iter__` method. """ @@ -273,7 +272,7 @@ def test_group_iter(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> [x for x in group] # type: ignore -def test_group_len(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__len__` method. """ @@ -283,7 +282,7 @@ def test_group_len(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> len(group) # type: ignore -def test_group_setitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_setitem(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__setitem__` method. """ @@ -292,7 +291,7 @@ def test_group_setitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) group["key"] = 10 -def test_group_contains(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_contains(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__contains__` method """ @@ -302,7 +301,7 @@ def test_group_contains(store: MemoryStore | LocalStore, zarr_format: ZarrFormat assert "foo" in group -def test_group_subgroups(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_subgroups(store: Store, zarr_format: ZarrFormat) -> None: """ Test the behavior of `Group` methods for accessing subgroups, namely `Group.group_keys` and `Group.groups` """ @@ -317,7 +316,7 @@ def test_group_subgroups(store: MemoryStore | LocalStore, zarr_format: ZarrForma assert all(a in subgroups_observed for a in subgroups_expected) -def test_group_subarrays(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_subarrays(store: Store, zarr_format: ZarrFormat) -> None: """ Test the behavior of `Group` methods for accessing subgroups, namely `Group.group_keys` and `Group.groups` """ @@ -332,7 +331,7 @@ def test_group_subarrays(store: MemoryStore | LocalStore, zarr_format: ZarrForma assert all(a in subarrays_observed for a in subarrays_expected) -def test_group_update_attributes(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_update_attributes(store: Store, zarr_format: ZarrFormat) -> None: """ Test the behavior of `Group.update_attributes` """ @@ -344,9 +343,7 @@ def test_group_update_attributes(store: MemoryStore | LocalStore, zarr_format: Z assert new_group.attrs == new_attrs -async def test_group_update_attributes_async( - store: MemoryStore | LocalStore, zarr_format: ZarrFormat -) -> None: +async def test_group_update_attributes_async(store: Store, zarr_format: ZarrFormat) -> None: """ Test the behavior of `Group.update_attributes_async` """ @@ -360,7 +357,7 @@ async def test_group_update_attributes_async( @pytest.mark.parametrize("method", ["create_array", "array"]) def test_group_create_array( - store: MemoryStore | LocalStore, + store: Store, zarr_format: ZarrFormat, exists_ok: bool, method: Literal["create_array", "array"], @@ -393,12 +390,12 @@ def test_group_create_array( assert np.array_equal(array[:], data) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", (2, 3)) @pytest.mark.parametrize("exists_ok", [True, False]) @pytest.mark.parametrize("extant_node", ["array", "group"]) def test_group_creation_existing_node( - store: LocalStore | MemoryStore, + store: Store, zarr_format: ZarrFormat, exists_ok: bool, extant_node: Literal["array", "group"], @@ -441,7 +438,7 @@ def test_group_creation_existing_node( async def test_asyncgroup_create( - store: MemoryStore | LocalStore, + store: Store, exists_ok: bool, zarr_format: ZarrFormat, ) -> None: @@ -482,14 +479,14 @@ async def test_asyncgroup_create( ) -async def test_asyncgroup_attrs(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +async def test_asyncgroup_attrs(store: Store, zarr_format: ZarrFormat) -> None: attributes = {"foo": 100} agroup = await AsyncGroup.create(store, zarr_format=zarr_format, attributes=attributes) assert agroup.attrs == agroup.metadata.attributes == attributes -async def test_asyncgroup_info(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +async def test_asyncgroup_info(store: Store, zarr_format: ZarrFormat) -> None: agroup = await AsyncGroup.create( # noqa store, zarr_format=zarr_format, @@ -499,7 +496,7 @@ async def test_asyncgroup_info(store: LocalStore | MemoryStore, zarr_format: Zar async def test_asyncgroup_open( - store: LocalStore | MemoryStore, + store: Store, zarr_format: ZarrFormat, ) -> None: """ @@ -520,7 +517,7 @@ async def test_asyncgroup_open( async def test_asyncgroup_open_wrong_format( - store: LocalStore | MemoryStore, + store: Store, zarr_format: ZarrFormat, ) -> None: _ = await AsyncGroup.create(store=store, exists_ok=False, zarr_format=zarr_format) @@ -546,7 +543,7 @@ async def test_asyncgroup_open_wrong_format( {"zarr_format": 2, "attributes": {"foo": 100}}, ), ) -def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, Any]) -> None: +def test_asyncgroup_from_dict(store: Store, data: dict[str, Any]) -> None: """ Test that we can create an AsyncGroup from a dict """ @@ -561,7 +558,7 @@ def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, A # todo: replace this with a declarative API where we model a full hierarchy -async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +async def test_asyncgroup_getitem(store: Store, zarr_format: ZarrFormat) -> None: """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those members via the `AsyncGroup.getitem` method. @@ -583,7 +580,10 @@ async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: await agroup.getitem("foo") -async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +async def test_asyncgroup_delitem(store: Store, zarr_format: ZarrFormat) -> None: + if not store.supports_deletes: + pytest.skip("store does not support deletes") + agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) array_name = "sub_array" _ = await agroup.create_array( @@ -613,7 +613,7 @@ async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: async def test_asyncgroup_create_group( - store: LocalStore | MemoryStore, + store: Store, zarr_format: ZarrFormat, ) -> None: agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) @@ -629,7 +629,7 @@ async def test_asyncgroup_create_group( async def test_asyncgroup_create_array( - store: LocalStore | MemoryStore, zarr_format: ZarrFormat, exists_ok: bool + store: Store, zarr_format: ZarrFormat, exists_ok: bool ) -> None: """ Test that the AsyncGroup.create_array method works correctly. We ensure that array properties @@ -667,9 +667,7 @@ async def test_asyncgroup_create_array( assert subnode.metadata.zarr_format == zarr_format -async def test_asyncgroup_update_attributes( - store: LocalStore | MemoryStore, zarr_format: ZarrFormat -) -> None: +async def test_asyncgroup_update_attributes(store: Store, zarr_format: ZarrFormat) -> None: """ Test that the AsyncGroup.update_attributes method works correctly. """ diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py new file mode 100644 index 0000000000..7c332e9a2e --- /dev/null +++ b/tests/v3/test_store/test_zip.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +import os +import tempfile +from typing import TYPE_CHECKING + +import numpy as np +import pytest + +import zarr +from zarr.abc.store import AccessMode +from zarr.core.buffer import Buffer, cpu, default_buffer_prototype +from zarr.store.zip import ZipStore +from zarr.testing.store import StoreTests + +if TYPE_CHECKING: + from collections.abc import Coroutine + from typing import Any + + +class TestZipStore(StoreTests[ZipStore, cpu.Buffer]): + store_cls = ZipStore + buffer_cls = cpu.Buffer + + @pytest.fixture(scope="function") + def store_kwargs(self, request) -> dict[str, str | bool]: + fd, temp_path = tempfile.mkstemp() + os.close(fd) + + return {"path": temp_path, "mode": "w"} + + def get(self, store: ZipStore, key: str) -> Buffer: + return store._get(key, prototype=default_buffer_prototype()) + + def set(self, store: ZipStore, key: str, value: Buffer) -> None: + return store._set(key, value) + + def test_store_mode(self, store: ZipStore, store_kwargs: dict[str, Any]) -> None: + assert store.mode == AccessMode.from_literal(store_kwargs["mode"]) + assert not store.mode.readonly + + async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> None: + # we need to create the zipfile in write mode before switching to read mode + store = await self.store_cls.open(**store_kwargs) + store.close() + + kwargs = {**store_kwargs, "mode": "r"} + store = await self.store_cls.open(**kwargs) + assert store.mode == AccessMode.from_literal("r") + assert store.mode.readonly + + # set + with pytest.raises(ValueError): + await store.set("foo", cpu.Buffer.from_bytes(b"bar")) + + def test_store_repr(self, store: ZipStore) -> None: + assert str(store) == f"zip://{store.path!s}" + + def test_store_supports_writes(self, store: ZipStore) -> None: + assert store.supports_writes + + def test_store_supports_partial_writes(self, store: ZipStore) -> None: + assert store.supports_partial_writes is False + + def test_store_supports_listing(self, store: ZipStore) -> None: + assert store.supports_listing + + def test_delete(self, store: ZipStore) -> Coroutine[Any, Any, None]: + pass + + def test_api_integration(self, store: ZipStore) -> None: + root = zarr.open_group(store=store) + + data = np.arange(10000, dtype=np.uint16).reshape(100, 100) + z = root.create_array( + shape=data.shape, chunks=(10, 10), name="foo", dtype=np.uint16, fill_value=99 + ) + z[:] = data + + assert np.array_equal(data, z[:]) + + # you can overwrite existing chunks but zipfile will issue a warning + with pytest.warns(UserWarning, match="Duplicate name: 'foo/c/0/0'"): + z[0, 0] = 100 + + # TODO: assigning an entire chunk to fill value ends up deleting the chunk which is not supported + # a work around will be needed here. + with pytest.raises(NotImplementedError): + z[0:10, 0:10] = 99 + + bar = root.create_group("bar", attributes={"hello": "world"}) + assert "hello" in dict(bar.attrs) + + # keys cannot be deleted + with pytest.raises(NotImplementedError): + del root["bar"] + + store.close()