diff --git a/doc/reference/index.rst b/doc/reference/index.rst index d6cf97b4..2caef31c 100644 --- a/doc/reference/index.rst +++ b/doc/reference/index.rst @@ -4,6 +4,7 @@ API Reference .. toctree:: :maxdepth: 2 + storage top_level classes array_operations diff --git a/doc/reference/storage.rst b/doc/reference/storage.rst new file mode 100644 index 00000000..9bcbb9fa --- /dev/null +++ b/doc/reference/storage.rst @@ -0,0 +1,34 @@ +Dataclasses +=========== + +Dataclasses for setting the compression, decompression +and storage parameters. All their parameters are optional. + +.. currentmodule:: blosc2 + +CParams +------- + +.. autosummary:: + :toctree: autofiles/storage + :nosignatures: + + CParams + +DParams +------- + +.. autosummary:: + :toctree: autofiles/storage + :nosignatures: + + DParams + +Storage +------- + +.. autosummary:: + :toctree: autofiles/storage + :nosignatures: + + Storage diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py index 489e8577..a3197573 100644 --- a/src/blosc2/__init__.py +++ b/src/blosc2/__init__.py @@ -188,9 +188,26 @@ class Tuner(Enum): unpack_tensor, ) +# Internal Blosc threading # Get CPU info cpu_info = get_cpu_info() +nthreads = ncores = cpu_info.get("count", 1) +"""Number of threads to be used in compression/decompression. +""" +# Protection against too many threads +nthreads = min(nthreads, 32) +# Experiments say that, when using a large number of threads, it is better to not use them all +nthreads -= nthreads // 8 +# This import must be before ndarray and schunk +from .storage import ( + CParams, + cparams_dflts, + DParams, + dparams_dflts, + Storage, + storage_dflts, +) from .ndarray import ( # noqa: I001 NDArray, @@ -243,60 +260,7 @@ class Tuner(Enum): """ The blosc2 version + date. """ -# Internal Blosc threading -nthreads = ncores = cpu_info.get("count", 1) -"""Number of threads to be used in compression/decompression. -""" -# Protection against too many threads -nthreads = min(nthreads, 32) -# Experiments say that, when using a large number of threads, it is better to not use them all -nthreads -= nthreads // 8 -set_nthreads(nthreads) - -# Set the number of threads for NumExpr -numexpr.set_num_threads(nthreads) - -# Defaults for compression params -cparams_dflts = { - "codec": Codec.ZSTD, - "codec_meta": 0, - "clevel": 1, - "use_dict": False, - "typesize": 8, - "nthreads": nthreads, - "blocksize": 0, - "splitmode": SplitMode.ALWAYS_SPLIT, - "schunk": None, - "filters": [ - Filter.NOFILTER, - Filter.NOFILTER, - Filter.NOFILTER, - Filter.NOFILTER, - Filter.NOFILTER, - Filter.SHUFFLE, - ], - "filters_meta": [0, 0, 0, 0, 0, 0], - "prefilter": None, - "preparams": None, - "tuner": Tuner.STUNE, - "instr_codec": False, -} -""" -Compression params defaults. -""" - -# Defaults for decompression params -dparams_dflts = {"nthreads": nthreads, "schunk": None, "postfilter": None, "postparams": None} -""" -Decompression params defaults. -""" -# Default for storage -storage_dflts = {"contiguous": False, "urlpath": None, "cparams": None, "dparams": None, "io": None} -""" -Storage params defaults. This is meant only for :ref:`SChunk ` or :ref:`NDArray `. -""" -_disable_overloaded_equal = False # Delayed imports for avoiding overwriting of python builtins from .ndarray import ( @@ -341,7 +305,9 @@ class Tuner(Enum): "__version__", "compress", "decompress", + "CParams", "cparams_dflts", + "DParams", "dparams_dflts", "storage_dflts", "set_compressor", @@ -373,6 +339,7 @@ class Tuner(Enum): "compress2", "decompress2", "SChunk", + "Storage", "open", "remove_urlpath", "nthreads", diff --git a/src/blosc2/blosc2_ext.pyx b/src/blosc2/blosc2_ext.pyx index 3f6f2d73..fb5c1d16 100644 --- a/src/blosc2/blosc2_ext.pyx +++ b/src/blosc2/blosc2_ext.pyx @@ -716,14 +716,16 @@ cdef _check_dparams(blosc2_dparams* dparams, blosc2_cparams* cparams=NULL): cdef create_cparams_from_kwargs(blosc2_cparams *cparams, kwargs): if "compcode" in kwargs: - raise NameError("`compcode` has been renamed to `codec`. Please go update your code.") + raise NameError("`compcode` has been renamed to `codec`. Please go update your code.") + if "shuffle" in kwargs: + raise NameError("`shuffle` has been substituted by `filters`. Please go update your code.") codec = kwargs.get('codec', blosc2.cparams_dflts['codec']) cparams.compcode = codec if not isinstance(codec, blosc2.Codec) else codec.value cparams.compcode_meta = kwargs.get('codec_meta', blosc2.cparams_dflts['codec_meta']) cparams.clevel = kwargs.get('clevel', blosc2.cparams_dflts['clevel']) cparams.use_dict = kwargs.get('use_dict', blosc2.cparams_dflts['use_dict']) cparams.typesize = typesize = kwargs.get('typesize', blosc2.cparams_dflts['typesize']) - cparams.nthreads = kwargs.get('nthreads', blosc2.cparams_dflts['nthreads']) + cparams.nthreads = kwargs.get('nthreads', blosc2.nthreads) cparams.blocksize = kwargs.get('blocksize', blosc2.cparams_dflts['blocksize']) splitmode = kwargs.get('splitmode', blosc2.cparams_dflts['splitmode']) cparams.splitmode = splitmode.value @@ -804,7 +806,7 @@ def compress2(src, **kwargs): return dest[:size] cdef create_dparams_from_kwargs(blosc2_dparams *dparams, kwargs, blosc2_cparams* cparams=NULL): - dparams.nthreads = kwargs.get('nthreads', blosc2.dparams_dflts['nthreads']) + dparams.nthreads = kwargs.get('nthreads', blosc2.nthreads) dparams.schunk = NULL dparams.postfilter = NULL dparams.postparams = NULL @@ -927,7 +929,7 @@ cdef class SChunk: self._urlpath = urlpath.encode() if isinstance(urlpath, str) else urlpath kwargs["urlpath"] = self._urlpath - self.mode = kwargs.get("mode", "a") + self.mode = blosc2.Storage().mode if kwargs.get("mode", None) is None else kwargs.get("mode") self.mmap_mode = kwargs.get("mmap_mode") self.initial_mapping_size = kwargs.get("initial_mapping_size") if self.mmap_mode is not None: @@ -1067,16 +1069,6 @@ cdef class SChunk: else: # User codec codec = self.schunk.storage.cparams.compcode - cparams_dict = { - "codec": codec, - "codec_meta": self.schunk.storage.cparams.compcode_meta, - "clevel": self.schunk.storage.cparams.clevel, - "use_dict": self.schunk.storage.cparams.use_dict, - "typesize": self.schunk.storage.cparams.typesize, - "nthreads": self.schunk.storage.cparams.nthreads, - "blocksize": self.schunk.storage.cparams.blocksize, - "splitmode": blosc2.SplitMode(self.schunk.storage.cparams.splitmode) - } filters = [0] * BLOSC2_MAX_FILTERS filters_meta = [0] * BLOSC2_MAX_FILTERS @@ -1087,42 +1079,50 @@ cdef class SChunk: # User filter filters[i] = self.schunk.filters[i] filters_meta[i] = self.schunk.filters_meta[i] - cparams_dict["filters"] = filters - cparams_dict["filters_meta"] = filters_meta - return cparams_dict - def update_cparams(self, cparams_dict): + cparams = blosc2.CParams( + codec=codec, + codec_meta=self.schunk.storage.cparams.compcode_meta, + clevel=self.schunk.storage.cparams.clevel, + use_dict=bool(self.schunk.storage.cparams.use_dict), + typesize=self.schunk.storage.cparams.typesize, + nthreads=self.schunk.storage.cparams.nthreads, + blocksize=self.schunk.storage.cparams.blocksize, + splitmode=blosc2.SplitMode(self.schunk.storage.cparams.splitmode), + tuner=blosc2.Tuner(self.schunk.storage.cparams.tuner_id), + filters=filters, + filters_meta=filters_meta, + ) + + return cparams + + def update_cparams(self, new_cparams): cdef blosc2_cparams* cparams = self.schunk.storage.cparams - codec = cparams_dict.get('codec', None) - if codec is None: - cparams.compcode = cparams.compcode - else: - cparams.compcode = codec if not isinstance(codec, blosc2.Codec) else codec.value - cparams.compcode_meta = cparams_dict.get('codec_meta', cparams.compcode_meta) - cparams.clevel = cparams_dict.get('clevel', cparams.clevel) - cparams.use_dict = cparams_dict.get('use_dict', cparams.use_dict) - cparams.typesize = cparams_dict.get('typesize', cparams.typesize) - cparams.nthreads = cparams_dict.get('nthreads', cparams.nthreads) - cparams.blocksize = cparams_dict.get('blocksize', cparams.blocksize) - splitmode = cparams_dict.get('splitmode', None) - cparams.splitmode = cparams.splitmode if splitmode is None else splitmode.value - - filters = cparams_dict.get('filters', None) - if filters is not None: - for i, filter in enumerate(filters): - cparams.filters[i] = filter.value if isinstance(filter, Enum) else filter - for i in range(len(filters), BLOSC2_MAX_FILTERS): - cparams.filters[i] = 0 - - filters_meta = cparams_dict.get('filters_meta', None) + codec = new_cparams.codec + cparams.compcode = codec if not isinstance(codec, blosc2.Codec) else codec.value + cparams.compcode_meta = new_cparams.codec_meta + cparams.clevel = new_cparams.clevel + cparams.use_dict = new_cparams.use_dict + cparams.typesize = new_cparams.typesize + cparams.nthreads = new_cparams.nthreads + cparams.blocksize = new_cparams.blocksize + cparams.splitmode = new_cparams.splitmode.value + cparams.tuner_id = new_cparams.tuner.value + + filters = new_cparams.filters + for i, filter in enumerate(filters): + cparams.filters[i] = filter.value if isinstance(filter, Enum) else filter + for i in range(len(filters), BLOSC2_MAX_FILTERS): + cparams.filters[i] = 0 + + filters_meta = new_cparams.filters_meta cdef int8_t meta_value - if filters_meta is not None: - for i, meta in enumerate(filters_meta): - # We still may want to encode negative values - meta_value = meta if meta < 0 else meta - cparams.filters_meta[i] = meta_value - for i in range(len(filters_meta), BLOSC2_MAX_FILTERS): - cparams.filters_meta[i] = 0 + for i, meta in enumerate(filters_meta): + # We still may want to encode negative values + meta_value = meta if meta < 0 else meta + cparams.filters_meta[i] = meta_value + for i in range(len(filters_meta), BLOSC2_MAX_FILTERS): + cparams.filters_meta[i] = 0 _check_cparams(cparams) @@ -1140,12 +1140,11 @@ cdef class SChunk: self.schunk.filters_meta = self.schunk.storage.cparams.filters_meta def get_dparams(self): - dparams_dict = {"nthreads": self.schunk.storage.dparams.nthreads} - return dparams_dict + return blosc2.DParams(nthreads=self.schunk.storage.dparams.nthreads) - def update_dparams(self, dparams_dict): + def update_dparams(self, new_dparams): cdef blosc2_dparams* dparams = self.schunk.storage.dparams - dparams.nthreads = dparams_dict.get('nthreads', dparams.nthreads) + dparams.nthreads = new_dparams.nthreads _check_dparams(dparams, self.schunk.storage.cparams) @@ -1964,17 +1963,17 @@ def open(urlpath, mode, offset, **kwargs): res = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) if cparams is not None: - res.schunk.cparams = cparams + res.schunk.cparams = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) if dparams is not None: - res.schunk.dparams = dparams + res.schunk.dparams = dparams if isinstance(dparams, blosc2.DParams) else blosc2.DParams(**dparams) res.schunk.mode = mode else: res = blosc2.SChunk(_schunk=PyCapsule_New(schunk, "blosc2_schunk*", NULL), mode=mode, **kwargs) if cparams is not None: - res.cparams = cparams + res.cparams = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) if dparams is not None: - res.dparams = dparams + res.dparams = dparams if isinstance(dparams, blosc2.DParams) else blosc2.DParams(**dparams) return res diff --git a/src/blosc2/c2array.py b/src/blosc2/c2array.py index 23a68def..73c6958e 100644 --- a/src/blosc2/c2array.py +++ b/src/blosc2/c2array.py @@ -198,6 +198,22 @@ def __init__(self, path: str, /, urlbase: str = None, auth_token: str = None): ------- out: C2Array + Examples + -------- + >>> import blosc2 + >>> import pathlib + >>> urlbase = "https://demo.caterva2.net/" + >>> root = "example" + >>> path = pathlib.Path(f"{root}/dir1/ds-3d.b2nd").as_posix() + >>> remote_array = blosc2.C2Array(path, urlbase=urlbase) + >>> remote_array.shape + (3, 4, 5) + >>> remote_array.chunks + (2, 3, 4) + >>> remote_array.blocks + (2, 2, 2) + >>> remote_array.dtype + float32 """ if path.startswith("/"): raise ValueError("The path should start with a root name, not a slash") @@ -235,6 +251,21 @@ def __getitem__(self, slice_: int | slice | Sequence[slice]) -> np.ndarray: ------- out: numpy.ndarray A numpy.ndarray containing the data slice. + + Examples + -------- + >>> import pathlib + >>> import blosc2 + >>> urlbase = "https://demo.caterva2.net/" + >>> root = "example" + >>> path = pathlib.Path(f"{root}/dir1/ds-2d.b2nd").as_posix() + >>> remote_array = blosc2.C2Array(path, urlbase=urlbase) + >>> data_slice = remote_array[3:5, 1:4] + >>> data_slice.shape + (2, 3) + >>> data_slice[:] + [[61 62 63] + [81 82 83]] """ slice_ = slice_to_string(slice_) return fetch_data(self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token) @@ -252,6 +283,30 @@ def get_chunk(self, nchunk: int) -> bytes: ------- out: bytes The requested compressed chunk. + + Examples + -------- + >>> import pathlib + >>> import numpy as np + >>> import blosc2 + >>> urlbase = "https://demo.caterva2.net/" + >>> root = "example" + >>> path = pathlib.Path(f"{root}/dir1/ds-3d.b2nd").as_posix() + >>> a = blosc2.C2Array(path, urlbase) + >>> b = blosc2.C2Array(path, urlbase) + >>> c = a + b + >>> # Get the compressed chunk from array 'a' for index 0 + >>> chunk_index = 0 + >>> compressed_chunk = c.get_chunk(chunk_index) + >>> f"Size of chunk {chunk_index} from a: {len(compressed_chunk)} bytes" + Size of chunk 0 from a: 160 bytes + >>> # Decompress the chunk and convert it to a NumPy array + >>> decompressed_chunk = blosc2.decompress(compressed_chunk) + >>> chunk_np_array = np.frombuffer(decompressed_chunk, dtype=a.dtype) + >>> chunk_np_array + Content of chunk 0 as NumPy array: + [ 0. 2. 10. 12. 40. 42. 50. 52. 4. 6. 14. 16. 44. 46. 54. 56. 20. 22. + 0. 0. 60. 62. 0. 0. 24. 26. 0. 0. 64. 66. 0. 0.] """ url = _sub_url(self.urlbase, f"api/chunk/{self.path}") params = {"nchunk": nchunk} diff --git a/src/blosc2/core.py b/src/blosc2/core.py index 977a0a81..3544e237 100644 --- a/src/blosc2/core.py +++ b/src/blosc2/core.py @@ -18,6 +18,7 @@ import platform import sys from collections.abc import Callable +from dataclasses import asdict import cpuinfo import numpy as np @@ -55,10 +56,10 @@ def _check_codec(codec): def compress( src: object, - typesize: int = None, - clevel: int = 9, + typesize: int = 8, + clevel: int = 1, filter: blosc2.Filter = blosc2.Filter.SHUFFLE, - codec: blosc2.Codec = blosc2.Codec.BLOSCLZ, + codec: blosc2.Codec = blosc2.Codec.ZSTD, _ignore_multiple_size: bool = False, ) -> str | bytes: """Compress src, with a given type size. @@ -893,8 +894,9 @@ def set_nthreads(nthreads: int) -> int: -------- :attr:`~blosc2.nthreads` """ + rc = blosc2_ext.set_nthreads(nthreads) blosc2.nthreads = nthreads - return blosc2_ext.set_nthreads(nthreads) + return rc def compressor_list(plugins: bool = False) -> list: @@ -1393,34 +1395,15 @@ def compress2(src: object, **kwargs: dict) -> str | bytes: Other Parameters ---------------- kwargs: dict, optional + Compression parameters. The default values are in :class:`blosc2.CParams`. Keyword arguments supported: - codec: :class:`Codec` - The compressor code. Default is :py:obj:`Codec.BLOSCLZ `. - codec_meta: int - The metadata for the compressor code, 0 by default. - clevel: int - The compression level from 0 (no compression) to 9 - (maximum compression). Default: 5. - use_dict: bool - Use dicts or not when compressing - (only for :py:obj:`blosc2.Codec.ZSTD `). By default `False`. - typesize: int from 1 to 255 - The data type size. Default: 8. - nthreads: int - The number of threads to use internally (1 by default). - blocksize: int - The requested size of the compressed blocks. If 0 (the default) - blosc2 chooses it automatically. - splitmode: :class:`SplitMode` - The split mode for the blocks. - The default value is :py:obj:`SplitMode.FORWARD_COMPAT_SPLIT `. - filters: :class:`Filter` list - The sequence of filters. Default: {0, 0, 0, 0, 0, :py:obj:`Filter.SHUFFLE `}. - filters_meta: list - The metadata for filters. Default: `{0, 0, 0, 0, 0, 0}`. - tuner: :class:`Tuner` - The tuner to use. Default: :py:obj:`Tuner.STUNE `. + cparams: :class:`blosc2.CParams` or dict + All the compression parameters that you want to use as + a :class:`blosc2.CParams` or dict instance. + others: Any + If `cparams` is not passed, all the parameters of a :class:`blosc2.CParams` + can be passed as keyword arguments. Returns ------- @@ -1434,6 +1417,15 @@ def compress2(src: object, **kwargs: dict) -> str | bytes: If an internal error occurred, probably because some parameter is not a valid parameter. """ + if kwargs is not None: + if 'cparams' in kwargs: + if len(kwargs) > 1: + raise AttributeError("Cannot pass both cparams and other kwargs already included in CParams") + if isinstance(kwargs.get('cparams'), blosc2.CParams): + kwargs = asdict(kwargs.get('cparams')) + else: + kwargs = kwargs.get('cparams') + return blosc2_ext.compress2(src, **kwargs) @@ -1456,10 +1448,15 @@ def decompress2(src: object, dst: object | bytearray = None, **kwargs: dict) -> Other Parameters ---------------- kwargs: dict, optional + Decompression parameters. The default values are in :class:`blosc2.DParams`. Keyword arguments supported: - nthreads: int - The number of threads to use internally (1 by default). + dparams: :class:`blosc2.DParams` or dict + All the decompression parameters that you want to use as + a :class:`blosc2.DParams` or dict instance. + others: Any + If `dparams` is not passed, all the parameters of a :class:`blosc2.DParams` + can be passed as keyword arguments. Returns ------- @@ -1481,6 +1478,15 @@ def decompress2(src: object, dst: object | bytearray = None, **kwargs: dict) -> If the length of :paramref:`src` is smaller than the minimum. If :paramref:`dst` is not None and its length is 0. """ + if kwargs is not None: + if 'dparams' in kwargs: + if len(kwargs) > 1: + raise AttributeError("Cannot pass both dparams and other kwargs already included in DParams") + if isinstance(kwargs.get('dparams'), blosc2.DParams): + kwargs = asdict(kwargs.get('dparams')) + else: + kwargs = kwargs.get('dparams') + return blosc2_ext.decompress2(src, dst, **kwargs) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 0ca42856..78f1d5b1 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -1884,7 +1884,7 @@ def eval(self, item=None, **kwargs): aux = np.empty(res_eval.shape, res_eval.dtype) res_eval[...] = aux res_eval.schunk.remove_prefilter(self.func.__name__) - res_eval.schunk.cparams["nthreads"] = self._cnthreads + res_eval.schunk.cparams.nthreads = self._cnthreads return res_eval else: diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py index 0f9f094d..bccf9284 100644 --- a/src/blosc2/ndarray.py +++ b/src/blosc2/ndarray.py @@ -18,6 +18,7 @@ import ndindex import numpy as np +from dataclasses import asdict import blosc2 from blosc2 import SpecialValue, blosc2_ext, compute_chunks_blocks @@ -1288,13 +1289,15 @@ def copy(self, dtype: np.dtype = None, **kwargs: dict) -> NDArray: """ if dtype is None: dtype = self.dtype - kwargs["cparams"] = kwargs.get("cparams", self.schunk.cparams).copy() - kwargs["dparams"] = kwargs.get("dparams", self.schunk.dparams).copy() + kwargs["cparams"] = kwargs.get("cparams").copy() if isinstance(kwargs.get("cparams"), dict) \ + else asdict(self.schunk.cparams) + kwargs["dparams"] = kwargs.get("dparams").copy() if isinstance(kwargs.get("dparams"), dict) \ + else asdict(self.schunk.dparams) if "meta" not in kwargs: # Copy metalayers as well meta_dict = {meta: self.schunk.meta[meta] for meta in self.schunk.meta} kwargs["meta"] = meta_dict - _check_ndarray_kwargs(**kwargs) + kwargs = _check_ndarray_kwargs(**kwargs) return super().copy(dtype, **kwargs) @@ -1369,7 +1372,7 @@ def slice(self, key: int | slice | Sequence[slice], **kwargs: dict) -> NDArray: >>> print(type(c)) """ - _check_ndarray_kwargs(**kwargs) + kwargs = _check_ndarray_kwargs(**kwargs) key, mask = process_key(key, self.shape) start, stop, step = get_ndarray_start_stop(self.ndim, key, self.shape) key = (start, stop) @@ -2328,7 +2331,7 @@ def empty(shape: int | tuple | list, dtype: np.dtype = np.uint8, **kwargs: dict) dtype('int32') """ shape = _check_shape(shape) - _check_ndarray_kwargs(**kwargs) + kwargs = _check_ndarray_kwargs(**kwargs) chunks = kwargs.pop("chunks", None) blocks = kwargs.pop("blocks", None) chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) @@ -2361,7 +2364,7 @@ def uninit(shape: int | tuple | list, dtype: np.dtype = np.uint8, **kwargs: dict dtype('float64') """ shape = _check_shape(shape) - _check_ndarray_kwargs(**kwargs) + kwargs = _check_ndarray_kwargs(**kwargs) chunks = kwargs.pop("chunks", None) blocks = kwargs.pop("blocks", None) chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) @@ -2394,7 +2397,7 @@ def nans(shape: int | tuple | list, dtype: np.dtype = np.float64, **kwargs: dict dtype('float64') """ shape = _check_shape(shape) - _check_ndarray_kwargs(**kwargs) + kwargs = _check_ndarray_kwargs(**kwargs) chunks = kwargs.pop("chunks", None) blocks = kwargs.pop("blocks", None) chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) @@ -2433,7 +2436,7 @@ def zeros(shape: int | tuple | list, dtype: np.dtype = np.uint8, **kwargs: dict) dtype('float64') """ shape = _check_shape(shape) - _check_ndarray_kwargs(**kwargs) + kwargs = _check_ndarray_kwargs(**kwargs) chunks = kwargs.pop("chunks", None) blocks = kwargs.pop("blocks", None) chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) @@ -2486,7 +2489,7 @@ def full(shape: int | tuple | list, fill_value: bytes | int | float | bool, dtyp if dtype is None: dtype = np.dtype(type(fill_value)) shape = _check_shape(shape) - _check_ndarray_kwargs(**kwargs) + kwargs = _check_ndarray_kwargs(**kwargs) chunks = kwargs.pop("chunks", None) blocks = kwargs.pop("blocks", None) chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) @@ -2533,7 +2536,7 @@ def frombuffer( >>> a = blosc2.frombuffer(buffer, shape, chunks=chunks, dtype=dtype) """ shape = _check_shape(shape) - _check_ndarray_kwargs(**kwargs) + kwargs = _check_ndarray_kwargs(**kwargs) chunks = kwargs.pop("chunks", None) blocks = kwargs.pop("blocks", None) chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) @@ -2596,7 +2599,7 @@ def asarray(array: np.ndarray | blosc2.C2Array, **kwargs: dict | list) -> NDArra >>> # Create a NDArray from a NumPy array >>> nda = blosc2.asarray(a) """ - _check_ndarray_kwargs(**kwargs) + kwargs = _check_ndarray_kwargs(**kwargs) chunks = kwargs.pop("chunks", None) blocks = kwargs.pop("blocks", None) # Use the chunks and blocks from the array if they are not passed @@ -2647,6 +2650,16 @@ def asarray(array: np.ndarray | blosc2.C2Array, **kwargs: dict | list) -> NDArra def _check_ndarray_kwargs(**kwargs): + if "storage" in kwargs: + for key in kwargs: + if key in list(blosc2.Storage.__annotations__): + raise AttributeError("Cannot pass both `storage` and other kwargs already included in Storage") + storage = kwargs.get("storage") + if isinstance(storage, blosc2.Storage): + kwargs = {**kwargs, **asdict(storage)} + else: + kwargs = {**kwargs, **storage} + supported_keys = [ "chunks", "blocks", @@ -2658,16 +2671,27 @@ def _check_ndarray_kwargs(**kwargs): "mode", "mmap_mode", "initial_mapping_size", + "storage", ] for key in kwargs: if key not in supported_keys: raise KeyError( f"Only {supported_keys} are supported as keyword arguments, and you passed '{key}'" ) - if "cparams" in kwargs and "chunks" in kwargs["cparams"]: - raise ValueError("You cannot pass chunks in cparams, use `chunks` argument instead") - if "cparams" in kwargs and "blocks" in kwargs["cparams"]: - raise ValueError("You cannot pass chunks in cparams, use `blocks` argument instead") + + if "cparams" in kwargs: + if isinstance(kwargs["cparams"], blosc2.CParams): + kwargs["cparams"] = asdict(kwargs["cparams"]) + else: + if "chunks" in kwargs["cparams"]: + raise ValueError("You cannot pass chunks in cparams, use `chunks` argument instead") + if "blocks" in kwargs["cparams"]: + raise ValueError("You cannot pass chunks in cparams, use `blocks` argument instead") + if "dparams" in kwargs: + if isinstance(kwargs["dparams"], blosc2.DParams): + kwargs["dparams"] = asdict(kwargs["dparams"]) + + return kwargs def get_slice_nchunks(schunk: blosc2.SChunk, diff --git a/src/blosc2/proxy.py b/src/blosc2/proxy.py index 42cbe8e4..1d01c0e0 100644 --- a/src/blosc2/proxy.py +++ b/src/blosc2/proxy.py @@ -7,114 +7,22 @@ ####################################################################### from abc import ABC, abstractmethod -import numpy as np - import blosc2 - - -class ProxyNDSource(ABC): - """ - Base interface for NDim sources in :ref:`Proxy`. - """ - - @property - @abstractmethod - def shape(self) -> tuple: - """ - The shape of the source. - """ - pass - - @property - @abstractmethod - def chunks(self) -> tuple: - """ - The chunk shape of the source. - """ - pass - - @property - @abstractmethod - def blocks(self) -> tuple: - """ - The block shape of the source. - """ - pass - - @property - @abstractmethod - def dtype(self) -> np.dtype: - """ - The dtype of the source. - """ - pass - - @abstractmethod - def get_chunk(self, nchunk: int) -> bytes: - """ - Return the compressed chunk in :paramref:`self`. - - Parameters - ---------- - nchunk: int - The unidimensional index of the chunk to retrieve. - - Returns - ------- - out: bytes object - The compressed chunk. - """ - pass - - def aget_chunk(self, nchunk: int) -> bytes: - """ - Return the compressed chunk in :paramref:`self` in an asynchronous way. - - Parameters - ---------- - nchunk: int - The index of the chunk to retrieve. - - Returns - ------- - out: bytes object - The compressed chunk. - - Notes - ----- - This method is optional, and only available if the source has an async `aget_chunk` method. - """ - raise NotImplementedError("aget_chunk is only available if the source has an aget_chunk method") +import numpy as np class ProxySource(ABC): """ - Base interface for sources of :ref:`Proxy` that are not NDim objects. - """ - - @property - @abstractmethod - def nbytes(self) -> int: - """ - The total number of bytes in the source. - """ - pass + Base interface for all supported sources in :ref:`Proxy`. - @property - @abstractmethod - def chunksize(self) -> tuple: - """ - The chunksize of the source. - """ - pass + In case the source is multidimensional, the attributes `shape`, `chunks`, + `blocks` and `dtype` are also required when creating the :ref:`Proxy`. - @property - @abstractmethod - def typesize(self) -> int: - """ - The typesize of the source. - """ - pass + In case the source is unidimensional, the attributes `chunksize`, `typesize` + and `nbytes` are required as well when creating the :ref:`Proxy`. + These attributes do not need to be available when opening an already + existing :ref:`Proxy`. + """ @abstractmethod def get_chunk(self, nchunk: int) -> bytes: @@ -124,7 +32,7 @@ def get_chunk(self, nchunk: int) -> bytes: Parameters ---------- nchunk: int - The index of the chunk to retrieve. + The unidimensional index of the chunk to retrieve. Returns ------- @@ -133,42 +41,23 @@ def get_chunk(self, nchunk: int) -> bytes: """ pass - def aget_chunk(self, nchunk: int) -> bytes: - """ - Return the compressed chunk in :paramref:`self` in an asynchronous way. - - Parameters - ---------- - nchunk: int - The index of the chunk to retrieve. - - Returns - ------- - out: bytes object - The compressed chunk. - - Notes - ----- - This method is optional, and only available if the source has an async `aget_chunk` method. - """ - raise NotImplementedError("aget_chunk is only available if the source has an aget_chunk method") - class Proxy(blosc2.Operand): """Proxy (with cache support) of an object following the :ref:`ProxySource` interface. - This can be used to cache chunks of a regular data container which follows the - :ref:`ProxySource` or :ref:`ProxyNDSource` interfaces. + This can be used to cache chunks of a regular data container + which follows the :ref:`ProxySource` interface in an urlpath. """ - def __init__(self, src: ProxySource or ProxyNDSource, urlpath: str = None, **kwargs: dict): + def __init__(self, src, urlpath=None, **kwargs): """ - Create a new :ref:`Proxy` to serve like a cache to save accessed chunks locally. + Create a new :ref:`Proxy` to serve like a cache to save accessed + chunks locally. Parameters ---------- - src: :ref:`ProxySource` or :ref:`ProxyNDSource` - The original container. + src: :ref:`ProxySource` + The original container urlpath: str, optional The urlpath where to save the container that will work as a cache. @@ -179,6 +68,7 @@ def __init__(self, src: ProxySource or ProxyNDSource, urlpath: str = None, **kwa vlmeta: dict or None A dictionary with different variable length metalayers. One entry per metalayer: + key: bytes or str The name of the metalayer. value: object @@ -248,17 +138,12 @@ def fetch(self, item: slice | list[slice] = None) -> blosc2.NDArray | blosc2.sch >>> data = np.arange(20).reshape(10, 2) >>> ndarray = blosc2.asarray(data) >>> proxy = blosc2.Proxy(ndarray) - >>> full_data = proxy.fetch() - >>> f"Full data cache: {full_data[:]}" - Full data cache: - [[ 0 1][ 2 3][ 4 5] - [ 6 7][ 8 9][10 11] - [12 13][14 15][16 17] - [18 19]] - >>> slice_data = proxy[0:2, :] - >>> f"Slice data cache: {slice_data}" + >>> slice_data = proxy.fetch((slice(0, 3), slice(0, 2))) + >>> f"Slice data cache: {slice_data[:3, :2]}" Slice data cache: - [[0 1][2 3]] + [[0 1] + [2 3] + [4 5]] """ if item is None: # Full realization @@ -296,6 +181,69 @@ async def afetch(self, item: slice | list[slice] = None) -> blosc2.NDArray | blo ----- This method is only available if the :ref:`ProxySource` or :ref:`ProxyNDSource` have an async `aget_chunk` method. + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> import asyncio + >>> from blosc2 import ProxyNDSource + >>> class MyProxySource(ProxyNDSource): + >>> def __init__(self, data): + >>> # If the next source is multidimensional, it must have the attributes: + >>> self.data = data + >>> f"Data shape: {self.shape}, Chunks: {self.chunks}" + Data shape: (4, 5), Chunks: [2, 5] + >>> f"Blocks: {self.blocks}, Dtype: {self.dtype}" + Blocks: [1, 5], Dtype: int64 + >>> @property + >>> def shape(self): + >>> return self.data.shape + >>> @property + >>> def chunks(self): + >>> return self.data.chunks + >>> @property + >>> def blocks(self): + >>> return self.data.blocks + >>> @property + >>> def dtype(self): + >>> return self.data.dtype + >>> # This method must be present + >>> def get_chunk(self, nchunk): + >>> return self.data.get_chunk(nchunk) + >>> # This method is optional + >>> async def aget_chunk(self, nchunk): + >>> await asyncio.sleep(0.1) # Simulate an asynchronous operation + >>> return self.data.get_chunk(nchunk) + >>> data = np.arange(20).reshape(4, 5) + >>> chunks = [2, 5] + >>> blocks = [1, 5] + >>> data = blosc2.asarray(data, chunks=chunks, blocks=blocks) + >>> source = MyProxySource(data) + >>> proxy = blosc2.Proxy(source) + >>> async def fetch_data(): + >>> # Fetch a slice of the data from the proxy asynchronously + >>> slice_data = await proxy.afetch(slice(0, 2)) + >>> # Note that only data fetched is shown, the rest is uninitialized + >>> f"Slice data cache: {slice_data[:]}" + Slice data cache: + [[0 1 2 3 4] + [5 6 7 8 9] + [0 0 0 0 0] + [0 0 0 0 0]] + >>> # Fetch the full data from the proxy asynchronously + >>> full_data = await proxy.afetch() + >>> # Now, all data is shown, meaning the full data has been fetched + >>> f"Full data cache: {full_data[:]}" + Full data cache: + [[ 0 1 2 3 4] + [ 5 6 7 8 9] + [10 11 12 13 14] + [15 16 17 18 19]] + >>> asyncio.run(fetch_data()) + >>> # Using getitem to get a slice of the data + >>> result = proxy[1:2, 1:3] + [[6 7]] """ if not callable(getattr(self.src, "aget_chunk", None)): raise NotImplementedError("afetch is only available if the source has an aget_chunk method") @@ -333,34 +281,36 @@ def __getitem__(self, item: slice | list[slice]) -> np.ndarray: -------- >>> import numpy as np >>> import blosc2 - >>> data = np.arange(100).reshape(10, 10) + >>> data = np.arange(25).reshape(5, 5) >>> ndarray = blosc2.asarray(data) >>> proxy = blosc2.Proxy(ndarray) >>> slice_1 = proxy[0:3, 0:3] >>> f"Slice 1: {slice_1}" Slice 1: [[ 0 1 2] + [ 5 6 7] [10 11 12] - [20 21 22]] - >>> slice_2 = proxy[5:8, 2:5] + >>> slice_2 = proxy[2:5, 2:5] >>> f"Slice 2: {slice_2}" Slice 2: - [[52 53 54] - [62 63 64] - [72 73 74]] + [[12 13 14] + [17 18 19] + [22 23 24]] """ # Populate the cache self.fetch(item) return self._cache[item] @property - def dtype(self) -> np.dtype: - """The dtype of :paramref:`self` or None if the data is unidimensional""" + def dtype(self): + """The dtype of :paramref:`self` or None if the data is unidimensional + """ return self._cache.dtype if isinstance(self._cache, blosc2.NDArray) else None @property - def shape(self) -> tuple[int]: - """The shape of :paramref:`self`""" + def shape(self): + """The shape of :paramref:`self` + """ return self._cache.shape if isinstance(self._cache, blosc2.NDArray) else len(self._cache) def __str__(self): @@ -374,11 +324,21 @@ def vlmeta(self) -> blosc2.schunk.vlmeta: See Also -------- :ref:`SChunk.vlmeta` + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> data = np.arange(100).reshape(10, 10) + >>> ndarray = blosc2.asarray(data) + >>> proxy = blosc2.Proxy(ndarray) + >>> f"VLMeta data: {proxy.vlmeta}" + VLMeta data: """ return self._schunk_cache.vlmeta @property - def fields(self) -> dict: + def fields(self)-> dict: """ Dictionary with the fields of :paramref:`self`. @@ -390,6 +350,26 @@ def fields(self) -> dict: See Also -------- :ref:`NDField` + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> data = np.zeros(16, dtype=[('field1', 'i4'), ('field2', 'f4')]).reshape(4, 4) + >>> ndarray = blosc2.asarray(data) + >>> proxy = blosc2.Proxy(ndarray) + >>> # Get a dictionary of fields from the proxy, where each field can be accessed individually + >>> fields_dict = proxy.fields + >>> for field_name, field_proxy in fields_dict.items(): + >>> f"Field name: {field_name}, Field data: {field_proxy}" + Field name: field1, Field data: + Field name: field2, Field data: + >>> field1_data = fields_dict['field1'][:] + >>> field1_data + [[0 0 0 0] + [0 0 0 0] + [0 0 0 0] + [0 0 0 0]] """ _fields = getattr(self._cache, "fields", None) if _fields is None: diff --git a/src/blosc2/schunk.py b/src/blosc2/schunk.py index c9f7c49f..91ee36fe 100644 --- a/src/blosc2/schunk.py +++ b/src/blosc2/schunk.py @@ -11,6 +11,7 @@ import pathlib from collections import namedtuple from collections.abc import Mapping, MutableMapping +from dataclasses import asdict from typing import Any, Iterator, NamedTuple import numpy as np @@ -156,95 +157,20 @@ def __init__(self, chunksize: int = None, data: object = None, **kwargs: dict): Other parameters ---------------- kwargs: dict, optional + Storage parameters. The default values are in :class:`blosc2.Storage`. Keyword arguments supported: - - contiguous: bool, optional - If the chunks are stored contiguously or not. - Default is True when :paramref:`urlpath` is not None; - False otherwise. - urlpath: str | pathlib.Path, optional - If the storage is persistent, the name of the file (when - `contiguous = True`) or the directory (if `contiguous = False`). - If the storage is in-memory, then this field is `None`. - mode: str, optional - Persistence mode: ‘r’ means read only (must exist); - ‘a’ means read/write (create if it doesn’t exist); - ‘w’ means create (overwrite if it exists). - mmap_mode: str, optional - If set, the file will be memory-mapped instead of using the default - I/O functions and the `mode` argument will be ignored. The memory-mapping - modes are similar as used by the - `numpy.memmap `_ - function, but it is possible to extend the file: - - .. list-table:: - :widths: 10 90 - :header-rows: 1 - - * - mode - - description - * - 'r' - - Open an existing file for reading only. - * - 'r+' - - Open an existing file for reading and writing. Use this mode if you want - to append data to an existing schunk file. - * - 'w+' - - Create or overwrite an existing file for reading and writing. Use this - mode if you want to create a new schunk. - * - 'c' - - Open an existing file in copy-on-write mode: all changes affect the data - in memory but changes are not saved to disk. The file on disk is - read-only. On Windows, the size of the mapping cannot change. - - Only contiguous storage can be memory-mapped. Hence, `urlpath` must point to a - file (and not a directory). - - .. note:: - Memory-mapped files are opened once and the file contents remain in (virtual) - memory for the lifetime of the schunk. Using memory-mapped I/O can be faster - than using the default I/O functions depending on the use case. Whereas - reading performance is generally better, writing performance may also be - slower in some cases on certain systems. In any case, memory-mapped files - can be especially beneficial when operating with network file systems - (like NFS). - - This is currently a beta feature (especially write operations) and we - recommend trying it out and reporting any issues you may encounter. - - initial_mapping_size: int, optional - The initial size of the mapping for the memory-mapped file when writes are - allowed (r+ w+, or c mode). Once a file is memory-mapped and extended beyond the - initial mapping size, the file must be remapped which may be expensive. This - parameter allows to decouple the mapping size from the actual file size to early - reserve memory for future writes and avoid remappings. The memory is only - reserved virtually and does not occupy physical memory unless actual writes - happen. Since the virtual address space is large enough, it is ok to be generous - with this parameter (with special consideration on Windows, see note below). - For best performance, set this to the maximum expected size of the compressed - data (see example in :obj:`SChunk.__init__ `). - The size is in bytes. - - Default: 1 GiB. - - .. note:: - On Windows, the size of the mapping is directly coupled to the file size. - When the schunk gets destroyed, the file size will be truncated to the - actual size of the schunk. - - cparams: dict - A dictionary with the compression parameters, which are the same - as those can be used in the :func:`~blosc2.compress2` function. - dparams: dict - A dictionary with the decompression parameters, which are the same - as those that can be used in the :func:`~blosc2.decompress2` - function. - meta: dict or None - A dictionary with different metalayers. One entry per metalayer: - - key: bytes or str - The name of the metalayer. - value: object - The metalayer object that will be serialized using msgpack. + storage: :class:`blosc2.Storage` or dict + All the storage parameters that you want to use as + a :class:`blosc2.Storage` or dict instance. + cparams: :class:`blosc2.CParams` or dict + All the compression parameters that you want to use as + a :class:`blosc2.CParams` or dict instance. + dparams: :class:`blosc2.DParams` or dict + All the decompression parameters that you want to use as + a :class:`blosc2.DParams` or dict instance. + others: Any + If `storage` is not passed, all the parameters of a :class:`blosc2.Storage` + can be passed as keyword arguments. Examples -------- @@ -301,10 +227,26 @@ def __init__(self, chunksize: int = None, data: object = None, **kwargs: dict): "mmap_mode", "initial_mapping_size", "_is_view", + "storage" ] for kwarg in kwargs: if kwarg not in allowed_kwargs: raise ValueError(f"{kwarg} is not supported as keyword argument") + if kwargs.get("storage") is not None: + if any(key in list(blosc2.Storage.__annotations__) for key in kwargs.keys()): + raise AttributeError("Cannot pass both `storage` and other kwargs already included in Storage") + storage = kwargs.get("storage") + if isinstance(storage, blosc2.Storage): + kwargs = {**kwargs, **asdict(storage)} + else: + kwargs = {**kwargs, **storage} + + if isinstance(kwargs.get("cparams"), blosc2.CParams): + kwargs["cparams"] = asdict(kwargs.get("cparams")) + + if isinstance(kwargs.get("dparams"), blosc2.DParams): + kwargs["dparams"] = asdict(kwargs.get("dparams")) + urlpath = kwargs.get("urlpath") if "contiguous" not in kwargs: # Make contiguous true for disk, else sparse (for in-memory performance) @@ -345,26 +287,26 @@ def __init__(self, chunksize: int = None, data: object = None, **kwargs: dict): self._dparams = super().get_dparams() @property - def cparams(self) -> dict: + def cparams(self) -> blosc2.CParams: """ - Dictionary with the compression parameters. + :class:`blosc2.CParams` instance with the compression parameters. """ return self._cparams @cparams.setter - def cparams(self, value): + def cparams(self, value: blosc2.CParams) -> None: super().update_cparams(value) self._cparams = super().get_cparams() @property - def dparams(self) -> dict: + def dparams(self) -> blosc2.DParams: """ - Dictionary with the decompression parameters. + :class:`blosc2.DParams` instance with the decompression parameters. """ return self._dparams @dparams.setter - def dparams(self, value): + def dparams(self, value: blosc2.DParams) -> None: super().update_dparams(value) self._dparams = super().get_dparams() @@ -1395,8 +1337,8 @@ def __dealloc__(self): super().__dealloc__() -@_inherit_doc_parameter(SChunk.__init__, "mmap_mode:", {r"\* - 'w\+'[^*]+": ""}) -@_inherit_doc_parameter(SChunk.__init__, "initial_mapping_size:", {r"r\+ w\+, or c": "r+ or c"}) +@_inherit_doc_parameter(blosc2.Storage, "mmap_mode:", {r"\* - 'w\+'[^*]+": ""}) +@_inherit_doc_parameter(blosc2.Storage, "initial_mapping_size:", {r"r\+ w\+, or c": "r+ or c"}) def open(urlpath: str | pathlib.Path | blosc2.URLPath, mode: str = "a", offset: int = 0, **kwargs: dict) -> blosc2.SChunk | blosc2.NDArray | blosc2.C2Array: """Open a persistent :ref:`SChunk` or :ref:`NDArray` or a remote :ref:`C2Array` diff --git a/src/blosc2/storage.py b/src/blosc2/storage.py new file mode 100644 index 00000000..759b297e --- /dev/null +++ b/src/blosc2/storage.py @@ -0,0 +1,227 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# This source code is licensed under a BSD-style license (found in the +# LICENSE file in the root directory of this source tree) +####################################################################### + +from dataclasses import dataclass, field, asdict, fields +import warnings + +import blosc2 + + +def default_nthreads(): + return blosc2.nthreads + +def default_filters(): + return [blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.SHUFFLE] + + +def default_filters_meta(): + return [0] * 6 + +@dataclass +class CParams: + """Dataclass for hosting the different compression parameters. + + Parameters + ---------- + codec: :class:`Codec` or int + The compressor code. Default is :py:obj:`Codec.ZSTD `. + codec_meta: int + The metadata for the compressor code, 0 by default. + clevel: int + The compression level from 0 (no compression) to 9 + (maximum compression). Default: 1. + use_dict: bool + Use dicts or not when compressing + (only for :py:obj:`blosc2.Codec.ZSTD `). Default: `False`. + typesize: int from 1 to 255 + The data type size. Default: 8. + nthreads: int + The number of threads to use internally. By default, the + value of :py:obj:`blosc2.nthreads` is used. If not set with + :func:`blosc2.set_nthreads`, blosc2 computes a good guess for it. + blocksize: int + The requested size of the compressed blocks. If 0 (the default) + blosc2 chooses it automatically. + splitmode: :class:`SplitMode` + The split mode for the blocks. + The default value is :py:obj:`SplitMode.ALWAYS_SPLIT `. + filters: :class:`Filter` or int list + The sequence of filters. Default: [:py:obj:`Filter.NOFILTER `, + :py:obj:`Filter.NOFILTER `, :py:obj:`Filter.NOFILTER `, :py:obj:`Filter.NOFILTER `, + :py:obj:`Filter.NOFILTER `, :py:obj:`Filter.SHUFFLE `]. + filters_meta: list + The metadata for filters. Default: `[0, 0, 0, 0, 0, 0]`. + tuner: :class:`Tuner` + The tuner to use. Default: :py:obj:`Tuner.STUNE `. + """ + codec: blosc2.Codec | int = blosc2.Codec.ZSTD + codec_meta: int = 0 + clevel: int = 1 + use_dict: bool = False + typesize: int = 8 + nthreads: int = field(default_factory=default_nthreads) + blocksize: int = 0 + splitmode: blosc2.SplitMode = blosc2.SplitMode.ALWAYS_SPLIT + filters: list[blosc2.Filter | int] = field(default_factory=default_filters) + filters_meta: list[int] = field(default_factory=default_filters_meta) + tuner: blosc2.Tuner = blosc2.Tuner.STUNE + + def __post_init__(self): + if len(self.filters) > 6: + raise ValueError("Number of filters exceeds 6") + if len(self.filters) < len(self.filters_meta): + self.filters_meta = self.filters_meta[:len(self.filters)] + warnings.warn("Changed `filters_meta` length to match `filters` length") + if len(self.filters) > len(self.filters_meta): + raise ValueError("Number of filters cannot exceed number of filters meta") + + for i in range(len(self.filters)): + if self.filters_meta[i] == 0 and self.filters[i] == blosc2.Filter.BYTEDELTA: + self.filters_meta[i] = self.typesize + + +@dataclass +class DParams: + """Dataclass for hosting the different decompression parameters. + + Parameters + ---------- + nthreads: int + The number of threads to use internally. By default, the + value of :py:obj:`blosc2.nthreads` is used. If not set with + :func:`blosc2.set_nthreads`, blosc2 computes a good guess for it. + """ + nthreads: int = field(default_factory=default_nthreads) + + +@dataclass +class Storage: + """Dataclass for hosting the different storage parameters. + + Parameters + ---------- + contiguous: bool + If the chunks are stored contiguously or not. + Default is True when :paramref:`urlpath` is not None; + False otherwise. + urlpath: str or pathlib.Path, optional + If the storage is persistent, the name of the file (when + `contiguous = True`) or the directory (if `contiguous = False`). + If the storage is in-memory, then this field is `None`. + mode: str, optional + Persistence mode: ‘r’ means read only (must exist); + ‘a’ means read/write (create if it doesn’t exist); + ‘w’ means create (overwrite if it exists). Default is 'a'. + mmap_mode: str, optional + If set, the file will be memory-mapped instead of using the default + I/O functions and the `mode` argument will be ignored. The memory-mapping + modes are similar as used by the + `numpy.memmap `_ + function, but it is possible to extend the file: + + .. list-table:: + :widths: 10 90 + :header-rows: 1 + + * - mode + - description + * - 'r' + - Open an existing file for reading only. + * - 'r+' + - Open an existing file for reading and writing. Use this mode if you want + to append data to an existing schunk file. + * - 'w+' + - Create or overwrite an existing file for reading and writing. Use this + mode if you want to create a new schunk. + * - 'c' + - Open an existing file in copy-on-write mode: all changes affect the data + in memory but changes are not saved to disk. The file on disk is + read-only. On Windows, the size of the mapping cannot change. + + Only contiguous storage can be memory-mapped. Hence, `urlpath` must point to a + file (and not a directory). + + .. note:: + Memory-mapped files are opened once and the file contents remain in (virtual) + memory for the lifetime of the schunk. Using memory-mapped I/O can be faster + than using the default I/O functions depending on the use case. Whereas + reading performance is generally better, writing performance may also be + slower in some cases on certain systems. In any case, memory-mapped files + can be especially beneficial when operating with network file systems + (like NFS). + + This is currently a beta feature (especially write operations) and we + recommend trying it out and reporting any issues you may encounter. + + initial_mapping_size: int, optional + The initial size of the mapping for the memory-mapped file when writes are + allowed (r+ w+, or c mode). Once a file is memory-mapped and extended beyond the + initial mapping size, the file must be remapped which may be expensive. This + parameter allows to decouple the mapping size from the actual file size to early + reserve memory for future writes and avoid remappings. The memory is only + reserved virtually and does not occupy physical memory unless actual writes + happen. Since the virtual address space is large enough, it is ok to be generous + with this parameter (with special consideration on Windows, see note below). + For best performance, set this to the maximum expected size of the compressed + data (see example in :obj:`SChunk.__init__ `). + The size is in bytes. + + Default: 1 GiB. + + .. note:: + On Windows, the size of the mapping is directly coupled to the file size. + When the schunk gets destroyed, the file size will be truncated to the + actual size of the schunk. + + meta: dict or None + A dictionary with different metalayers. One entry per metalayer: + + key: bytes or str + The name of the metalayer. + value: object + The metalayer object that will be serialized using msgpack. + """ + contiguous: bool = None + urlpath: str = None + mode: str = 'a' + mmap_mode: str = None + initial_mapping_size: int = None + meta: dict = None + + def __post_init__(self): + if self.contiguous is None: + self.contiguous = False if self.urlpath is None else True + # Check for None values + for field in fields(self): + if (getattr(self, field.name) is None and + field.name not in ['urlpath', 'mmap_mode', 'initial_mapping_size', 'meta']): + setattr(self, field.name, getattr(Storage(), field.name)) + warnings.warn("`{name}` field value changed from `None` to `{value}`".format(name=field.name, value=getattr(self, field.name))) + + +# Defaults for compression params +cparams_dflts = asdict(CParams()) +""" +Compression params defaults. +""" + +# Defaults for decompression params +dparams_dflts = asdict(DParams()) +""" +Decompression params defaults. +""" +# Default for storage +storage_dflts = asdict(Storage()) +""" +Storage params defaults. This is meant only for :ref:`SChunk ` or :ref:`NDArray `. +""" diff --git a/tests/ndarray/test_c2array_udf.py b/tests/ndarray/test_c2array_udf.py index d9c7357f..e13f631f 100644 --- a/tests/ndarray/test_c2array_udf.py +++ b/tests/ndarray/test_c2array_udf.py @@ -95,8 +95,7 @@ def test_getitem(chunks, blocks, slices, urlpath, contiguous, chunked_eval, c2su chunked_eval=chunked_eval, chunks=chunks, blocks=blocks, - urlpath=urlpath, - contiguous=contiguous, + storage=blosc2.Storage(urlpath=urlpath, contiguous=contiguous), dparams=dparams, ) lazy_eval = expr[slices] @@ -107,6 +106,6 @@ def test_getitem(chunks, blocks, slices, urlpath, contiguous, chunked_eval, c2su assert res.schunk.urlpath is None assert res.schunk.contiguous == contiguous # Check dparams after a getitem and an eval - assert res.schunk.dparams["nthreads"] == dparams["nthreads"] + assert res.schunk.dparams.nthreads == dparams["nthreads"] blosc2.remove_urlpath(urlpath) diff --git a/tests/ndarray/test_copy.py b/tests/ndarray/test_copy.py index a8d9c18b..5bf773de 100644 --- a/tests/ndarray/test_copy.py +++ b/tests/ndarray/test_copy.py @@ -27,7 +27,7 @@ def test_copy(shape, chunks1, blocks1, chunks2, blocks2, dtype): typesize = dtype.itemsize size = int(np.prod(shape)) buffer = bytes(size * typesize) - cparams1 = {"clevel": 2} + cparams1 = blosc2.CParams(clevel=2) a = blosc2.frombuffer(buffer, shape, dtype=dtype, chunks=chunks1, blocks=blocks1, cparams=cparams1) cparams2 = {"clevel": 5, "filters": [blosc2.Filter.BITSHUFFLE], "filters_meta": [0]} b = a.copy(chunks=chunks2, blocks=blocks2, cparams=cparams2) @@ -35,9 +35,9 @@ def test_copy(shape, chunks1, blocks1, chunks2, blocks2, dtype): assert a.schunk.dparams == b.schunk.dparams for key in cparams2: if key in ("filters", "filters_meta"): - assert b.schunk.cparams[key][: len(cparams2[key])] == cparams2[key] + assert getattr(b.schunk.cparams, key)[: len(cparams2[key])] == cparams2[key] continue - assert b.schunk.cparams[key] == cparams2[key] + assert getattr(b.schunk.cparams, key) == cparams2[key] assert b.chunks == tuple(chunks2) assert b.blocks == tuple(blocks2) assert a.dtype == b.dtype @@ -63,7 +63,7 @@ def test_copy_numpy(shape, chunks1, blocks1, chunks2, blocks2, dtype): else: nparray = np.arange(size, dtype=dtype).reshape(shape) a = blosc2.asarray(nparray, chunks=chunks1, blocks=blocks1) - cparams = {"clevel": 5, "filters": [blosc2.Filter.BITSHUFFLE], "filters_meta": [0]} + cparams = blosc2.CParams(clevel=5, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0]) b = a.copy(chunks=chunks2, blocks=blocks2, cparams=cparams) assert b.dtype == nparray.dtype if dtype.kind == "V": diff --git a/tests/ndarray/test_empty.py b/tests/ndarray/test_empty.py index d165de2d..bf405f70 100644 --- a/tests/ndarray/test_empty.py +++ b/tests/ndarray/test_empty.py @@ -65,16 +65,15 @@ def test_empty(shape, chunks, blocks, dtype, cparams, urlpath, contiguous): blosc2.remove_urlpath(urlpath) filters = cparams["filters"] - cparams["filters_meta"] = [0] * len(filters) + storage = blosc2.Storage(urlpath=urlpath, contiguous=contiguous) a = blosc2.empty( shape, chunks=chunks, blocks=blocks, dtype=dtype, - cparams=cparams, + storage=storage, + cparams=blosc2.CParams(**cparams), dparams={"nthreads": 2}, - urlpath=urlpath, - contiguous=contiguous, ) dtype = np.dtype(dtype) @@ -83,10 +82,10 @@ def test_empty(shape, chunks, blocks, dtype, cparams, urlpath, contiguous): assert a.blocks == blocks assert a.dtype == dtype assert a.schunk.typesize == dtype.itemsize - assert a.schunk.cparams["codec"] == cparams["codec"] - assert a.schunk.cparams["clevel"] == cparams["clevel"] - assert a.schunk.cparams["filters"][: len(filters)] == filters - assert a.schunk.dparams["nthreads"] == 2 + assert a.schunk.cparams.codec == cparams["codec"] + assert a.schunk.cparams.clevel == cparams["clevel"] + assert a.schunk.cparams.filters[: len(filters)] == filters + assert a.schunk.dparams.nthreads == 2 blosc2.remove_urlpath(urlpath) diff --git a/tests/ndarray/test_full.py b/tests/ndarray/test_full.py index 3734af64..4f7a5e81 100644 --- a/tests/ndarray/test_full.py +++ b/tests/ndarray/test_full.py @@ -8,6 +8,7 @@ import numpy as np import pytest +from dataclasses import asdict import blosc2 @@ -32,7 +33,7 @@ (10, 10), b"sun", None, - {"codec": blosc2.Codec.LZ4HC, "clevel": 8, "use_dict": False, "nthreads": 2}, + blosc2.CParams(codec=blosc2.Codec.LZ4HC, clevel=8, use_dict=False, nthreads=2), {"nthreads": 2}, "full.b2nd", True, @@ -54,7 +55,7 @@ (11, 11), 123456789, None, - {"codec": blosc2.Codec.LZ4HC, "clevel": 8, "use_dict": False, "nthreads": 2}, + blosc2.CParams(codec=blosc2.Codec.LZ4HC, clevel=8, use_dict=False, nthreads=2), {"nthreads": 2}, None, True, @@ -63,6 +64,7 @@ ) def test_full(shape, chunks, blocks, fill_value, cparams, dparams, dtype, urlpath, contiguous): blosc2.remove_urlpath(urlpath) + storage = {"urlpath": urlpath, "contiguous": contiguous} a = blosc2.full( shape, fill_value, @@ -70,11 +72,10 @@ def test_full(shape, chunks, blocks, fill_value, cparams, dparams, dtype, urlpat blocks=blocks, dtype=dtype, cparams=cparams, - dparams=dparams, - urlpath=urlpath, - contiguous=contiguous, + dparams=blosc2.DParams(**dparams), + **storage, ) - assert a.schunk.dparams == dparams + assert asdict(a.schunk.dparams) == dparams if isinstance(fill_value, bytes): dtype = np.dtype(f"S{len(fill_value)}") assert a.dtype == np.dtype(dtype) if dtype is not None else np.dtype(np.uint8) diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index a4b90b47..69fd0022 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -160,7 +160,7 @@ def test_simple_expression(array_fixture): a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture expr = a1 + a2 - a3 * a4 nres = ne.evaluate("na1 + na2 - na3 * na4") - res = expr.eval() + res = expr.eval(cparams=blosc2.CParams()) np.testing.assert_allclose(res[:], nres) @@ -171,7 +171,7 @@ def test_proxy_simple_expression(array_fixture): a3 = blosc2.Proxy(a3) expr = a1 + a2 - a3 * a4 nres = ne.evaluate("na1 + na2 - na3 * na4") - res = expr.eval() + res = expr.eval(storage=blosc2.Storage()) np.testing.assert_allclose(res[:], nres) @@ -221,7 +221,7 @@ def test_func_expression(array_fixture): expr = (a1 + a2) * a3 - a4 expr = blosc2.sin(expr) + blosc2.cos(expr) nres = ne.evaluate("sin((na1 + na2) * na3 - na4) + cos((na1 + na2) * na3 - na4)") - res = expr.eval() + res = expr.eval(storage={}) np.testing.assert_allclose(res[:], nres) @@ -250,7 +250,7 @@ def test_comparison_operators(dtype_fixture, compare_expressions, comparison_ope else: expr = eval(f"a1 {comparison_operator} a2", {"a1": a1, "a2": a2}) expr_string = f"na1 {comparison_operator} na2" - res_lazyexpr = expr.eval() + res_lazyexpr = expr.eval(dparams={}) # Evaluate using NumExpr res_numexpr = ne.evaluate(expr_string) # Compare the results @@ -290,7 +290,7 @@ def test_functions(function, dtype_fixture, shape_fixture): a1 = blosc2.asarray(na1, cparams=cparams) # Construct the lazy expression based on the function name expr = blosc2.LazyExpr(new_op=(a1, function, None)) - res_lazyexpr = expr.eval() + res_lazyexpr = expr.eval(cparams={}) # Evaluate using NumExpr expr_string = f"{function}(na1)" res_numexpr = ne.evaluate(expr_string) @@ -384,7 +384,7 @@ def test_abs(shape_fixture, dtype_fixture): na1 = np.linspace(-1, 1, nelems, dtype=dtype_fixture).reshape(shape_fixture) a1 = blosc2.asarray(na1) expr = blosc2.LazyExpr(new_op=(a1, "abs", None)) - res_lazyexpr = expr.eval() + res_lazyexpr = expr.eval(dparams={}) res_np = np.abs(na1) np.testing.assert_allclose(res_lazyexpr[:], res_np) @@ -448,15 +448,15 @@ def test_params(array_fixture): urlpath = "eval_expr.b2nd" blosc2.remove_urlpath(urlpath) - cparams = {"nthreads": 2} + cparams = blosc2.CParams(nthreads=2) dparams = {"nthreads": 4} chunks = tuple(i // 2 for i in nres.shape) blocks = tuple(i // 4 for i in nres.shape) res = expr.eval(urlpath=urlpath, cparams=cparams, dparams=dparams, chunks=chunks, blocks=blocks) np.testing.assert_allclose(res[:], nres) assert res.schunk.urlpath == urlpath - assert res.schunk.cparams["nthreads"] == cparams["nthreads"] - assert res.schunk.dparams["nthreads"] == dparams["nthreads"] + assert res.schunk.cparams.nthreads == cparams.nthreads + assert res.schunk.dparams.nthreads == dparams["nthreads"] assert res.chunks == chunks assert res.blocks == blocks @@ -493,8 +493,8 @@ def test_save(): chunks = tuple(i // 2 for i in nres.shape) blocks = tuple(i // 4 for i in nres.shape) urlpath_eval = "eval_expr.b2nd" - res = expr.eval( - urlpath=urlpath_eval, cparams=cparams, dparams=dparams, mode="w", chunks=chunks, blocks=blocks + res = expr.eval(storage=blosc2.Storage(urlpath=urlpath_eval, mode="w"), + chunks=chunks, blocks=blocks, cparams=cparams, dparams=dparams, ) np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol) diff --git a/tests/ndarray/test_lazyudf.py b/tests/ndarray/test_lazyudf.py index ed642ccc..5391e5ae 100644 --- a/tests/ndarray/test_lazyudf.py +++ b/tests/ndarray/test_lazyudf.py @@ -188,7 +188,7 @@ def test_params(chunked_eval): res = expr.eval(urlpath=urlpath2, chunks=(10,)) np.testing.assert_allclose(res[...], npc) assert res.shape == npa.shape - assert res.schunk.cparams["nthreads"] == cparams["nthreads"] + assert res.schunk.cparams.nthreads == cparams["nthreads"] assert res.schunk.urlpath == urlpath2 assert res.chunks == (10,) @@ -243,7 +243,7 @@ def test_getitem(shape, chunks, blocks, slices, urlpath, contiguous, chunked_eva assert res.schunk.urlpath is None assert res.schunk.contiguous == contiguous # Check dparams after a getitem and an eval - assert res.schunk.dparams["nthreads"] == dparams["nthreads"] + assert res.schunk.dparams.nthreads == dparams["nthreads"] lazy_eval = expr[slices] np.testing.assert_allclose(lazy_eval, npc[slices]) @@ -282,8 +282,8 @@ def test_eval_slice(shape, chunks, blocks, slices, urlpath, contiguous, chunked_ np.testing.assert_allclose(res[...], npc[slices]) assert res.schunk.urlpath is None assert res.schunk.contiguous == contiguous - assert res.schunk.dparams["nthreads"] == dparams["nthreads"] - assert res.schunk.cparams["nthreads"] == blosc2.cparams_dflts["nthreads"] + assert res.schunk.dparams.nthreads == dparams["nthreads"] + assert res.schunk.cparams.nthreads == blosc2.cparams_dflts["nthreads"] assert res.shape == npc[slices].shape cparams = {"nthreads": 6} @@ -294,8 +294,8 @@ def test_eval_slice(shape, chunks, blocks, slices, urlpath, contiguous, chunked_ np.testing.assert_allclose(res[...], npc[slices]) assert res.schunk.urlpath == urlpath2 assert res.schunk.contiguous == contiguous - assert res.schunk.dparams["nthreads"] == dparams["nthreads"] - assert res.schunk.cparams["nthreads"] == cparams["nthreads"] + assert res.schunk.dparams.nthreads == dparams["nthreads"] + assert res.schunk.cparams.nthreads == cparams["nthreads"] assert res.shape == npc[slices].shape blosc2.remove_urlpath(urlpath) diff --git a/tests/ndarray/test_lossy.py b/tests/ndarray/test_lossy.py index ceced6e9..b0a21fd4 100644 --- a/tests/ndarray/test_lossy.py +++ b/tests/ndarray/test_lossy.py @@ -9,6 +9,7 @@ import numpy as np import pytest +from dataclasses import asdict import blosc2 @@ -18,7 +19,7 @@ ( (32, 18), np.float32, - {"codec": blosc2.Codec.NDLZ, "codec_meta": 4}, + blosc2.CParams(codec=blosc2.Codec.NDLZ, codec_meta=4), None, False, ), @@ -60,14 +61,15 @@ ], ) def test_lossy(shape, cparams, dtype, urlpath, contiguous): - if cparams.get("codec") == blosc2.Codec.NDLZ: + cparams_dict = cparams if isinstance(cparams, dict) else asdict(cparams) + if cparams_dict.get("codec") == blosc2.Codec.NDLZ: dtype = np.uint8 array = np.linspace(0, np.prod(shape), np.prod(shape), dtype=dtype).reshape(shape) a = blosc2.asarray(array, cparams=cparams, urlpath=urlpath, contiguous=contiguous, mode="w") if ( - a.schunk.cparams["codec"] in (blosc2.Codec.ZFP_RATE, blosc2.Codec.ZFP_PREC, blosc2.Codec.ZFP_ACC) - or a.schunk.cparams["filters"][0] == blosc2.Filter.NDMEAN + a.schunk.cparams.codec in (blosc2.Codec.ZFP_RATE, blosc2.Codec.ZFP_PREC, blosc2.Codec.ZFP_ACC) + or a.schunk.cparams.filters[0] == blosc2.Filter.NDMEAN ): _ = a[...] else: diff --git a/tests/ndarray/test_metalayers.py b/tests/ndarray/test_metalayers.py index 4a7aae43..68c6ab48 100644 --- a/tests/ndarray/test_metalayers.py +++ b/tests/ndarray/test_metalayers.py @@ -41,9 +41,8 @@ def test_metalayers(shape, chunks, blocks, urlpath, contiguous, dtype): chunks=chunks, blocks=blocks, dtype=dtype, - urlpath=urlpath, - contiguous=contiguous, - meta={"numpy": numpy_meta, "test": test_meta}, + storage=blosc2.Storage(urlpath=urlpath, contiguous=contiguous, + meta={"numpy": numpy_meta, "test": test_meta}), ) assert os.path.exists(urlpath) diff --git a/tests/ndarray/test_mode.py b/tests/ndarray/test_mode.py index 78be1478..3249fac1 100644 --- a/tests/ndarray/test_mode.py +++ b/tests/ndarray/test_mode.py @@ -22,14 +22,14 @@ 3.14, np.float64, {"codec": blosc2.Codec.ZLIB, "clevel": 5, "use_dict": False, "nthreads": 2}, - {"nthreads": 1}, + blosc2.DParams(nthreads=1), False, ), ( (13, 13), 123456789, None, - {"codec": blosc2.Codec.LZ4HC, "clevel": 8, "use_dict": False, "nthreads": 2}, + blosc2.CParams(codec=blosc2.Codec.LZ4HC, clevel=8, use_dict=False, nthreads=2), {"nthreads": 2}, True, ), @@ -45,9 +45,7 @@ def test_mode(shape, fill_value, cparams, dparams, dtype, urlpath, contiguous, m dtype=dtype, cparams=cparams, dparams=dparams, - urlpath=urlpath, - contiguous=contiguous, - mode=mode, + storage={"urlpath": urlpath, "contiguous": contiguous, "mode": mode}, ) _ = blosc2.full( shape, @@ -55,8 +53,7 @@ def test_mode(shape, fill_value, cparams, dparams, dtype, urlpath, contiguous, m dtype=dtype, cparams=cparams, dparams=dparams, - urlpath=urlpath, - contiguous=contiguous, + storage={"urlpath": urlpath, "contiguous": contiguous}, ) a = blosc2.open(urlpath, mode=mode) diff --git a/tests/ndarray/test_ndarray.py b/tests/ndarray/test_ndarray.py index 597668ca..17d02938 100644 --- a/tests/ndarray/test_ndarray.py +++ b/tests/ndarray/test_ndarray.py @@ -17,19 +17,19 @@ @pytest.mark.parametrize( "cparams, dparams, nchunks", [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, {}, 1), + (blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=4), blosc2.DParams(), 1), ({"typesize": 4}, {"nthreads": 4}, 1), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "typesize": 4}, {}, 5), - ({"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {}, 10), + ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "typesize": 4}, blosc2.DParams(), 5), + (blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4), {}, 10), ], ) @pytest.mark.parametrize("copy", [True, False]) def test_ndarray_cframe(contiguous, urlpath, cparams, dparams, nchunks, copy): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + storage = {"contiguous": contiguous, "urlpath": urlpath} blosc2.remove_urlpath(urlpath) data = np.arange(200 * 1000 * nchunks, dtype="int32").reshape(200, 1000, nchunks) - ndarray = blosc2.asarray(data, **storage) + ndarray = blosc2.asarray(data, storage=storage, cparams=cparams, dparams=dparams) cframe = ndarray.to_cframe() ndarray2 = blosc2.ndarray_from_cframe(cframe, copy) diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py index 3f56e544..496713c6 100644 --- a/tests/ndarray/test_reductions.py +++ b/tests/ndarray/test_reductions.py @@ -65,7 +65,7 @@ def test_reduce_bool(array_fixture, reduce_op): @pytest.mark.parametrize("axis", [0, 1, (0, 1), None]) @pytest.mark.parametrize("keepdims", [True, False]) @pytest.mark.parametrize("dtype_out", [np.int16, np.float64]) -@pytest.mark.parametrize("kwargs", [{}, {"cparams": dict(clevel=1, shuffle=blosc2.Filter.BITSHUFFLE)}]) +@pytest.mark.parametrize("kwargs", [{}, {"cparams": blosc2.CParams(clevel=1, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0])}]) def test_reduce_params(array_fixture, axis, keepdims, dtype_out, reduce_op, kwargs): a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture if axis is not None and np.isscalar(axis) and len(a1.shape) >= axis: diff --git a/tests/test_bytes_array.py b/tests/test_bytes_array.py index 2b0025b7..8c50b2da 100644 --- a/tests/test_bytes_array.py +++ b/tests/test_bytes_array.py @@ -22,6 +22,6 @@ def test_bytes_array(arr, gil): @pytest.mark.parametrize("data", [bytearray(7241), bytearray(7241) * 7]) def test_bytearray(data): - cdata = blosc2.compress(data) + cdata = blosc2.compress(data, typesize=1) uncomp = blosc2.decompress(cdata) assert data == uncomp diff --git a/tests/test_compress2.py b/tests/test_compress2.py index f9b1851d..af5e6f92 100644 --- a/tests/test_compress2.py +++ b/tests/test_compress2.py @@ -19,34 +19,34 @@ @pytest.mark.parametrize( "obj, cparams, dparams", [ - (random.integers(0, 10, 10), {"codec": blosc2.Codec.LZ4, "clevel": 6}, {}), + (random.integers(0, 10, 10), {'cparams': blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6)}, {}), ( np.arange(10, dtype="float32"), # Select an absolute precision of 10 bits in mantissa - { - "filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], - "filters_meta": [10, 0], - "typesize": 4, - }, - {"nthreads": 4}, + {'cparams': blosc2.CParams( + filters=[blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], + filters_meta=[10, 0], + typesize=4 + )}, + {'dparams': blosc2.DParams(nthreads=4)}, ), ( np.arange(10, dtype="float32"), # Do a reduction of precision of 10 bits in mantissa - { - "filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], - "filters_meta": [-10, 0], - "typesize": 4, - }, + {"cparams": {"filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], + "filters_meta": [-10, 0], + "typesize": 4, + }, + }, {"nthreads": 4}, ), ( random.integers(0, 1000, 1000, endpoint=True), - {"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, - {}, + {'cparams': blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4)}, + {'dparams': blosc2.DParams()}, ), - (np.arange(45, dtype=np.float64), {"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {}), - (np.arange(50, dtype=np.int64), {"typesize": 4}, blosc2.dparams_dflts), + (np.arange(45, dtype=np.float64), {'cparams': blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4)}, {}), + (np.arange(50, dtype=np.int64), {"typesize": 4}, {"dparams": blosc2.dparams_dflts}), ], ) def test_compress2_numpy(obj, cparams, dparams, gil): diff --git a/tests/test_decompress.py b/tests/test_decompress.py index 9560fba4..aa2c9f81 100644 --- a/tests/test_decompress.py +++ b/tests/test_decompress.py @@ -28,7 +28,8 @@ ) def test_decompress_numpy(object, codec, gil): blosc2.set_releasegil(gil) - c = blosc2.compress(object, codec=codec) + typesize = None if hasattr(object, "itemsize") else 1 + c = blosc2.compress(object, typesize=typesize, codec=codec) dest = bytearray(object) blosc2.decompress(c, dst=dest) @@ -59,7 +60,7 @@ def test_decompress_numpy(object, codec, gil): ], ) def test_decompress(object, codec): - c = blosc2.compress(object, codec=codec) + c = blosc2.compress(object, typesize=1, codec=codec) dest = bytearray(object) blosc2.decompress(c, dst=dest) diff --git a/tests/test_iterchunks.py b/tests/test_iterchunks.py index 2ecfdef3..8c1a8bfb 100644 --- a/tests/test_iterchunks.py +++ b/tests/test_iterchunks.py @@ -24,10 +24,10 @@ ], ) def test_iterchunks(contiguous, urlpath, cparams, dparams, nchunks): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} blosc2.remove_urlpath(urlpath) - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **storage) + schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **kwargs) for i in range(nchunks): buffer = i * np.arange(200 * 1000, dtype="int32") @@ -54,12 +54,12 @@ def test_iterchunks(contiguous, urlpath, cparams, dparams, nchunks): ], ) def test_iterchunks_pf(contiguous, urlpath, cparams, dparams, nchunks): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} blosc2.remove_urlpath(urlpath) chunkshape = 200 * 1000 data = np.arange(0, nchunks * chunkshape, dtype=np.int32) - schunk = blosc2.SChunk(chunksize=chunkshape * 4, data=data, **storage) + schunk = blosc2.SChunk(chunksize=chunkshape * 4, data=data, **kwargs) @schunk.postfilter(np.int32, np.int32) def postf1(input, output, offset): diff --git a/tests/test_open.py b/tests/test_open.py index 7e9d17ff..839acbc2 100644 --- a/tests/test_open.py +++ b/tests/test_open.py @@ -43,11 +43,11 @@ def test_open(contiguous, urlpath, cparams, dparams, nchunks, chunk_nitems, dtyp if os.name == "nt" and mmap_mode == "c": pytest.skip("Cannot test mmap_mode 'c' on Windows") - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} blosc2.remove_urlpath(urlpath) dtype = np.dtype(dtype) schunk = blosc2.SChunk( - chunksize=chunk_nitems * dtype.itemsize, mmap_mode="w+" if mmap_mode is not None else None, **storage + chunksize=chunk_nitems * dtype.itemsize, mmap_mode="w+" if mmap_mode is not None else None, **kwargs ) for i in range(nchunks): buffer = i * np.arange(chunk_nitems, dtype=dtype) @@ -62,12 +62,12 @@ def test_open(contiguous, urlpath, cparams, dparams, nchunks, chunk_nitems, dtyp cparams2 = cparams cparams2["nthreads"] = 1 schunk_open = blosc2.open(urlpath, mode, mmap_mode=mmap_mode, cparams=cparams2) - assert schunk_open.cparams["nthreads"] == cparams2["nthreads"] + assert schunk_open.cparams.nthreads == cparams2["nthreads"] for key in cparams: if key == "nthreads": continue - assert schunk_open.cparams[key] == cparams[key] + assert getattr(schunk_open.cparams, key) == cparams[key] buffer = np.zeros(chunk_nitems, dtype=dtype) if mode != "r": diff --git a/tests/test_pathlib.py b/tests/test_pathlib.py index 9da3433b..f3c7f785 100644 --- a/tests/test_pathlib.py +++ b/tests/test_pathlib.py @@ -23,12 +23,12 @@ ) def test_schunk_pathlib(mode, mmap_mode, cparams, dparams, nchunks): urlpath = pathlib.Path("b2frame") - storage = {"urlpath": urlpath, "cparams": cparams, "dparams": dparams} + kwargs = {"urlpath": urlpath, "cparams": cparams, "dparams": dparams} blosc2.remove_urlpath(urlpath) if mode != "r": chunk_len = 200 * 1000 - schunk = blosc2.SChunk(chunksize=chunk_len * 4, mode=mode, mmap_mode=mmap_mode, **storage) + schunk = blosc2.SChunk(chunksize=chunk_len * 4, mode=mode, mmap_mode=mmap_mode, **kwargs) assert schunk.urlpath == str(urlpath) for i in range(nchunks): diff --git a/tests/test_postfilters.py b/tests/test_postfilters.py index baa4934f..cadd646f 100644 --- a/tests/test_postfilters.py +++ b/tests/test_postfilters.py @@ -67,7 +67,7 @@ def postf2(input, output, offset): def postf3(input, output, offset): output[:] = input <= np.datetime64("1997-12-31") - schunk.dparams = {"nthreads": 1} + schunk.dparams = blosc2.DParams(nthreads=1) post_data = np.empty(chunk_len * nchunks, dtype=output_dtype) schunk.get_slice(0, chunk_len * nchunks, out=post_data) diff --git a/tests/test_prefilters.py b/tests/test_prefilters.py index 818517a4..d0865ce9 100644 --- a/tests/test_prefilters.py +++ b/tests/test_prefilters.py @@ -8,6 +8,7 @@ import numpy as np import pytest +from dataclasses import asdict, replace import blosc2 @@ -104,7 +105,7 @@ def fill_f4(inputs_tuple, output, offset): fill_f4((data, data2, np.pi), res, offset) - new_cparams = {"nthreads": 2} + new_cparams = replace(schunk.cparams, nthreads=2) schunk.cparams = new_cparams pre_data = np.empty(chunk_len * nchunks, dtype=schunk_dtype) @@ -180,7 +181,9 @@ def pref2(input, output, offset): def pref3(input, output, offset): output[:] = input <= np.datetime64("1997-12-31") - schunk.cparams = {"nthreads": 1} + new_cparams = asdict(schunk.cparams) + new_cparams["nthreads"] = 1 + schunk.cparams = blosc2.CParams(**new_cparams) schunk[: nchunks * chunk_len] = data post_data = np.empty(chunk_len * nchunks, dtype=schunk_dtype) diff --git a/tests/test_proxy_schunk.py b/tests/test_proxy_schunk.py index caa233c0..607efaf3 100644 --- a/tests/test_proxy_schunk.py +++ b/tests/test_proxy_schunk.py @@ -22,11 +22,11 @@ ], ) def test_schunk_proxy(contiguous, urlpath, chunksize, nchunks, start, stop): - storage = {"contiguous": contiguous, "cparams": {"typesize": 4}} + kwargs = {"contiguous": contiguous, "cparams": {"typesize": 4}} blosc2.remove_urlpath(urlpath) num_elem = chunksize // 4 * nchunks data = np.arange(num_elem, dtype="int32") - schunk = blosc2.SChunk(chunksize=chunksize, data=data, **storage) + schunk = blosc2.SChunk(chunksize=chunksize, data=data, **kwargs) bytes_obj = data.tobytes() cache = blosc2.Proxy(schunk, urlpath=urlpath) @@ -58,13 +58,13 @@ def test_schunk_proxy(contiguous, urlpath, chunksize, nchunks, start, stop): ], ) def test_open(urlpath, chunksize, nchunks): - storage = {"urlpath": urlpath, "cparams": {"typesize": 4}} + kwargs = {"urlpath": urlpath, "cparams": {"typesize": 4}} proxy_urlpath = "proxy.b2frame" blosc2.remove_urlpath(urlpath) blosc2.remove_urlpath(proxy_urlpath) num_elem = chunksize // 4 * nchunks data = np.arange(num_elem, dtype="int32") - schunk = blosc2.SChunk(chunksize=chunksize, data=data, **storage) + schunk = blosc2.SChunk(chunksize=chunksize, data=data, **kwargs) bytes_obj = data.tobytes() proxy = blosc2.Proxy(schunk, urlpath=proxy_urlpath) del proxy diff --git a/tests/test_python_blosc.py b/tests/test_python_blosc.py index fe06eac3..6d1001c6 100644 --- a/tests/test_python_blosc.py +++ b/tests/test_python_blosc.py @@ -239,7 +239,7 @@ def test_bitshuffle_not_multiple(self): xx = x.tobytes() with pytest.raises(ValueError): blosc2.compress(xx, typesize=8, filter=blosc2.Filter.BITSHUFFLE) - zxx = blosc2.compress(xx, filter=blosc2.Filter.BITSHUFFLE) + zxx = blosc2.compress(xx, typesize=1, filter=blosc2.Filter.BITSHUFFLE) last_xx = blosc2.decompress(zxx)[-3:] assert last_xx == b"\x01\x01\x01" @@ -248,7 +248,7 @@ def test_bitshuffle_leftovers(self): buffer = b" " * 641091 # a buffer that is not divisible by 8 with pytest.raises(ValueError): blosc2.compress(buffer, typesize=8, filter=blosc2.Filter.BITSHUFFLE, clevel=1) - cbuffer = blosc2.compress(buffer, filter=blosc2.Filter.BITSHUFFLE, clevel=1) + cbuffer = blosc2.compress(buffer, typesize=1, filter=blosc2.Filter.BITSHUFFLE, clevel=1) dbuffer = blosc2.decompress(cbuffer) assert buffer == dbuffer diff --git a/tests/test_schunk.py b/tests/test_schunk.py index 26e385e5..db27362e 100644 --- a/tests/test_schunk.py +++ b/tests/test_schunk.py @@ -7,6 +7,7 @@ ####################################################################### import os +from dataclasses import asdict, replace, fields import numpy as np import pytest @@ -37,31 +38,35 @@ @pytest.mark.parametrize( "cparams, dparams, nchunks", [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, {}, 0), - ({"typesize": 4}, {"nthreads": 4}, 1), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, {}, 5), + (blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=4), blosc2.DParams(), 0), + ({"typesize": 4}, blosc2.DParams(nthreads=4), 1), + (blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4), {}, 5), ({"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {}, 10), ], ) def test_schunk_numpy(contiguous, urlpath, mode, mmap_mode, cparams, dparams, nchunks): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode=mode, mmap_mode=mmap_mode) blosc2.remove_urlpath(urlpath) chunk_len = 200 * 1000 if mode != "r": - schunk = blosc2.SChunk(chunksize=chunk_len * 4, mode=mode, mmap_mode=mmap_mode, **storage) + schunk = blosc2.SChunk(chunksize=chunk_len * 4, storage=storage, cparams=cparams, dparams=dparams) + else: with pytest.raises( ValueError, match="not specify a urlpath" if urlpath is None else "does not exist" ): - blosc2.SChunk(chunksize=chunk_len * 4, mode=mode, mmap_mode=mmap_mode, **storage) + blosc2.SChunk(chunksize=chunk_len * 4, storage=storage, cparams=cparams, dparams=dparams) # Create a schunk which we can read later + storage2 = replace(storage, + mode="w" if mmap_mode is None else None, + mmap_mode="w+" if mmap_mode is not None else None) schunk = blosc2.SChunk( chunksize=chunk_len * 4, - mode="w" if mmap_mode is None else None, - mmap_mode="w+" if mmap_mode is not None else None, - **storage, + storage=storage2, + cparams=cparams, + dparams=dparams ) assert schunk.urlpath == urlpath @@ -74,9 +79,10 @@ def test_schunk_numpy(contiguous, urlpath, mode, mmap_mode, cparams, dparams, nc if mode == "r": if urlpath is not None: - schunk = blosc2.SChunk(chunksize=chunk_len * 4, mode=mode, mmap_mode=mmap_mode, **storage) + schunk = blosc2.SChunk(chunksize=chunk_len * 4, **asdict(storage)) else: return + assert schunk.nchunks == nchunks for i in range(nchunks): buffer = i * np.arange(chunk_len, dtype="int32") @@ -132,20 +138,20 @@ def test_schunk_ndarray(tmp_path, mode_write, mode_read, mmap_mode_write, mmap_m @pytest.mark.parametrize( "nbytes, cparams, dparams, nchunks", [ - (7, {"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 5}, {}, 1), - (641091, {"typesize": 3}, {"nthreads": 2}, 1), - (136, {"typesize": 1}, {}, 5), + (7, blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=5), {}, 1), + (641091, {"typesize": 3}, blosc2.DParams(nthreads=2), 1), + (136, blosc2.CParams(typesize=1), blosc2.DParams(), 5), (1232, {"typesize": 8}, blosc2.dparams_dflts, 10), ], ) def test_schunk(contiguous, urlpath, mode, mmap_mode, nbytes, cparams, dparams, nchunks): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} numpy_meta = {b"dtype": str(np.dtype(np.uint8))} test_meta = {b"lorem": 1234} meta = {"numpy": numpy_meta, "test": test_meta} blosc2.remove_urlpath(urlpath) - schunk = blosc2.SChunk(chunksize=2 * nbytes, meta=meta, mode=mode, mmap_mode=mmap_mode, **storage) + schunk = blosc2.SChunk(chunksize=2 * nbytes, meta=meta, mode=mode, mmap_mode=mmap_mode, **kwargs) assert "numpy" in schunk.meta assert "error" not in schunk.meta @@ -189,27 +195,31 @@ def test_schunk(contiguous, urlpath, mode, mmap_mode, nbytes, cparams, dparams, @pytest.mark.parametrize( "cparams, dparams, nchunks", [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, {}, 1), + ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, blosc2.DParams(), 1), ({"typesize": 4}, {"nthreads": 4}, 1), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, {}, 5), - ({"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {}, 10), + (blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4), {}, 5), + (blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4), blosc2.DParams(), 10), ], ) @pytest.mark.parametrize("copy", [True, False]) def test_schunk_cframe(contiguous, urlpath, mode, mmap_mode, cparams, dparams, nchunks, copy): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, + mode=mode, mmap_mode=mmap_mode) blosc2.remove_urlpath(urlpath) data = np.arange(200 * 1000 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, mode=mode, mmap_mode=mmap_mode, **storage) + schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, **asdict(storage), cparams=cparams, dparams=dparams) cframe = schunk.to_cframe() schunk2 = blosc2.schunk_from_cframe(cframe, copy) + cparams_dict = cparams if isinstance(cparams, dict) else asdict(cparams) if not os.getenv("BTUNE_TRADEOFF"): - for key in cparams: + for key in cparams_dict: if key == "nthreads": continue - assert schunk2.cparams[key] == cparams[key] + if key == "blocksize" and cparams_dict[key] == 0: + continue + assert getattr(schunk2.cparams, key) == cparams_dict[key] data2 = np.empty(data.shape, dtype=data.dtype) schunk2.get_slice(out=data2) @@ -228,76 +238,56 @@ def test_schunk_cframe(contiguous, urlpath, mode, mmap_mode, cparams, dparams, n "cparams, dparams, new_cparams, new_dparams", [ ( - {"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, + blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=4), {}, - {"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, - {"nthreads": 4}, + blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=4), + blosc2.DParams(nthreads=4), ), ( {"typesize": 4}, - {"nthreads": 4}, - {"codec": blosc2.Codec.ZLIB, "splitmode": blosc2.SplitMode.ALWAYS_SPLIT}, - {"nthreads": 1}, + blosc2.DParams(nthreads=4), + blosc2.CParams(codec=blosc2.Codec.ZLIB, splitmode=blosc2.SplitMode.ALWAYS_SPLIT), + blosc2.DParams(nthreads=1), ), ( {"codec": blosc2.Codec.ZLIB, "splitmode": blosc2.SplitMode.ALWAYS_SPLIT}, {}, - { - "splitmode": blosc2.SplitMode.ALWAYS_SPLIT, - "nthreads": 5, - "typesize": 4, - "filters": [blosc2.Filter.SHUFFLE, blosc2.Filter.TRUNC_PREC], - }, - {"nthreads": 16}, + blosc2.CParams( + splitmode=blosc2.SplitMode.ALWAYS_SPLIT, + nthreads=5, + typesize=4, + filters=[blosc2.Filter.SHUFFLE, blosc2.Filter.TRUNC_PREC], + ), + blosc2.DParams(nthreads=16), ), ( - {"codec": blosc2.Codec.LZ4HC, "typesize": 4}, - {}, - {"filters": [blosc2.Filter.SHUFFLE, blosc2.Filter.TRUNC_PREC]}, - {"nthreads": 3}, + blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4), + blosc2.DParams(), + blosc2.CParams(filters=[blosc2.Filter.SHUFFLE, blosc2.Filter.TRUNC_PREC]), + blosc2.DParams(nthreads=3), ), ], ) def test_schunk_cdparams(cparams, dparams, new_cparams, new_dparams): - storage = {"cparams": cparams, "dparams": dparams} + kwargs = {"cparams": cparams, "dparams": dparams} chunk_len = 200 * 1000 - schunk = blosc2.SChunk(chunksize=chunk_len * 4, **storage) + schunk = blosc2.SChunk(chunksize=chunk_len * 4, **kwargs) # Check cparams have been set correctly - for key in cparams: - assert schunk.cparams[key] == cparams[key] - for key in dparams: - assert schunk.dparams[key] == dparams[key] + cparams_dict = cparams if isinstance(cparams, dict) else asdict(cparams) + dparams_dict = dparams if isinstance(dparams, dict) else asdict(dparams) + for key in cparams_dict: + assert getattr(schunk.cparams, key) == cparams_dict[key] + for key in dparams_dict: + assert getattr(schunk.dparams, key) == dparams_dict[key] schunk.cparams = new_cparams schunk.dparams = new_dparams - for key in schunk.cparams: - if key in new_cparams: - if key == "filters": - assert schunk.cparams[key][: len(new_cparams[key])] == new_cparams[key] - else: - assert schunk.cparams[key] == new_cparams[key] - elif key in cparams: - if key == "filters": - assert schunk.cparams[key][: len(cparams[key])] == cparams[key] - else: - assert schunk.cparams[key] == cparams[key] + for field in fields(schunk.cparams): + if field.name in ["filters", "filters_meta"]: + assert getattr(schunk.cparams, field.name)[: len(getattr(new_cparams, field.name))] == getattr(new_cparams, field.name) else: - if key == "filters": - assert schunk.cparams[key][: len(blosc2.cparams_dflts[key])] == blosc2.cparams_dflts[key] - elif key == "filters_meta": - # Exception for testing bytedelta in the last position - assert ( - schunk.cparams[key][: len(blosc2.cparams_dflts[key]) - 1] - == blosc2.cparams_dflts[key][:-1] - ) - else: - assert schunk.cparams[key] == blosc2.cparams_dflts[key] - - if "nthreads" in new_dparams: - assert schunk.dparams["nthreads"] == new_dparams["nthreads"] - elif "nthreads" in dparams: - assert schunk.dparams["nthreads"] == dparams["nthreads"] - else: - assert schunk.dparams["nthreads"] == blosc2.dparams_dflts["nthreads"] + assert getattr(schunk.cparams, field.name) == getattr(new_cparams, field.name) + + assert schunk.dparams.nthreads == new_dparams.nthreads diff --git a/tests/test_schunk_constructor.py b/tests/test_schunk_constructor.py index f04b51d2..dda1e956 100644 --- a/tests/test_schunk_constructor.py +++ b/tests/test_schunk_constructor.py @@ -24,13 +24,13 @@ ], ) def test_schunk_numpy(contiguous, urlpath, cparams, dparams, chunksize): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} blosc2.remove_urlpath(urlpath) num_elem = 20 * 1000 nchunks = num_elem * 4 // chunksize + 1 if num_elem * 4 % chunksize != 0 else num_elem * 4 // chunksize data = np.arange(num_elem, dtype="int32") bytes_obj = data.tobytes() - schunk = blosc2.SChunk(chunksize=chunksize, data=data, **storage) + schunk = blosc2.SChunk(chunksize=chunksize, data=data, **kwargs) # Test properties assert len(schunk) == num_elem assert chunksize == schunk.chunksize @@ -81,14 +81,14 @@ def test_schunk_numpy(contiguous, urlpath, cparams, dparams, chunksize): ], ) def test_schunk(contiguous, urlpath, cparams, dparams, chunksize): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + storage = {"contiguous": contiguous, "urlpath": urlpath} blosc2.remove_urlpath(urlpath) nrep = 1000 nchunks = 5 * nrep // chunksize + 1 if nrep * 5 % chunksize != 0 else 5 * nrep // chunksize buffer = b"1234 " * nrep - schunk = blosc2.SChunk(chunksize=chunksize, data=buffer, **storage) + schunk = blosc2.SChunk(chunksize=chunksize, data=buffer, cparams=cparams, dparams=dparams, **storage) for i in range(nchunks): start = i * chunksize @@ -141,11 +141,11 @@ def test_schunk(contiguous, urlpath, cparams, dparams, chunksize): ], ) def test_schunk_fill_special(contiguous, urlpath, cparams, nitems, special_value, expected_value): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams} + kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams} blosc2.remove_urlpath(urlpath) chunk_len = 200 * 1000 - schunk = blosc2.SChunk(chunksize=chunk_len * 4, **storage) + schunk = blosc2.SChunk(chunksize=chunk_len * 4, **kwargs) if special_value in [blosc2.SpecialValue.ZERO, blosc2.SpecialValue.NAN, blosc2.SpecialValue.UNINIT]: schunk.fill_special(nitems, special_value) else: diff --git a/tests/test_schunk_delete.py b/tests/test_schunk_delete.py index 2a00a7bb..6156c291 100644 --- a/tests/test_schunk_delete.py +++ b/tests/test_schunk_delete.py @@ -26,7 +26,7 @@ ], ) def test_schunk_delete_numpy(contiguous, urlpath, nchunks, ndeletes): - storage = { + kwargs = { "contiguous": contiguous, "urlpath": urlpath, "cparams": {"nthreads": 2}, @@ -34,7 +34,7 @@ def test_schunk_delete_numpy(contiguous, urlpath, nchunks, ndeletes): } blosc2.remove_urlpath(urlpath) - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **storage) + schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **kwargs) for i in range(nchunks): buffer = i * np.arange(200 * 1000, dtype="int32") nchunks_ = schunk.append_data(buffer) @@ -72,13 +72,11 @@ def test_schunk_delete(contiguous, urlpath, nchunks, ndeletes): storage = { "contiguous": contiguous, "urlpath": urlpath, - "cparams": {"nthreads": 2}, - "dparams": {"nthreads": 2}, } blosc2.remove_urlpath(urlpath) nbytes = 23401 - schunk = blosc2.SChunk(chunksize=nbytes * 2, **storage) + schunk = blosc2.SChunk(chunksize=nbytes * 2, cparams={"nthreads": 2}, dparams={"nthreads": 2}, **storage) for i in range(nchunks): bytes_obj = b"i " * nbytes nchunks_ = schunk.append_data(bytes_obj) diff --git a/tests/test_schunk_get_slice.py b/tests/test_schunk_get_slice.py index bf563ecf..d136460a 100644 --- a/tests/test_schunk_get_slice.py +++ b/tests/test_schunk_get_slice.py @@ -34,11 +34,11 @@ ], ) def test_schunk_get_slice(contiguous, urlpath, mode, cparams, dparams, nchunks, start, stop): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} blosc2.remove_urlpath(urlpath) data = np.arange(200 * 100 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, mode=mode, **storage) + schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, mode=mode, **kwargs) start_, stop_ = start, stop if start is None: @@ -82,10 +82,8 @@ def test_schunk_get_slice(contiguous, urlpath, mode, cparams, dparams, nchunks, ], ) def test_schunk_getitem_int(cparams, nchunks, elem): - storage = {"cparams": cparams} - data = np.arange(200 * 100 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, **storage) + schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, cparams=cparams) sl = data[elem] res = schunk[elem] @@ -93,12 +91,12 @@ def test_schunk_getitem_int(cparams, nchunks, elem): def test_schunk_get_slice_raises(): - storage = {"contiguous": True, "urlpath": "schunk.b2frame", "cparams": {"typesize": 4}, "dparams": {}} - blosc2.remove_urlpath(storage["urlpath"]) + kwargs = {"contiguous": True, "urlpath": "schunk.b2frame", "cparams": {"typesize": 4}, "dparams": {}} + blosc2.remove_urlpath(kwargs["urlpath"]) nchunks = 2 data = np.arange(200 * 100 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, **storage) + schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, **kwargs) start = 200 * 100 stop = 200 * 100 * nchunks @@ -118,4 +116,4 @@ def test_schunk_get_slice_raises(): stop = start + 4 assert schunk[start:stop] == b"" - blosc2.remove_urlpath(storage["urlpath"]) + blosc2.remove_urlpath(kwargs["urlpath"]) diff --git a/tests/test_schunk_get_slice_nchunks.py b/tests/test_schunk_get_slice_nchunks.py index 15ce2c14..ecce9cf2 100644 --- a/tests/test_schunk_get_slice_nchunks.py +++ b/tests/test_schunk_get_slice_nchunks.py @@ -33,8 +33,8 @@ ], ) def test_schunk_get_slice(contiguous, urlpath, cparams, nchunks, start, stop): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams} - schunk = blosc2.SChunk(chunksize=200 * 100 * 4, mode="w", **storage) + kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams} + schunk = blosc2.SChunk(chunksize=200 * 100 * 4, mode="w", **kwargs) for i in range(nchunks): chunk = np.full(schunk.chunksize // schunk.typesize, i, dtype=np.int32) schunk.append_data(chunk) diff --git a/tests/test_schunk_insert.py b/tests/test_schunk_insert.py index a08f230f..18abc21a 100644 --- a/tests/test_schunk_insert.py +++ b/tests/test_schunk_insert.py @@ -30,15 +30,10 @@ @pytest.mark.parametrize("create_chunk", [True, False]) def test_schunk_insert_numpy(contiguous, urlpath, nchunks, ninserts, copy, create_chunk, gil): blosc2.set_releasegil(gil) - storage = { - "contiguous": contiguous, - "urlpath": urlpath, - "cparams": {"nthreads": 2}, - "dparams": {"nthreads": 2}, - } + storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) blosc2.remove_urlpath(urlpath) - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **storage) + schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, storage=storage, cparams={"nthreads": 2}, dparams={"nthreads": 2}) for i in range(nchunks): buffer = i * np.arange(200 * 1000, dtype="int32") nchunks_ = schunk.append_data(buffer) diff --git a/tests/test_schunk_set_slice.py b/tests/test_schunk_set_slice.py index 3f75f48a..f5a26683 100644 --- a/tests/test_schunk_set_slice.py +++ b/tests/test_schunk_set_slice.py @@ -34,11 +34,11 @@ ], ) def test_schunk_set_slice(contiguous, urlpath, mode, cparams, dparams, nchunks, start, stop): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + storage = {"contiguous": contiguous, "urlpath": urlpath, "mode": mode} blosc2.remove_urlpath(urlpath) data = np.arange(200 * 100 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, mode=mode, **storage) + schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, storage=storage, cparams=cparams, dparams=dparams) _start, _stop = start, stop if _start is None: @@ -62,14 +62,14 @@ def test_schunk_set_slice(contiguous, urlpath, mode, cparams, dparams, nchunks, def test_schunk_set_slice_raises(): - storage = {"contiguous": True, "urlpath": "schunk.b2frame", "cparams": {"typesize": 4}, "dparams": {}} - blosc2.remove_urlpath(storage["urlpath"]) + kwargs = {"contiguous": True, "urlpath": "schunk.b2frame", "cparams": {"typesize": 4}, "dparams": {}} + blosc2.remove_urlpath(kwargs["urlpath"]) nchunks = 2 data = np.arange(200 * 100 * nchunks, dtype="int32") - blosc2.SChunk(chunksize=200 * 100 * 4, data=data, **storage) + blosc2.SChunk(chunksize=200 * 100 * 4, data=data, **kwargs) - schunk = blosc2.open(storage["urlpath"], mode="r") + schunk = blosc2.open(kwargs["urlpath"], mode="r") start = 200 * 100 stop = 200 * 100 * nchunks val = 3 * np.arange(start, stop, dtype="int32") @@ -77,7 +77,7 @@ def test_schunk_set_slice_raises(): with pytest.raises(ValueError): schunk[start:stop] = val - schunk = blosc2.open(storage["urlpath"], mode="a") + schunk = blosc2.open(kwargs["urlpath"], mode="a") with pytest.raises(IndexError): schunk[start:stop:2] = val @@ -95,4 +95,4 @@ def test_schunk_set_slice_raises(): with pytest.raises(ValueError): schunk[start:stop] = val - blosc2.remove_urlpath(storage["urlpath"]) + blosc2.remove_urlpath(kwargs["urlpath"]) diff --git a/tests/test_schunk_update.py b/tests/test_schunk_update.py index 533e483a..03a247b0 100644 --- a/tests/test_schunk_update.py +++ b/tests/test_schunk_update.py @@ -29,7 +29,7 @@ @pytest.mark.parametrize("create_chunk", [True, False]) def test_schunk_update_numpy(contiguous, urlpath, nchunks, nupdates, copy, create_chunk, gil): blosc2.set_releasegil(gil) - storage = { + kwargs = { "contiguous": contiguous, "urlpath": urlpath, "cparams": {"nthreads": 2}, @@ -37,7 +37,7 @@ def test_schunk_update_numpy(contiguous, urlpath, nchunks, nupdates, copy, creat } blosc2.remove_urlpath(urlpath) - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **storage) + schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **kwargs) for i in range(nchunks): buffer = i * np.arange(200 * 1000, dtype="int32") nchunks_ = schunk.append_data(buffer) @@ -79,7 +79,7 @@ def test_schunk_update_numpy(contiguous, urlpath, nchunks, nupdates, copy, creat @pytest.mark.parametrize("create_chunk", [True, False]) def test_update(contiguous, urlpath, nchunks, nupdates, copy, create_chunk, gil): blosc2.set_releasegil(gil) - storage = { + kwargs = { "contiguous": contiguous, "urlpath": urlpath, "cparams": {"nthreads": 2}, @@ -89,7 +89,7 @@ def test_update(contiguous, urlpath, nchunks, nupdates, copy, create_chunk, gil) blosc2.remove_urlpath(urlpath) nbytes = 23401 - schunk = blosc2.SChunk(chunksize=nbytes * 2, **storage) + schunk = blosc2.SChunk(chunksize=nbytes * 2, **kwargs) for i in range(nchunks): bytes_obj = b"i " * nbytes nchunks_ = schunk.append_data(bytes_obj) diff --git a/tests/test_storage.py b/tests/test_storage.py new file mode 100644 index 00000000..93b64158 --- /dev/null +++ b/tests/test_storage.py @@ -0,0 +1,186 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# This source code is licensed under a BSD-style license (found in the +# LICENSE file in the root directory of this source tree) +####################################################################### + +from dataclasses import asdict, fields +import numpy as np + +import pytest + +import blosc2 + + +@pytest.mark.parametrize( + "urlpath, contiguous, mode, mmap_mode", + [ + (None, None, "w", None), + (None, False, "a", None), + (None, None, "r", None), + (None, True, "a", None), + ("b2frame", None, "r", None), + ("b2frame", False, "a", None), + ("b2frame", True, "w", None), + ("b2frame", True, "r", "r"), + ("b2frame", None, "w", "w+"), + ], +) +def test_storage_values(contiguous, urlpath, mode, mmap_mode): + storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode=mode, mmap_mode=mmap_mode) + if contiguous is None: + if urlpath is not None: + assert storage.contiguous + else: + assert not storage.contiguous + else: + assert storage.contiguous == contiguous + + assert storage.urlpath == urlpath + assert storage.mode == mode + assert storage.mmap_mode == mmap_mode + + +def test_storage_defaults(): + storage = blosc2.Storage() + assert storage.contiguous == False + assert storage.urlpath is None + assert storage.mode == "a" + assert storage.mmap_mode is None + assert storage.initial_mapping_size is None + assert storage.meta is None + + +@pytest.mark.parametrize( + "urlpath, contiguous", + [ + (None, False), + (None, True), + ("b2frame", False), + ("b2frame", True), + ], +) +def test_raises_storage(contiguous, urlpath): + storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) + blosc2.remove_urlpath(urlpath) + + for field in fields(blosc2.Storage): + with pytest.raises(AttributeError): + _ = blosc2.SChunk(storage=storage, **{str(field.name): {}}) + with pytest.raises(TypeError): + _ = blosc2.SChunk(**{str(field.name): {}}, **asdict(storage)) + + with pytest.raises(AttributeError): + _ = blosc2.empty((30, 30), storage=storage, **{str(field.name): {}}) + with pytest.raises(TypeError): + _ = blosc2.empty((30, 30), **{str(field.name): {}}, **asdict(storage)) + + +@pytest.mark.parametrize( + "cparams", + [ + blosc2.CParams(codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE]), + {"typesize": 4, 'filters': [blosc2.Filter.TRUNC_PREC, blosc2.Filter.DELTA], 'filters_meta': [0, 0]}, + blosc2.CParams(nthreads=5, filters=[blosc2.Filter.BITSHUFFLE, blosc2.Filter.BYTEDELTA], filters_meta=[0] * 3), + {"codec": blosc2.Codec.LZ4HC, "typesize": 4, 'filters': [blosc2.Filter.BYTEDELTA]}, + ], +) +def test_cparams_values(cparams): + schunk = blosc2.SChunk(cparams=cparams) + cparams_dataclass = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) + for field in fields(cparams_dataclass): + if field.name in ['filters', 'filters_meta']: + assert getattr(schunk.cparams, field.name)[:len(getattr(cparams_dataclass, field.name))] == getattr(cparams_dataclass, field.name) + else: + assert getattr(schunk.cparams, field.name) == getattr(cparams_dataclass, field.name) + + array = blosc2.empty((30, 30), np.int32, cparams=cparams) + for field in fields(cparams_dataclass): + if field.name in ['filters', 'filters_meta']: + assert getattr(array.schunk.cparams, field.name)[:len(getattr(cparams_dataclass, field.name))] == getattr(cparams_dataclass, field.name) + elif field.name == 'typesize': + assert getattr(array.schunk.cparams, field.name) == array.dtype.itemsize + elif field.name != 'blocksize': + assert getattr(array.schunk.cparams, field.name) == getattr(cparams_dataclass, field.name) + + blosc2.set_nthreads(10) + schunk = blosc2.SChunk(cparams=cparams) + cparams_dataclass = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) + assert schunk.cparams.nthreads == cparams_dataclass.nthreads + + array = blosc2.empty((30, 30), np.int32, cparams=cparams) + assert array.schunk.cparams.nthreads == cparams_dataclass.nthreads + + +def test_cparams_defaults(): + cparams = blosc2.CParams() + assert cparams.codec == blosc2.Codec.ZSTD + assert cparams.codec_meta == 0 + assert cparams.splitmode == blosc2.SplitMode.ALWAYS_SPLIT + assert cparams.clevel == 1 + assert cparams.typesize == 8 + assert cparams.nthreads == blosc2.nthreads + assert cparams.filters == [blosc2.Filter.NOFILTER] * 5 + [blosc2.Filter.SHUFFLE] + assert cparams.filters_meta == [0] * 6 + assert not cparams.use_dict + assert cparams.blocksize == 0 + assert cparams.tuner == blosc2.Tuner.STUNE + + blosc2.set_nthreads(1) + cparams = blosc2.CParams() + assert cparams.nthreads == blosc2.nthreads + + +def test_raises_cparams(): + cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=4) + for field in fields(blosc2.CParams): + with pytest.raises(ValueError): + _ = blosc2.SChunk(cparams=cparams, **{str(field.name): {}}) + with pytest.raises(AttributeError): + _ = blosc2.compress2(b"12345678" * 1000, cparams=cparams, **{str(field.name): {}}) + with pytest.raises(KeyError): + _ = blosc2.empty((10, 10), cparams=cparams, **{str(field.name): {}}) + + +@pytest.mark.parametrize( + "dparams", + [ + (blosc2.DParams()), + (blosc2.DParams(nthreads=2)), + ({}), + ({'nthreads': 2}), + ], +) +def test_dparams_values(dparams): + schunk = blosc2.SChunk(dparams=dparams) + dparams_dataclass = dparams if isinstance(dparams, blosc2.DParams) else blosc2.DParams(**dparams) + array = blosc2.empty((30, 30), dparams=dparams) + for field in fields(dparams_dataclass): + assert getattr(schunk.dparams, field.name) == getattr(dparams_dataclass, field.name) + assert getattr(array.schunk.dparams, field.name) == getattr(dparams_dataclass, field.name) + + blosc2.set_nthreads(3) + schunk = blosc2.SChunk(dparams=dparams) + dparams_dataclass = dparams if isinstance(dparams, blosc2.DParams) else blosc2.DParams(**dparams) + array = blosc2.empty((30, 30), dparams=dparams) + assert schunk.dparams.nthreads == dparams_dataclass.nthreads + assert array.schunk.dparams.nthreads == dparams_dataclass.nthreads + +def test_dparams_defaults(): + dparams = blosc2.DParams() + assert dparams.nthreads == blosc2.nthreads + + blosc2.set_nthreads(1) + dparams = blosc2.DParams() + assert dparams.nthreads == blosc2.nthreads + + +def test_raises_dparams(): + dparams = blosc2.DParams() + for field in fields(blosc2.DParams): + with pytest.raises(ValueError): + _ = blosc2.SChunk(dparams=dparams, **{str(field.name): {}}) + with pytest.raises(AttributeError): + _ = blosc2.decompress2(b"12345678" * 1000, dparams=dparams, **{str(field.name): {}}) diff --git a/tests/test_ucodecs.py b/tests/test_ucodecs.py index d4ff8f07..a20e9abe 100644 --- a/tests/test_ucodecs.py +++ b/tests/test_ucodecs.py @@ -40,6 +40,7 @@ def test_ucodecs(contiguous, urlpath, cparams, nchunks, codec_name, id, dtype): chunk_len = 20 * 1000 blocksize = chunk_len * dtype.itemsize / 10 cparams["blocksize"] = blocksize + cparams["typesize"] = dtype.itemsize def encoder1(input, output, meta, schunk): nd_input = input.view(dtype) @@ -71,7 +72,7 @@ def decoder1(input, output, meta, schunk): data=data, contiguous=contiguous, urlpath=urlpath, - cparams=cparams, + cparams=blosc2.CParams(**cparams), dparams=dparams, ) @@ -149,5 +150,5 @@ def test_dynamic_ucodecs_error(cparams, dparams): chunksize=chunk_len * dtype.itemsize, data=data, cparams=cparams, - dparams=dparams, + dparams=blosc2.DParams(**dparams), ) diff --git a/tests/test_ufilters.py b/tests/test_ufilters.py index 27218b26..90b925bb 100644 --- a/tests/test_ufilters.py +++ b/tests/test_ufilters.py @@ -82,7 +82,7 @@ def backward2(input, output, meta, schunk): contiguous=contiguous, urlpath=urlpath, cparams=cparams, - dparams=dparams, + dparams=blosc2.DParams(**dparams), ) out = np.empty(chunk_len * nchunks, dtype=dtype) @@ -129,7 +129,7 @@ def backward(input, output, meta, schunk): _ = blosc2.SChunk( chunksize=chunk_len * dtype.itemsize, data=data, - cparams=cparams, + cparams=blosc2.CParams(**cparams), dparams=dparams, ) diff --git a/tests/test_vlmeta.py b/tests/test_vlmeta.py index ddbe5267..9155cc1f 100644 --- a/tests/test_vlmeta.py +++ b/tests/test_vlmeta.py @@ -21,10 +21,10 @@ ], ) def test_schunk_numpy(contiguous, urlpath, cparams, dparams, nchunks): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} blosc2.remove_urlpath(urlpath) - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **storage) + schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **kwargs) for i in range(nchunks): buffer = i * np.arange(200 * 1000, dtype="int32") nchunks_ = schunk.append_data(buffer) @@ -47,10 +47,10 @@ def test_schunk_numpy(contiguous, urlpath, cparams, dparams, nchunks): ], ) def test_schunk(contiguous, urlpath, nbytes, cparams, dparams, nchunks): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} blosc2.remove_urlpath(urlpath) - schunk = blosc2.SChunk(chunksize=2 * nbytes, **storage) + schunk = blosc2.SChunk(chunksize=2 * nbytes, **kwargs) for i in range(nchunks): bytes_obj = b"i " * nbytes nchunks_ = schunk.append_data(bytes_obj)