From 3967bb569cd2374c9fe2f2c70dd584b9bf4ea681 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:02:42 +0200 Subject: [PATCH 01/16] Apply ruff/isort rule I001 I001 Import block is un-sorted or un-formatted --- src/blosc2/__init__.py | 4 ++-- src/blosc2/lazyexpr.py | 1 - src/blosc2/ndarray.py | 4 +++- src/blosc2/storage.py | 2 +- tests/ndarray/test_full.py | 3 ++- tests/ndarray/test_lossy.py | 3 ++- tests/ndarray/test_proxy.py | 3 ++- tests/ndarray/test_proxy_expr.py | 3 ++- tests/test_prefilters.py | 3 ++- tests/test_schunk.py | 2 +- tests/test_storage.py | 2 +- 11 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py index a3197573..724ff8f0 100644 --- a/src/blosc2/__init__.py +++ b/src/blosc2/__init__.py @@ -200,7 +200,7 @@ class Tuner(Enum): nthreads -= nthreads // 8 # This import must be before ndarray and schunk -from .storage import ( +from .storage import ( # noqa: I001 CParams, cparams_dflts, DParams, @@ -220,10 +220,10 @@ class Tuner(Enum): empty, frombuffer, full, + get_slice_nchunks, nans, uninit, zeros, - get_slice_nchunks, ) from .c2array import c2context, C2Array, URLPath diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 78f1d5b1..14d25a59 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -19,7 +19,6 @@ from enum import Enum from pathlib import Path from queue import Empty, Queue - from typing import TYPE_CHECKING if TYPE_CHECKING: diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py index bccf9284..f41273a2 100644 --- a/src/blosc2/ndarray.py +++ b/src/blosc2/ndarray.py @@ -16,15 +16,17 @@ if TYPE_CHECKING: from collections.abc import Sequence +from dataclasses import asdict + import ndindex import numpy as np -from dataclasses import asdict import blosc2 from blosc2 import SpecialValue, blosc2_ext, compute_chunks_blocks from blosc2.info import InfoReporter from blosc2.schunk import SChunk + def is_documented_by(original): def wrapper(target): target.__doc__ = original.__doc__ diff --git a/src/blosc2/storage.py b/src/blosc2/storage.py index 759b297e..8b8b24ff 100644 --- a/src/blosc2/storage.py +++ b/src/blosc2/storage.py @@ -6,8 +6,8 @@ # LICENSE file in the root directory of this source tree) ####################################################################### -from dataclasses import dataclass, field, asdict, fields import warnings +from dataclasses import asdict, dataclass, field, fields import blosc2 diff --git a/tests/ndarray/test_full.py b/tests/ndarray/test_full.py index 4f7a5e81..9f7f90af 100644 --- a/tests/ndarray/test_full.py +++ b/tests/ndarray/test_full.py @@ -6,9 +6,10 @@ # LICENSE file in the root directory of this source tree) ####################################################################### +from dataclasses import asdict + import numpy as np import pytest -from dataclasses import asdict import blosc2 diff --git a/tests/ndarray/test_lossy.py b/tests/ndarray/test_lossy.py index b0a21fd4..09caf737 100644 --- a/tests/ndarray/test_lossy.py +++ b/tests/ndarray/test_lossy.py @@ -6,10 +6,11 @@ # LICENSE file in the root directory of this source tree) ####################################################################### +from dataclasses import asdict + import numpy as np import pytest -from dataclasses import asdict import blosc2 diff --git a/tests/ndarray/test_proxy.py b/tests/ndarray/test_proxy.py index 1a44c559..e78ba342 100644 --- a/tests/ndarray/test_proxy.py +++ b/tests/ndarray/test_proxy.py @@ -6,9 +6,10 @@ # LICENSE file in the root directory of this source tree) ####################################################################### -import blosc2 import numpy as np import pytest + +import blosc2 from blosc2.ndarray import get_chunks_idx argnames = "urlpath, shape, chunks, blocks, slices, dtype" diff --git a/tests/ndarray/test_proxy_expr.py b/tests/ndarray/test_proxy_expr.py index a9250ef9..0ce2a110 100644 --- a/tests/ndarray/test_proxy_expr.py +++ b/tests/ndarray/test_proxy_expr.py @@ -7,11 +7,12 @@ ####################################################################### import pathlib -import blosc2 import numexpr as ne import numpy as np import pytest +import blosc2 + pytestmark = pytest.mark.network ROOT = "b2tests" diff --git a/tests/test_prefilters.py b/tests/test_prefilters.py index d0865ce9..f1fcaa97 100644 --- a/tests/test_prefilters.py +++ b/tests/test_prefilters.py @@ -6,9 +6,10 @@ # LICENSE file in the root directory of this source tree) ####################################################################### +from dataclasses import asdict, replace + import numpy as np import pytest -from dataclasses import asdict, replace import blosc2 diff --git a/tests/test_schunk.py b/tests/test_schunk.py index db27362e..b5198a10 100644 --- a/tests/test_schunk.py +++ b/tests/test_schunk.py @@ -7,7 +7,7 @@ ####################################################################### import os -from dataclasses import asdict, replace, fields +from dataclasses import asdict, fields, replace import numpy as np import pytest diff --git a/tests/test_storage.py b/tests/test_storage.py index 93b64158..cc94b18b 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -7,8 +7,8 @@ ####################################################################### from dataclasses import asdict, fields -import numpy as np +import numpy as np import pytest import blosc2 From b737875e3ff15b44f2df71dc5bb9605a164c2dae Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:08:50 +0200 Subject: [PATCH 02/16] Disable ruff/flake8-bugbear rule B028 B028 No explicit `stacklevel` keyword argument found --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 8c23ee26..c2ac3201 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,6 +95,7 @@ extend-select = [ "UP", ] ignore = [ + "B028", "PT006", "PT011", "RET505", From 63c0d221aba24b9579a727430d2b4cbcb8d2f967 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:10:31 +0200 Subject: [PATCH 03/16] Apply ruff/pycodestyle rule E712 E712 Avoid equality comparisons to `False` --- tests/test_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_storage.py b/tests/test_storage.py index cc94b18b..2e7fa6d5 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -45,7 +45,7 @@ def test_storage_values(contiguous, urlpath, mode, mmap_mode): def test_storage_defaults(): storage = blosc2.Storage() - assert storage.contiguous == False + assert storage.contiguous is False assert storage.urlpath is None assert storage.mode == "a" assert storage.mmap_mode is None From 7cc013ec48f87cf3de2f414bc76f61941a1f6486 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:12:19 +0200 Subject: [PATCH 04/16] Apply ruff/pyupgrade rule UP032 UP032 Use f-string instead of `format` call --- src/blosc2/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/blosc2/storage.py b/src/blosc2/storage.py index 8b8b24ff..4a9e3ae3 100644 --- a/src/blosc2/storage.py +++ b/src/blosc2/storage.py @@ -206,7 +206,7 @@ def __post_init__(self): if (getattr(self, field.name) is None and field.name not in ['urlpath', 'mmap_mode', 'initial_mapping_size', 'meta']): setattr(self, field.name, getattr(Storage(), field.name)) - warnings.warn("`{name}` field value changed from `None` to `{value}`".format(name=field.name, value=getattr(self, field.name))) + warnings.warn(f"`{field.name}` field value changed from `None` to `{getattr(self, field.name)}`") # Defaults for compression params From 74a0ca7e4d15a935aebef7b13b20bf858a89db9e Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:13:19 +0200 Subject: [PATCH 05/16] Apply ruff/pyugrade rule UP035 UP035 Import from `collections.abc` instead --- src/blosc2/ndarray.py | 3 ++- src/blosc2/schunk.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py index f41273a2..9ff06b45 100644 --- a/src/blosc2/ndarray.py +++ b/src/blosc2/ndarray.py @@ -11,7 +11,8 @@ import builtins import math from collections import namedtuple -from typing import TYPE_CHECKING, Iterator, NamedTuple +from collections.abc import Iterator +from typing import TYPE_CHECKING, NamedTuple if TYPE_CHECKING: from collections.abc import Sequence diff --git a/src/blosc2/schunk.py b/src/blosc2/schunk.py index 91ee36fe..bdd022a8 100644 --- a/src/blosc2/schunk.py +++ b/src/blosc2/schunk.py @@ -10,9 +10,9 @@ import os import pathlib from collections import namedtuple -from collections.abc import Mapping, MutableMapping +from collections.abc import Iterator, Mapping, MutableMapping from dataclasses import asdict -from typing import Any, Iterator, NamedTuple +from typing import Any, NamedTuple import numpy as np from msgpack import packb, unpackb From bcd5c3aa5fa4553a458f83381a9e8598c6e3b0c7 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:16:54 +0200 Subject: [PATCH 06/16] Apply ruff/flake8-type-checking rule TCH003 TCH003 Move standard library import into a type-checking block --- src/blosc2/ndarray.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py index 9ff06b45..bc2f6ed0 100644 --- a/src/blosc2/ndarray.py +++ b/src/blosc2/ndarray.py @@ -11,11 +11,10 @@ import builtins import math from collections import namedtuple -from collections.abc import Iterator from typing import TYPE_CHECKING, NamedTuple if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Iterator, Sequence from dataclasses import asdict From a5da19c45592b5411c4353d66d3a2637600eeb6d Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:19:15 +0200 Subject: [PATCH 07/16] Apply ruff/flake8-simplify rule SIM102 SIM102 Use a single `if` statement instead of nested `if` statements --- src/blosc2/ndarray.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py index bc2f6ed0..463cb48a 100644 --- a/src/blosc2/ndarray.py +++ b/src/blosc2/ndarray.py @@ -2689,9 +2689,8 @@ def _check_ndarray_kwargs(**kwargs): raise ValueError("You cannot pass chunks in cparams, use `chunks` argument instead") if "blocks" in kwargs["cparams"]: raise ValueError("You cannot pass chunks in cparams, use `blocks` argument instead") - if "dparams" in kwargs: - if isinstance(kwargs["dparams"], blosc2.DParams): - kwargs["dparams"] = asdict(kwargs["dparams"]) + if "dparams" in kwargs and isinstance(kwargs["dparams"], blosc2.DParams): + kwargs["dparams"] = asdict(kwargs["dparams"]) return kwargs From 2a53986dd5274439f21970d0c60b7d572352dec2 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:20:50 +0200 Subject: [PATCH 08/16] Apply ruff/flake8-simplify rule SIM118 SIM118 Use `key in dict` instead of `key in dict.keys()` --- src/blosc2/schunk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/blosc2/schunk.py b/src/blosc2/schunk.py index bdd022a8..0104475e 100644 --- a/src/blosc2/schunk.py +++ b/src/blosc2/schunk.py @@ -233,7 +233,7 @@ def __init__(self, chunksize: int = None, data: object = None, **kwargs: dict): if kwarg not in allowed_kwargs: raise ValueError(f"{kwarg} is not supported as keyword argument") if kwargs.get("storage") is not None: - if any(key in list(blosc2.Storage.__annotations__) for key in kwargs.keys()): + if any(key in list(blosc2.Storage.__annotations__) for key in kwargs): raise AttributeError("Cannot pass both `storage` and other kwargs already included in Storage") storage = kwargs.get("storage") if isinstance(storage, blosc2.Storage): From 1262f4b93740c541e4b7f22d44aac8209709651d Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:22:01 +0200 Subject: [PATCH 09/16] Apply ruff/flake8-simplify rule SIM211 SIM211 Use `not ...` instead of `False if ... else True` --- src/blosc2/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/blosc2/storage.py b/src/blosc2/storage.py index 4a9e3ae3..788340a5 100644 --- a/src/blosc2/storage.py +++ b/src/blosc2/storage.py @@ -200,7 +200,7 @@ class Storage: def __post_init__(self): if self.contiguous is None: - self.contiguous = False if self.urlpath is None else True + self.contiguous = self.urlpath is not None # Check for None values for field in fields(self): if (getattr(self, field.name) is None and From 3615eeaa35d497b503cadef4a1221a337d077a6f Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Fri, 27 Sep 2024 09:26:48 +0200 Subject: [PATCH 10/16] Apply ruff/Pyflakes rule F402 F402 Import shadowed by loop variable --- src/blosc2/storage.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/blosc2/storage.py b/src/blosc2/storage.py index 788340a5..a5c2f6bc 100644 --- a/src/blosc2/storage.py +++ b/src/blosc2/storage.py @@ -202,11 +202,11 @@ def __post_init__(self): if self.contiguous is None: self.contiguous = self.urlpath is not None # Check for None values - for field in fields(self): - if (getattr(self, field.name) is None and - field.name not in ['urlpath', 'mmap_mode', 'initial_mapping_size', 'meta']): - setattr(self, field.name, getattr(Storage(), field.name)) - warnings.warn(f"`{field.name}` field value changed from `None` to `{getattr(self, field.name)}`") + for f in fields(self): + if (getattr(self, f.name) is None and + f.name not in ['urlpath', 'mmap_mode', 'initial_mapping_size', 'meta']): + setattr(self, f.name, getattr(Storage(), f.name)) + warnings.warn(f"`{f.name}` field value changed from `None` to `{getattr(self, f.name)}`") # Defaults for compression params From 083b6d3b476b8f98a1aece5be46565dbba9c3aa2 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:31:08 +0200 Subject: [PATCH 11/16] Apply ruff/Pyflakes rule F821 F821 Undefined name --- src/blosc2/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/blosc2/core.py b/src/blosc2/core.py index e9905b2c..91690003 100644 --- a/src/blosc2/core.py +++ b/src/blosc2/core.py @@ -29,6 +29,7 @@ if TYPE_CHECKING: from collections.abc import Callable + import tensorflow import torch From 037f1ff48b603f3a895c3d89503798bf6e82cfbf Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:34:16 +0200 Subject: [PATCH 12/16] =?UTF-8?q?Update=20ruff=200.6.2=20=E2=86=92=200.6.7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4c41e677..13a087fa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,3 +11,4 @@ repos: hooks: - id: ruff - id: ruff-format + exclude: ^bench/ From ff737709e3df64e18dd64ceae51773e8c5f8b96a Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Fri, 27 Sep 2024 09:31:20 +0200 Subject: [PATCH 13/16] Apply preview rules Prepare for future versions by applying `ruff check --preview`. --- doc/getting_started/tutorials/04.reductions.ipynb | 2 +- pyproject.toml | 3 +++ src/blosc2/core.py | 2 +- src/blosc2/lazyexpr.py | 6 +++--- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/getting_started/tutorials/04.reductions.ipynb b/doc/getting_started/tutorials/04.reductions.ipynb index b862e297..f81ffc82 100644 --- a/doc/getting_started/tutorials/04.reductions.ipynb +++ b/doc/getting_started/tutorials/04.reductions.ipynb @@ -188,7 +188,7 @@ "outputs": [], "source": [ "def plot_meas(meas_np, meas, chunks):\n", - " fig, ax = plt.subplots()\n", + " _fig, ax = plt.subplots()\n", "\n", " # Define the groups and bar width\n", " groups = meas_np[\"time\"].keys()\n", diff --git a/pyproject.toml b/pyproject.toml index c2ac3201..d3672327 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,3 +102,6 @@ ignore = [ "RET508", "SIM108", ] + +[tool.ruff.lint.extend-per-file-ignores] +"tests/**" = ["F841"] diff --git a/src/blosc2/core.py b/src/blosc2/core.py index 91690003..611f113f 100644 --- a/src/blosc2/core.py +++ b/src/blosc2/core.py @@ -1085,7 +1085,7 @@ def print_versions(): for clib in sorted(clib_versions.keys()): print(f" {clib}: {clib_versions[clib]}") print(f"Python version: {sys.version}") - (sysname, nodename, release, version, machine, processor) = platform.uname() + (sysname, _nodename, release, version, machine, processor) = platform.uname() print(f"Platform: {sysname}-{release}-{machine} ({version})") if sysname == "Linux": distro = os_release_pretty_name() diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 14d25a59..f1d41f8c 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -539,7 +539,7 @@ def fill_chunk_operands( if nchunk == 0: # Initialize the iterator for reading the chunks arr = operands["o0"] - chunks_idx, nchunks = get_chunks_idx(arr.shape, arr.chunks) + chunks_idx, _ = get_chunks_idx(arr.shape, arr.chunks) info = (reduc, aligned, low_mem, chunks_idx) iter_chunks = read_nchunk(list(operands.values()), info) # Run the asynchronous file reading function from a synchronous context @@ -1162,7 +1162,7 @@ def chunked_eval(expression: str | Callable[[tuple, np.ndarray, tuple[int]], Non if where: # Make the where arguments part of the operands operands = {**operands, **where} - shape, _, _, fast_path = validate_inputs(operands, out) + _, _, _, fast_path = validate_inputs(operands, out) # Activate last read cache for NDField instances for op in operands: @@ -1364,7 +1364,7 @@ def get_chunk(self, nchunk): shape = out.shape chunks = out.chunks # Calculate the shape of the (chunk) slice_ (specially at the end of the array) - chunks_idx, nchunks = get_chunks_idx(shape, chunks) + chunks_idx, _ = get_chunks_idx(shape, chunks) coords = tuple(np.unravel_index(nchunk, chunks_idx)) slice_ = tuple( slice(c * s, min((c + 1) * s, shape[i])) From 0bec280690f56d325264d02c4815649064aae8ea Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Fri, 27 Sep 2024 09:42:11 +0200 Subject: [PATCH 14/16] A round of `ruff format` --- src/blosc2/core.py | 75 ++++++++++------ src/blosc2/lazyexpr.py | 38 ++++++-- src/blosc2/ndarray.py | 147 ++++++++++++++++++++----------- src/blosc2/schunk.py | 42 +++++---- src/blosc2/storage.py | 31 ++++--- tests/ndarray/test_lazyexpr.py | 8 +- tests/ndarray/test_metalayers.py | 5 +- tests/ndarray/test_proxy.py | 21 +++-- tests/ndarray/test_reductions.py | 5 +- tests/test_compress2.py | 38 ++++---- tests/test_schunk.py | 24 ++--- tests/test_schunk_constructor.py | 23 ++--- tests/test_schunk_insert.py | 4 +- tests/test_schunk_set_slice.py | 4 +- tests/test_storage.py | 27 +++--- 15 files changed, 314 insertions(+), 178 deletions(-) diff --git a/src/blosc2/core.py b/src/blosc2/core.py index 611f113f..1a4c4df3 100644 --- a/src/blosc2/core.py +++ b/src/blosc2/core.py @@ -133,8 +133,9 @@ def compress( return blosc2_ext.compress(src, typesize, clevel, filter, codec) -def decompress(src: object, dst: object | bytearray = None, - as_bytearray: bool = False) -> str | bytes | bytearray | None: +def decompress( + src: object, dst: object | bytearray = None, as_bytearray: bool = False +) -> str | bytes | bytearray | None: """Decompresses a bytes-like compressed object. Parameters @@ -202,8 +203,12 @@ def decompress(src: object, dst: object | bytearray = None, return blosc2_ext.decompress(src, dst, as_bytearray) -def pack(obj: object, clevel: int = 9, filter: blosc2.Filter = blosc2.Filter.SHUFFLE, - codec: blosc2.Codec = blosc2.Codec.BLOSCLZ) -> str | bytes: +def pack( + obj: object, + clevel: int = 9, + filter: blosc2.Filter = blosc2.Filter.SHUFFLE, + codec: blosc2.Codec = blosc2.Codec.BLOSCLZ, +) -> str | bytes: """Pack (compress) a Python object. Parameters @@ -321,8 +326,12 @@ def unpack(packed_object: str | bytes, **kwargs: dict) -> object: return obj -def pack_array(arr: np.ndarray, clevel: int = 9, filter: blosc2.Filter = blosc2.Filter.SHUFFLE, - codec: blosc2.Codec = blosc2.Codec.BLOSCLZ) -> str | bytes: +def pack_array( + arr: np.ndarray, + clevel: int = 9, + filter: blosc2.Filter = blosc2.Filter.SHUFFLE, + codec: blosc2.Codec = blosc2.Codec.BLOSCLZ, +) -> str | bytes: """Pack (compress) a NumPy array. It is equivalent to the pack function. Parameters @@ -591,8 +600,9 @@ def load_array(urlpath: str, dparams: dict = None) -> np.ndarray: return load_tensor(urlpath, dparams=dparams) -def pack_tensor(tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, chunksize: int = None, - **kwargs: dict) -> bytes | int: +def pack_tensor( + tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, chunksize: int = None, **kwargs: dict +) -> bytes | int: """Pack (compress) a TensorFlow or PyTorch tensor or a NumPy array. Parameters @@ -719,8 +729,12 @@ def unpack_tensor(cframe: bytes) -> tensorflow.Tensor | torch.Tensor | np.ndarra return _unpack_tensor(schunk) -def save_tensor(tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, urlpath: str, chunksize: int = None, - **kwargs: dict) -> int: +def save_tensor( + tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, + urlpath: str, + chunksize: int = None, + **kwargs: dict, +) -> int: """Save a serialized PyTorch or TensorFlow tensor or NumPy array in `urlpath`. Parameters @@ -1271,8 +1285,11 @@ def compute_partition(nitems, maxshape, minpart=None): def compute_chunks_blocks( - shape: tuple[int] | list, chunks: tuple | list | None = None, blocks: tuple | list | None = None, - dtype: np.dtype = np.uint8, **kwargs: dict + shape: tuple[int] | list, + chunks: tuple | list | None = None, + blocks: tuple | list | None = None, + dtype: np.dtype = np.uint8, + **kwargs: dict, ) -> tuple[(int, int)]: """ Compute educated guesses for chunks and blocks of a :ref:`NDArray`. @@ -1421,13 +1438,13 @@ def compress2(src: object, **kwargs: dict) -> str | bytes: If an internal error occurred, probably because some parameter is not a valid parameter. """ - if kwargs is not None and 'cparams' in kwargs: + if kwargs is not None and "cparams" in kwargs: if len(kwargs) > 1: raise AttributeError("Cannot pass both cparams and other kwargs already included in CParams") - if isinstance(kwargs.get('cparams'), blosc2.CParams): - kwargs = asdict(kwargs.get('cparams')) + if isinstance(kwargs.get("cparams"), blosc2.CParams): + kwargs = asdict(kwargs.get("cparams")) else: - kwargs = kwargs.get('cparams') + kwargs = kwargs.get("cparams") return blosc2_ext.compress2(src, **kwargs) @@ -1481,13 +1498,13 @@ def decompress2(src: object, dst: object | bytearray = None, **kwargs: dict) -> If the length of :paramref:`src` is smaller than the minimum. If :paramref:`dst` is not None and its length is 0. """ - if kwargs is not None and 'dparams' in kwargs: + if kwargs is not None and "dparams" in kwargs: if len(kwargs) > 1: raise AttributeError("Cannot pass both dparams and other kwargs already included in DParams") - if isinstance(kwargs.get('dparams'), blosc2.DParams): - kwargs = asdict(kwargs.get('dparams')) + if isinstance(kwargs.get("dparams"), blosc2.DParams): + kwargs = asdict(kwargs.get("dparams")) else: - kwargs = kwargs.get('dparams') + kwargs = kwargs.get("dparams") return blosc2_ext.decompress2(src, dst, **kwargs) @@ -1582,11 +1599,11 @@ def ndarray_from_cframe(cframe: bytes | str, copy: bool = False) -> blosc2.NDArr def register_codec( - codec_name: str, - id: int, - encoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] = None, - decoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] = None, - version: int = 1 + codec_name: str, + id: int, + encoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] = None, + decoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] = None, + version: int = 1, ) -> None: """Register a user defined codec. @@ -1664,10 +1681,10 @@ def decoder1(input, output, meta, schunk): def register_filter( - id: int, - forward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] = None, - backward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] = None, - name: str = None + id: int, + forward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] = None, + backward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] = None, + name: str = None, ) -> None: """Register an user defined filter. diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index f1d41f8c..f2b31771 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -610,7 +610,10 @@ def fill_chunk_operands( def fast_eval( - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, getitem: bool, **kwargs + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + getitem: bool, + **kwargs, ) -> blosc2.NDArray | np.ndarray: """Evaluate the expression in chunks of operands using a fast path. @@ -721,7 +724,11 @@ def fast_eval( def slices_eval( - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, getitem: bool, _slice=None, **kwargs + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + getitem: bool, + _slice=None, + **kwargs, ) -> blosc2.NDArray | np.ndarray: """Evaluate the expression in chunks of operands. @@ -896,7 +903,11 @@ def slices_eval( def reduce_slices( - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, reduce_args, _slice=None, **kwargs + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + reduce_args, + _slice=None, + **kwargs, ) -> blosc2.NDArray | np.ndarray: """Evaluate the expression in chunks of operands. @@ -1131,7 +1142,9 @@ def convert_none_out(dtype, reduce_op, reduced_shape): return out -def chunked_eval(expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, item=None, **kwargs): +def chunked_eval( + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, item=None, **kwargs +): """ Evaluate the expression in chunks of operands. @@ -1942,8 +1955,13 @@ def _open_lazyarray(array): return expr -def lazyudf(func: Callable[[tuple, np.ndarray, tuple[int]], None], inputs: tuple | list, - dtype: np.dtype, chunked_eval: bool = True, **kwargs: dict) -> LazyUDF: +def lazyudf( + func: Callable[[tuple, np.ndarray, tuple[int]], None], + inputs: tuple | list, + dtype: np.dtype, + chunked_eval: bool = True, + **kwargs: dict, +) -> LazyUDF: """ Get a LazyUDF from a python user-defined function. @@ -2002,8 +2020,12 @@ def lazyudf(func: Callable[[tuple, np.ndarray, tuple[int]], None], inputs: tuple return LazyUDF(func, inputs, dtype, chunked_eval, **kwargs) -def lazyexpr(expression: str | bytes | LazyExpr, operands: dict = None, - out: blosc2.NDArray | np.ndarray = None, where: tuple | list = None) -> LazyExpr: +def lazyexpr( + expression: str | bytes | LazyExpr, + operands: dict = None, + out: blosc2.NDArray | np.ndarray = None, + where: tuple | list = None, +) -> LazyExpr: """ Get a LazyExpr from an expression. diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py index 463cb48a..8d503076 100644 --- a/src/blosc2/ndarray.py +++ b/src/blosc2/ndarray.py @@ -34,6 +34,7 @@ def wrapper(target): return wrapper + def make_key_hashable(key): if isinstance(key, slice): return (key.start, key.stop, key.step) @@ -151,9 +152,13 @@ def _check_allowed_dtypes( ) -def sum(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - dtype: np.dtype = None, keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | complex | bool: +def sum( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + dtype: np.dtype = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | complex | bool: """ Return the sum of array elements over a given axis. @@ -204,9 +209,13 @@ def sum(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.sum(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) -def mean(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - dtype: np.dtype = None, keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | complex | bool: +def mean( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + dtype: np.dtype = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | complex | bool: """ Return the arithmetic mean along the specified axis. @@ -251,9 +260,14 @@ def mean(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int return ndarr.mean(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) -def std(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - dtype: np.dtype = None, ddof: int = 0, keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | bool: +def std( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + dtype: np.dtype = None, + ddof: int = 0, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | bool: """ Return the standard deviation along the specified axis. @@ -305,9 +319,14 @@ def std(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.std(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) -def var(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - dtype: np.dtype = None, ddof: int = 0, keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | bool: +def var( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + dtype: np.dtype = None, + ddof: int = 0, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | bool: """ Return the variance along the specified axis. @@ -360,9 +379,13 @@ def var(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.var(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) -def prod(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - dtype: np.dtype = None, keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | complex | bool: +def prod( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + dtype: np.dtype = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | complex | bool: """ Return the product of array elements over a given axis. @@ -414,9 +437,12 @@ def prod(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int return ndarr.prod(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) -def min(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | complex | bool: +def min( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | complex | bool: """ Return the minimum along a given axis. @@ -459,9 +485,12 @@ def min(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.min(axis=axis, keepdims=keepdims, **kwargs) -def max(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | complex | bool: +def max( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | complex | bool: """ Return the maximum along a given axis. @@ -510,9 +539,12 @@ def max(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.max(axis=axis, keepdims=keepdims, **kwargs) -def any(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | bool: +def any( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | bool: """ Test whether any array element along a given axis evaluates to True. @@ -559,9 +591,12 @@ def any(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.any(axis=axis, keepdims=keepdims, **kwargs) -def all(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | bool: +def all( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | bool: """ Test whether all array elements along a given axis evaluate to True. @@ -981,7 +1016,9 @@ def blocksize(self) -> int: """ return self._schunk.blocksize - def __getitem__(self, key: int | slice | Sequence[slice] | blosc2.LazyExpr | str) -> np.ndarray | blosc2.LazyExpr: + def __getitem__( + self, key: int | slice | Sequence[slice] | blosc2.LazyExpr | str + ) -> np.ndarray | blosc2.LazyExpr: """Get a (multidimensional) slice as specified in key. Parameters @@ -1144,16 +1181,19 @@ def get_chunk(self, nchunk: int) -> bytes: """ return self.schunk.get_chunk(nchunk) - def iterchunks_info(self) -> Iterator[ - NamedTuple("info", - nchunk = int, - coords = tuple, - cratio = float, - special = blosc2.SpecialValue, - repeated_value = bytes | None, - lazychunk = bytes - ) - ]: + def iterchunks_info( + self, + ) -> Iterator[ + NamedTuple( + "info", + nchunk=int, + coords=tuple, + cratio=float, + special=blosc2.SpecialValue, + repeated_value=bytes | None, + lazychunk=bytes, + ) + ]: """ Iterate over :paramref:`self` chunks, providing info on index and special values. @@ -1201,7 +1241,6 @@ def iterchunks_info(self) -> Iterator[ repeated_value = np.frombuffer(cinfo.repeated_value, dtype=self.dtype)[0] yield ChunkInfoNDArray(nchunk, coords, cratio, special, repeated_value, lazychunk) - def tobytes(self) -> bytes: """Returns a buffer with the data contents. @@ -1291,10 +1330,16 @@ def copy(self, dtype: np.dtype = None, **kwargs: dict) -> NDArray: """ if dtype is None: dtype = self.dtype - kwargs["cparams"] = kwargs.get("cparams").copy() if isinstance(kwargs.get("cparams"), dict) \ + kwargs["cparams"] = ( + kwargs.get("cparams").copy() + if isinstance(kwargs.get("cparams"), dict) else asdict(self.schunk.cparams) - kwargs["dparams"] = kwargs.get("dparams").copy() if isinstance(kwargs.get("dparams"), dict) \ + ) + kwargs["dparams"] = ( + kwargs.get("dparams").copy() + if isinstance(kwargs.get("dparams"), dict) else asdict(self.schunk.dparams) + ) if "meta" not in kwargs: # Copy metalayers as well meta_dict = {meta: self.schunk.meta[meta] for meta in self.schunk.meta} @@ -1759,8 +1804,9 @@ def arctan(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, /) -> bl return blosc2.LazyExpr(new_op=(ndarr, "arctan", None)) -def arctan2(ndarr1: NDArray | NDField | blosc2.C2Array, - ndarr2: NDArray | NDField | blosc2.C2Array, /) -> blosc2.LazyExpr: +def arctan2( + ndarr1: NDArray | NDField | blosc2.C2Array, ndarr2: NDArray | NDField | blosc2.C2Array, / +) -> blosc2.LazyExpr: """ Element-wise arc tangent of ``ndarr1 / ndarr2`` choosing the quadrant correctly. @@ -2445,8 +2491,9 @@ def zeros(shape: int | tuple | list, dtype: np.dtype = np.uint8, **kwargs: dict) return blosc2_ext.zeros(shape, chunks, blocks, dtype, **kwargs) -def full(shape: int | tuple | list, fill_value: bytes | int | float | bool, dtype: np.dtype = None, - **kwargs: dict) -> NDArray: +def full( + shape: int | tuple | list, fill_value: bytes | int | float | bool, dtype: np.dtype = None, **kwargs: dict +) -> NDArray: """Create an array, with :paramref:`fill_value` being used as the default value for uninitialized portions of the array. @@ -2655,7 +2702,9 @@ def _check_ndarray_kwargs(**kwargs): if "storage" in kwargs: for key in kwargs: if key in list(blosc2.Storage.__annotations__): - raise AttributeError("Cannot pass both `storage` and other kwargs already included in Storage") + raise AttributeError( + "Cannot pass both `storage` and other kwargs already included in Storage" + ) storage = kwargs.get("storage") if isinstance(storage, blosc2.Storage): kwargs = {**kwargs, **asdict(storage)} @@ -2695,9 +2744,9 @@ def _check_ndarray_kwargs(**kwargs): return kwargs -def get_slice_nchunks(schunk: blosc2.SChunk, - key: tuple[(int, int)] | int | slice | Sequence[slice] - ) -> np.ndarray: +def get_slice_nchunks( + schunk: blosc2.SChunk, key: tuple[(int, int)] | int | slice | Sequence[slice] +) -> np.ndarray: """ Get the unidimensional chunk indexes needed to get a slice of a :ref:`SChunk ` or a :ref:`NDArray`. diff --git a/src/blosc2/schunk.py b/src/blosc2/schunk.py index 0104475e..b77d68df 100644 --- a/src/blosc2/schunk.py +++ b/src/blosc2/schunk.py @@ -227,14 +227,16 @@ def __init__(self, chunksize: int = None, data: object = None, **kwargs: dict): "mmap_mode", "initial_mapping_size", "_is_view", - "storage" + "storage", ] for kwarg in kwargs: if kwarg not in allowed_kwargs: raise ValueError(f"{kwarg} is not supported as keyword argument") if kwargs.get("storage") is not None: if any(key in list(blosc2.Storage.__annotations__) for key in kwargs): - raise AttributeError("Cannot pass both `storage` and other kwargs already included in Storage") + raise AttributeError( + "Cannot pass both `storage` and other kwargs already included in Storage" + ) storage = kwargs.get("storage") if isinstance(storage, blosc2.Storage): kwargs = {**kwargs, **asdict(storage)} @@ -330,8 +332,7 @@ def chunksize(self) -> int: @property def blocksize(self) -> int: - """The block size (in bytes). - """ + """The block size (in bytes).""" return super().blocksize @property @@ -345,7 +346,7 @@ def cratio(self) -> float: Compression ratio. """ if self.cbytes == 0: - return 0. + return 0.0 return self.nbytes / self.cbytes @property @@ -416,8 +417,9 @@ def append_data(self, data: object) -> int: blosc2_ext.check_access_mode(self.urlpath, self.mode) return super().append_data(data) - def fill_special(self, nitems: int, special_value: blosc2.SpecialValue, - value: bytes | int | float | bool = None) -> int: + def fill_special( + self, nitems: int, special_value: blosc2.SpecialValue, value: bytes | int | float | bool = None + ) -> int: """Fill the SChunk with a special value. SChunk must be empty. Parameters @@ -992,15 +994,18 @@ def iterchunks(self, dtype: np.dtype) -> Iterator[np.ndarray]: self.get_slice(i, i + self.chunkshape, out) yield out - def iterchunks_info(self) -> Iterator[ - NamedTuple("info", - nchunk = int, - cratio = float, - special = blosc2.SpecialValue, - repeated_value = bytes | None, - lazychunk = bytes - ) - ]: + def iterchunks_info( + self, + ) -> Iterator[ + NamedTuple( + "info", + nchunk=int, + cratio=float, + special=blosc2.SpecialValue, + repeated_value=bytes | None, + lazychunk=bytes, + ) + ]: """ Iterate over :paramref:`self` chunks, providing info on index and special values. @@ -1339,8 +1344,9 @@ def __dealloc__(self): @_inherit_doc_parameter(blosc2.Storage, "mmap_mode:", {r"\* - 'w\+'[^*]+": ""}) @_inherit_doc_parameter(blosc2.Storage, "initial_mapping_size:", {r"r\+ w\+, or c": "r+ or c"}) -def open(urlpath: str | pathlib.Path | blosc2.URLPath, mode: str = "a", offset: int = 0, - **kwargs: dict) -> blosc2.SChunk | blosc2.NDArray | blosc2.C2Array: +def open( + urlpath: str | pathlib.Path | blosc2.URLPath, mode: str = "a", offset: int = 0, **kwargs: dict +) -> blosc2.SChunk | blosc2.NDArray | blosc2.C2Array: """Open a persistent :ref:`SChunk` or :ref:`NDArray` or a remote :ref:`C2Array` or a :ref:`Proxy` (see the `Notes` section for more info on the latter case). diff --git a/src/blosc2/storage.py b/src/blosc2/storage.py index a5c2f6bc..8bd4d90b 100644 --- a/src/blosc2/storage.py +++ b/src/blosc2/storage.py @@ -15,18 +15,22 @@ def default_nthreads(): return blosc2.nthreads + def default_filters(): - return [blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.SHUFFLE] + return [ + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.SHUFFLE, + ] def default_filters_meta(): return [0] * 6 + @dataclass class CParams: """Dataclass for hosting the different compression parameters. @@ -64,6 +68,7 @@ class CParams: tuner: :class:`Tuner` The tuner to use. Default: :py:obj:`Tuner.STUNE `. """ + codec: blosc2.Codec | int = blosc2.Codec.ZSTD codec_meta: int = 0 clevel: int = 1 @@ -80,7 +85,7 @@ def __post_init__(self): if len(self.filters) > 6: raise ValueError("Number of filters exceeds 6") if len(self.filters) < len(self.filters_meta): - self.filters_meta = self.filters_meta[:len(self.filters)] + self.filters_meta = self.filters_meta[: len(self.filters)] warnings.warn("Changed `filters_meta` length to match `filters` length") if len(self.filters) > len(self.filters_meta): raise ValueError("Number of filters cannot exceed number of filters meta") @@ -101,6 +106,7 @@ class DParams: value of :py:obj:`blosc2.nthreads` is used. If not set with :func:`blosc2.set_nthreads`, blosc2 computes a good guess for it. """ + nthreads: int = field(default_factory=default_nthreads) @@ -191,9 +197,10 @@ class Storage: value: object The metalayer object that will be serialized using msgpack. """ + contiguous: bool = None urlpath: str = None - mode: str = 'a' + mode: str = "a" mmap_mode: str = None initial_mapping_size: int = None meta: dict = None @@ -203,8 +210,12 @@ def __post_init__(self): self.contiguous = self.urlpath is not None # Check for None values for f in fields(self): - if (getattr(self, f.name) is None and - f.name not in ['urlpath', 'mmap_mode', 'initial_mapping_size', 'meta']): + if getattr(self, f.name) is None and f.name not in [ + "urlpath", + "mmap_mode", + "initial_mapping_size", + "meta", + ]: setattr(self, f.name, getattr(Storage(), f.name)) warnings.warn(f"`{f.name}` field value changed from `None` to `{getattr(self, f.name)}`") diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index 69fd0022..a6fe3bc1 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -493,8 +493,12 @@ def test_save(): chunks = tuple(i // 2 for i in nres.shape) blocks = tuple(i // 4 for i in nres.shape) urlpath_eval = "eval_expr.b2nd" - res = expr.eval(storage=blosc2.Storage(urlpath=urlpath_eval, mode="w"), - chunks=chunks, blocks=blocks, cparams=cparams, dparams=dparams, + res = expr.eval( + storage=blosc2.Storage(urlpath=urlpath_eval, mode="w"), + chunks=chunks, + blocks=blocks, + cparams=cparams, + dparams=dparams, ) np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol) diff --git a/tests/ndarray/test_metalayers.py b/tests/ndarray/test_metalayers.py index 68c6ab48..b130edc2 100644 --- a/tests/ndarray/test_metalayers.py +++ b/tests/ndarray/test_metalayers.py @@ -41,8 +41,9 @@ def test_metalayers(shape, chunks, blocks, urlpath, contiguous, dtype): chunks=chunks, blocks=blocks, dtype=dtype, - storage=blosc2.Storage(urlpath=urlpath, contiguous=contiguous, - meta={"numpy": numpy_meta, "test": test_meta}), + storage=blosc2.Storage( + urlpath=urlpath, contiguous=contiguous, meta={"numpy": numpy_meta, "test": test_meta} + ), ) assert os.path.exists(urlpath) diff --git a/tests/ndarray/test_proxy.py b/tests/ndarray/test_proxy.py index e78ba342..950daeba 100644 --- a/tests/ndarray/test_proxy.py +++ b/tests/ndarray/test_proxy.py @@ -111,15 +111,17 @@ def test_open(urlpath, shape, chunks, blocks, slices, dtype): # Test the ProxyNDSources interface -@pytest.mark.parametrize("shape, chunks, blocks", [ - # One should be careful to choose aligned partitions for our source - # E.g., the following is not aligned - # ((10, 8), (4, 4), (2, 2)) - ((12,), (4,), (2,)), - ((10, 8), (2, 8), (1, 4)), - ((10, 8, 6), (2, 4, 3), (1, 2, 3)), - ((4, 8, 6, 4), (2, 4, 3, 2), (1, 2, 3, 2)), - ] +@pytest.mark.parametrize( + "shape, chunks, blocks", + [ + # One should be careful to choose aligned partitions for our source + # E.g., the following is not aligned + # ((10, 8), (4, 4), (2, 2)) + ((12,), (4,), (2,)), + ((10, 8), (2, 8), (1, 4)), + ((10, 8, 6), (2, 4, 3), (1, 2, 3)), + ((4, 8, 6, 4), (2, 4, 3, 2), (1, 2, 3, 2)), + ], ) def test_proxy_source(shape, chunks, blocks): # Define an object that will be used as a source @@ -128,6 +130,7 @@ class Source(blosc2.ProxyNDSource): A simple source that will be used to test the ProxyNDSource interface. """ + def __init__(self, data, chunks, blocks): self._data = data self._shape = data.shape diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py index 496713c6..aa231338 100644 --- a/tests/ndarray/test_reductions.py +++ b/tests/ndarray/test_reductions.py @@ -65,7 +65,10 @@ def test_reduce_bool(array_fixture, reduce_op): @pytest.mark.parametrize("axis", [0, 1, (0, 1), None]) @pytest.mark.parametrize("keepdims", [True, False]) @pytest.mark.parametrize("dtype_out", [np.int16, np.float64]) -@pytest.mark.parametrize("kwargs", [{}, {"cparams": blosc2.CParams(clevel=1, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0])}]) +@pytest.mark.parametrize( + "kwargs", + [{}, {"cparams": blosc2.CParams(clevel=1, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0])}], +) def test_reduce_params(array_fixture, axis, keepdims, dtype_out, reduce_op, kwargs): a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture if axis is not None and np.isscalar(axis) and len(a1.shape) >= axis: diff --git a/tests/test_compress2.py b/tests/test_compress2.py index af5e6f92..498921e2 100644 --- a/tests/test_compress2.py +++ b/tests/test_compress2.py @@ -19,33 +19,41 @@ @pytest.mark.parametrize( "obj, cparams, dparams", [ - (random.integers(0, 10, 10), {'cparams': blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6)}, {}), + (random.integers(0, 10, 10), {"cparams": blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6)}, {}), ( np.arange(10, dtype="float32"), # Select an absolute precision of 10 bits in mantissa - {'cparams': blosc2.CParams( - filters=[blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], - filters_meta=[10, 0], - typesize=4 - )}, - {'dparams': blosc2.DParams(nthreads=4)}, + { + "cparams": blosc2.CParams( + filters=[blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], + filters_meta=[10, 0], + typesize=4, + ) + }, + {"dparams": blosc2.DParams(nthreads=4)}, ), ( np.arange(10, dtype="float32"), # Do a reduction of precision of 10 bits in mantissa - {"cparams": {"filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], - "filters_meta": [-10, 0], - "typesize": 4, - }, - }, + { + "cparams": { + "filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], + "filters_meta": [-10, 0], + "typesize": 4, + }, + }, {"nthreads": 4}, ), ( random.integers(0, 1000, 1000, endpoint=True), - {'cparams': blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4)}, - {'dparams': blosc2.DParams()}, + {"cparams": blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4)}, + {"dparams": blosc2.DParams()}, + ), + ( + np.arange(45, dtype=np.float64), + {"cparams": blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4)}, + {}, ), - (np.arange(45, dtype=np.float64), {'cparams': blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4)}, {}), (np.arange(50, dtype=np.int64), {"typesize": 4}, {"dparams": blosc2.dparams_dflts}), ], ) diff --git a/tests/test_schunk.py b/tests/test_schunk.py index b5198a10..176db54e 100644 --- a/tests/test_schunk.py +++ b/tests/test_schunk.py @@ -59,15 +59,12 @@ def test_schunk_numpy(contiguous, urlpath, mode, mmap_mode, cparams, dparams, nc blosc2.SChunk(chunksize=chunk_len * 4, storage=storage, cparams=cparams, dparams=dparams) # Create a schunk which we can read later - storage2 = replace(storage, - mode="w" if mmap_mode is None else None, - mmap_mode="w+" if mmap_mode is not None else None) - schunk = blosc2.SChunk( - chunksize=chunk_len * 4, - storage=storage2, - cparams=cparams, - dparams=dparams + storage2 = replace( + storage, + mode="w" if mmap_mode is None else None, + mmap_mode="w+" if mmap_mode is not None else None, ) + schunk = blosc2.SChunk(chunksize=chunk_len * 4, storage=storage2, cparams=cparams, dparams=dparams) assert schunk.urlpath == urlpath assert schunk.contiguous == contiguous @@ -203,12 +200,13 @@ def test_schunk(contiguous, urlpath, mode, mmap_mode, nbytes, cparams, dparams, ) @pytest.mark.parametrize("copy", [True, False]) def test_schunk_cframe(contiguous, urlpath, mode, mmap_mode, cparams, dparams, nchunks, copy): - storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, - mode=mode, mmap_mode=mmap_mode) + storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode=mode, mmap_mode=mmap_mode) blosc2.remove_urlpath(urlpath) data = np.arange(200 * 1000 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, **asdict(storage), cparams=cparams, dparams=dparams) + schunk = blosc2.SChunk( + chunksize=200 * 1000 * 4, data=data, **asdict(storage), cparams=cparams, dparams=dparams + ) cframe = schunk.to_cframe() schunk2 = blosc2.schunk_from_cframe(cframe, copy) @@ -286,7 +284,9 @@ def test_schunk_cdparams(cparams, dparams, new_cparams, new_dparams): schunk.dparams = new_dparams for field in fields(schunk.cparams): if field.name in ["filters", "filters_meta"]: - assert getattr(schunk.cparams, field.name)[: len(getattr(new_cparams, field.name))] == getattr(new_cparams, field.name) + assert getattr(schunk.cparams, field.name)[: len(getattr(new_cparams, field.name))] == getattr( + new_cparams, field.name + ) else: assert getattr(schunk.cparams, field.name) == getattr(new_cparams, field.name) diff --git a/tests/test_schunk_constructor.py b/tests/test_schunk_constructor.py index dda1e956..01dfa259 100644 --- a/tests/test_schunk_constructor.py +++ b/tests/test_schunk_constructor.py @@ -129,16 +129,17 @@ def test_schunk(contiguous, urlpath, cparams, dparams, chunksize): ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, 200 * 1000 * 2 + 17), ], ) -@pytest.mark.parametrize("special_value, expected_value", - [ - (blosc2.SpecialValue.ZERO, 0), - (blosc2.SpecialValue.NAN, np.nan), - (blosc2.SpecialValue.UNINIT, 0), - (blosc2.SpecialValue.VALUE, 34), - (blosc2.SpecialValue.VALUE, np.pi), - (blosc2.SpecialValue.VALUE, b"0123"), - (blosc2.SpecialValue.VALUE, True), - ], +@pytest.mark.parametrize( + "special_value, expected_value", + [ + (blosc2.SpecialValue.ZERO, 0), + (blosc2.SpecialValue.NAN, np.nan), + (blosc2.SpecialValue.UNINIT, 0), + (blosc2.SpecialValue.VALUE, 34), + (blosc2.SpecialValue.VALUE, np.pi), + (blosc2.SpecialValue.VALUE, b"0123"), + (blosc2.SpecialValue.VALUE, True), + ], ) def test_schunk_fill_special(contiguous, urlpath, cparams, nitems, special_value, expected_value): kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams} @@ -157,7 +158,7 @@ def test_schunk_fill_special(contiguous, urlpath, cparams, nitems, special_value if isinstance(expected_value, float): dtype = np.float32 elif isinstance(expected_value, bytes): - dtype = np.dtype('|S' + str(len(expected_value))) + dtype = np.dtype("|S" + str(len(expected_value))) array = np.full(nitems, expected_value, dtype=dtype) dest = np.empty(nitems, dtype=dtype) schunk.get_slice(out=dest) diff --git a/tests/test_schunk_insert.py b/tests/test_schunk_insert.py index 18abc21a..06fecc95 100644 --- a/tests/test_schunk_insert.py +++ b/tests/test_schunk_insert.py @@ -33,7 +33,9 @@ def test_schunk_insert_numpy(contiguous, urlpath, nchunks, ninserts, copy, creat storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) blosc2.remove_urlpath(urlpath) - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, storage=storage, cparams={"nthreads": 2}, dparams={"nthreads": 2}) + schunk = blosc2.SChunk( + chunksize=200 * 1000 * 4, storage=storage, cparams={"nthreads": 2}, dparams={"nthreads": 2} + ) for i in range(nchunks): buffer = i * np.arange(200 * 1000, dtype="int32") nchunks_ = schunk.append_data(buffer) diff --git a/tests/test_schunk_set_slice.py b/tests/test_schunk_set_slice.py index f5a26683..d78b5e67 100644 --- a/tests/test_schunk_set_slice.py +++ b/tests/test_schunk_set_slice.py @@ -38,7 +38,9 @@ def test_schunk_set_slice(contiguous, urlpath, mode, cparams, dparams, nchunks, blosc2.remove_urlpath(urlpath) data = np.arange(200 * 100 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, storage=storage, cparams=cparams, dparams=dparams) + schunk = blosc2.SChunk( + chunksize=200 * 100 * 4, data=data, storage=storage, cparams=cparams, dparams=dparams + ) _start, _stop = start, stop if _start is None: diff --git a/tests/test_storage.py b/tests/test_storage.py index 2e7fa6d5..4a4b4551 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -82,27 +82,33 @@ def test_raises_storage(contiguous, urlpath): "cparams", [ blosc2.CParams(codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE]), - {"typesize": 4, 'filters': [blosc2.Filter.TRUNC_PREC, blosc2.Filter.DELTA], 'filters_meta': [0, 0]}, - blosc2.CParams(nthreads=5, filters=[blosc2.Filter.BITSHUFFLE, blosc2.Filter.BYTEDELTA], filters_meta=[0] * 3), - {"codec": blosc2.Codec.LZ4HC, "typesize": 4, 'filters': [blosc2.Filter.BYTEDELTA]}, + {"typesize": 4, "filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.DELTA], "filters_meta": [0, 0]}, + blosc2.CParams( + nthreads=5, filters=[blosc2.Filter.BITSHUFFLE, blosc2.Filter.BYTEDELTA], filters_meta=[0] * 3 + ), + {"codec": blosc2.Codec.LZ4HC, "typesize": 4, "filters": [blosc2.Filter.BYTEDELTA]}, ], ) def test_cparams_values(cparams): schunk = blosc2.SChunk(cparams=cparams) cparams_dataclass = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) for field in fields(cparams_dataclass): - if field.name in ['filters', 'filters_meta']: - assert getattr(schunk.cparams, field.name)[:len(getattr(cparams_dataclass, field.name))] == getattr(cparams_dataclass, field.name) + if field.name in ["filters", "filters_meta"]: + assert getattr(schunk.cparams, field.name)[ + : len(getattr(cparams_dataclass, field.name)) + ] == getattr(cparams_dataclass, field.name) else: assert getattr(schunk.cparams, field.name) == getattr(cparams_dataclass, field.name) array = blosc2.empty((30, 30), np.int32, cparams=cparams) for field in fields(cparams_dataclass): - if field.name in ['filters', 'filters_meta']: - assert getattr(array.schunk.cparams, field.name)[:len(getattr(cparams_dataclass, field.name))] == getattr(cparams_dataclass, field.name) - elif field.name == 'typesize': + if field.name in ["filters", "filters_meta"]: + assert getattr(array.schunk.cparams, field.name)[ + : len(getattr(cparams_dataclass, field.name)) + ] == getattr(cparams_dataclass, field.name) + elif field.name == "typesize": assert getattr(array.schunk.cparams, field.name) == array.dtype.itemsize - elif field.name != 'blocksize': + elif field.name != "blocksize": assert getattr(array.schunk.cparams, field.name) == getattr(cparams_dataclass, field.name) blosc2.set_nthreads(10) @@ -150,7 +156,7 @@ def test_raises_cparams(): (blosc2.DParams()), (blosc2.DParams(nthreads=2)), ({}), - ({'nthreads': 2}), + ({"nthreads": 2}), ], ) def test_dparams_values(dparams): @@ -168,6 +174,7 @@ def test_dparams_values(dparams): assert schunk.dparams.nthreads == dparams_dataclass.nthreads assert array.schunk.dparams.nthreads == dparams_dataclass.nthreads + def test_dparams_defaults(): dparams = blosc2.DParams() assert dparams.nthreads == blosc2.nthreads From 51bdba89691f167be338c5a3deb5475690099150 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:05:39 +0200 Subject: [PATCH 15/16] Disable ruff/pyupgrade rule UP038 Using `X | Y` in `isinstance` is slower and more verbose --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index d3672327..10d2464f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,6 +101,7 @@ ignore = [ "RET505", "RET508", "SIM108", + "UP038", # https://github.com/astral-sh/ruff/issues/7871 ] [tool.ruff.lint.extend-per-file-ignores] From 1ec0c45701c461bd2827bc4252e73794312ae9f6 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:34:10 +0200 Subject: [PATCH 16/16] Disable ruff/flake8-pytest-style rule PT004 It has been deprecated in ruff. --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 10d2464f..48c8dfa8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,6 +97,7 @@ extend-select = [ ignore = [ "B028", "PT006", + "PT004", # deprecated "PT011", "RET505", "RET508",