Skip to content

Commit

Permalink
Unify cparams default values && add dataclass cparams
Browse files Browse the repository at this point in the history
  • Loading branch information
martaiborra committed Sep 18, 2024
1 parent f4d1de9 commit 10e64d9
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 44 deletions.
1 change: 1 addition & 0 deletions doc/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ API Reference
.. toctree::
:maxdepth: 2

storage
top_level
classes
array_operations
17 changes: 17 additions & 0 deletions doc/reference/storage.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
.. _Storage:

Storage
=======

This is a class for .......

.. currentmodule:: blosc2

CParams
-------

.. autosummary::
:toctree: autofiles/storage
:nosignatures:

CParams
5 changes: 1 addition & 4 deletions src/blosc2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ class Tuner(Enum):
compress2,
compressor_list,
compute_chunks_blocks,
CParams,
decompress,
decompress2,
detect_number_of_cores,
Expand Down Expand Up @@ -266,7 +267,6 @@ class Tuner(Enum):
"nthreads": nthreads,
"blocksize": 0,
"splitmode": SplitMode.ALWAYS_SPLIT,
"schunk": None,
"filters": [
Filter.NOFILTER,
Filter.NOFILTER,
Expand All @@ -276,10 +276,7 @@ class Tuner(Enum):
Filter.SHUFFLE,
],
"filters_meta": [0, 0, 0, 0, 0, 0],
"prefilter": None,
"preparams": None,
"tuner": Tuner.STUNE,
"instr_codec": False,
}
"""
Compression params defaults.
Expand Down
4 changes: 3 additions & 1 deletion src/blosc2/blosc2_ext.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,9 @@ cdef _check_dparams(blosc2_dparams* dparams, blosc2_cparams* cparams=NULL):

cdef create_cparams_from_kwargs(blosc2_cparams *cparams, kwargs):
if "compcode" in kwargs:
raise NameError("`compcode` has been renamed to `codec`. Please go update your code.")
raise NameError("`compcode` has been renamed to `codec`. Please go update your code.")
if "shuffle" in kwargs:
raise NameError("`shuffle` has been substituted by `filters`. Please go update your code.")
codec = kwargs.get('codec', blosc2.cparams_dflts['codec'])
cparams.compcode = codec if not isinstance(codec, blosc2.Codec) else codec.value
cparams.compcode_meta = kwargs.get('codec_meta', blosc2.cparams_dflts['codec_meta'])
Expand Down
112 changes: 82 additions & 30 deletions src/blosc2/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import platform
import sys
from collections.abc import Callable
from dataclasses import dataclass, field, asdict

import cpuinfo
import numpy as np
Expand Down Expand Up @@ -53,12 +54,76 @@ def _check_codec(codec):
raise ValueError(f"codec can only be one of: {codecs}, not '{codec}'")


def default_filters():
return [blosc2.Filter.NOFILTER,
blosc2.Filter.NOFILTER,
blosc2.Filter.NOFILTER,
blosc2.Filter.NOFILTER,
blosc2.Filter.NOFILTER,
blosc2.Filter.SHUFFLE]


def default_filters_meta():
return [0] * 6

@dataclass
class CParams:
"""Dataclass for hosting the different compression parameters.
Parameters
----------
codec: :class:`Codec`
The compressor code. Default is :py:obj:`Codec.ZSTD <Codec>`.
codec_meta: int
The metadata for the compressor code, 0 by default.
clevel: int
The compression level from 0 (no compression) to 9
(maximum compression). Default: 1.
use_dict: bool
Use dicts or not when compressing
(only for :py:obj:`blosc2.Codec.ZSTD <Codec>`). Default: `False`.
typesize: int from 1 to 255
The data type size. Default: 8.
nthreads: int
The number of threads to use internally. By default, blosc2 computes
a good guess.
blocksize: int
The requested size of the compressed blocks. If 0 (the default)
blosc2 chooses it automatically.
splitmode: :class:`SplitMode`
The split mode for the blocks.
The default value is :py:obj:`SplitMode.ALWAYS_SPLIT <SplitMode>`.
filters: :class:`Filter` list
The sequence of filters. Default: [:py:obj:`Filter.NOFILTER <Filter>`,
:py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`,
:py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.SHUFFLE <Filter>`].
filters_meta: list
The metadata for filters. Default: `[0, 0, 0, 0, 0, 0]`.
tuner: :class:`Tuner`
The tuner to use. Default: :py:obj:`Tuner.STUNE <Tuner>`.
"""
codec: blosc2.Codec = blosc2.Codec.ZSTD
codec_meta: int = 0
clevel: int = 1
use_dict: bool = False
typesize: int = 8
nthreads: int = blosc2.nthreads
blocksize: int = 0
splitmode: blosc2.SplitMode = blosc2.SplitMode.ALWAYS_SPLIT
filters: list[blosc2.Filter] = field(default_factory=default_filters)
filters_meta: list[int] = field(default_factory=default_filters_meta)
tuner: blosc2.Tuner = blosc2.Tuner.STUNE

# def __post_init__(self):
# if len(self.filters) > 6:


def compress(
src: object,
typesize: int = None,
clevel: int = 9,
typesize: int = 8,
clevel: int = 1,
filter: blosc2.Filter = blosc2.Filter.SHUFFLE,
codec: blosc2.Codec = blosc2.Codec.BLOSCLZ,
codec: blosc2.Codec = blosc2.Codec.ZSTD,
_ignore_multiple_size: bool = False,
) -> str | bytes:
"""Compress src, with a given type size.
Expand Down Expand Up @@ -1382,7 +1447,7 @@ def compute_chunks_blocks(
return tuple(chunks), tuple(blocks)


def compress2(src: object, **kwargs: dict) -> str | bytes:
def compress2(src: object, **kwargs: CParams | dict) -> str | bytes:
"""Compress :paramref:`src` with the given compression params (if given)
Parameters
Expand All @@ -1393,34 +1458,15 @@ def compress2(src: object, **kwargs: dict) -> str | bytes:
Other Parameters
----------------
kwargs: dict, optional
Compression parameters. The default values are in :ref:`blosc2.CParams`.
Keyword arguments supported:
codec: :class:`Codec`
The compressor code. Default is :py:obj:`Codec.BLOSCLZ <Codec>`.
codec_meta: int
The metadata for the compressor code, 0 by default.
clevel: int
The compression level from 0 (no compression) to 9
(maximum compression). Default: 5.
use_dict: bool
Use dicts or not when compressing
(only for :py:obj:`blosc2.Codec.ZSTD <Codec>`). By default `False`.
typesize: int from 1 to 255
The data type size. Default: 8.
nthreads: int
The number of threads to use internally (1 by default).
blocksize: int
The requested size of the compressed blocks. If 0 (the default)
blosc2 chooses it automatically.
splitmode: :class:`SplitMode`
The split mode for the blocks.
The default value is :py:obj:`SplitMode.FORWARD_COMPAT_SPLIT <SplitMode>`.
filters: :class:`Filter` list
The sequence of filters. Default: {0, 0, 0, 0, 0, :py:obj:`Filter.SHUFFLE <Filter>`}.
filters_meta: list
The metadata for filters. Default: `{0, 0, 0, 0, 0, 0}`.
tuner: :class:`Tuner`
The tuner to use. Default: :py:obj:`Tuner.STUNE <Tuner>`.
cparams: :class:`CParams`
All the compression parameters that you want to use as
a :class:`CParams` instance.
others: Any
If `cparams` is not passed, all the parameters of a :class:`CParams`
can be passed as keyword arguments.
Returns
-------
Expand All @@ -1434,6 +1480,12 @@ def compress2(src: object, **kwargs: dict) -> str | bytes:
If an internal error occurred, probably because some
parameter is not a valid parameter.
"""
if kwargs is not None:
if 'cparams' in kwargs:
if len(kwargs) > 1:
raise AttributeError("Cannot pass both cparams and other kwargs already included in CParams")
kwargs = asdict(kwargs.get('cparams'))

return blosc2_ext.compress2(src, **kwargs)


Expand Down
2 changes: 1 addition & 1 deletion tests/ndarray/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_reduce_bool(array_fixture, reduce_op):
@pytest.mark.parametrize("axis", [0, 1, (0, 1), None])
@pytest.mark.parametrize("keepdims", [True, False])
@pytest.mark.parametrize("dtype_out", [np.int16, np.float64])
@pytest.mark.parametrize("kwargs", [{}, {"cparams": dict(clevel=1, shuffle=blosc2.Filter.BITSHUFFLE)}])
@pytest.mark.parametrize("kwargs", [{}, {"cparams": dict(clevel=1, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0])}])
def test_reduce_params(array_fixture, axis, keepdims, dtype_out, reduce_op, kwargs):
a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture
if axis is not None and np.isscalar(axis) and len(a1.shape) >= axis:
Expand Down
16 changes: 8 additions & 8 deletions tests/test_compress2.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@
@pytest.mark.parametrize(
"obj, cparams, dparams",
[
(random.integers(0, 10, 10), {"codec": blosc2.Codec.LZ4, "clevel": 6}, {}),
(random.integers(0, 10, 10), {'cparams': blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6)}, {}),
(
np.arange(10, dtype="float32"),
# Select an absolute precision of 10 bits in mantissa
{
"filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE],
"filters_meta": [10, 0],
"typesize": 4,
},
{'cparams': blosc2.CParams(
filters=[blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE],
filters_meta=[10, 0],
typesize=4
)},
{"nthreads": 4},
),
(
Expand All @@ -42,10 +42,10 @@
),
(
random.integers(0, 1000, 1000, endpoint=True),
{"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4},
{'cparams': blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4)},
{},
),
(np.arange(45, dtype=np.float64), {"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {}),
(np.arange(45, dtype=np.float64), {'cparams': blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4)}, {}),
(np.arange(50, dtype=np.int64), {"typesize": 4}, blosc2.dparams_dflts),
],
)
Expand Down

0 comments on commit 10e64d9

Please sign in to comment.