Unify cparams default values && add dataclass cparams

Blosc · Sep 18, 2024 · 10e64d9 · 10e64d9
1 parent f4d1de9
commit 10e64d9
Show file tree

Hide file tree

Showing 7 changed files with 113 additions and 44 deletions.
diff --git a/doc/reference/index.rst b/doc/reference/index.rst
@@ -4,6 +4,7 @@ API Reference
 .. toctree::
     :maxdepth: 2
 
+    storage
     top_level
     classes
     array_operations
diff --git a/doc/reference/storage.rst b/doc/reference/storage.rst
@@ -0,0 +1,17 @@
+.. _Storage:
+
+Storage
+=======
+
+This is a class for .......
+
+.. currentmodule:: blosc2
+
+CParams
+-------
+
+.. autosummary::
+   :toctree: autofiles/storage
+   :nosignatures:
+
+    CParams
diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py
@@ -155,6 +155,7 @@ class Tuner(Enum):
     compress2,
     compressor_list,
     compute_chunks_blocks,
+    CParams,
     decompress,
     decompress2,
     detect_number_of_cores,
@@ -266,7 +267,6 @@ class Tuner(Enum):
     "nthreads": nthreads,
     "blocksize": 0,
     "splitmode": SplitMode.ALWAYS_SPLIT,
-    "schunk": None,
     "filters": [
         Filter.NOFILTER,
         Filter.NOFILTER,
@@ -276,10 +276,7 @@ class Tuner(Enum):
         Filter.SHUFFLE,
     ],
     "filters_meta": [0, 0, 0, 0, 0, 0],
-    "prefilter": None,
-    "preparams": None,
     "tuner": Tuner.STUNE,
-    "instr_codec": False,
 }
 """
 Compression params defaults.

diff --git a/src/blosc2/blosc2_ext.pyx b/src/blosc2/blosc2_ext.pyx
@@ -716,7 +716,9 @@ cdef _check_dparams(blosc2_dparams* dparams, blosc2_cparams* cparams=NULL):
 
 cdef create_cparams_from_kwargs(blosc2_cparams *cparams, kwargs):
     if "compcode" in kwargs:
-        raise NameError("`compcode` has been renamed to `codec`.  Please go update your code.")
+        raise NameError("`compcode` has been renamed to `codec`. Please go update your code.")
+    if "shuffle" in kwargs:
+        raise NameError("`shuffle` has been substituted by `filters`. Please go update your code.")
     codec = kwargs.get('codec', blosc2.cparams_dflts['codec'])
     cparams.compcode = codec if not isinstance(codec, blosc2.Codec) else codec.value
     cparams.compcode_meta = kwargs.get('codec_meta', blosc2.cparams_dflts['codec_meta'])

diff --git a/src/blosc2/core.py b/src/blosc2/core.py
@@ -18,6 +18,7 @@
 import platform
 import sys
 from collections.abc import Callable
+from dataclasses import dataclass, field, asdict
 
 import cpuinfo
 import numpy as np
@@ -53,12 +54,76 @@ def _check_codec(codec):
         raise ValueError(f"codec can only be one of: {codecs}, not '{codec}'")
 
 
+def default_filters():
+    return [blosc2.Filter.NOFILTER,
+            blosc2.Filter.NOFILTER,
+            blosc2.Filter.NOFILTER,
+            blosc2.Filter.NOFILTER,
+            blosc2.Filter.NOFILTER,
+            blosc2.Filter.SHUFFLE]
+
+
+def default_filters_meta():
+    return [0] * 6
+
+@dataclass
+class CParams:
+    """Dataclass for hosting the different compression parameters.
+
+    Parameters
+    ----------
+    codec: :class:`Codec`
+        The compressor code. Default is :py:obj:`Codec.ZSTD <Codec>`.
+    codec_meta: int
+        The metadata for the compressor code, 0 by default.
+    clevel: int
+        The compression level from 0 (no compression) to 9
+        (maximum compression). Default: 1.
+    use_dict: bool
+        Use dicts or not when compressing
+        (only for :py:obj:`blosc2.Codec.ZSTD <Codec>`). Default: `False`.
+    typesize: int from 1 to 255
+        The data type size. Default: 8.
+    nthreads: int
+        The number of threads to use internally. By default, blosc2 computes
+        a good guess.
+    blocksize: int
+        The requested size of the compressed blocks. If 0 (the default)
+        blosc2 chooses it automatically.
+    splitmode: :class:`SplitMode`
+        The split mode for the blocks.
+        The default value is :py:obj:`SplitMode.ALWAYS_SPLIT <SplitMode>`.
+    filters: :class:`Filter` list
+        The sequence of filters. Default: [:py:obj:`Filter.NOFILTER <Filter>`,
+        :py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`,
+        :py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.SHUFFLE <Filter>`].
+    filters_meta: list
+        The metadata for filters. Default: `[0, 0, 0, 0, 0, 0]`.
+    tuner: :class:`Tuner`
+        The tuner to use. Default: :py:obj:`Tuner.STUNE <Tuner>`.
+    """
+    codec: blosc2.Codec = blosc2.Codec.ZSTD
+    codec_meta: int = 0
+    clevel: int = 1
+    use_dict: bool = False
+    typesize: int = 8
+    nthreads: int = blosc2.nthreads
+    blocksize: int = 0
+    splitmode: blosc2.SplitMode = blosc2.SplitMode.ALWAYS_SPLIT
+    filters: list[blosc2.Filter] = field(default_factory=default_filters)
+    filters_meta: list[int] = field(default_factory=default_filters_meta)
+    tuner: blosc2.Tuner = blosc2.Tuner.STUNE
+
+    # def __post_init__(self):
+    #     if len(self.filters) > 6:
+
+
 def compress(
     src: object,
-    typesize: int = None,
-    clevel: int = 9,
+    typesize: int = 8,
+    clevel: int = 1,
     filter: blosc2.Filter = blosc2.Filter.SHUFFLE,
-    codec: blosc2.Codec = blosc2.Codec.BLOSCLZ,
+    codec: blosc2.Codec = blosc2.Codec.ZSTD,
     _ignore_multiple_size: bool = False,
 ) -> str | bytes:
     """Compress src, with a given type size.
@@ -1382,7 +1447,7 @@ def compute_chunks_blocks(
     return tuple(chunks), tuple(blocks)
 
 
-def compress2(src: object, **kwargs: dict) -> str | bytes:
+def compress2(src: object, **kwargs: CParams | dict) -> str | bytes:
     """Compress :paramref:`src` with the given compression params (if given)
 
     Parameters
@@ -1393,34 +1458,15 @@ def compress2(src: object, **kwargs: dict) -> str | bytes:
     Other Parameters
     ----------------
     kwargs: dict, optional
+        Compression parameters. The default values are in :ref:`blosc2.CParams`.
         Keyword arguments supported:
 
-            codec: :class:`Codec`
-                The compressor code. Default is :py:obj:`Codec.BLOSCLZ <Codec>`.
-            codec_meta: int
-                The metadata for the compressor code, 0 by default.
-            clevel: int
-                The compression level from 0 (no compression) to 9
-                (maximum compression). Default: 5.
-            use_dict: bool
-                Use dicts or not when compressing
-                (only for :py:obj:`blosc2.Codec.ZSTD <Codec>`). By default `False`.
-            typesize: int from 1 to 255
-                The data type size. Default: 8.
-            nthreads: int
-                The number of threads to use internally (1 by default).
-            blocksize: int
-                The requested size of the compressed blocks. If 0 (the default)
-                blosc2 chooses it automatically.
-            splitmode: :class:`SplitMode`
-                The split mode for the blocks.
-                The default value is :py:obj:`SplitMode.FORWARD_COMPAT_SPLIT <SplitMode>`.
-            filters: :class:`Filter` list
-                The sequence of filters. Default: {0, 0, 0, 0, 0, :py:obj:`Filter.SHUFFLE <Filter>`}.
-            filters_meta: list
-                The metadata for filters. Default: `{0, 0, 0, 0, 0, 0}`.
-            tuner: :class:`Tuner`
-                The tuner to use. Default: :py:obj:`Tuner.STUNE <Tuner>`.
+            cparams: :class:`CParams`
+                All the compression parameters that you want to use as
+                a :class:`CParams` instance.
+            others: Any
+                If `cparams` is not passed, all the parameters of a :class:`CParams`
+                can be passed as keyword arguments.
 
     Returns
     -------
@@ -1434,6 +1480,12 @@ def compress2(src: object, **kwargs: dict) -> str | bytes:
         If an internal error occurred, probably because some
         parameter is not a valid parameter.
     """
+    if kwargs is not None:
+        if 'cparams' in kwargs:
+            if len(kwargs) > 1:
+                raise AttributeError("Cannot pass both cparams and other kwargs already included in CParams")
+            kwargs = asdict(kwargs.get('cparams'))
+
     return blosc2_ext.compress2(src, **kwargs)
 
 

diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py
@@ -65,7 +65,7 @@ def test_reduce_bool(array_fixture, reduce_op):
 @pytest.mark.parametrize("axis", [0, 1, (0, 1), None])
 @pytest.mark.parametrize("keepdims", [True, False])
 @pytest.mark.parametrize("dtype_out", [np.int16, np.float64])
-@pytest.mark.parametrize("kwargs", [{}, {"cparams": dict(clevel=1, shuffle=blosc2.Filter.BITSHUFFLE)}])
+@pytest.mark.parametrize("kwargs", [{}, {"cparams": dict(clevel=1, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0])}])
 def test_reduce_params(array_fixture, axis, keepdims, dtype_out, reduce_op, kwargs):
     a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture
     if axis is not None and np.isscalar(axis) and len(a1.shape) >= axis:

diff --git a/tests/test_compress2.py b/tests/test_compress2.py
@@ -19,15 +19,15 @@
 @pytest.mark.parametrize(
     "obj, cparams, dparams",
     [
-        (random.integers(0, 10, 10), {"codec": blosc2.Codec.LZ4, "clevel": 6}, {}),
+        (random.integers(0, 10, 10), {'cparams': blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6)}, {}),
         (
             np.arange(10, dtype="float32"),
             # Select an absolute precision of 10 bits in mantissa
-            {
-                "filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE],
-                "filters_meta": [10, 0],
-                "typesize": 4,
-            },
+            {'cparams': blosc2.CParams(
+                filters=[blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE],
+                filters_meta=[10, 0],
+                typesize=4
+            )},
             {"nthreads": 4},
         ),
         (
@@ -42,10 +42,10 @@
         ),
         (
             random.integers(0, 1000, 1000, endpoint=True),
-            {"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4},
+            {'cparams': blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4)},
             {},
         ),
-        (np.arange(45, dtype=np.float64), {"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {}),
+        (np.arange(45, dtype=np.float64), {'cparams': blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4)}, {}),
         (np.arange(50, dtype=np.int64), {"typesize": 4}, blosc2.dparams_dflts),
     ],
 )