diff --git a/bench/fill_special.py b/bench/fill_special.py new file mode 100644 index 00000000..f46fb856 --- /dev/null +++ b/bench/fill_special.py @@ -0,0 +1,60 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# This source code is licensed under a BSD-style license (found in the +# LICENSE file in the root directory of this source tree) +####################################################################### + +import sys +from time import time + +import numpy as np + +import blosc2 + +# Dimensions, type and persistence properties for the arrays +shape = 1_000 * 1_000 +chunksize = 10_000 +blocksize = 1_000 + +dtype = np.float64 + +nchunks = shape // chunksize +# Set the compression and decompression parameters +cparams = {"codec": blosc2.Codec.BLOSCLZ, "typesize": 8, "blocksize": blocksize * 8} +dparams = {} +contiguous = True +persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False + +if persistent: + urlpath = "bench_fill_special.b2frame" +else: + urlpath = None + + +def create_schunk(data=None): + storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} + blosc2.remove_urlpath(urlpath) + # Create the empty SChunk + return blosc2.SChunk(chunksize=chunksize * cparams["typesize"], data=data, **storage) + +t0 = time() +schunk = create_schunk(data=np.full(shape, np.pi, dtype)) +t1 = time() +print("Time for filling the schunk with `data` argument in the constructor: {:.3f}s".format(t1 - t0)) + +schunk = create_schunk() +t0 = time() +schunk.fill_special(shape, blosc2.SpecialValue.UNINIT) +schunk[:] = np.full(shape, np.pi, dtype) +t1 = time() +print("Time for filling the schunk without passing directly the value: {:.3f}s".format(t1 - t0)) + +schunk = create_schunk() +t0 = time() +schunk.fill_special(shape, blosc2.SpecialValue.VALUE, np.pi) +t1 = time() +print("Time for filling the schunk passing directly the value to `fill_special`: {:.3f}s".format(t1 - t0)) + +blosc2.remove_urlpath(urlpath) diff --git a/src/blosc2/blosc2_ext.pyx b/src/blosc2/blosc2_ext.pyx index 670d752a..03b12bef 100644 --- a/src/blosc2/blosc2_ext.pyx +++ b/src/blosc2/blosc2_ext.pyx @@ -1229,7 +1229,7 @@ cdef class SChunk: raise RuntimeError("nitems is too large. Try increasing the chunksize") if self.nbytes > 0 or self.cbytes > 0: raise RuntimeError("Filling with special values only works on empty SChunks") - + # Get a void pointer to the value array = np.array([value]) if array.dtype.itemsize != self.typesize: if isinstance(value, int): @@ -1241,7 +1241,7 @@ cdef class SChunk: array = np.array([value], dtype=dtype) cdef Py_buffer *buf = malloc(sizeof(Py_buffer)) PyObject_GetBuffer(array, buf, PyBUF_SIMPLE) - + # Create chunk with repeated values nchunks = nitems // self.chunkshape cdef blosc2_schunk *c_schunk = self.c_schunk cdef blosc2_cparams *cparams = self.schunk.storage.cparams @@ -1255,11 +1255,11 @@ cdef class SChunk: PyBuffer_Release(buf) free(buf) raise RuntimeError("Error while appending the chunk") - - last_nitems = nitems % self.chunkshape + # Create and append last chunk if it is smaller than chunkshape + remainder = nitems % self.chunkshape rc = 0 - if last_nitems != 0: - get_chunk_repeatval(dereference(cparams), last_nitems * self.typesize, chunk, chunksize, buf) + if remainder != 0: + get_chunk_repeatval(dereference(cparams), remainder * self.typesize, chunk, chunksize, buf) rc = blosc2_schunk_append_chunk(self.schunk, chunk, True) free(chunk) PyBuffer_Release(buf) diff --git a/src/blosc2/schunk.py b/src/blosc2/schunk.py index 9f72ac65..b3c83275 100644 --- a/src/blosc2/schunk.py +++ b/src/blosc2/schunk.py @@ -415,7 +415,7 @@ def fill_special(self, nitems, special_value, value=None): The number of items to fill with the special value. special_value: SpecialValue The special value to be used for filling the SChunk. - value: bytes, int, float or bool (optional) + value: bytes, int, float, bool (optional) The value to fill the SChunk. This parameter is only supported if :paramref:`special_value` is ``blosc2.SpecialValue.VALUE``. diff --git a/tests/test_schunk_constructor.py b/tests/test_schunk_constructor.py index a19af02a..f04b51d2 100644 --- a/tests/test_schunk_constructor.py +++ b/tests/test_schunk_constructor.py @@ -117,3 +117,53 @@ def test_schunk(contiguous, urlpath, cparams, dparams, chunksize): assert schunk.typesize == 1 blosc2.remove_urlpath(urlpath) + + +@pytest.mark.parametrize("contiguous", [True, False]) +@pytest.mark.parametrize("urlpath", [None, "b2frame"]) +@pytest.mark.parametrize( + "cparams, nitems", + [ + ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, 0), + ({"typesize": 4}, 200 * 1000), + ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, 200 * 1000 * 2 + 17), + ], +) +@pytest.mark.parametrize("special_value, expected_value", + [ + (blosc2.SpecialValue.ZERO, 0), + (blosc2.SpecialValue.NAN, np.nan), + (blosc2.SpecialValue.UNINIT, 0), + (blosc2.SpecialValue.VALUE, 34), + (blosc2.SpecialValue.VALUE, np.pi), + (blosc2.SpecialValue.VALUE, b"0123"), + (blosc2.SpecialValue.VALUE, True), + ], +) +def test_schunk_fill_special(contiguous, urlpath, cparams, nitems, special_value, expected_value): + storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams} + blosc2.remove_urlpath(urlpath) + + chunk_len = 200 * 1000 + schunk = blosc2.SChunk(chunksize=chunk_len * 4, **storage) + if special_value in [blosc2.SpecialValue.ZERO, blosc2.SpecialValue.NAN, blosc2.SpecialValue.UNINIT]: + schunk.fill_special(nitems, special_value) + else: + schunk.fill_special(nitems, special_value, expected_value) + assert len(schunk) == nitems + + if special_value != blosc2.SpecialValue.UNINIT: + dtype = np.int32 + if isinstance(expected_value, float): + dtype = np.float32 + elif isinstance(expected_value, bytes): + dtype = np.dtype('|S' + str(len(expected_value))) + array = np.full(nitems, expected_value, dtype=dtype) + dest = np.empty(nitems, dtype=dtype) + schunk.get_slice(out=dest) + if dtype in [np.float32, np.float64]: + np.testing.assert_allclose(dest, array) + else: + np.testing.assert_equal(dest, array) + + blosc2.remove_urlpath(urlpath) diff --git a/tests/test_schunk_fill.py b/tests/test_schunk_fill.py deleted file mode 100644 index 2d069d09..00000000 --- a/tests/test_schunk_fill.py +++ /dev/null @@ -1,65 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# This source code is licensed under a BSD-style license (found in the -# LICENSE file in the root directory of this source tree) -####################################################################### - -import os - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - "cparams, nitems", - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, 0), - ({"typesize": 4}, 200 * 1000), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, 200 * 1000 * 2 + 17), - ], -) -@pytest.mark.parametrize("special_value, expected_value", - [ - (blosc2.SpecialValue.ZERO, 0), - (blosc2.SpecialValue.NAN, np.nan), - (blosc2.SpecialValue.UNINIT, 0), - (blosc2.SpecialValue.VALUE, 34), - (blosc2.SpecialValue.VALUE, np.pi), - (blosc2.SpecialValue.VALUE, b"0123"), - (blosc2.SpecialValue.VALUE, True), - ], -) -def test_schunk_fill_special(contiguous, urlpath, cparams, nitems, special_value, expected_value): - storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams} - blosc2.remove_urlpath(urlpath) - - chunk_len = 200 * 1000 - schunk = blosc2.SChunk(chunksize=chunk_len * 4, **storage) - if special_value in [blosc2.SpecialValue.ZERO, blosc2.SpecialValue.NAN, blosc2.SpecialValue.UNINIT]: - schunk.fill_special(nitems, special_value) - else: - schunk.fill_special(nitems, special_value, expected_value) - assert len(schunk) == nitems - - if special_value != blosc2.SpecialValue.UNINIT: - dtype = np.int32 - if isinstance(expected_value, float): - dtype = np.float32 - elif isinstance(expected_value, bytes): - dtype = np.dtype('|S' + str(len(expected_value))) - array = np.full(nitems, expected_value, dtype=dtype) - dest = np.empty(nitems, dtype=dtype) - schunk.get_slice(out=dest) - print(dest[:10]) - if dtype in [np.float32, np.float64]: - np.testing.assert_allclose(dest, array) - else: - np.testing.assert_equal(dest, array) - - blosc2.remove_urlpath(urlpath)