diff --git a/examples/ndarray/reduce_string_expr.py b/examples/ndarray/reduce_string_expr.py index b27d7624..6ca37aa2 100644 --- a/examples/ndarray/reduce_string_expr.py +++ b/examples/ndarray/reduce_string_expr.py @@ -8,9 +8,8 @@ # This shows how to evaluate expressions with NDArray instances as operands. -import numpy as np - import blosc2 +import numpy as np shape = (10, 10, 2) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 5cbb0451..eb66f756 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -40,8 +40,10 @@ def is_inside_eval(): # Get the current call stack stack = inspect.stack() - # return any('eval' in frame_info.function or 'eval' in frame_info.filename for frame_info in stack) - return any("_new_expr" in frame_info.function for frame_info in stack) + return any( + "_new_expr" in frame_info.function or "_open_lazyarray" in frame_info.function + for frame_info in stack + ) class ReduceOp(Enum): @@ -1123,10 +1125,11 @@ def reduce_slices( if out is None: if dtype is None: dtype = result.dtype - out = convert_none_out(dtype, reduce_op, reduced_shape) if is_inside_eval(): + out = np.zeros(reduced_shape, dtype=dtype) # We already have the dtype and reduced_shape, so return immediately return out + out = convert_none_out(dtype, reduce_op, reduced_shape) # Update the output array with the result if reduce_op == ReduceOp.ANY: @@ -1785,12 +1788,16 @@ def info_items(self): items += [("dtype", self.dtype)] return items - def save(self, **kwargs): - if kwargs.get("urlpath") is None: + def save(self, urlpath=None, **kwargs): + if urlpath is None: raise ValueError("To save a LazyArray you must provide an urlpath") + # Validate expression + validate_expr(self.expression) + meta = kwargs.get("meta", {}) meta["LazyArray"] = LazyArrayEnum.Expr.value + kwargs["urlpath"] = urlpath kwargs["meta"] = meta kwargs["mode"] = "w" # always overwrite the file in urlpath @@ -2010,16 +2017,23 @@ def _open_lazyarray(array): # Validate the expression (prevent security issues) validate_expr(expr) # Create the expression as such - expr = eval(expr, globals, operands_dict) - if isinstance(expr, blosc2.LazyExpr): + new_expr = eval(expr, globals, operands_dict) + if isinstance(new_expr, blosc2.LazyExpr): + new_expr._dtype = new_expr.dtype + new_expr._shape = new_expr.shape + # Restore the original expression and operands + new_expr.expression = expr + new_expr.operands = operands_dict # Make the array info available for the user (only available when opened from disk) - expr.array = array - elif isinstance(expr, np.ndarray): + new_expr.array = array + # We want to expose schunk too, so that .info() can be used on the LazyArray + new_expr.schunk = array.schunk + elif isinstance(new_expr, np.ndarray): # The expression was evaluated immediately - expr = blosc2.asarray(expr) + new_expr = blosc2.asarray(new_expr) else: raise ValueError("Unexpected error when opening the LazyArray") - return expr + return new_expr def lazyudf( diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index 63825a03..47433d8f 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -503,11 +503,10 @@ def test_save(): np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol) expr = blosc2.open(urlpath_save) - # After opening, check that a lazy expression does have an array attribute, - # but does not have a schunk one. This is by design, and because some packages - # (e.g. Caterva2), might assume that a LazyExpr does not have a schunk attribute. + # After opening, check that a lazy expression does have an array + # and schunk attributes. This is to allow the .info() method to work. assert hasattr(expr, "array") is True - assert hasattr(expr, "schunk") is False + assert hasattr(expr, "schunk") is True # Check the dtype (should be upcasted to float64) assert expr.array.dtype == np.float64 res = expr.compute() diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py index aa231338..e35241ef 100644 --- a/tests/ndarray/test_reductions.py +++ b/tests/ndarray/test_reductions.py @@ -6,12 +6,11 @@ # LICENSE file in the root directory of this source tree) ####################################################################### +import blosc2 import numexpr as ne import numpy as np import pytest -import blosc2 - NITEMS_SMALL = 1000 NITEMS = 10_000 @@ -213,3 +212,28 @@ def test_fast_path(chunks, blocks, disk, fill_value, reduce_op, axis): nres = getattr(na[:], reduce_op)(axis=axis) assert np.allclose(res, nres) + +@pytest.mark.parametrize("disk", [True, False]) +@pytest.mark.parametrize("fill_value", [0, 1, 0.32]) +@pytest.mark.parametrize("reduce_op", ["sum", "prod", "min", "max", "any", "all", "mean", "std", "var"]) +@pytest.mark.parametrize("axis", [0, (0, 1), None]) +def test_save(disk, fill_value, reduce_op, axis): + shape = (20, 50, 100) + urlpath = "a1.b2nd" if disk else None + if fill_value != 0: + a = blosc2.full(shape, fill_value, urlpath=urlpath, mode="w") + else: + a = blosc2.zeros(shape, dtype=np.float64, urlpath=urlpath, mode="w") + if disk: + a = blosc2.open(urlpath) + na = a[:] + + expr = f"a + a.{reduce_op}(axis={axis})" + lexpr = blosc2.lazyexpr(expr, operands={"a": a}) + if disk: + lexpr.save("out.b2nd") + lexpr = blosc2.open("out.b2nd") + res = lexpr.compute() + nres = na + getattr(na[:], reduce_op)(axis=axis) + + assert np.allclose(res[:], nres)