Skip to content

Commit

Permalink
Add support for saving expressions with reductions
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescAlted committed Oct 19, 2024
1 parent 93f962f commit bf15e21
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 19 deletions.
3 changes: 1 addition & 2 deletions examples/ndarray/reduce_string_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@

# This shows how to evaluate expressions with NDArray instances as operands.

import numpy as np

import blosc2
import numpy as np

shape = (10, 10, 2)

Expand Down
36 changes: 25 additions & 11 deletions src/blosc2/lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@
def is_inside_eval():
# Get the current call stack
stack = inspect.stack()
# return any('eval' in frame_info.function or 'eval' in frame_info.filename for frame_info in stack)
return any("_new_expr" in frame_info.function for frame_info in stack)
return any(
"_new_expr" in frame_info.function or "_open_lazyarray" in frame_info.function
for frame_info in stack
)


class ReduceOp(Enum):
Expand Down Expand Up @@ -1123,10 +1125,11 @@ def reduce_slices(
if out is None:
if dtype is None:
dtype = result.dtype
out = convert_none_out(dtype, reduce_op, reduced_shape)
if is_inside_eval():
out = np.zeros(reduced_shape, dtype=dtype)
# We already have the dtype and reduced_shape, so return immediately
return out
out = convert_none_out(dtype, reduce_op, reduced_shape)

# Update the output array with the result
if reduce_op == ReduceOp.ANY:
Expand Down Expand Up @@ -1785,12 +1788,16 @@ def info_items(self):
items += [("dtype", self.dtype)]
return items

def save(self, **kwargs):
if kwargs.get("urlpath") is None:
def save(self, urlpath=None, **kwargs):
if urlpath is None:
raise ValueError("To save a LazyArray you must provide an urlpath")

# Validate expression
validate_expr(self.expression)

meta = kwargs.get("meta", {})
meta["LazyArray"] = LazyArrayEnum.Expr.value
kwargs["urlpath"] = urlpath
kwargs["meta"] = meta
kwargs["mode"] = "w" # always overwrite the file in urlpath

Expand Down Expand Up @@ -2010,16 +2017,23 @@ def _open_lazyarray(array):
# Validate the expression (prevent security issues)
validate_expr(expr)
# Create the expression as such
expr = eval(expr, globals, operands_dict)
if isinstance(expr, blosc2.LazyExpr):
new_expr = eval(expr, globals, operands_dict)
if isinstance(new_expr, blosc2.LazyExpr):
new_expr._dtype = new_expr.dtype
new_expr._shape = new_expr.shape
# Restore the original expression and operands
new_expr.expression = expr
new_expr.operands = operands_dict
# Make the array info available for the user (only available when opened from disk)
expr.array = array
elif isinstance(expr, np.ndarray):
new_expr.array = array
# We want to expose schunk too, so that .info() can be used on the LazyArray
new_expr.schunk = array.schunk
elif isinstance(new_expr, np.ndarray):
# The expression was evaluated immediately
expr = blosc2.asarray(expr)
new_expr = blosc2.asarray(new_expr)
else:
raise ValueError("Unexpected error when opening the LazyArray")
return expr
return new_expr


def lazyudf(
Expand Down
7 changes: 3 additions & 4 deletions tests/ndarray/test_lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,11 +503,10 @@ def test_save():
np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol)

expr = blosc2.open(urlpath_save)
# After opening, check that a lazy expression does have an array attribute,
# but does not have a schunk one. This is by design, and because some packages
# (e.g. Caterva2), might assume that a LazyExpr does not have a schunk attribute.
# After opening, check that a lazy expression does have an array
# and schunk attributes. This is to allow the .info() method to work.
assert hasattr(expr, "array") is True
assert hasattr(expr, "schunk") is False
assert hasattr(expr, "schunk") is True
# Check the dtype (should be upcasted to float64)
assert expr.array.dtype == np.float64
res = expr.compute()
Expand Down
28 changes: 26 additions & 2 deletions tests/ndarray/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@
# LICENSE file in the root directory of this source tree)
#######################################################################

import blosc2
import numexpr as ne
import numpy as np
import pytest

import blosc2

NITEMS_SMALL = 1000
NITEMS = 10_000

Expand Down Expand Up @@ -213,3 +212,28 @@ def test_fast_path(chunks, blocks, disk, fill_value, reduce_op, axis):
nres = getattr(na[:], reduce_op)(axis=axis)

assert np.allclose(res, nres)

@pytest.mark.parametrize("disk", [True, False])
@pytest.mark.parametrize("fill_value", [0, 1, 0.32])
@pytest.mark.parametrize("reduce_op", ["sum", "prod", "min", "max", "any", "all", "mean", "std", "var"])
@pytest.mark.parametrize("axis", [0, (0, 1), None])
def test_save(disk, fill_value, reduce_op, axis):
shape = (20, 50, 100)
urlpath = "a1.b2nd" if disk else None
if fill_value != 0:
a = blosc2.full(shape, fill_value, urlpath=urlpath, mode="w")
else:
a = blosc2.zeros(shape, dtype=np.float64, urlpath=urlpath, mode="w")
if disk:
a = blosc2.open(urlpath)
na = a[:]

expr = f"a + a.{reduce_op}(axis={axis})"
lexpr = blosc2.lazyexpr(expr, operands={"a": a})
if disk:
lexpr.save("out.b2nd")
lexpr = blosc2.open("out.b2nd")
res = lexpr.compute()
nres = na + getattr(na[:], reduce_op)(axis=axis)

assert np.allclose(res[:], nres)

0 comments on commit bf15e21

Please sign in to comment.