Skip to content

Commit

Permalink
Merge pull request #257 from omaech/Lazyexpr-examples
Browse files Browse the repository at this point in the history
LazyArray examples
  • Loading branch information
FrancescAlted authored Sep 17, 2024
2 parents f4d1de9 + a4cce3b commit 52c651a
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 0 deletions.
111 changes: 111 additions & 0 deletions src/blosc2/lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,27 @@ def eval(self, item: slice | list[slice] = None, **kwargs: dict) -> blosc2.NDArr
* If self is a LazyArray from an udf, the kwargs used to store the resulting
array will be the ones passed to the constructor in :func:`lazyudf` (except the
`urlpath`) updated with the kwargs passed when calling this method.
Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> # Convert numpy arrays to Blosc2 arrays
>>> a1 = blosc2.asarray(a)
>>> b1 = blosc2.asarray(b)
>>> # Perform the mathematical operation
>>> expr = a1 + b1
>>> output = expr.eval()
>>> f"Result of a + b (lazy evaluation): {output[:]}"
Result of a + b (lazy evaluation):
[[ 0. 1.25 2.5 ]
[ 3.75 5. 6.25]
[ 7.5 8.75 10. ]]
"""
pass

Expand All @@ -110,6 +131,26 @@ def __getitem__(self, item: int | slice | Sequence[slice]) -> blosc2.NDArray:
-------
out: np.ndarray
An array with the data containing the slice evaluated.
Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [30, 4]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape)
>>> # Convert numpy arrays to Blosc2 arrays
>>> a1 = blosc2.asarray(a)
>>> b1 = blosc2.asarray(b)
>>> # Perform the mathematical operation
>>> expr = a1 + b1 # LazyExpr expression
>>> expr[3]
[2.01680672 2.18487395 2.35294118 2.5210084 ]
>>> expr[2:4]
[[1.34453782 1.51260504 1.68067227 1.8487395 ]
[2.01680672 2.18487395 2.35294118 2.5210084 ]]
"""
pass

Expand All @@ -135,6 +176,28 @@ def save(self, **kwargs: dict) -> None:
if its source is a :ref:`SChunk`, :ref:`NDArray` or a :ref:`C2Array` (see :func:`blosc2.open` notes
section for more info).
* This is currently only supported for :ref:`LazyExpr`.
Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> # Define file paths for storing the arrays
>>> a1 = blosc2.asarray(a, urlpath='a_array.b2nd', mode='w')
>>> b1 = blosc2.asarray(b, urlpath='b_array.b2nd', mode='w')
>>> # Perform the mathematical operation to create a LazyExpr expression
>>> expr = a1 + b1
>>> # Save the LazyExpr to disk
>>> expr.save(urlpath='lazy_array.b2nd', mode='w')
>>> # Open and load the LazyExpr from disk
>>> disk_expr = blosc2.open('lazy_array.b2nd')
>>> disk_expr[:2]
[[0. 1.25 2.5 ]
[3.75 5. 6.25]]
"""
pass

Expand Down Expand Up @@ -1912,6 +1975,30 @@ def lazyudf(func: Callable[[tuple, np.ndarray, tuple[int]], None], inputs: tuple
out: :ref:`LazyUDF`
A :ref:`LazyUDF` is returned.
Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(10, 20, num=size, dtype=dtype).reshape(shape)
>>> a1 = blosc2.asarray(a)
>>> b1 = blosc2.asarray(b)
>>> # Define a user-defined function that will be applied to each block of data
>>> def my_function(inputs_tuple, output, offset):
>>> a, b = inputs_tuple
>>> output[:] = a + b
>>> # Create a LazyUDF object using the user-defined function
>>> lazy_udf = blosc2.lazyudf(my_function, [a1, b1], dtype)
>>> type(lazy_udf)
<class 'blosc2.lazyexpr.LazyUDF'>
>>> f"Result of LazyUDF evaluation: {lazy_udf[:]}"
Result of LazyUDF evaluation:
[[10. 12.5 15. ]
[17.5 20. 22.5]
[25. 27.5 30. ]]
"""
return LazyUDF(func, inputs, dtype, chunked_eval, **kwargs)

Expand Down Expand Up @@ -1942,6 +2029,30 @@ def lazyexpr(expression: str | bytes | LazyExpr, operands: dict = None,
out: :ref:`LazyExpr`
A :ref:`LazyExpr` is returned.
Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> a1 = blosc2.asarray(a)
>>> a1[:]
[[0. 0.625 1.25 ]
[1.875 2.5 3.125]
[3.75 4.375 5. ]]
>>> b1 = blosc2.asarray(b)
>>> expr = 'a1 * b1 + 2'
>>> operands = { 'a': a1, 'b': b1 }
>>> lazy_expr = blosc2.lazyexpr(expr, operands=operands)
>>> f"Lazy expression created: {lazy_expr}"
Lazy expression created: a1 * b1 + 2
>>> lazy_expr[:]
[[ 2. 2.390625 3.5625 ]
[ 5.515625 8.25 11.765625]
[16.0625 21.140625 27. ]]
"""
if isinstance(expression, LazyExpr):
if operands is not None:
Expand Down
39 changes: 39 additions & 0 deletions src/blosc2/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,25 @@ def fetch(self, item: slice | list[slice] = None) -> blosc2.NDArray | blosc2.sch
-------
out: :ref:`NDArray` or :ref:`SChunk`
The local container used to cache the already requested data.
Examples
--------
>>> import numpy as np
>>> import blosc2
>>> data = np.arange(20).reshape(10, 2)
>>> ndarray = blosc2.asarray(data)
>>> proxy = blosc2.Proxy(ndarray)
>>> full_data = proxy.fetch()
>>> f"Full data cache: {full_data[:]}"
Full data cache:
[[ 0 1][ 2 3][ 4 5]
[ 6 7][ 8 9][10 11]
[12 13][14 15][16 17]
[18 19]]
>>> slice_data = proxy[0:2, :]
>>> f"Slice data cache: {slice_data}"
Slice data cache:
[[0 1][2 3]]
"""
if item is None:
# Full realization
Expand Down Expand Up @@ -199,6 +218,26 @@ def __getitem__(self, item: slice | list[slice]) -> np.ndarray:
-------
out: numpy.ndarray
An array with the data slice.
Examples
--------
>>> import numpy as np
>>> import blosc2
>>> data = np.arange(100).reshape(10, 10)
>>> ndarray = blosc2.asarray(data)
>>> proxy = blosc2.Proxy(ndarray)
>>> slice_1 = proxy[0:3, 0:3]
>>> f"Slice 1: {slice_1}"
Slice 1:
[[ 0 1 2]
[10 11 12]
[20 21 22]]
>>> slice_2 = proxy[5:8, 2:5]
>>> f"Slice 2: {slice_2}"
Slice 2:
[[52 53 54]
[62 63 64]
[72 73 74]]
"""
# Populate the cache
self.fetch(item)
Expand Down

0 comments on commit 52c651a

Please sign in to comment.