-
Notifications
You must be signed in to change notification settings - Fork 19
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Working with ProxySource: examples. #259
Changes from 4 commits
97c4d4c
6ef1029
0af76a5
f016ee8
499d313
5f2d9e8
8a6fc22
68f6d49
f0cf2b6
8689a66
afbc1f2
57a5ae6
cb7a849
9ecb2e0
5a8c269
4537161
5fce631
3a43d2e
eff3865
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -198,6 +198,24 @@ def __init__(self, path: str, /, urlbase: str = None, auth_token: str = None): | |
------- | ||
out: C2Array | ||
|
||
Examples | ||
-------- | ||
>>> import blosc2 | ||
>>> import pathlib | ||
>>> host = "https://demo.caterva2.net/" | ||
>>> root = "b2tests" | ||
>>> dir = "expr/" | ||
>>> name = "ds-0-10-linspace-float64-(True, True)-a1-(60, 60)d.b2nd" | ||
>>> path = pathlib.Path(f"{root}/{dir + name}").as_posix() | ||
>>> remote_array = blosc2.C2Array(path, urlbase=host) | ||
>>> f"Shape of the remote array: {remote_array.shape}" | ||
>>> f"Chunks of the remote array: {remote_array.chunks}" | ||
>>> f"Blocks of the remote array: {remote_array.blocks}" | ||
>>> f"Dtype of the remote array: {remote_array.dtype}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd replace this by just:
|
||
Shape of the remote array: (60, 60) | ||
Chunks of the remote array: (30, 60) | ||
Blocks of the remote array: (10, 60) | ||
Dtype of the remote array: float64 | ||
""" | ||
if path.startswith("/"): | ||
raise ValueError("The path should start with a root name, not a slash") | ||
|
@@ -252,6 +270,36 @@ def get_chunk(self, nchunk: int) -> bytes: | |
------- | ||
out: bytes | ||
The requested compressed chunk. | ||
|
||
Examples | ||
-------- | ||
>>> import pathlib | ||
>>> import numpy as np | ||
>>> import blosc2 | ||
>>> host = "https://demo.caterva2.net/" | ||
>>> root = "b2tests" | ||
>>> dir = "expr/" | ||
>>> root = "b2tests" | ||
>>> dir = "expr/" | ||
>>> name1 = "ds-0-10-linspace-float64-(True, True)-a1-(60, 60)d.b2nd" | ||
>>> name2 = "ds-0-10-linspace-float64-(True, True)-a2-(60, 60)d.b2nd" | ||
>>> path1 = pathlib.Path(f"{root}/{dir + name1}").as_posix() | ||
>>> path2 = pathlib.Path(f"{root}/{dir + name2}").as_posix() | ||
>>> a = blosc2.C2Array(path1, host) | ||
>>> b = blosc2.C2Array(path2, host) | ||
>>> c = a + b | ||
>>> # Get the compressed chunk from array 'a' for index 0 | ||
>>> chunk_index = 0 | ||
>>> compressed_chunk = c.get_chunk(chunk_index) | ||
>>> f"Size of chunk {chunk_index} from a: {len(compressed_chunk)} bytes" | ||
Size of chunk 0 from 'a': 8604 bytes | ||
>>> # Decompress the chunk and convert it to a NumPy array | ||
>>> decompressed_chunk = blosc2.decompress(compressed_chunk) | ||
>>> chunk_np_array = np.frombuffer(decompressed_chunk, dtype=a.dtype) | ||
>>> f"Content of chunk {chunk_index} as NumPy array:{chunk_np_array}" | ||
Content of chunk 0 as NumPy array: | ||
[0.00000000e+00 5.55709919e-03 1.11141984e-02 ... 9.98610725e+00 | ||
9.99166435e+00 9.99722145e+00] | ||
""" | ||
url = _sub_url(self.urlbase, f"api/chunk/{self.path}") | ||
params = {"nchunk": nchunk} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,9 +7,8 @@ | |
####################################################################### | ||
from abc import ABC, abstractmethod | ||
|
||
import numpy as np | ||
|
||
import blosc2 | ||
import numpy as np | ||
|
||
|
||
class ProxyNDSource(ABC): | ||
|
@@ -248,17 +247,12 @@ def fetch(self, item: slice | list[slice] = None) -> blosc2.NDArray | blosc2.sch | |
>>> data = np.arange(20).reshape(10, 2) | ||
>>> ndarray = blosc2.asarray(data) | ||
>>> proxy = blosc2.Proxy(ndarray) | ||
>>> full_data = proxy.fetch() | ||
>>> f"Full data cache: {full_data[:]}" | ||
Full data cache: | ||
[[ 0 1][ 2 3][ 4 5] | ||
[ 6 7][ 8 9][10 11] | ||
[12 13][14 15][16 17] | ||
[18 19]] | ||
>>> slice_data = proxy[0:2, :] | ||
>>> f"Slice data cache: {slice_data}" | ||
>>> slice_data = proxy.fetch((slice(0, 3), slice(0, 2))) | ||
>>> f"Slice data cache: {slice_data[:3, :2]}" | ||
Slice data cache: | ||
[[0 1][2 3]] | ||
[[0 1] | ||
[2 3] | ||
[4 5]] | ||
""" | ||
if item is None: | ||
# Full realization | ||
|
@@ -296,6 +290,65 @@ async def afetch(self, item: slice | list[slice] = None) -> blosc2.NDArray | blo | |
----- | ||
This method is only available if the :ref:`ProxySource` or :ref:`ProxyNDSource` | ||
have an async `aget_chunk` method. | ||
|
||
Examples | ||
-------- | ||
>>> import numpy as np | ||
>>> import blosc2 | ||
>>> import asyncio | ||
>>> class MyProxySource: | ||
>>> def __init__(self, data): | ||
>>> # If the next source is multidimensional, it must have the attributes: | ||
>>> self.data = data | ||
>>> f"Data shape: {self.shape}, Chunks: {self.chunks}" | ||
Data shape: (4, 5), Chunks: [2, 5] | ||
>>> f"Blocks: {self.blocks}, Dtype: {self.dtype}" | ||
Blocks: [1, 5], Dtype: int64 | ||
>>> @property | ||
>>> def shape(self): | ||
>>> return self.data.shape | ||
>>> @property | ||
>>> def chunks(self): | ||
>>> return self.data.chunks | ||
>>> @property | ||
>>> def blocks(self): | ||
>>> return self.data.blocks | ||
>>> @property | ||
>>> def dtype(self): | ||
>>> return self.data.dtype | ||
>>> # This method must be present | ||
>>> def get_chunk(self, nchunk): | ||
>>> return self.data.get_chunk(nchunk) | ||
>>> # This method is optional | ||
>>> async def aget_chunk(self, nchunk): | ||
>>> await asyncio.sleep(0.1) # Simulate an asynchronous operation | ||
>>> return self.data.get_chunk(nchunk) | ||
>>> data = np.arange(20).reshape(4, 5) | ||
>>> chunks = [2, 5] | ||
>>> blocks = [1, 5] | ||
>>> data = blosc2.asarray(data, chunks=chunks, blocks=blocks) | ||
>>> source = MyProxySource(data2) | ||
>>> proxy = blosc2.Proxy(source) | ||
>>> async def fetch_data(): | ||
>>> # Fetch a slice of the data from the proxy asynchronously | ||
>>> slice_data = await proxy.afetch(slice(0, 2)) | ||
>>> # Note that only data fetched is shown, the rest is uninitialized | ||
>>> f"Slice data cache: {slice_data[:]}" | ||
Slice data cache: | ||
[[0 1 2 3 4] | ||
[5 6 7 8 9] | ||
[0 0 0 0 0] | ||
[0 0 0 0 0]] | ||
>>> # Fetch the full data from the proxy asynchronously | ||
>>> full_data = await proxy.afetch() | ||
>>> # Now, all data is shown, meaning the full data has been fetched | ||
>>> f"Full data cache: {full_data[:]}" | ||
Full data cache: | ||
[[ 0 1 2 3 4] | ||
[ 5 6 7 8 9] | ||
[10 11 12 13 14] | ||
[15 16 17 18 19]] | ||
>>> asyncio.run(fetch_data()) | ||
""" | ||
if not callable(getattr(self.src, "aget_chunk", None)): | ||
raise NotImplementedError("afetch is only available if the source has an aget_chunk method") | ||
|
@@ -333,34 +386,36 @@ def __getitem__(self, item: slice | list[slice]) -> np.ndarray: | |
-------- | ||
>>> import numpy as np | ||
>>> import blosc2 | ||
>>> data = np.arange(100).reshape(10, 10) | ||
>>> data = np.arange(25).reshape(5, 5) | ||
>>> ndarray = blosc2.asarray(data) | ||
>>> proxy = blosc2.Proxy(ndarray) | ||
>>> slice_1 = proxy[0:3, 0:3] | ||
>>> f"Slice 1: {slice_1}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same, print the entire proxy values to see that only slice |
||
Slice 1: | ||
[[ 0 1 2] | ||
[ 5 6 7] | ||
[10 11 12] | ||
[20 21 22]] | ||
>>> slice_2 = proxy[5:8, 2:5] | ||
>>> slice_2 = proxy[2:5, 2:5] | ||
>>> f"Slice 2: {slice_2}" | ||
Slice 2: | ||
[[52 53 54] | ||
[62 63 64] | ||
[72 73 74]] | ||
[[12 13 14] | ||
[17 18 19] | ||
[22 23 24]] | ||
""" | ||
# Populate the cache | ||
self.fetch(item) | ||
return self._cache[item] | ||
|
||
@property | ||
def dtype(self) -> np.dtype: | ||
"""The dtype of :paramref:`self` or None if the data is unidimensional""" | ||
def dtype(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dito |
||
"""The dtype of :paramref:`self` or None if the data is unidimensional | ||
""" | ||
return self._cache.dtype if isinstance(self._cache, blosc2.NDArray) else None | ||
|
||
@property | ||
def shape(self) -> tuple[int]: | ||
"""The shape of :paramref:`self`""" | ||
def shape(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dito |
||
"""The shape of :paramref:`self` | ||
""" | ||
return self._cache.shape if isinstance(self._cache, blosc2.NDArray) else len(self._cache) | ||
|
||
def __str__(self): | ||
|
@@ -378,7 +433,7 @@ def vlmeta(self) -> blosc2.schunk.vlmeta: | |
return self._schunk_cache.vlmeta | ||
|
||
@property | ||
def fields(self) -> dict: | ||
def fields(self)-> dict: | ||
""" | ||
Dictionary with the fields of :paramref:`self`. | ||
|
||
|
@@ -390,6 +445,26 @@ def fields(self) -> dict: | |
See Also | ||
-------- | ||
:ref:`NDField` | ||
|
||
Examples | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm unsure about this one. It brings some insight about how to use the |
||
-------- | ||
>>> import numpy as np | ||
>>> import blosc2 | ||
>>> data = np.zeros(16, dtype=[('field1', 'i4'), ('field2', 'f4')]).reshape(4, 4) | ||
>>> ndarray = blosc2.asarray(data) | ||
>>> proxy = blosc2.Proxy(ndarray) | ||
>>> # Get a dictionary of fields from the proxy, where each field can be accessed individually | ||
>>> fields_dict = proxy.fields | ||
>>> for field_name, field_proxy in fields_dict.items(): | ||
>>> f"Field name: {field_name}, Field data: {field_proxy}" | ||
Field name: field1, Field data: <blosc2.proxy.ProxyNDField object at 0x10c176c90> | ||
Field name: field2, Field data: <blosc2.proxy.ProxyNDField object at 0x103264bf0> | ||
>>> field1_data = fields_dict['field1'][:] | ||
>>> field1_data | ||
[[0 0 0 0] | ||
[0 0 0 0] | ||
[0 0 0 0] | ||
[0 0 0 0]] | ||
""" | ||
_fields = getattr(self._cache, "fields", None) | ||
if _fields is None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd use regular examples in
example
root.