From 433131d2c216c9df105ce4b20ec48e35ddda0944 Mon Sep 17 00:00:00 2001 From: oumaima-ech-chdig Date: Wed, 25 Sep 2024 13:12:32 +0200 Subject: [PATCH] Proxy and c2array examples: all set. --- src/blosc2/c2array.py | 44 ++++++++++++++++++ src/blosc2/proxy.py | 106 +++++++++++++++++++++++++++++++++--------- 2 files changed, 129 insertions(+), 21 deletions(-) diff --git a/src/blosc2/c2array.py b/src/blosc2/c2array.py index 23a68def..bf088881 100644 --- a/src/blosc2/c2array.py +++ b/src/blosc2/c2array.py @@ -198,6 +198,20 @@ def __init__(self, path: str, /, urlbase: str = None, auth_token: str = None): ------- out: C2Array + Examples + -------- + >>> import blosc2 + >>> urlbase = "https://demo.caterva2.net/" + >>> path = "example/dir1/ds-3d.b2nd" + >>> remote_array = blosc2.C2Array(path, urlbase=urlbase) + >>> remote_array.shape + (3, 4, 5) + >>> remote_array.chunks + (2, 3, 4) + >>> remote_array.blocks + (2, 2, 2) + >>> remote_array.dtype + float32 """ if path.startswith("/"): raise ValueError("The path should start with a root name, not a slash") @@ -235,6 +249,19 @@ def __getitem__(self, slice_: int | slice | Sequence[slice]) -> np.ndarray: ------- out: numpy.ndarray A numpy.ndarray containing the data slice. + + Examples + -------- + >>> import blosc2 + >>> urlbase = "https://demo.caterva2.net/" + >>> path = "example/dir1/ds-2d.b2nd" + >>> remote_array = blosc2.C2Array(path, urlbase=urlbase) + >>> data_slice = remote_array[3:5, 1:4] + >>> data_slice.shape + (2, 3) + >>> data_slice[:] + [[61 62 63] + [81 82 83]] """ slice_ = slice_to_string(slice_) return fetch_data(self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token) @@ -252,6 +279,23 @@ def get_chunk(self, nchunk: int) -> bytes: ------- out: bytes The requested compressed chunk. + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> urlbase = "https://demo.caterva2.net/" + >>> path = "example/dir1/ds-3d.b2nd" + >>> a = blosc2.C2Array(path, urlbase) + >>> # Get the compressed chunk from array 'a' for index 0 + >>> compressed_chunk = a.get_chunk(0) + >>> f"Size of chunk {0} from a: {len(compressed_chunk)} bytes" + Size of chunk 0 from a: 160 bytes + >>> # Decompress the chunk and convert it to a NumPy array + >>> decompressed_chunk = blosc2.decompress(compressed_chunk) + >>> np.frombuffer(decompressed_chunk, dtype=a.dtype) + [ 0. 1. 5. 6. 20. 21. 25. 26. 2. 3. 7. 8. 22. 23. 27. 28. 10. 11. + 0. 0. 30. 31. 0. 0. 12. 13. 0. 0. 32. 33. 0. 0.] """ url = _sub_url(self.urlbase, f"api/chunk/{self.path}") params = {"nchunk": nchunk} diff --git a/src/blosc2/proxy.py b/src/blosc2/proxy.py index 42cbe8e4..23ca81ef 100644 --- a/src/blosc2/proxy.py +++ b/src/blosc2/proxy.py @@ -248,17 +248,11 @@ def fetch(self, item: slice | list[slice] = None) -> blosc2.NDArray | blosc2.sch >>> data = np.arange(20).reshape(10, 2) >>> ndarray = blosc2.asarray(data) >>> proxy = blosc2.Proxy(ndarray) - >>> full_data = proxy.fetch() - >>> f"Full data cache: {full_data[:]}" - Full data cache: - [[ 0 1][ 2 3][ 4 5] - [ 6 7][ 8 9][10 11] - [12 13][14 15][16 17] - [18 19]] - >>> slice_data = proxy[0:2, :] - >>> f"Slice data cache: {slice_data}" - Slice data cache: - [[0 1][2 3]] + >>> slice_data = proxy.fetch((slice(0, 3), slice(0, 2))) + >>> slice_data[:3, :2] + [[0 1] + [2 3] + [4 5]] """ if item is None: # Full realization @@ -296,6 +290,60 @@ async def afetch(self, item: slice | list[slice] = None) -> blosc2.NDArray | blo ----- This method is only available if the :ref:`ProxySource` or :ref:`ProxyNDSource` have an async `aget_chunk` method. + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> import asyncio + >>> from blosc2 import ProxyNDSource + >>> class MyProxySource(ProxyNDSource): + >>> def __init__(self, data): + >>> # If the next source is multidimensional, it must have the attributes: + >>> self.data = data + >>> f"Data shape: {self.shape}, Chunks: {self.chunks}" + >>> f"Blocks: {self.blocks}, Dtype: {self.dtype}" + >>> @property + >>> def shape(self): + >>> return self.data.shape + >>> @property + >>> def chunks(self): + >>> return self.data.chunks + >>> @property + >>> def blocks(self): + >>> return self.data.blocks + >>> @property + >>> def dtype(self): + >>> return self.data.dtype + >>> # This method must be present + >>> def get_chunk(self, nchunk): + >>> return self.data.get_chunk(nchunk) + >>> # This method is optional + >>> async def aget_chunk(self, nchunk): + >>> await asyncio.sleep(0.1) # Simulate an asynchronous operation + >>> return self.data.get_chunk(nchunk) + >>> data = np.arange(20).reshape(4, 5) + >>> chunks = [2, 5] + >>> blocks = [1, 5] + >>> data = blosc2.asarray(data, chunks=chunks, blocks=blocks) + >>> source = MyProxySource(data) + >>> proxy = blosc2.Proxy(source) + >>> async def fetch_data(): + >>> # Fetch a slice of the data from the proxy asynchronously + >>> slice_data = await proxy.afetch(slice(0, 2)) + >>> # Note that only data fetched is shown, the rest is uninitialized + >>> slice_data[:] + >>> asyncio.run(fetch_data()) + >>> # Using getitem to get a slice of the data + >>> result = proxy[1:2, 1:3] + >>> f"Proxy getitem: {result}" + Data shape: (4, 5), Chunks: (2, 5) + Blocks: (1, 5), Dtype: int64 + [[0 1 2 3 4] + [5 6 7 8 9] + [0 0 0 0 0] + [0 0 0 0 0]] + Proxy getitem: [[6 7]] """ if not callable(getattr(self.src, "aget_chunk", None)): raise NotImplementedError("afetch is only available if the source has an aget_chunk method") @@ -333,21 +381,18 @@ def __getitem__(self, item: slice | list[slice]) -> np.ndarray: -------- >>> import numpy as np >>> import blosc2 - >>> data = np.arange(100).reshape(10, 10) + >>> data = np.arange(25).reshape(5, 5) >>> ndarray = blosc2.asarray(data) >>> proxy = blosc2.Proxy(ndarray) - >>> slice_1 = proxy[0:3, 0:3] - >>> f"Slice 1: {slice_1}" - Slice 1: + >>> proxy[0:3, 0:3] [[ 0 1 2] + [ 5 6 7] [10 11 12] [20 21 22]] - >>> slice_2 = proxy[5:8, 2:5] - >>> f"Slice 2: {slice_2}" - Slice 2: - [[52 53 54] - [62 63 64] - [72 73 74]] + >>> proxy[2:5, 2:5] + [[12 13 14] + [17 18 19] + [22 23 24]] """ # Populate the cache self.fetch(item) @@ -390,6 +435,25 @@ def fields(self) -> dict: See Also -------- :ref:`NDField` + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> data = np.ones(16, dtype=[('field1', 'i4'), ('field2', 'f4')]).reshape(4, 4) + >>> ndarray = blosc2.asarray(data) + >>> proxy = blosc2.Proxy(ndarray) + >>> # Get a dictionary of fields from the proxy, where each field can be accessed individually + >>> fields_dict = proxy.fields + >>> for field_name, field_proxy in fields_dict.items(): + >>> print(f"Field name: {field_name}, Field data: {field_proxy}") + Field name: field1, Field data: + Field name: field2, Field data: + >>> fields_dict['field2'][:] + [[1. 1. 1. 1.] + [1. 1. 1. 1.] + [1. 1. 1. 1.] + [1. 1. 1. 1.]] """ _fields = getattr(self._cache, "fields", None) if _fields is None: