Skip to content

Commit

Permalink
Add better support for cparams in Proxy and C2Array instances
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescAlted committed Oct 1, 2024
1 parent ab19f03 commit 1858e08
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 1 deletion.
2 changes: 2 additions & 0 deletions doc/reference/c2array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ Attributes
chunks
blocks
dtype
cparams
urlpath

.. _URLPath:

Expand Down
4 changes: 4 additions & 0 deletions doc/reference/ndarray.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ Attributes
info
schunk
size
cparams
dparams
urlpath
vlmeta


.. currentmodule:: blosc2
Expand Down
4 changes: 4 additions & 0 deletions doc/reference/proxy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ Attributes
:toctree: autofiles/proxy

shape
chunks
blocks
dtype
cparams
info
fields
vlmeta
54 changes: 54 additions & 0 deletions examples/ndarray/proxy-carray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# This source code is licensed under a BSD-style license (found in the
# LICENSE file in the root directory of this source tree)
#######################################################################

# Shows how you can make a proxy of a remote array (served with Caterva2) on disk
# Note that, for running this example, you will need the blosc2-grok package.

import os
from time import time

import blosc2

urlbase = "https://demo.caterva2.net/"
path = "example/lung-jpeg2000_10x.b2nd"
a = blosc2.C2Array(path, urlbase=urlbase)
blosc2.remove_urlpath("proxy.b2nd")
b = blosc2.Proxy(a, urlpath="proxy.b2nd")

# Check metadata (note that all should be the same)
print("*** Metadata ***")
print(f"Codec in 'a': {a.cparams.codec}")
print(f"Codec in 'b': {b.cparams.codec}")
print(f"Filters in 'a': {a.cparams.filters}")
print(f"Filters in 'b': {b.cparams.filters}")

# Check array properties
print("*** Array properties ***")
print(f"Shape in 'a': {a.shape}")
print(f"Shape in 'b': {b.shape}")
print(f"Type in 'a': {a.dtype}")
print(f"Type in 'b': {b.dtype}")

print("*** Fetching data ***")
t0 = time()
print(f"Data in 'a': {a[0, 0, 0:10]}")
print(f"Time to fetch data in 'a': {time() - t0:.3f}s")
t0 = time()
print(f"Data in 'b': {b[0, 0, 0:10]}")
print(f"Time to fetch data in 'b': {time() - t0:.3f}s")
t0 = time()
print(f"Data in 'b': {b[0, 0, 0:10]}")
print(f"Time to fetch data in 'b' (cached): {time() - t0:.3f}s")

# Check sizes. Note that the proxy will only have the 'touched' chunks (only 1 in this case)
print("*** Sizes ***")
print(f"Size in 'a': {a.meta['schunk']['cbytes']}")
print(f"Size in 'b': {b.schunk.cbytes}")
# Check sizes on disk
print("*** Disk sizes ***")
print(f"Size 'b' (disk): {os.stat(b.urlpath).st_size}")
56 changes: 56 additions & 0 deletions examples/ndarray/proxy-ndarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# This source code is licensed under a BSD-style license (found in the
# LICENSE file in the root directory of this source tree)
#######################################################################

# Shows how you can make a proxy of a local array on disk.

import os

import blosc2

cparams = blosc2.CParams(
clevel=5, codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0]
)
cwd = os.getcwd()
a = blosc2.full((128, 128), 1, dtype="float64", urlpath=f"{cwd}/a.b2nd", mode="w", cparams=cparams)
blosc2.remove_urlpath(f"{cwd}/proxy.b2nd")
b = blosc2.Proxy(a, urlpath=f"{cwd}/proxy.b2nd")

# Check metadata
print("*** Metadata ***")
print(f"Codec in 'a': {a.cparams.codec}")
print(f"Codec in 'b': {b.cparams.codec}")
print(f"Clevel in 'a': {a.cparams.clevel}")
print(f"Clevel in 'b': {b.cparams.clevel}")
print(f"Filters in 'a': {a.cparams.filters}")
print(f"Filters in 'b': {b.cparams.filters}")

# Check array properties
print("*** Array properties ***")
print(f"Shape in 'a': {a.shape}")
print(f"Shape in 'b': {b.shape}")
print(f"Type in 'a': {a.dtype}")
print(f"Type in 'b': {b.dtype}")

# Check data
print("*** Fetching data ***")
print(f"Data in 'a': {a[0, 0:10]}")
print(f"Data in 'b': {b[0, 0:10]}")

# Check sizes. Note that the proxy will only have the 'touched' chunks (only 1 in this case)
print("*** Sizes ***")
print(f"Size in 'a': {a.schunk.cbytes}")
print(f"Size in 'b': {b.schunk.cbytes}")
# Check sizes on disk
print("*** Disk sizes ***")
print(f"Size 'a' (disk): {os.stat(a.urlpath).st_size}")
print(f"Size 'b' (disk): {os.stat(b.urlpath).st_size}")

# Check vlmeta
print("*** VLmeta ***")
print(f"VLmeta in 'a': {list(a.vlmeta)}")
print(f"VLmeta in 'b': {list(b.vlmeta)}")
14 changes: 14 additions & 0 deletions src/blosc2/c2array.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ def __init__(self, path: str, /, urlbase: str = None, auth_token: str = None):
self.meta = info(self.path, self.urlbase, auth_token=self.auth_token)
except httpx.HTTPStatusError as err:
raise FileNotFoundError(f"Remote path not found: {path}.\nError was: {err}") from err
cparams = self.meta["schunk"]["cparams"]
# Remove "filters, meta" from cparams; this is an artifact from the server
cparams.pop("filters, meta", None)
self._cparams = blosc2.CParams(**cparams)

def __getitem__(self, slice_: int | slice | Sequence[slice]) -> np.ndarray:
"""
Expand Down Expand Up @@ -322,6 +326,16 @@ def dtype(self) -> np.dtype:
"""The dtype of the remote array"""
return np.dtype(self.meta["dtype"])

@property
def cparams(self) -> blosc2.CParams:
"""The compression parameters of the remote array"""
return self._cparams

@property
def urlpath(self) -> str:
"""The URL path of the remote array"""
return self.path


class URLPath:
def __init__(self, path: str, /, urlbase: str = None, auth_token: str = None):
Expand Down
20 changes: 20 additions & 0 deletions src/blosc2/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,26 @@ def __init__(self, **kwargs):
for field in self.dtype.fields:
self._fields[field] = NDField(self, field)

@property
def cparams(self) -> dict:
"""The compression parameters used by the array."""
return self.schunk.cparams

@property
def dparams(self) -> dict:
"""The decompression parameters used by the array."""
return self.schunk.dparams

@property
def urlpath(self) -> str:
"""The URL path of the array."""
return self.schunk.urlpath

@property
def vlmeta(self) -> dict:
"""The variable-length metadata of the array."""
return self.schunk.vlmeta

@property
def fields(self) -> dict:
"""
Expand Down
20 changes: 19 additions & 1 deletion src/blosc2/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,14 +209,15 @@ def __init__(self, src: ProxySource or ProxyNDSource, urlpath: str = None, **kwa
self.src.dtype,
chunks=self.src.chunks,
blocks=self.src.blocks,
cparams=self.src.cparams,
urlpath=urlpath,
meta=meta,
)
else:
self._cache = blosc2.SChunk(
chunksize=self.src.chunksize,
cparams=self.src.cparams,
urlpath=urlpath,
cparams={"typesize": self.src.typesize},
meta=meta,
)
self._cache.fill_special(self.src.nbytes // self.src.typesize, blosc2.SpecialValue.UNINIT)
Expand Down Expand Up @@ -408,6 +409,23 @@ def shape(self) -> tuple[int]:
"""The shape of :paramref:`self`"""
return self._cache.shape if isinstance(self._cache, blosc2.NDArray) else len(self._cache)

@property
def schunk(self) -> blosc2.schunk.SChunk:
"""The :ref:`SChunk` of the cache"""
return self._schunk_cache

@property
def cparams(self) -> blosc2.CParams:
"""The compression parameters of the cache"""
return self._cache.cparams

@property
def info(self) -> str:
"""The info of the cache"""
if isinstance(self._cache, blosc2.NDArray):
return self._cache.info
raise NotImplementedError("info is only available if the source is a NDArray")

def __str__(self):
return f"Proxy({self.src}, urlpath={self.urlpath})"

Expand Down
4 changes: 4 additions & 0 deletions src/blosc2/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class CParams:
tuner: blosc2.Tuner = blosc2.Tuner.STUNE

def __post_init__(self):
if not isinstance(self.codec, blosc2.Codec):
self.codec = blosc2.Codec(self.codec)
if len(self.filters) > 6:
raise ValueError("Number of filters exceeds 6")
if len(self.filters) < len(self.filters_meta):
Expand All @@ -91,6 +93,8 @@ def __post_init__(self):
raise ValueError("Number of filters cannot exceed number of filters meta")

for i in range(len(self.filters)):
if not isinstance(self.filters[i], blosc2.Filter):
self.filters[i] = blosc2.Filter(self.filters[i])
if self.filters_meta[i] == 0 and self.filters[i] == blosc2.Filter.BYTEDELTA:
self.filters_meta[i] = self.typesize

Expand Down

0 comments on commit 1858e08

Please sign in to comment.