Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added tests for filter classes #42

Merged
merged 8 commits into from
Oct 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 22 additions & 22 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,44 +52,44 @@ Sample code:

* The HDF5 filter ID of embedded plugins:

- ``BLOSC``
- ``BSHUF``
- ``LZ4``
- ``BLOSC_ID``
- ``BSHUF_ID``
- ``LZ4_ID``

* Compression option helper functions to prepare arguments to provide to ``h5py.Group.create_dataset``:
* Compression option helper classes to prepare arguments to provide to ``h5py.Group.create_dataset``:

- `bitshuffle_options(nelems=0, lz4=True)`_
- `blosc_options(level=9, shuffle='byte', compression='blosclz')`_
- `lz4_options(nbytes=0)`_
- `Bitshuffle(nelems=0, lz4=True)`_
- `Blosc(level=9, shuffle='byte', compression='blosclz')`_
- `LZ4(nbytes=0)`_

* ``FILTERS``: A dictionary mapping provided filters to their ID
* ``PLUGINS_PATH``: The directory where the provided filters library are stored.


bitshuffle_options(nelems=0, lz4=True)
**************************************
Bitshuffle(nelems=0, lz4=True)
******************************

This function takes the following arguments and returns the compression options to feed into ``h5py.Group.create_dataset`` for using the bitshuffle filter:
This class takes the following arguments and returns the compression options to feed into ``h5py.Group.create_dataset`` for using the bitshuffle filter:

* **nelems** the number of elements per block, needs to be divisible by eight (default is 0, about 8kB per block)
* **lz4** if True the elements get compressed using lz4 (default is True)

It returns a dict that can be passed as keyword arguments.
It can be passed as keyword arguments.

Sample code:

.. code-block:: python

f = h5py.File('test.h5', 'w')
f.create_dataset('bitshuffle_with_lz4', data=numpy.arange(100),
**hdf5plugin.bshuf_options(nelems=0, lz4=True))
**hdf5plugin.Bitshuffle(nelems=0, lz4=True))
f.close()


blosc_options(level=9, shuffle='byte', compression='blosclz')
*************************************************************
Blosc(level=9, shuffle='byte', compression='blosclz')
*****************************************************

This function takes the following arguments and returns the compression options to feed into ``h5py.Group.create_dataset`` for using the blosc filter:
This class takes the following arguments and returns the compression options to feed into ``h5py.Group.create_dataset`` for using the blosc filter:

* **level** the compression level, from 0 to 9 (default is 9)
* **shuffle** the shuffling mode, either 'none', 'bit' or 'byte' (default is 'byte')
Expand All @@ -101,35 +101,35 @@ This function takes the following arguments and returns the compression options
* 'zlib'
* 'zstd'

It returns a dict that can be passed as keyword arguments.
It can be passed as keyword arguments.

Sample code:

.. code-block:: python

f = h5py.File('test.h5', 'w')
f.create_dataset('blosc_byte_shuffle_blosclz', data=numpy.arange(100),
**hdf5plugin.blosc_options(level=9, shuffle='byte', compression='blosclz'))
**hdf5plugin.Blosc(level=9, shuffle='byte', compression='blosclz'))
f.close()


lz4_options(nbytes=0)
*********************
LZ4(nbytes=0)
*************

This function takes the number of bytes per block as argument and returns the compression options to feed into ``h5py.Group.create_dataset`` for using the lz4 filter:
This class takes the number of bytes per block as argument and returns the compression options to feed into ``h5py.Group.create_dataset`` for using the lz4 filter:

* **nbytes** number of bytes per block needs to be in the range of 0 < nbytes < 2113929216 (1,9GB).
The default value is 0 (for 1GB).

It returns a dict that can be passed as keyword arguments.
It can be passed as keyword arguments.

Sample code:

.. code-block:: python

f = h5py.File('test.h5', 'w')
f.create_dataset('lz4', data=numpy.arange(100),
**hdf5plugin.lz4_options(nbytes=0))
**hdf5plugin.LZ4(nbytes=0))
f.close()

Dependencies
Expand Down
100 changes: 66 additions & 34 deletions hdf5plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@
import logging as _logging
import os as _os
import sys as _sys

if _sys.version_info[0] >= 3:
from collections.abc import Mapping as _Mapping
else :
from collections import Mapping as _Mapping
import h5py as _h5py


Expand All @@ -57,16 +60,16 @@


# IDs of provided filters
BLOSC = 32001
BLOSC_ID = 32001
"""Blosc filter ID"""

BSHUF = 32008
BSHUF_ID = 32008
"""Bitshuffle filter ID"""

LZ4 = 32004
"""LZ4 filter ID"""
LZ4_ID = 32004
"""LZ4_ID filter ID"""

FILTERS = {'blosc': BLOSC, 'bshuf': BSHUF, 'lz4': LZ4}
FILTERS = {'blosc': BLOSC_ID, 'bshuf': BSHUF_ID, 'lz4': LZ4_ID}
"""Mapping of filter name to HDF5 filter ID for available filters"""

# compression_opts
Expand All @@ -87,9 +90,38 @@
'zstd': 5,
}

try:
_FilterRefClass = _h5py.filters.FilterRefBase
except AttributeError:
class _FilterRefClass(_Mapping):
"""Base class for referring to an HDF5 and describing its options

Your subclass must define filter_id, and may define a filter_options tuple.
"""
filter_id = None
filter_options = ()

# Mapping interface supports using instances as **kwargs for compatibility
# with older versions of h5py
@property
def _kwargs(self):
return {
'compression': self.filter_id,
'compression_opts': self.filter_options
}

def __len__(self):
return len(self._kwargs)

def __iter__(self):
return iter(self._kwargs)

def blosc_options(level=9, shuffle='byte', compression='blosclz'):
"""Prepare h5py.Group.create_dataset's compression and compression_opts arguments for using blosc filter.
def __getitem__(self, item):
return self._kwargs[item]


class Blosc(_FilterRefClass):
"""h5py.Group.create_dataset's compression and compression_opts arguments for using blosc filter.

:param int level:
Compression level from 0 no compression to 9 maximum compression.
Expand All @@ -100,50 +132,50 @@ def blosc_options(level=9, shuffle='byte', compression='blosclz'):
- `bit`: bit-wise shuffle.
:param str compression:
`blosclz` (default), `lz4`, `lz4hc`, `zlib`, `zstd`
:returns: compression and compression_opts arguments for h5py.Group.create_dataset
:rtype: dict
"""
level = int(level)
assert 0 <= level <= 9
shuffle = _blosc_shuffle[shuffle]
compression = _blosc_compression[compression]
return {'compression_opts': (0, 0, 0, 0, level, shuffle, compression),
'compression': BLOSC}
filter_id = BLOSC_ID

def __init__ (self, level=9, shuffle='byte', compression='blosclz'):
level = int(level)
assert 0 <= level <= 9
shuffle = _blosc_shuffle[shuffle]
compression = _blosc_compression[compression]
self.filter_options = (0, 0, 0, 0, level, shuffle, compression)


def bshuf_options(nelems=0, lz4=True):
"""Prepare h5py.Group.create_dataset's compression and compression_opts arguments for using bitshuffle filter.
class Bitshuffle(_FilterRefClass):
"""h5py.Group.create_dataset's compression and compression_opts arguments for using bitshuffle filter.

:param int nelems:
The number of elements per block.
Default: 0 (for about 8kB per block).
:param bool lz4:
Whether to use LZ4 compression or not as part of the filter.
Whether to use LZ4_ID compression or not as part of the filter.
Default: True
:returns: compression and compression_opts arguments for h5py.Group.create_dataset
:rtype: dict
"""
nelems = int(nelems)
assert nelems % 8 == 0
filter_id = BSHUF_ID

lz4_enabled = 2 if lz4 else 0
def __init__(self, nelems=0, lz4=True):
nelems = int(nelems)
assert nelems % 8 == 0

return {'compression_opts': (nelems, lz4_enabled),
'compression': BSHUF}
lz4_enabled = 2 if lz4 else 0
self.filter_options = (nelems, lz4_enabled)

def lz4_options(nbytes=0):
"""Prepare h5py.Group.create_dataset's compression and compression_opts arguments for using lz4 filter.

class LZ4(_FilterRefClass):
"""h5py.Group.create_dataset's compression and compression_opts arguments for using lz4 filter.

:param int nelems:
The number of bytes per block.
Default: 0 (for 1GB per block).
:returns: compression and compression_opts arguments for h5py.Group.create_dataset
:rtype: dict
"""
nbytes = int(nbytes)
assert 0 <= nbytes <= 0x7E000000
return {'compression_opts': (nbytes,),
'compression': LZ4}
filter_id = LZ4_ID

def __init__(self, nbytes = 0):
nbytes = int(nbytes)
assert 0 <= nbytes <= 0x7E000000
self.filter_options = (nbytes,)


def _init_filters():
Expand Down
43 changes: 23 additions & 20 deletions hdf5plugin/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,20 @@ def setUpClass(cls):
def tearDownClass(cls):
shutil.rmtree(cls.tempdir)

def _test(self, filter_name, **options):
def _test(self, filter_name, dtype=numpy.int32, **options):
"""Run test for a particular filter

:param str filter_name: The name of the filter to use
:param Union[None,tuple(int)] options:
create_dataset's compression_opts argument
:return: The tuple describing the filter
"""
data = numpy.arange(100, dtype='float32')
data = numpy.arange(100, dtype=dtype)
filename = os.path.join(self.tempdir, "test_" + filter_name + ".h5")

args = {"blosc": hdf5plugin.blosc_options,
"bshuf": hdf5plugin.bshuf_options,
"lz4": hdf5plugin.lz4_options}[filter_name](**options)
args = {"blosc": hdf5plugin.Blosc,
"bshuf": hdf5plugin.Bitshuffle,
"lz4": hdf5plugin.LZ4}[filter_name](**options)

# Write
f = h5py.File(filename, "w")
Expand Down Expand Up @@ -86,16 +86,29 @@ def testBitshuffle(self):
self._test('bshuf') # Default options

# Specify options
filter_ = self._test('bshuf', nelems=0, lz4=False)
self.assertEqual(filter_[2][3:], (0, 0))
#numpy.int8, int16, int32, int64
for dtype in (numpy.int8, numpy.int16, numpy.int32, numpy.int64):
for x in range(1,3):
filter_ = self._test('bshuf', dtype, nelems=1024*x, lz4=False)
self.assertEqual(filter_[2][3:], (1024*x, 0))
filter_ = self._test('bshuf', dtype, nelems=1024*x, lz4=True)
self.assertEqual(filter_[2][3:], (1024*x, 2))

def testBlosc(self):
"""Write/read test with blosc filter plugin"""
self._test('blosc') # Default options

shuffle = ['none', 'byte', 'bit']
compress = ['blosclz', 'lz4', 'lz4hc','snappy', 'zlib', 'zstd']
# Specify options
filter_ = self._test('blosc', level=3, shuffle='bit', compression='lz4')
self.assertEqual(filter_[2][4:], (3, 2, 1))
for i in range(0,10):
#for shuffle_id, shuffle in enumerate(['none', 'byte', 'bit']):
for j in range(len(shuffle)):
#for compress_id, compress in enumerate(['blosclz', 'lz4', 'lz4hc','snappy', 'zlib', 'zstd']):
for k in range(0,6):
if not k == 3:
filter_ = self._test('blosc', level=i, shuffle=shuffle[j], compression=compress[k])
self.assertEqual(filter_[2][4:], (i, j, k))

def testLZ4(self):
"""Write/read test with lz4 filter plugin"""
Expand All @@ -106,19 +119,9 @@ def testLZ4(self):
self.assertEqual(filter_[2], (1024,))


class TestBloscOptions(unittest.TestCase):
"""Test the blosc_options helper"""

def test(self):
"""blosc_options test"""
result = hdf5plugin.blosc_options(level=3,
shuffle='byte', compression='lz4')["compression_opts"]
self.assertEqual(result, (0, 0, 0, 0, 3, 1, 1))


def suite():
test_suite = unittest.TestSuite()
for cls in (TestHDF5PluginRW, TestBloscOptions):
for cls in (TestHDF5PluginRW,):
test_suite.addTest(unittest.TestLoader().loadTestsFromTestCase(cls))
return test_suite

Expand Down