Skip to content

Commit

Permalink
add basic support for builtin H5 filters
Browse files Browse the repository at this point in the history
So far implemented are:
- zlib
- szip (if available in h5 dll)

Soon to be implemented extern filters:
- blosc

To be implemented h5 filters:
- shuffle
- nbits
- fletcher32
- scale offset
  • Loading branch information
Vindaar committed Aug 17, 2018
1 parent 1c25de5 commit 958414e
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 5 deletions.
6 changes: 3 additions & 3 deletions README.org
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ know.

** Major missing HDF5 features
Most major HDF5 features are now supported (although they may not be
perfectly stable yet, due to limited testing). The major exceptions
are the following two:
- support for filters
perfectly stable yet, due to limited testing). The major exception
is the following:
- support for compound data types

4 changes: 4 additions & 0 deletions src/nimhdf5.nim
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ import nimhdf5/dataspaces
import nimhdf5/datasets
export datasets

# compression / filter support
import nimhdf5/filters
export filters

# finally import and export seqmath, so that calls to procs, which use
# e.g. `shape` or `flatten` internally do not fail, if the calling module
# has not imported seqmath itself
Expand Down
26 changes: 24 additions & 2 deletions src/nimhdf5/datasets.nim
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import H5nimtypes
import datatypes
import dataspaces
import attributes
import filters
import util
import h5util

Expand Down Expand Up @@ -313,8 +314,9 @@ proc create_dataset*[T: (tuple | int | seq)](
dset_raw: string,
shape_raw: T,
dtype: (typedesc | hid_t),
chunksize: seq[int] = @[],
maxshape: seq[int] = @[]): H5DataSet =
chunksize: seq[int],
maxshape: seq[int],
filter: H5Filter): H5DataSet =
## procedure to create a dataset given a H5file object. The shape of
## that type is given as a tuple, the datatype as a typedescription
## inputs:
Expand Down Expand Up @@ -397,6 +399,9 @@ proc create_dataset*[T: (tuple | int | seq)](
try:
status = dset.parseChunkSizeAndMaxShape(chunksize, maxshape)
if status >= 0:
# potentially apply filters
dset.setFilters(filter)

# check whether there already exists a dataset with the given name
# first in H5FileObj:
var exists = hasKey(h5f.datasets, dset_name)
Expand Down Expand Up @@ -468,6 +473,23 @@ proc create_dataset*[T: (tuple | int | seq)](

result = dset

proc create_dataset*[T: (tuple | int | seq)](
h5f: var H5FileObj,
dset_raw: string,
shape_raw: T,
dtype: (typedesc | hid_t),
chunksize: seq[int] = @[],
maxshape: seq[int] = @[]): H5DataSet {.inline.} =
## Wrapper around full `create_dataset` proc if no filter is being used.
## In this case chunksize and maxshape are optional
let filter = H5Filter(kind: fkNone)
result = h5f.create_dataset(dset_raw,
shape_raw,
dtype,
chunksize,
maxshape,
filter)

# proc create_dataset*[T: (tuple | int)](h5f: var H5Group, dset_raw: string, shape_raw: T, dtype: typedesc): H5DataSet =
# convenience wrapper around create_dataset to create a dataset within a group with a
# relative name
Expand Down
66 changes: 66 additions & 0 deletions src/nimhdf5/filters.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import sequtils
import hdf5_wrapper

import datatypes
import H5nimtypes

const SzipPixPerBlockSeq = toSeq(0'u8 .. 32'u8).filterIt(it mod 2 == 0)
const ZlibCompressionLevel = {0 .. 9}
# define allowed values for the Szip pixels per block (even values < 32)
var tempSet {.compileTime.}: set[uint8] = {}
static:
for x in SzipPixPerBlockSeq:
tempSet.incl x
const SzipPixPerBlockSet = tempSet

type
H5FilterKind* = enum
fkNone, fkSzip, fkZlib, fkBlosc

SzipOptionMask* {.pure.} = enum
EntropyCoding = H5_SZIP_EC_OPTION_MASK
NearestNeighbor = H5_SZIP_NN_OPTION_MASK

H5Filter* = object
case kind*: H5FilterKind
of fkSzip:
optionMask*: SzipOptionMask
pixPerBlock*: int
of fkZlib:
zlibLevel*: int
of fkBlosc:
bloscLevel*: int
of fkNone:
# if no filter used, empty object
discard

proc setFilters*(dset: H5DataSet, filter: H5Filter) =
## parses the given `filter` and sets the dataset creation property list
## accordingly
## raises:
## HDF5LibraryError: if a call to a H5 lib function fails
## ValueError:
## - if a `pixPerBlock` field of an Szip filter is invalid
## (uneven or larger 32)
## - if `zlibLevel` notin {0 .. 9}
var status: herr_t = 0

case filter.kind
of fkSzip:
if filter.pixPerBlock.uint8 notin SzipPixPerBlockSet:
raise newException(ValueError, "Invalid `pixPerBlock` value for SZip " &
"compression. Valid values are even, positive integers <= 32")
status = H5Pset_szip(dset.dcpl_id, filter.optionMask.cuint, filter.pixPerBlock.cuint)
of fkZlib:
if filter.zlibLevel notin ZlibCompressionLevel:
raise newException(ValueError, "Invalid `zlibLevel` compression value Zlib " &
"compression. Valid values are {0 .. 9}")
status = H5Pset_deflate(dset.dcpl_id, filter.zlibLevel.cuint)
of fkBlosc:
raise newException(NotImplementedError, "Blosc support not yet implemented!")
of fkNone:
discard

if status < 0:
raise newException(Hdf5LibraryError, "Call to hdf5 library failed in " &
"`setFilters` trying to set " & $filter.kind & " filter.")

0 comments on commit 958414e

Please sign in to comment.