diff --git a/Project.toml b/Project.toml index 6a9f47c12..660f853b9 100644 --- a/Project.toml +++ b/Project.toml @@ -3,7 +3,6 @@ uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" version = "0.16.0" [deps] -Blosc = "a74b3585-a348-5f62-a45c-50e91977d574" Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" HDF5_jll = "0234f1f7-429e-5d53-9886-15a909be8d59" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" @@ -12,7 +11,6 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" [compat] -Blosc = "0.7.1" Compat = "3.1.0" HDF5_jll = "~1.10.5, ~1.12.0" Requires = "1.0" @@ -22,6 +20,10 @@ julia = "1.3" CRC32c = "8bf52ea8-c179-5cab-976a-9e18b702a9bc" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +H5Zblosc = "c8ec2601-a99c-407f-b158-e79c03c2f5f7" +H5Zbzip2 = "094576f2-1e46-4c84-8e32-c46c042eaaa2" +H5Zlz4 = "eb20ec05-5464-47b5-ba41-098e3c1068a3" +H5Zzstd = "f6f2d980-1ec6-471c-a70d-0270e22f1103" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" diff --git a/THIRDPARTY.md b/THIRDPARTY.md new file mode 100644 index 000000000..719f1734d --- /dev/null +++ b/THIRDPARTY.md @@ -0,0 +1,14 @@ +# Third Party Licenses + +HDF5.jl contains several derivative works of open source software. + +In particular, the following submodules are licensed as derivative works from third-parties. +Original and derivative code in HDF5.jl is licensed according to [LICENSE.txt](LICENSE.txt) +as permitted by licenses for the original software from which they may be derived. +See the files indicated below for the copyright notices and the licenses of the original +software from which individual submodules are derived. + +## Filter Plugins +* [H5Zbzip2](src/filters/H5Zbzip2/src/H5Zbzip2.jl): See [src/filters/H5Zbzip2/THIRDPARTY.txt](src/filters/H5Zbzip2/THIRDPARTY.txt) +* [H5Zlz4](src/filters/H5Zlz4/src/H5Zlz4.jl): See [src/filters/H5Zlz4/THIRDPARTY.txt](src/filters/H5Zlz4/THIRDPARTY.txt) +* [H5Zzstd](src/filters/H5Zzstd/src/H5Zzstd.jl): See [src/filters/H5Zzstd/THIRDPARTY.txt](src/filters/H5Zzstd/THIRDPARTY.txt) \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md index beaf6e4bd..748478cf8 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -202,6 +202,7 @@ contiguously. A = rand(100,100) g1["A", chunk=(5,5), compress=3] = A g2["A", chunk=(5,5), shuffle=(), deflate=3] = A +using H5Zblosc # load in Blosc g3["A", chunk=(5,5), blosc=3] = A ``` diff --git a/src/HDF5.jl b/src/HDF5.jl index cecd9a708..8f0f01cac 100644 --- a/src/HDF5.jl +++ b/src/HDF5.jl @@ -1600,8 +1600,6 @@ function __init__() ENV["HDF5_USE_FILE_LOCKING"] = "FALSE" end - Filters.register_blosc() - # use our own error handling machinery (i.e. turn off automatic error printing) API.h5e_set_auto(API.H5E_DEFAULT, C_NULL, C_NULL) @@ -1614,6 +1612,10 @@ function __init__() UTF8_ATTRIBUTE_PROPERTIES.char_encoding = :utf8 @require FileIO="5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" @eval include("fileio.jl") + @require H5Zblosc="c8ec2601-a99c-407f-b158-e79c03c2f5f7" @eval begin + set_blosc!(p::Properties, val::Bool) = val && push!(Filters.FilterPipeline(p), H5Zblosc.BloscFilter()) + set_blosc!(p::Properties, level::Integer) = push!(Filters.FilterPipeline(p), H5Zblosc.BloscFilter(level=level)) + end return nothing end diff --git a/src/filters/H5Zblosc/LICENSE.txt b/src/filters/H5Zblosc/LICENSE.txt new file mode 100644 index 000000000..b5b30a868 --- /dev/null +++ b/src/filters/H5Zblosc/LICENSE.txt @@ -0,0 +1,17 @@ +The MIT License (MIT) +Copyright (c) 2012-2021: Timothy E. Holy, Simon Kornblith, and contributors: https://github.com/JuliaIO/HDF5.jl/contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software +and associated documentation files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/filters/H5Zblosc/Project.toml b/src/filters/H5Zblosc/Project.toml new file mode 100644 index 000000000..aaf45c3a2 --- /dev/null +++ b/src/filters/H5Zblosc/Project.toml @@ -0,0 +1,12 @@ +name = "H5Zblosc" +uuid = "c8ec2601-a99c-407f-b158-e79c03c2f5f7" +version = "0.1.0" + +[deps] +Blosc = "a74b3585-a348-5f62-a45c-50e91977d574" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" + +[compat] +HDF5 = "0.16" +Blosc = "0.7.1" +julia = "1.3" diff --git a/src/filters/blosc.jl b/src/filters/H5Zblosc/src/H5Zblosc.jl similarity index 77% rename from src/filters/blosc.jl rename to src/filters/H5Zblosc/src/H5Zblosc.jl index 47f6b7e3a..6c43afc16 100644 --- a/src/filters/blosc.jl +++ b/src/filters/H5Zblosc/src/H5Zblosc.jl @@ -1,8 +1,15 @@ +module H5Zblosc # port of https://github.com/Blosc/c-blosc/blob/3a668dcc9f61ad22b5c0a0ab45fe8dad387277fd/hdf5/blosc_filter.c (copyright 2010 Francesc Alted, license: MIT/expat) import Blosc +using HDF5.API +import HDF5.Filters: Filter, FilterPipeline +import HDF5.Filters: filterid, register_filter, filtername, filter_func, filter_cfunc, set_local_func, set_local_cfunc -const FILTER_BLOSC = API.H5Z_filter_t(32001) # Filter ID registered with the HDF Group for Blosc +export H5Z_FILTER_BLOSC, blosc_filter, BloscFilter + + +const H5Z_FILTER_BLOSC = API.H5Z_filter_t(32001) # Filter ID registered with the HDF Group for Blosc const FILTER_BLOSC_VERSION = 2 const blosc_name = "blosc" @@ -12,7 +19,7 @@ function blosc_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) blosc_nelements = Ref{Csize_t}(length(blosc_values)) blosc_chunkdims = Vector{API.hsize_t}(undef,32) - API.h5p_get_filter_by_id(dcpl, FILTER_BLOSC, blosc_flags, blosc_nelements, blosc_values, 0, C_NULL, C_NULL) + API.h5p_get_filter_by_id(dcpl, H5Z_FILTER_BLOSC, blosc_flags, blosc_nelements, blosc_values, 0, C_NULL, C_NULL) flags = blosc_flags[] nelements = max(blosc_nelements[], 4) # First 4 slots reserved @@ -45,7 +52,7 @@ function blosc_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) blosc_values[3] = basetypesize blosc_values[4] = chunksize * htypesize # size of the chunk - API.h5p_modify_filter(dcpl, FILTER_BLOSC, flags, nelements, blosc_values) + API.h5p_modify_filter(dcpl, H5Z_FILTER_BLOSC, flags, nelements, blosc_values) return API.herr_t(1) end @@ -85,10 +92,13 @@ function blosc_filter(flags::Cuint, cd_nelmts::Csize_t, # uncompressed chunk size but it should not be used in a general # cases since other filters in the pipeline can modify the buffer # size. - outbuf_size, cbytes, blocksize = Blosc.cbuffer_sizes(unsafe_load(buf)) + in = unsafe_load(buf) + # See https://github.com/JuliaLang/julia/issues/43402 + # Resolved in https://github.com/JuliaLang/julia/pull/43408 + outbuf_size, cbytes, blocksize = Blosc.cbuffer_sizes(in) outbuf = Libc.malloc(outbuf_size) outbuf == C_NULL && return Csize_t(0) - status = Blosc.blosc_decompress(unsafe_load(buf), outbuf, outbuf_size) + status = Blosc.blosc_decompress(in, outbuf, outbuf_size) status <= 0 && (Libc.free(outbuf); return Csize_t(0)) end @@ -102,19 +112,6 @@ function blosc_filter(flags::Cuint, cd_nelmts::Csize_t, return Csize_t(0) end - -# register the Blosc filter function with HDF5 -function register_blosc() - c_blosc_set_local = @cfunction(blosc_set_local, API.herr_t, (API.hid_t,API.hid_t,API.hid_t)) - c_blosc_filter = @cfunction(blosc_filter, Csize_t, - (Cuint, Csize_t, Ptr{Cuint}, Csize_t, - Ptr{Csize_t}, Ptr{Ptr{Cvoid}})) - API.h5z_register(API.H5Z_class_t(API.H5Z_CLASS_T_VERS, FILTER_BLOSC, 1, 1, pointer(blosc_name), C_NULL, c_blosc_set_local, c_blosc_filter)) - - return nothing -end - - """ BloscFilter(;level=5, shuffle=true, compressor="blosclz") @@ -142,6 +139,15 @@ function BloscFilter(;level=5, shuffle=true, compressor="blosclz") BloscFilter(0,0,0,0,level,shuffle,compcode) end +filterid(::Type{BloscFilter}) = H5Z_FILTER_BLOSC +filtername(::Type{BloscFilter}) = blosc_name +set_local_func(::Type{BloscFilter}) = blosc_set_local +set_local_cfunc(::Type{BloscFilter}) = @cfunction(blosc_set_local, API.herr_t, (API.hid_t,API.hid_t,API.hid_t)) +filter_func(::Type{BloscFilter}) = blosc_filter +filter_cfunc(::Type{BloscFilter}) = @cfunction(blosc_filter, Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, + Ptr{Csize_t}, Ptr{Ptr{Cvoid}})) + function Base.show(io::IO, blosc::BloscFilter) print(io, BloscFilter, "(level=", Int(blosc.level), @@ -150,9 +156,6 @@ function Base.show(io::IO, blosc::BloscFilter) ")") end -filterid(::Type{BloscFilter}) = FILTER_BLOSC -FILTERS[FILTER_BLOSC] = BloscFilter - function Base.push!(f::FilterPipeline, blosc::BloscFilter) 0 <= blosc.level <= 9 || throw(ArgumentError("blosc compression $(blosc.level) not in [0,9]")) ref = Ref(blosc) @@ -161,3 +164,9 @@ function Base.push!(f::FilterPipeline, blosc::BloscFilter) end return f end + +function __init__() + register_filter(BloscFilter) +end + +end # module H5Zblosc diff --git a/src/filters/H5Zbzip2/LICENSE.txt b/src/filters/H5Zbzip2/LICENSE.txt new file mode 100644 index 000000000..b5b30a868 --- /dev/null +++ b/src/filters/H5Zbzip2/LICENSE.txt @@ -0,0 +1,17 @@ +The MIT License (MIT) +Copyright (c) 2012-2021: Timothy E. Holy, Simon Kornblith, and contributors: https://github.com/JuliaIO/HDF5.jl/contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software +and associated documentation files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/filters/H5Zbzip2/Project.toml b/src/filters/H5Zbzip2/Project.toml new file mode 100644 index 000000000..bad63bd76 --- /dev/null +++ b/src/filters/H5Zbzip2/Project.toml @@ -0,0 +1,12 @@ +name = "H5Zbzip2" +uuid = "094576f2-1e46-4c84-8e32-c46c042eaaa2" +version = "0.1.0" + +[deps] +CodecBzip2 = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" + +[compat] +HDF5 = "0.16" +CodecBzip2 = "0.7" +julia = "1.3" diff --git a/src/filters/H5Zbzip2/THIRDPARTY.txt b/src/filters/H5Zbzip2/THIRDPARTY.txt new file mode 100644 index 000000000..812ff0293 --- /dev/null +++ b/src/filters/H5Zbzip2/THIRDPARTY.txt @@ -0,0 +1,31 @@ +H5Z_filter_bzip2 in H5Zbzip2.jl was derived from H5Zbzip2.c from PyTables: + +Copyright Notice and Statement for PyTables Software Library and Utilities: +Copyright (c) 2002-2004 by Francesc Alted +Copyright (c) 2005-2007 by Carabos Coop. V. +Copyright (c) 2008-2010 by Francesc Alted +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +a. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. +c. Neither the name of Francesc Alted nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/src/filters/H5Zbzip2/src/H5Zbzip2.jl b/src/filters/H5Zbzip2/src/H5Zbzip2.jl new file mode 100644 index 000000000..9475d7076 --- /dev/null +++ b/src/filters/H5Zbzip2/src/H5Zbzip2.jl @@ -0,0 +1,217 @@ +#= +The code below has been ported to Julia from the original C source: +https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/BZIP2/src/H5Zbzip2.c +The filter function H5Z_filter_bzip2 was adopted from: +PyTables http://www.pytables.org. +The plugin can be used with the HDF5 library version 1.8.11+ to read HDF5 datasets compressed with bzip2 created by PyTables. +License: licenses/H5Zbzip2_LICENSE.txt + +The following license applies to the Julia port. +Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt +=# +module H5Zbzip2 + +using CodecBzip2 +import CodecBzip2: libbzip2 +using HDF5.API +import HDF5.Filters: Filter, filterid, register_filter, filtername, filter_func, filter_cfunc + +export H5Z_FILTER_BZIP2, H5Z_filter_bzip2, Bzip2Filter + + +const H5Z_FILTER_BZIP2 = API.H5Z_filter_t(307) +const bzip2_name = "HDF5 bzip2 filter; see http://www.hdfgroup.org/services/contributions.html" + +function H5Z_filter_bzip2(flags::Cuint, cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, nbytes::Csize_t, + buf_size::Ptr{Csize_t}, buf::Ptr{Ptr{Cvoid}})::Csize_t + outbuf = C_NULL + outdatalen = Cuint(0) + + # Prepare the output buffer + + try + + if flags & API.H5Z_FLAG_REVERSE != 0 + # Decompress + + outbuflen = nbytes * 3 + 1 + outbuf = Libc.malloc(outbuflen) + if outbuf == C_NULL + error("H5Zbzip2: memory allocation failed for bzip2 decompression.") + end + + stream = CodecBzip2.BZStream() + # Just use default malloc and free + stream.bzalloc = C_NULL + stream.bzfree = C_NULL + # BZ2_bzDecompressInit + ret = CodecBzip2.decompress_init!(stream, 0, false) + if ret != CodecBzip2.BZ_OK + errror("H5Zbzip2: bzip2 decompress start failed with error $ret.") + end + + stream.next_out = outbuf + stream.avail_out = outbuflen + stream.next_in = unsafe_load(buf) + stream.avail_in = nbytes + + cont = true + + while cont + # BZ2_bzDecompress + ret = CodecBzip2.decompress!(stream) + if ret < 0 + error("H5Zbzip2: bzip2 decompression failed with error $ret.") + end + cont = ret != CodecBzip2.BZ_STREAM_END + if cont && stream.avail_out == 0 + # Grow the output buffer + newbuflen = outbuflen * 2 + newbuf = Libc.realloc(outbuf, newbuflen) + if newbuf == C_NULL + error("H5Zbzip2: memory allocation failed for bzip2 decompression.") + end + stream.next_out = newbuf + outbuflen + stream.avail_out = outbuflen + outbuf = newbuf + outbuflen = newbuflen + end + end + + outdatalen = stream.total_out_lo32 + # BZ2_bzDecompressEnd + ret = CodecBzip2.decompress_end!(stream) + if ret != CodecBzip2.BZ_OK + error("H5Zbzip2: bzip2 compression end failed with error $ret.") + end + else + # Compress data + + # Maybe not the same size as outdatalen + odatalen = Cuint(0) + blockSize100k = 9 + + # Get compression blocksize if present + if cd_nelmts > 0 + blockSize100k = unsafe_load(cd_values) + if blockSize100k < 1 || blockSize100k > 9 + error("H5Zbzip2: Invalid compression blocksize: $blockSize100k") + end + end + + # Prepare the output buffer + outbuflen = nbytes + nbytes ÷ 100 + 600 # worse case (bzip2 docs) + outbuf = Libc.malloc(outbuflen) + @debug "Allocated" outbuflen outbuf + if outbuf == C_NULL + error("H5Zbzip2: Memory allocation failed for bzip2 compression") + end + + # Compress data + odatalen = outbuflen + r_odatalen = Ref{Cuint}(odatalen) + ret = BZ2_bzBuffToBuffCompress(outbuf, r_odatalen, unsafe_load(buf), nbytes, + blockSize100k, 0, 0) + outdatalen = r_odatalen[] + if ret != CodecBzip2.BZ_OK + error("H5Zbzip2: bzip2 compression failed with error $ret.") + end + end # if flags & API.H5Z_FLAG_REVERSE != 0 + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outbuf) + unsafe_store!(buf_size, outbuflen) + + catch err + # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." + outdatalen = Csize_t(0) + if outbuf != C_NULL + Libc.free(outbuf) + end + @error "H5Zbzip2.jl Non-Fatal ERROR: " err + display(stacktrace(catch_backtrace())) + end # try - catch + + return Csize_t(outdatalen) +end # function H5Z_filter_bzip2 + +# Need stdcall for 32-bit Windows? +function BZ2_bzBuffToBuffCompress(dest, destLen, source, sourceLen, blockSize100k, verbosity, workFactor) + @static if CodecBzip2.WIN32 + return ccall( + ("BZ2_bzBuffToBuffCompress@28", libbzip2), + stdcall, + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + blockSize100k, + verbosity, + workFactor + ) + else + return ccall( + (:BZ2_bzBuffToBuffCompress, libbzip2), + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + blockSize100k, + verbosity, + workFactor + ) + end +end + +function BZ2_bzBuffToBuffDecompress(dest, destLen, source, sourceLen, small, verbosity) + @static if CodecBzip2.WIN32 + return ccall( + ("BZ2_bzBuffToBuffDecompress@24", libbzip2), + stdcall, + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + small, + verbosity + ) + else + return ccall( + (:BZ2_bzBuffToBuffDecompress, libbzip2), + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + small, + verbosity + ) + end +end + +# Filters Module + +struct Bzip2Filter <: Filter + blockSize100k::Cuint +end +Bzip2Filter() = Bzip2Filter(9) + +filterid(::Type{Bzip2Filter}) = H5Z_FILTER_BZIP2 +filtername(::Type{Bzip2Filter}) = bzip2_name +filter_func(::Type{Bzip2Filter}) = H5Z_filter_bzip2 +filter_cfunc(::Type{Bzip2Filter}) = @cfunction(H5Z_filter_bzip2, Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, + Ptr{Csize_t}, Ptr{Ptr{Cvoid}})) + +function __init__() + register_filter(Bzip2Filter) +end + +end # module H5Zbzip2 diff --git a/src/filters/H5Zlz4/LICENSE.txt b/src/filters/H5Zlz4/LICENSE.txt new file mode 100644 index 000000000..b5b30a868 --- /dev/null +++ b/src/filters/H5Zlz4/LICENSE.txt @@ -0,0 +1,17 @@ +The MIT License (MIT) +Copyright (c) 2012-2021: Timothy E. Holy, Simon Kornblith, and contributors: https://github.com/JuliaIO/HDF5.jl/contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software +and associated documentation files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/filters/H5Zlz4/Project.toml b/src/filters/H5Zlz4/Project.toml new file mode 100644 index 000000000..19ddae641 --- /dev/null +++ b/src/filters/H5Zlz4/Project.toml @@ -0,0 +1,12 @@ +name = "H5Zlz4" +uuid = "eb20ec05-5464-47b5-ba41-098e3c1068a3" +version = "0.1.0" + +[deps] +CodecLz4 = "5ba52731-8f18-5e0d-9241-30f10d1ec561" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" + +[compat] +HDF5 = "0.16" +CodecLz4 = "0.4" +julia = "1.3" diff --git a/src/filters/H5Zlz4/THIRDPARTY.txt b/src/filters/H5Zlz4/THIRDPARTY.txt new file mode 100644 index 000000000..745176c31 --- /dev/null +++ b/src/filters/H5Zlz4/THIRDPARTY.txt @@ -0,0 +1,45 @@ +H5Zlz4.jl is derived from H5Zlz4.c from HDF5 Group + +================================================================================ +LZ4 filter plugin license +================================================================================ + +Copyright Notice and License Terms for +HDF5 LZ4 compression filter plugin +----------------------------------------------------------------------------- + +HDF5 LZ4 compression filter plugin +Copyright 2013-2015 by The HDF Group. + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted for any purpose (including commercial purposes) +provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or materials provided with the distribution. + +3. In addition, redistributions of modified forms of the source or binary + code must carry prominent notices stating that the original code was + changed and the date of the change. + +4. All publications or advertising materials mentioning features or use of + this software are asked, but not required, to acknowledge that it was + developed by The HDF Group and credit the contributors. + +5. Neither the name of The HDF Group, nor the name of any Contributor may + be used to endorse or promote products derived from this software + without specific prior written permission from The HDF Group or the + Contributor, respectively. + +DISCLAIMER: +THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS +"AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED. In no +event shall The HDF Group or the Contributors be liable for any damages +suffered by the users arising out of the use of this software, even if +advised of the possibility of such damage. \ No newline at end of file diff --git a/src/filters/H5Zlz4/src/H5Zlz4.jl b/src/filters/H5Zlz4/src/H5Zlz4.jl new file mode 100644 index 000000000..df92402bb --- /dev/null +++ b/src/filters/H5Zlz4/src/H5Zlz4.jl @@ -0,0 +1,204 @@ +#= +This is a port of H5Zlz4.c to Julia +https://github.com/HDFGroup/hdf5_plugins/blob/master/LZ4/src/H5Zlz4.c +https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/LZ4/src/H5Zlz4.c +https://github.com/silx-kit/hdf5plugin/blob/main/src/LZ4/H5Zlz4.c + +H5Zlz4 is originally a copyright of HDF Group. License: licenses/H5Zlz4_LICENSE.txt + +The following license applies to the Julia port. +Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt +=# +module H5Zlz4 + +using CodecLz4 +using HDF5.API +import HDF5.Filters: Filter, filterid, register_filter, filtername, filter_func, filter_cfunc + + +export H5Z_FILTER_LZ4, H5Z_filter_lz4, Lz4Filter + +const H5Z_FILTER_LZ4 = API.H5Z_filter_t(32004) + +const DEFAULT_BLOCK_SIZE = 1 << 30; +const lz4_name = "HDF5 lz4 filter; see http://www.hdfgroup.org/services/contributions.html" + +const LZ4_AGGRESSION = Ref(1) + + + +# flags H5Z_FLAG_REVERSE or H5Z_FLAG_OPTIONAL +# cd_nelmts number of elements in cd_values (0 or 1) +# cd_values the first optional element must be the blockSize +# nbytes - number of valid bytes of data +# buf_size - total size of buffer +# buf - pointer to pointer of data +function H5Z_filter_lz4(flags::Cuint, cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, nbytes::Csize_t, + buf_size::Ptr{Csize_t}, buf::Ptr{Ptr{Cvoid}})::Csize_t + + outBuf = C_NULL + ret_value = Csize_t(0) + + try + + if (flags & API.H5Z_FLAG_REVERSE) != 0 # reverse filter, decompressing + #i32Buf = Ref{UInt32}() + blockSize = UInt32(0) + roBuf = Ref{UInt8}() + rpos = Ptr{UInt8}(unsafe_load(buf)) + #i64Buf = Ptr{UInt64}(rpos) + # Load the first 8 bytes from buffer as a big endian UInt64 + # This is the original size of the buffer + origSize = ntoh(unsafe_load(Ptr{UInt64}(rpos))) + rpos += 8 # advance the pointer + + # Next read the next four bytes from the buffer as a big endian UInt32 + # This is the blocksize + #i32Buf[] = rpos + blockSize = ntoh(unsafe_load(Ptr{UInt32}(rpos))) + rpos += 4 + if blockSize > origSize + blockSize = origSize + end + + # malloc a byte buffer of origSize + # outBuf = Vector{UInt8}(undef, origSize) + @debug "OrigSize" origSize + outBuf = Libc.malloc(origSize) + # Julia should throw an error if it cannot allocate this + roBuf = Ptr{UInt8}(outBuf) + decompSize = 0 + # Start with the first blockSize + while decompSize < origSize + # compressedBlockSize = UInt32(0) + if origSize - decompSize < blockSize # the last block can be smaller than block size + blockSize = origSize - decompSize + end + + #i32Buf[] = rpos + compressedBlockSize = ntoh(unsafe_load(Ptr{UInt32}(rpos))) + rpos += 4 + + if compressedBlockSize == blockSize + # There was no compression + # memcpy(roBuf, rpos, blockSize) + unsafe_copyto!(roBuf, rpos, blockSize) + decompressedBytes = blockSize + else + # do the compression + # LZ4_decompress_fast, version number 10300 ? + @debug "decompress_safe" rpos roBuf compressedBlockSize (origSize - decompSize) + decompressedBytes = CodecLz4.LZ4_decompress_safe(rpos, roBuf, compressedBlockSize, origSize -decompSize) + @debug "decompressedBytes" decompressedBytes + end + + rpos += compressedBlockSize + roBuf += blockSize + decompSize += decompressedBytes + end + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outBuf) + outBuf = C_NULL + ret_value = Csize_t(origSize) + else + # forward filter + # compressing + #i64Buf = Ref{UInt64}() + #i32Buf = Ref{UInt32}() + + if nbytes > typemax(Int32) + error("Can only compress chunks up to 2GB") + end + blockSize = unsafe_load(cd_values) + if cd_nelmts > 0 && blockSize > 0 + else + blockSize = DEFAULT_BLOCK_SIZE + end + if blockSize > nbytes + blockSize = nbytes + end + nBlocks = (nbytes-1) ÷ blockSize + 1 + maxDestSize = nBlocks * CodecLz4.LZ4_compressBound(blockSize) + 4 + 8 + nBlocks*4 + outBuf = Libc.malloc(maxDestSize) + + rpos = Ptr{UInt8}(unsafe_load(buf)) + roBuf = Ptr{UInt8}(outBuf) + + # Header + unsafe_store!(Ptr{UInt64}(roBuf), hton(UInt64(nbytes))) + roBuf += 8 + + unsafe_store!(Ptr{UInt32}(roBuf), hton(UInt32(blockSize))) + roBuf += 4 + + outSize = 12 + + for block = 0:nBlocks-1 + # compBlockSize::UInt32 + origWritten = Csize_t(block*blockSize) + if nbytes - origWritten < blockSize # the last block may be < blockSize + blockSize = nbytes - origWritten + end + + # aggression = 1 is the same LZ4_compress_default + @debug "LZ4_compress_fast args" rpos outBuf roBuf roBuf+4 blockSize nBlocks CodecLz4.LZ4_compressBound(blockSize) + compBlockSize = UInt32(CodecLz4.LZ4_compress_fast(rpos, roBuf+4, blockSize, CodecLz4.LZ4_compressBound(blockSize), LZ4_AGGRESSION[])) + @debug "Compressed block size" compBlockSize + + if compBlockSize == 0 + error("Could not compress block $block") + end + + if compBlockSize >= blockSize # compression did not save any space, do a memcpy instead + compBlockSize = blockSize + unsafe_copyto!(roBuf+4, rpos, blockSize) + end + + unsafe_store!(Ptr{UInt32}(roBuf), hton(UInt32(compBlockSize))) # write blocksize + roBuf += 4 + + rpos += blockSize + roBuf += compBlockSize + outSize += compBlockSize + 4 + end + + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outBuf) + unsafe_store!(buf_size, outSize) + outBuf = C_NULL + ret_value = Csize_t(outSize) + end # (flags & API.H5Z_FLAG_REVERSE) != 0 + + catch err + # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." + ret_value = Csize_t(0) + @error "H5Zlz4.jl Non-Fatal ERROR: " err + display(stacktrace(catch_backtrace())) + finally + if outBuf != C_NULL + Libc.free(outBuf) + end + end + return Csize_t(ret_value) +end + +# Filters Module + +struct Lz4Filter <: Filter + blockSize::Cuint +end +Lz4Filter() = Lz4Filter(DEFAULT_BLOCK_SIZE) + +filterid(::Type{Lz4Filter}) = H5Z_FILTER_LZ4 +filtername(::Type{Lz4Filter}) = lz4_name +filter_func(::Type{Lz4Filter}) = H5Z_filter_lz4 +filter_cfunc(::Type{Lz4Filter}) = @cfunction(H5Z_filter_lz4, Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, + Ptr{Csize_t}, Ptr{Ptr{Cvoid}})) + +function __init__() + register_filter(Lz4Filter) +end + +end diff --git a/src/filters/H5Zzstd/LICENSE.txt b/src/filters/H5Zzstd/LICENSE.txt new file mode 100644 index 000000000..b5b30a868 --- /dev/null +++ b/src/filters/H5Zzstd/LICENSE.txt @@ -0,0 +1,17 @@ +The MIT License (MIT) +Copyright (c) 2012-2021: Timothy E. Holy, Simon Kornblith, and contributors: https://github.com/JuliaIO/HDF5.jl/contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software +and associated documentation files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/filters/H5Zzstd/Project.toml b/src/filters/H5Zzstd/Project.toml new file mode 100644 index 000000000..968d16137 --- /dev/null +++ b/src/filters/H5Zzstd/Project.toml @@ -0,0 +1,12 @@ +name = "H5Zzstd" +uuid = "f6f2d980-1ec6-471c-a70d-0270e22f1103" +version = "0.1.0" + +[deps] +CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" + +[compat] +HDF5 = "0.16" +CodecZstd = "0.7" +julia = "1.3" diff --git a/src/filters/H5Zzstd/THIRDPARTY.txt b/src/filters/H5Zzstd/THIRDPARTY.txt new file mode 100644 index 000000000..4c5921650 --- /dev/null +++ b/src/filters/H5Zzstd/THIRDPARTY.txt @@ -0,0 +1,205 @@ +H5Zzstd.jl is derived from +https://github.com/aparamon/HDF5Plugin-Zstandard, zstd_h5plugin.c +under Apache License Version 2.0: + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/src/filters/H5Zzstd/src/H5Zzstd.jl b/src/filters/H5Zzstd/src/H5Zzstd.jl new file mode 100644 index 000000000..1387c2f3a --- /dev/null +++ b/src/filters/H5Zzstd/src/H5Zzstd.jl @@ -0,0 +1,108 @@ +#= +Derived from https://github.com/aparamon/HDF5Plugin-Zstandard, zstd_h5plugin.c +Licensed under Apache License Version 2.0, see licenses/H5Zzstd_LICENSE.txt + +The following license applies to the Julia port. +Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt +=# +module H5Zzstd + +using CodecZstd +import CodecZstd.LibZstd +using HDF5.API +import HDF5.Filters: Filter, filterid, register_filter, filterid, filtername, filter_func, filter_cfunc + + +const H5Z_FILTER_ZSTD = API.H5Z_filter_t(32015) +const zstd_name = "Zstandard compression: http://www.zstd.net" + +export H5Z_filter_zstd, H5Z_FILTER_ZSTD, ZstdFilter + +# cd_values First optional value is the compressor aggression +# Default is CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT +function H5Z_filter_zstd(flags::Cuint, cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, nbytes::Csize_t, + buf_size::Ptr{Csize_t}, buf::Ptr{Ptr{Cvoid}})::Csize_t + inbuf = unsafe_load(buf) + outbuf = C_NULL + origSize = nbytes + ret_value = Csize_t(0) + + try + + if flags & API.H5Z_FLAG_REVERSE != 0 + #decompresssion + + decompSize = LibZstd.ZSTD_getDecompressedSize(inbuf, origSize) + outbuf = Libc.malloc(decompSize) + if outbuf == C_NULL + error("zstd_h5plugin: Cannot allocate memory for outbuf during decompression.") + end + decompSize = LibZstd.ZSTD_decompress(outbuf, decompSize, inbuf, origSize) + Libc.free(inbuf) + unsafe_store!(buf, outbuf) + outbuf = C_NULL + ret_value = Csize_t(decompSize) + else + # compression + + if cd_nelmts > 0 + aggression = Cint(unsafe_load(cd_values)) + else + aggression = CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT + end + + if aggression < 1 + aggression = 1 # ZSTD_minCLevel() + elseif aggression > LibZstd.ZSTD_maxCLevel() + aggression = LibZstd.ZSTD_maxCLevel() + end + + compSize = LibZstd.ZSTD_compressBound(origSize) + outbuf = Libc.malloc(compSize) + if outbuf == C_NULL + error("zstd_h5plugin: Cannot allocate memory for outbuf during compression.") + end + + compSize = LibZstd.ZSTD_compress(outbuf, compSize, inbuf, origSize, aggression) + + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outbuf) + unsafe_store!(buf_size, compSize) + outbuf = C_NULL + ret_value = compSize + end + catch e + # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." + ret_value = Csize_t(0) + @error "H5Zzstd Non-Fatal ERROR: " err + display(stacktrace(catch_backtrace())) + finally + + if outbuf != C_NULL + free(outbuf) + end + + end # try catch finally + return Csize_t(ret_value) +end + +# Filters Module + +struct ZstdFilter <: Filter + clevel::Cuint +end +ZstdFilter() = ZstdFilter(CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT) + +filterid(::Type{ZstdFilter}) = H5Z_FILTER_ZSTD +filtername(::Type{ZstdFilter}) = zstd_name +filter_func(::Type{ZstdFilter}) = H5Z_filter_zstd +filter_cfunc(::Type{ZstdFilter}) = @cfunction(H5Z_filter_zstd, Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, + Ptr{Csize_t}, Ptr{Ptr{Cvoid}})) + +function __init__() + register_filter(ZstdFilter) +end + +end # module H5Zzstd \ No newline at end of file diff --git a/src/filters/filters.jl b/src/filters/filters.jl index a911ca79b..7923a76ff 100644 --- a/src/filters/filters.jl +++ b/src/filters/filters.jl @@ -1,9 +1,44 @@ module Filters -export Deflate, Shuffle, Fletcher32, Szip, NBit, ScaleOffset, BloscFilter +# builtin filters +export Deflate, Shuffle, Fletcher32, Szip, NBit, ScaleOffset import ..HDF5: Properties, h5doc, API + +""" + Filter + +Abstract type to describe HDF5 Filters. +See the Extended Help for information on implementing a new filter. + +# Extended Help + +## Filter interface + +The Filter interface is implemented upon the Filter subtype. + +See API.h5z_register for details. + +### Required Methods to Implement +* `filterid` - registered filter ID +* `filter_func` - implement the actual filter + +### Optional Methods to Implement +* `filtername` - defaults to "Unnamed Filter" +* `encoder_present` - defaults to true +* `decoder_present` - defaults to true +* `can_apply_func` - defaults to nothing +* `set_local_func` - defaults to nothing + +### Advanced Methods to Implement +* `can_apply_cfunc` - Defaults to wrapping @cfunction around the result of `can_apply_func` +* `set_local_cfunc` - Defaults to wrapping @cfunction around the result of `set_local_func` +* `filter_cfunc` - Defaults to wrapping @cfunction around the result of `filter_func` +* `register_filter` - Defaults to using the above functions to register the filter + +Implement the Advanced Methods to avoid @cfunction from generating a runtime closure which may not work on all systems. +""" abstract type Filter end """ @@ -11,15 +46,162 @@ abstract type Filter end Maps filter id to filter type. """ -const FILTERS = Dict{API.H5Z_filter_t, Any}() +const FILTERS = Dict{API.H5Z_filter_t, Type{<: Filter}}() """ - filterid(::F) - filterid(F) + filterid(F) where {F <: Filter} + The internal filter id of a filter of type `F`. """ -filterid(::F) where {F<:Filter} = filterid(F) +filterid + +""" + encoder_present(::Type{F}) where {F<:Filter} + +Can the filter have an encode or compress the data? +Defaults to true. +Returns a Bool. See `API.h5z_register`. +""" +encoder_present(::Type{F}) where {F<:Filter} = true + +""" + decoder_present(::Type{F}) where {F<:Filter} + +Can the filter decode or decompress the data? +Defaults to true. +Returns a Bool. +See `API.h5z_register` +""" +decoder_present(::Type{F}) where {F<:Filter} = true + +""" + filtername(::Type{F}) where {F<:Filter} + +What is the name of a filter? +Defaults to "Unnamed Filter" +Returns a String describing the filter. See `API.h5z_register` +""" +filtername(::Type{F}) where {F<:Filter} = "Unnamed Filter" + +""" + can_apply_func(::Type{F}) where {F<:Filter} + +Return a function indicating whether the filter can be applied or `nothing` if no function exists. +The function signature is `func(dcpl_id::API.hid_t, type_id::API.hid_t, space_id::API.hid_t)`. +See `API.h5z_register` +""" +can_apply_func(::Type{F}) where {F<:Filter} = nothing + +""" + can_apply_cfunc(::Type{F}) where {F<:Filter} + +Return a C function pointer for the can apply function. +By default, this will return the result of using `@cfunction` on the function +specified by `can_apply_func(F)` or `C_NULL` if `nothing`. + +Overriding this will allow `@cfunction` to return a `Ptr{Nothing}` rather +than a `CFunction`` closure which may not work on all systems. +""" +function can_apply_cfunc(::Type{F}) where {F<:Filter} + func = can_apply_func(F) + if func === nothing + return C_NULL + else + return @cfunction($func, API.herr_t, (API.hid_t,API.hid_t,API.hid_t)) + end +end + +""" + set_local_func(::Type{F}) where {F<:Filter} + +Return a function that sets dataset specific parameters or `nothing` if no function exists. +The function signature is `func(dcpl_id::API.hid_t, type_id::API.hid_t, space_id::API.hid_t)`. +See `API.h5z_register` +""" +set_local_func(::Type{F}) where {F<:Filter} = nothing + +""" + set_local_cfunc(::Type{F}) where {F<:Filter} + +Return a C function pointer for the set local function. +By default, this will return the result of using `@cfunction` on the function +specified by `set_local_func(F)` or `C_NULL` if `nothing`. + +Overriding this will allow `@cfunction` to return a `Ptr{Nothing}` rather +than a `CFunction`` closure which may not work on all systems. +""" +function set_local_cfunc(::Type{F}) where {F<:Filter} + func = set_local_func(F) + if func === nothing + return C_NULL + else + return @cfunction($func, API.herr_t, (API.hid_t,API.hid_t,API.hid_t)) + end +end + + +""" + filter_func(::Type{F}) where {F<:Filter} + +Returns a function that performs the actual filtering. + +See `API.h5z_register` +""" +filter_func(::Type{F}) where {F<:Filter} = nothing + +""" + filter_cfunc(::Type{F}) where {F<:Filter} + +Return a C function pointer for the filter function. +By default, this will return the result of using `@cfunction` on the function +specified by `filter_func(F)` or will throw an error if `nothing`. + +Overriding this will allow `@cfunction` to return a `Ptr{Nothing}` rather +than a `CFunction`` closure which may not work on all systems. +""" +function filter_cfunc(::Type{F}) where {F<:Filter} + func = filter_func(F) + if func === nothing + error("Filter function for $f must be defined via `filter_func`.") + end + c_filter_func = @cfunction($func, Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, + Ptr{Csize_t}, Ptr{Ptr{Cvoid}})) + return c_filter_func +end + +# Generic implementation of register_filter +""" + register_filter(::Type{F}) where F <: Filter + +Register the filter with the HDF5 library via API.h5z_register. +Also add F to the FILTERS dictionary. +""" +function register_filter(::Type{F}) where F <: Filter + id = filterid(F) + encoder = encoder_present(F) + decoder = decoder_present(F) + name = filtername(F) + can_apply = can_apply_cfunc(F) + set_local = set_local_cfunc(F) + func = filter_cfunc(F) + GC.@preserve name begin + API.h5z_register(API.H5Z_class_t( + API.H5Z_CLASS_T_VERS, + id, + encoder, + decoder, + pointer(name), + can_apply, + set_local, + func + )) + end + FILTERS[id] = F + return nothing +end + struct UnknownFilter <: Filter filter_id::API.H5Z_filter_t @@ -28,6 +210,11 @@ struct UnknownFilter <: Filter name::String config::Cuint end +filterid(filter::UnknownFilter) = filter.filter_id +filtername(filter::UnknownFilter) = filter.name +filtername(::Type{UnknownFilter}) = "Unknown Filter" +encoder_present(::Type{UnknownFilter}) = false +decoder_present(::Type{UnknownFilter}) = false """ FilterPipeline(plist::DatasetCreateProperties) @@ -106,8 +293,19 @@ function Base.append!(filters::FilterPipeline, extra) end return filters end +function Base.push!(p::FilterPipeline, f::F) where F <: Filter + ref = Ref(f) + GC.@preserve ref begin + API.h5p_set_filter(p.plist, filterid(F), API.H5Z_FLAG_OPTIONAL, div(sizeof(F), sizeof(Cuint)), pointer_from_objref(ref)) + end + return p +end +function Base.push!(p::FilterPipeline, f::UnknownFilter) + GC.@preserve f begin + API.h5p_set_filter(p.plist, f.filter_id, f.flags, length(f.data), pointer(f.data)) + end +end include("builtin.jl") -include("blosc.jl") end # module diff --git a/src/properties.jl b/src/properties.jl index aa07029bb..b18950570 100644 --- a/src/properties.jl +++ b/src/properties.jl @@ -407,8 +407,6 @@ set_deflate!(p::Properties, val::Bool) = val && push!(Filters.FilterPipeline(p), set_deflate!(p::Properties, level::Integer) = push!(Filters.FilterPipeline(p), Filters.Deflate(level=level)) set_shuffle!(p::Properties, val::Bool) = val && push!(Filters.FilterPipeline(p), Filters.Shuffle()) set_fletcher32!(p::Properties, val::Bool) = val && push!(Filters.FilterPipeline(p), Filters.Fletcher32()) -set_blosc!(p::Properties, val::Bool) = val && push!(Filters.FilterPipeline(p), Filters.BloscFilter()) -set_blosc!(p::Properties, level::Integer) = push!(Filters.FilterPipeline(p), Filters.BloscFilter(level=level)) class_propertynames(::Type{DatasetCreateProperties}) = ( diff --git a/test/external.jl b/test/external.jl index a22b82347..72bfe0183 100644 --- a/test/external.jl +++ b/test/external.jl @@ -20,22 +20,26 @@ close(target_file) # test both an HDF5.File and an HDF5.Group for first argument HDF5.create_external(source_file, "ext_link", target_file.filename, "target_group") HDF5.create_external(agroup, "ext_link", target_file.filename, "target_group") +close(agroup) # write some things via the external link new_group = create_group(source_file["ext_link"], "new_group") new_group["abc"] = "abc" new_group["1"] = 1 new_group["1.1"] = 1.1 +close(new_group) # read things from target_group via exernal link created with HDF5File argument group = source_file["ext_link"] @test read(group["abc"]) == "abc" @test read(group["1"]) == 1 @test read(group["1.1"]) == 1.1 +close(group) # read things from target_group via the external link created with HDF5.Group argument groupalt = source_file["agroup/ext_link"] @test read(groupalt["abc"]) == "abc" @test read(groupalt["1"]) == 1 @test read(groupalt["1.1"]) == 1.1 +close(groupalt) close(source_file) ##### tests that should be included but don't work diff --git a/test/filter.jl b/test/filter.jl index 48d7816cc..80afe9dac 100644 --- a/test/filter.jl +++ b/test/filter.jl @@ -1,11 +1,10 @@ using HDF5 +using HDF5.Filters using Test +using H5Zblosc, H5Zlz4, H5Zbzip2, H5Zzstd @testset "filter" begin -H5Z_FILTER_DEFLATE = 1 -H5Z_FILTER_SHUFFLE = 2 - # Create a new file fn = tempname() @@ -28,28 +27,59 @@ dsfiltdef = create_dataset(f, "filtdef", datatype(data), dataspace(data), dsfiltshufdef = create_dataset(f, "filtshufdef", datatype(data), dataspace(data), chunk=(100, 100), filters=[Filters.Shuffle(), Filters.Deflate(3)]) + # Write data write(dsdeflate, data) write(dsshufdef, data) write(dsfiltdef, data) write(dsfiltshufdef, data) +# Test compression filters + +compressionFilters = Dict( + "blosc" => BloscFilter, + "bzip2" => Bzip2Filter, + "lz4" => Lz4Filter, + "zstd" => ZstdFilter +) + +for (name, filter) in compressionFilters + + ds = create_dataset( + f, name, datatype(data), dataspace(data), + chunk=(100,100), filters=filter() + ) + write(ds, data) + + ds = create_dataset( + f, "shuffle+"*name, datatype(data), dataspace(data), + chunk=(100,100), filters=[Filters.Shuffle(), filter()] + ) + write(ds, data) + +end + + # Close and re-open file for reading close(f) f = h5open(fn) -# Read dataseta -datadeflate = f["deflate"][] -datashufdef = f["shufdef"][] -datafiltdef = f["filtdef"][] -datafiltshufdef = f["filtshufdef"][] +# Read datasets and test for equality +for name in keys(f) + ds = f[name] + @testset "$name" begin + @debug "Filter Dataset" HDF5.name(ds) + @test ds[] == data + filters = HDF5.get_create_properties(ds).filters + if startswith(name, "shuffle+") + @test filters[1] isa Shuffle + @test filters[2] isa compressionFilters[name[9:end]] + elseif haskey(compressionFilters, name) + @test filters[1] isa compressionFilters[name] + end + end +end close(f) -# Test for equality -@test datadeflate == data -@test datashufdef == data -@test datafiltdef == data -@test datafiltshufdef == data - end # @testset "filter" diff --git a/test/filters/FilterTestUtils.jl b/test/filters/FilterTestUtils.jl new file mode 100644 index 000000000..86c58cf97 --- /dev/null +++ b/test/filters/FilterTestUtils.jl @@ -0,0 +1,116 @@ +""" + module FilterTestUtils + +This module contains utilities for evaluating and debugging HDF5 Filters. +""" +module FilterTestUtils + +import HDF5.API +import H5Zlz4: H5Z_filter_lz4 +import H5Zzstd: H5Z_filter_zstd +import H5Zbzip2: H5Z_filter_bzip2 +using Test + +export test_filter + +function test_filter_init(; cd_values = Cuint[], data = ones(UInt8, 1024)) + flags = Cuint(0) + nbytes = sizeof(data) + buf_size = Ref(Csize_t(sizeof(data))) + databuf = Libc.malloc(sizeof(data)) + data = reinterpret(UInt8, data) + unsafe_copyto!(Ptr{UInt8}(databuf), pointer(data), sizeof(data)) + buf = Ref(Ptr{Cvoid}(databuf)) + return flags, cd_values, nbytes, buf_size, buf +end + +function test_filter_compress!(filter_func, flags::Cuint, cd_values::Vector{Cuint}, nbytes::Integer, buf_size::Ref{Csize_t}, buf::Ref{Ptr{Cvoid}}) + nbytes = Csize_t(nbytes) + cd_nelmts = Csize_t(length(cd_values)) + GC.@preserve flags cd_nelmts cd_values nbytes buf_size buf begin + ret_code = filter_func( + flags, + cd_nelmts, + pointer(cd_values), + Csize_t(nbytes), + Base.unsafe_convert(Ptr{Csize_t}, buf_size), + Base.unsafe_convert(Ptr{Ptr{Cvoid}}, buf) + ) + @debug "Compression:" ret_code buf_size[] + if ret_code <= 0 + error("Test compression failed: $ret_code.") + end + end + return ret_code +end + +function test_filter_decompress!(filter_func, flags::Cuint, cd_values::Vector{Cuint}, nbytes::Integer, buf_size::Ref{Csize_t}, buf::Ref{Ptr{Cvoid}}) + nbytes = Csize_t(nbytes) + cd_nelmts = Csize_t(length(cd_values)) + flags |= UInt32(API.H5Z_FLAG_REVERSE) + GC.@preserve flags cd_nelmts cd_values nbytes buf_size buf begin + ret_code = filter_func( + flags, + cd_nelmts, + pointer(cd_values), + Csize_t(nbytes), + Base.unsafe_convert(Ptr{Csize_t},buf_size), + Base.unsafe_convert(Ptr{Ptr{Cvoid}}, buf) + ) + @debug "Decompression:" ret_code buf_size[] + end + return ret_code +end + +function test_filter_cleanup!(buf::Ref{Ptr{Cvoid}}) + Libc.free(buf[]) +end + +function test_filter(filter_func; cd_values::Vector{Cuint} = Cuint[], data = ones(UInt8, 1024)) + flags, cd_values, nbytes, buf_size, buf = test_filter_init(; cd_values = cd_values, data = data) + nbytes_compressed, nbytes_decompressed = 0, 0 + try + nbytes_compressed = test_filter_compress!(filter_func, flags, cd_values, nbytes, buf_size, buf) + nbytes_decompressed = test_filter_decompress!(filter_func, flags, cd_values, nbytes_compressed, buf_size, buf) + if nbytes_decompressed > 0 + # ret_code is the number of bytes out + round_trip_data = unsafe_wrap(Array,Ptr{UInt8}(buf[]), nbytes_decompressed) + @debug "Is the data the same after a roundtrip?" data == round_trip_data + end + catch err + rethrow(err) + finally + test_filter_cleanup!(buf) + end + @debug "Compression Ratio" nbytes_compressed / nbytes_decompressed + return nbytes_compressed, nbytes_decompressed +end + +function test_bzip2_filter(data = ones(UInt8, 1024)) + cd_values = Cuint[8] + test_filter(H5Z_filter_bzip2; cd_values = cd_values, data = data) +end + +function test_lz4_filter(data = ones(UInt8, 1024)) + cd_values = Cuint[1024] + test_filter(H5Z_filter_lz4; cd_values = cd_values, data = data) +end + +function test_zstd_filter(data = ones(UInt8, 1024)) + cd_values = Cuint[3] # aggression + test_filter(H5Z_filter_zstd; cd_values = cd_values, data = data) +end + +function __init__() + @testset "Compression Filter Unit Tests" begin + @test argmin(test_bzip2_filter()) == 1 + @test argmin(test_lz4_filter()) == 1 + @test argmin(test_zstd_filter()) == 1 + str = codeunits(repeat("foobar", 1000)) + @test argmin(test_bzip2_filter(str)) == 1 + @test argmin(test_lz4_filter(str)) == 1 + @test argmin(test_zstd_filter(str)) == 1 + end +end + +end \ No newline at end of file diff --git a/test/plain.jl b/test/plain.jl index c5183570b..93cba9f0f 100644 --- a/test/plain.jl +++ b/test/plain.jl @@ -1,5 +1,6 @@ using HDF5 using CRC32c +using H5Zblosc using Test gatherf(dst_buf, dst_buf_bytes_used, op_data) = HDF5.API.herr_t(0) diff --git a/test/runtests.jl b/test/runtests.jl index 82ec24369..498d22181 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,56 +1,63 @@ -using HDF5 -using Test -using Pkg - -@info "libhdf5 v$(HDF5.API.h5_get_libversion())" - -# To debug HDF5.jl tests, uncomment the next line -# ENV["JULIA_DEBUG"] = "Main" - -@testset "HDF5.jl" begin - -@debug "plain" -include("plain.jl") -@debug "compound" -include("compound.jl") -@debug "custom" -include("custom.jl") -@debug "reference" -include("reference.jl") -@debug "dataspace" -include("dataspace.jl") -@debug "hyperslab" -include("hyperslab.jl") -@debug "readremote" -include("readremote.jl") -@debug "extend_test" -include("extend_test.jl") -@debug "gc" -include("gc.jl") -@debug "external" -include("external.jl") -@debug "swmr" -include("swmr.jl") -@debug "mmap" -include("mmap.jl") -@debug "properties" -include("properties.jl") -@debug "table" -include("table.jl") -@debug "filter" -include("filter.jl") -@debug "chunkstorage" -include("chunkstorage.jl") -@debug "fileio" -include("fileio.jl") - -using MPI -if HDF5.has_parallel() - # basic MPI tests, for actual parallel tests we need to run in MPI mode - include("mpio.jl") -end - -# Clean up after all resources -HDF5.API.h5_close() - -end +using HDF5 +using Test +using Pkg +filter_path = joinpath(dirname(pathof(HDF5)), "filters") +Pkg.develop(PackageSpec(path=joinpath(filter_path, "H5Zblosc"))) +Pkg.develop(PackageSpec(path=joinpath(filter_path, "H5Zbzip2"))) +Pkg.develop(PackageSpec(path=joinpath(filter_path, "H5Zlz4"))) +Pkg.develop(PackageSpec(path=joinpath(filter_path, "H5Zzstd"))) + +@info "libhdf5 v$(HDF5.API.h5_get_libversion())" + +# To debug HDF5.jl tests, uncomment the next line +# ENV["JULIA_DEBUG"] = "Main" + +@testset "HDF5.jl" begin + +@debug "plain" +include("plain.jl") +@debug "compound" +include("compound.jl") +@debug "custom" +include("custom.jl") +@debug "reference" +include("reference.jl") +@debug "dataspace" +include("dataspace.jl") +@debug "hyperslab" +include("hyperslab.jl") +@debug "readremote" +include("readremote.jl") +@debug "extend_test" +include("extend_test.jl") +@debug "gc" +include("gc.jl") +@debug "external" +include("external.jl") +@debug "swmr" +include("swmr.jl") +@debug "mmap" +include("mmap.jl") +@debug "properties" +include("properties.jl") +@debug "table" +include("table.jl") +@debug "filter" +include("filter.jl") +@debug "chunkstorage" +include("chunkstorage.jl") +@debug "fileio" +include("fileio.jl") +@debug "filter test utils" +include("filters/FilterTestUtils.jl") + +using MPI +if HDF5.has_parallel() + # basic MPI tests, for actual parallel tests we need to run in MPI mode + include("mpio.jl") +end + +# Clean up after all resources +HDF5.API.h5_close() + +end