diff --git a/src/JLD2.jl b/src/JLD2.jl index 02b02ef6..01b88124 100644 --- a/src/JLD2.jl +++ b/src/JLD2.jl @@ -173,16 +173,15 @@ mutable struct JLDFile{T<:IO} datatype_locations::OrderedDict{RelOffset,CommittedDatatype} datatypes::Vector{H5Datatype} datatype_wsession::JLDWriteSession{Dict{UInt,RelOffset}} - jlh5type::IdDict - h5jltype::IdDict - jloffset::Dict{RelOffset,WeakRef} + jlh5type::IdDict # Julia Type → H5Type + h5jltype::IdDict # H5Type → Julia Type + jloffset::Dict{RelOffset,WeakRef} # Cache for (mutable) loaded objects end_of_data::Int64 global_heaps::Dict{RelOffset,GlobalHeap} global_heap::GlobalHeap loaded_groups::Dict{RelOffset,Group} root_group_offset::RelOffset - juliatypes::Vector{Any} juliatype_locations::OrderedDict{RelOffset,Any} juliatype_locations_rev::OrderedDict{Any,RelOffset} @@ -198,7 +197,7 @@ mutable struct JLDFile{T<:IO} JLDWriteSession(), IdDict(), IdDict(), Dict{RelOffset,WeakRef}(), Int64(FILE_HEADER_LENGTH + jlsizeof(Superblock)), Dict{RelOffset,GlobalHeap}(), GlobalHeap(0, 0, 0, Int64[]), Dict{RelOffset,Group}(), UNDEFINED_ADDRESS, - [], OrderedDict{RelOffset,Any}(),OrderedDict{Any,RelOffset}()) + OrderedDict{RelOffset,Any}(), OrderedDict{Any,RelOffset}()) finalizer(jld_finalizer, f) f end @@ -329,11 +328,6 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, if haskey(f.root_group.written_links, "_juliatypes") types_group_offset = f.root_group.written_links["_juliatypes"] f.juliatypes_group = f.loaded_groups[types_group_offset] = load_group(f, types_group_offset) - i = 0 - for offset in values(f.types_group.written_links) - f.juliatype_locations[offset] = CommittedDatatype(offset, i += 1) - end - resize!(f.juliatypes, length(f.juliatype_locations)) else f.juliatypes_group = Group{typeof(f)}(f) end @@ -379,6 +373,17 @@ function load_datatypes(f::JLDFile) end end +function load_all_juliatypes(f::JLDFile) + length(f.juliatypes_group) == length(f.juliatype_locations_rev) && return + for offset in values(f.juliatypes_group.written_links) + dt = load_dataset(f, offset) + f.juliatype_locations[offset] = dt + f.juliatype_locations_rev[dt] = offset + track_weakref!(f, offset, dt) + end + return nothing +end + """ prewrite(f::JLDFile) @@ -400,7 +405,15 @@ Base.getindex(f::JLDFile, name::AbstractString) = f.root_group[name] Base.setindex!(f::JLDFile, obj, name::AbstractString) = (f.root_group[name] = obj; f) Base.haskey(f::JLDFile, name::AbstractString) = haskey(f.root_group, name) Base.isempty(f::JLDFile) = isempty(f.root_group) -Base.keys(f::JLDFile) = filter!(x->x != "_types", keys(f.root_group)) +Base.keys(f::JLDFile) = filter!(x->x != "_types" && x != "_juliatypes", keys(f.root_group)) + +function Base.length(g::JLDFile) + l = length(f.root_group) + haskey(f.root_group, "_types") && (l -= 1) + haskey(f.root_group, "_juliatypes") && (l -= 1) + return l +end + Base.get(default::Function, f::Union{JLDFile, Group}, name::AbstractString) = haskey(f, name) ? f[name] : default() Base.get(f::Union{JLDFile, Group}, name::AbstractString, default) = diff --git a/src/backwards_compatibility.jl b/src/backwards_compatibility.jl index 62c36733..96c38898 100644 --- a/src/backwards_compatibility.jl +++ b/src/backwards_compatibility.jl @@ -1,15 +1,15 @@ # The following block allows reading Union types written prior to v0.2 -const LEGACY_H5TYPE_UNION = VariableLengthDatatype(H5TYPE_DATATYPE) +const LEGACY_H5TYPE_UNION = VariableLengthDatatype(H5TYPE_OLD_DATATYPE) -function jlconvert(::ReadRepresentation{Union, Vlen{DataTypeODR()}}, f::JLDFile, +function jlconvert(::ReadRepresentation{Union, Vlen{OldDataTypeODR()}}, f::JLDFile, ptr::Ptr, header_offset::RelOffset) - v = Union{jlconvert(ReadRepresentation{DataType,Vlen{DataTypeODR()}}(), f, ptr, NULL_REFERENCE)...} + v = Union{jlconvert(ReadRepresentation{DataType,Vlen{OldDataTypeODR()}}(), f, ptr, NULL_REFERENCE)...} track_weakref!(f, header_offset, v) v end constructrr(::JLDFile, ::Type{T}, dt::VariableLengthDatatype, ::Vector{ReadAttribute}) where {T<:Union} = - dt == LEGACY_H5TYPE_UNION ? (ReadRepresentation{Union,Vlen{DataTypeODR()}}(), true) : + dt == LEGACY_H5TYPE_UNION ? (ReadRepresentation{Union,Vlen{OldDataTypeODR()}}(), true) : throw(UnsupportedFeatureException()) # The following definition is needed to correctly load Strings written diff --git a/src/data/reconstructing_datatypes.jl b/src/data/reconstructing_datatypes.jl index e2a3f4b3..a4c40ae9 100644 --- a/src/data/reconstructing_datatypes.jl +++ b/src/data/reconstructing_datatypes.jl @@ -43,10 +43,13 @@ function jltype(f::JLDFile, cdt::CommittedDatatype) if h5offset(f, julia_type_attr.datatype_offset) == cdt.header_offset # Verify that the datatype matches our expectations if dt != H5TYPE_DATATYPE - error("""The HDF5 datatype representing a Julia datatype does not match + @warn("""The HDF5 datatype representing a Julia datatype does not match the expectations of this version of JLD. You may need to update JLD to read this file.""") + f.jlh5type[DataType] = cdt + f.datatypes[cdt.index] = dt + return (f.h5jltype[cdt] = ReadRepresentation{DataType, OldDataTypeODR()}()) end f.jlh5type[DataType] = cdt f.datatypes[cdt.index] = dt @@ -234,12 +237,11 @@ end # Find types in modules # returns the result of searching for the type in the specified module m -function _resolve_type_singlemodule(::ReadRepresentation{T,DataTypeODR()}, - m, +function _resolve_type_singlemodule(m, parts, mypath, hasparams::Bool, - params) where T + params) for part in parts sym = Symbol(part) if !isa(m, Module) || !isdefined(m, sym) @@ -256,19 +258,18 @@ end _is_not_unknown_type(x::UnknownType) = false _is_not_unknown_type(x) = true -function _resolve_type(rr::ReadRepresentation{T,DataTypeODR()}, +function _resolve_type(rr, f::JLDFile, ptr::Ptr, header_offset::RelOffset, mypath, hasparams::Bool, - params) where T + params) parts = split(mypath, '.') modules = vcat([Main], collect(keys(Base.module_keys)), stdlibmodules(Main)) unique!(modules) for mod in modules - resolution_attempt = _resolve_type_singlemodule(rr, - mod, + resolution_attempt = _resolve_type_singlemodule(mod, parts, mypath, hasparams, @@ -351,9 +352,49 @@ function jlconvert(rr::ReadRepresentation{T,DataTypeODR()}, return m end -constructrr(::JLDFile, ::Type{T}, dt::CompoundDatatype, ::Vector{ReadAttribute}) where {T<:DataType} = - dt == H5TYPE_DATATYPE ? (ReadRepresentation{DataType,DataTypeODR()}(), true) : - throw(UnsupportedFeatureException()) +function jlconvert(rr::ReadRepresentation{T,OldDataTypeODR()}, + f::JLDFile, + ptr::Ptr, + header_offset::RelOffset) where T + + params, unknown_params = types_from_refs(f, ptr+odr_sizeof(Vlen{UInt8})) + # For cross-platform compatibility convert integer type parameters to system precision + params = [p isa Union{Int64,Int32} ? Int(p) : p for p in params] + hasparams = !isempty(params) + mypath = String(jlconvert(ReadRepresentation{UInt8,Vlen{UInt8}}(), f, ptr, NULL_REFERENCE)) + + m = _resolve_type(rr, f, ptr, header_offset, mypath, hasparams, hasparams ? params : nothing) + #m isa UnknownType && return m + + if m isa UnknownType + return UnknownType{String}(m.name, params, [], []) + end + + if hasparams + try + m = m{params...} + catch + return UnknownType{DataType}(m, params, [],[]) + #return UnknownType(m, params) + end + elseif m === Tuple + # Need to instantiate with no parameters, since Tuple is really + # Tuple{Vararg{Any}} + m = Tuple{} + end + track_weakref!(f, header_offset, m) + return m +end + +function constructrr(::JLDFile, ::Type{T}, dt::CompoundDatatype, ::Vector{ReadAttribute}) where {T<:DataType} + if dt == H5TYPE_DATATYPE + return ReadRepresentation{DataType,DataTypeODR()}(), true + elseif dt == H5TYPE_OLD_DATATYPE + return ReadRepresentation{DataType,OldDataTypeODR()}(), true + else + throw(UnsupportedFeatureException()) + end +end ## Type reconstruction @@ -384,7 +425,7 @@ end function typestring(T::UnknownType) tn = IOBuffer() print(tn, T.name) - if isdefined(T, :parameters) + if isdefined(T, :parameters) && !isempty(T.parameters) write(tn, '{') for i = 1:length(T.parameters) x = T.parameters[i] diff --git a/src/data/writing_datatypes.jl b/src/data/writing_datatypes.jl index 54ee16c3..c364aa6a 100644 --- a/src/data/writing_datatypes.jl +++ b/src/data/writing_datatypes.jl @@ -22,6 +22,8 @@ const DataTypeODR = OnDiskRepresentation{ Vlen{RelOffset}, Vlen{RelOffset}}} +const OldDataTypeODR = OnDiskRepresentation{(0, odr_sizeof(Vlen{String})),Tuple{String,Vector{Any}},Tuple{Vlen{String},Vlen{RelOffset}}} + const H5TYPE_DATATYPE = CompoundDatatype( odr_sizeof(Vlen{String})+odr_sizeof(Vlen{RelOffset})+odr_sizeof(Vlen{RelOffset})+odr_sizeof(Vlen{RelOffset}), [:name, :parameters, :fieldnames, :fieldtypes], @@ -35,6 +37,14 @@ const H5TYPE_DATATYPE = CompoundDatatype( VariableLengthDatatype(ReferenceDatatype())] ) +const H5TYPE_OLD_DATATYPE = CompoundDatatype( + odr_sizeof(Vlen{String})+odr_sizeof(Vlen{RelOffset}), + [:name, :parameters], + [0, odr_sizeof(Vlen{String})], + [H5TYPE_VLEN_UTF8, VariableLengthDatatype(ReferenceDatatype())] +) + + function h5convert!(out::Pointers, ::DataTypeODR, f::JLDFile, T::DataType, wsession::JLDWriteSession) t = typename(T) store_vlen!(out, UInt8, f, unsafe_wrap(Vector{UInt8}, t), f.datatype_wsession) @@ -74,23 +84,21 @@ function h5convert!(out::Pointers, ::DataTypeODR, f::JLDFile, T::Type{DataType}, end out += odr_sizeof(Vlen{RelOffset}) fieldnames = T.name.names - #if isempty(fieldnames) + if isempty(fieldnames) h5convert_uninitialized!(out, Vlen{String}) out += odr_sizeof(Vlen{String}) h5convert_uninitialized!(out, Vlen{RelOffset}) - #else - # store_vlen!(out, String, f, string.(fieldnames), wsession) - # out += odr_sizeof(Vlen{String}) - # refs = refs_from_types(f, T.types, wsession) - # store_vlen!(out, RelOffset, f, refs, f.datatype_wsession) - #end + else + store_vlen!(out, String, f, string.(fieldnames), wsession) + out += odr_sizeof(Vlen{String}) + refs = refs_from_types(f, T.types, wsession) + store_vlen!(out, RelOffset, f, refs, f.datatype_wsession) + end nothing end -#const DataTypeODR = RelOffset - #= function h5convert!(out::Pointers, ::DataTypeODR, f::JLDFile, T::DataType, wsession::JLDWriteSession) #= t = typename(T) store_vlen!(out, UInt8, f, unsafe_wrap(Vector{UInt8}, t), f.datatype_wsession) diff --git a/src/datasets.jl b/src/datasets.jl index 429b47e4..3ea77f99 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -466,19 +466,19 @@ function write_dataset(f::JLDFile, datatype::H5Datatype, odr::S, data::DataType, - wsession::JLDWriteSession) where S + wsession::JLDWriteSession) where S + # Ensure that all juliatypes are loaded + load_all_juliatypes(f) ref = get(f.juliatype_locations_rev, data, RelOffset(0)) - if ref != RelOffset(0) - return ref - end - + ref != RelOffset(0) && return ref + header_offset = f.end_of_data ref = h5offset(f, header_offset) - id = length(f.juliatypes)+1 - #println("Stored datatype to ref: ", ref) - #error("test") + + id = length(f.juliatypes_group)+1 f.juliatypes_group[@sprintf("%08d", id)] = ref - push!(f.juliatypes, data) + + track_weakref!(f, ref, data) f.juliatype_locations[ref] = data f.juliatype_locations_rev[data] = ref @@ -502,12 +502,6 @@ function write_dataset(f::JLDFile, seek(io, header_offset) f.end_of_data = header_offset + fullsz - - if ismutabletype(typeof(data)) && !isa(wsession, JLDWriteSession{Union{}}) - wsession.h5offset[objectid(data)] = h5offset(f, header_offset) - push!(wsession.objects, data) - end - #println("write_dataset: datatype=$(datatype), data=$(data), odr=$(odr)") cio = begin_checksum_write(io, fullsz - 4) write_object_header_and_dataspace_message(cio, f, psz, dataspace) diff --git a/src/groups.jl b/src/groups.jl index 7d8752f9..4cf8d620 100644 --- a/src/groups.jl +++ b/src/groups.jl @@ -200,6 +200,16 @@ function Base.keys(g::Group) ks end +function Base.length(g::Group) + l = 0 + if g.last_chunk_start_offset != -1 + l += length(g.written_links) + end + l += length(g.unwritten_links) + l += length(g.unwritten_child_groups) + return l +end + struct LinkInfo version::UInt8 flags::UInt8 @@ -573,7 +583,7 @@ function show_group(io::IO, g::Group, maxnumlines::Int=10, prefix::String=" ", s end ks = collect(keys(g)) - skiptypes && filter!(x -> x != "_types", ks) + skiptypes && filter!(x -> x != "_types" && x != "_juliatypes", ks) if isempty(ks) && prefix == " " print(io, " (no datasets)")