Skip to content

Commit

Permalink
hide new juliatypes group
Browse files Browse the repository at this point in the history
make things backwards compatible

fix warning

fix backwards compat

forgot one

verify datatypes on append

add Base.length definitions

typo + cleanup

another fix

juliatype not datatype
  • Loading branch information
JonasIsensee committed Aug 17, 2021
1 parent 456f801 commit 62ef4e9
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 52 deletions.
35 changes: 24 additions & 11 deletions src/JLD2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,15 @@ mutable struct JLDFile{T<:IO}
datatype_locations::OrderedDict{RelOffset,CommittedDatatype}
datatypes::Vector{H5Datatype}
datatype_wsession::JLDWriteSession{Dict{UInt,RelOffset}}
jlh5type::IdDict
h5jltype::IdDict
jloffset::Dict{RelOffset,WeakRef}
jlh5type::IdDict # Julia Type → H5Type
h5jltype::IdDict # H5Type → Julia Type
jloffset::Dict{RelOffset,WeakRef} # Cache for (mutable) loaded objects
end_of_data::Int64
global_heaps::Dict{RelOffset,GlobalHeap}
global_heap::GlobalHeap
loaded_groups::Dict{RelOffset,Group}
root_group_offset::RelOffset

juliatypes::Vector{Any}
juliatype_locations::OrderedDict{RelOffset,Any}
juliatype_locations_rev::OrderedDict{Any,RelOffset}

Expand All @@ -198,7 +197,7 @@ mutable struct JLDFile{T<:IO}
JLDWriteSession(), IdDict(), IdDict(), Dict{RelOffset,WeakRef}(),
Int64(FILE_HEADER_LENGTH + jlsizeof(Superblock)), Dict{RelOffset,GlobalHeap}(),
GlobalHeap(0, 0, 0, Int64[]), Dict{RelOffset,Group}(), UNDEFINED_ADDRESS,
[], OrderedDict{RelOffset,Any}(),OrderedDict{Any,RelOffset}())
OrderedDict{RelOffset,Any}(), OrderedDict{Any,RelOffset}())
finalizer(jld_finalizer, f)
f
end
Expand Down Expand Up @@ -329,11 +328,6 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool,
if haskey(f.root_group.written_links, "_juliatypes")
types_group_offset = f.root_group.written_links["_juliatypes"]
f.juliatypes_group = f.loaded_groups[types_group_offset] = load_group(f, types_group_offset)
i = 0
for offset in values(f.types_group.written_links)
f.juliatype_locations[offset] = CommittedDatatype(offset, i += 1)
end
resize!(f.juliatypes, length(f.juliatype_locations))
else
f.juliatypes_group = Group{typeof(f)}(f)
end
Expand Down Expand Up @@ -379,6 +373,17 @@ function load_datatypes(f::JLDFile)
end
end

function load_all_juliatypes(f::JLDFile)
length(f.juliatypes_group) == length(f.juliatype_locations_rev) && return
for offset in values(f.juliatypes_group.written_links)
dt = load_dataset(f, offset)
f.juliatype_locations[offset] = dt
f.juliatype_locations_rev[dt] = offset
track_weakref!(f, offset, dt)
end
return nothing
end

"""
prewrite(f::JLDFile)
Expand All @@ -400,7 +405,15 @@ Base.getindex(f::JLDFile, name::AbstractString) = f.root_group[name]
Base.setindex!(f::JLDFile, obj, name::AbstractString) = (f.root_group[name] = obj; f)
Base.haskey(f::JLDFile, name::AbstractString) = haskey(f.root_group, name)
Base.isempty(f::JLDFile) = isempty(f.root_group)
Base.keys(f::JLDFile) = filter!(x->x != "_types", keys(f.root_group))
Base.keys(f::JLDFile) = filter!(x->x != "_types" && x != "_juliatypes", keys(f.root_group))

function Base.length(g::JLDFile)
l = length(f.root_group)
haskey(f.root_group, "_types") && (l -= 1)
haskey(f.root_group, "_juliatypes") && (l -= 1)
return l
end

Base.get(default::Function, f::Union{JLDFile, Group}, name::AbstractString) =
haskey(f, name) ? f[name] : default()
Base.get(f::Union{JLDFile, Group}, name::AbstractString, default) =
Expand Down
8 changes: 4 additions & 4 deletions src/backwards_compatibility.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# The following block allows reading Union types written prior to v0.2
const LEGACY_H5TYPE_UNION = VariableLengthDatatype(H5TYPE_DATATYPE)
const LEGACY_H5TYPE_UNION = VariableLengthDatatype(H5TYPE_OLD_DATATYPE)

function jlconvert(::ReadRepresentation{Union, Vlen{DataTypeODR()}}, f::JLDFile,
function jlconvert(::ReadRepresentation{Union, Vlen{OldDataTypeODR()}}, f::JLDFile,
ptr::Ptr, header_offset::RelOffset)
v = Union{jlconvert(ReadRepresentation{DataType,Vlen{DataTypeODR()}}(), f, ptr, NULL_REFERENCE)...}
v = Union{jlconvert(ReadRepresentation{DataType,Vlen{OldDataTypeODR()}}(), f, ptr, NULL_REFERENCE)...}
track_weakref!(f, header_offset, v)
v
end

constructrr(::JLDFile, ::Type{T}, dt::VariableLengthDatatype, ::Vector{ReadAttribute}) where {T<:Union} =
dt == LEGACY_H5TYPE_UNION ? (ReadRepresentation{Union,Vlen{DataTypeODR()}}(), true) :
dt == LEGACY_H5TYPE_UNION ? (ReadRepresentation{Union,Vlen{OldDataTypeODR()}}(), true) :
throw(UnsupportedFeatureException())

# The following definition is needed to correctly load Strings written
Expand Down
65 changes: 53 additions & 12 deletions src/data/reconstructing_datatypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,13 @@ function jltype(f::JLDFile, cdt::CommittedDatatype)
if h5offset(f, julia_type_attr.datatype_offset) == cdt.header_offset
# Verify that the datatype matches our expectations
if dt != H5TYPE_DATATYPE
error("""The HDF5 datatype representing a Julia datatype does not match
@warn("""The HDF5 datatype representing a Julia datatype does not match
the expectations of this version of JLD.
You may need to update JLD to read this file.""")
f.jlh5type[DataType] = cdt
f.datatypes[cdt.index] = dt
return (f.h5jltype[cdt] = ReadRepresentation{DataType, OldDataTypeODR()}())
end
f.jlh5type[DataType] = cdt
f.datatypes[cdt.index] = dt
Expand Down Expand Up @@ -234,12 +237,11 @@ end

# Find types in modules
# returns the result of searching for the type in the specified module m
function _resolve_type_singlemodule(::ReadRepresentation{T,DataTypeODR()},
m,
function _resolve_type_singlemodule(m,
parts,
mypath,
hasparams::Bool,
params) where T
params)
for part in parts
sym = Symbol(part)
if !isa(m, Module) || !isdefined(m, sym)
Expand All @@ -256,19 +258,18 @@ end
_is_not_unknown_type(x::UnknownType) = false
_is_not_unknown_type(x) = true

function _resolve_type(rr::ReadRepresentation{T,DataTypeODR()},
function _resolve_type(rr,
f::JLDFile,
ptr::Ptr,
header_offset::RelOffset,
mypath,
hasparams::Bool,
params) where T
params)
parts = split(mypath, '.')
modules = vcat([Main], collect(keys(Base.module_keys)), stdlibmodules(Main))
unique!(modules)
for mod in modules
resolution_attempt = _resolve_type_singlemodule(rr,
mod,
resolution_attempt = _resolve_type_singlemodule(mod,
parts,
mypath,
hasparams,
Expand Down Expand Up @@ -351,9 +352,49 @@ function jlconvert(rr::ReadRepresentation{T,DataTypeODR()},
return m
end

constructrr(::JLDFile, ::Type{T}, dt::CompoundDatatype, ::Vector{ReadAttribute}) where {T<:DataType} =
dt == H5TYPE_DATATYPE ? (ReadRepresentation{DataType,DataTypeODR()}(), true) :
throw(UnsupportedFeatureException())
function jlconvert(rr::ReadRepresentation{T,OldDataTypeODR()},
f::JLDFile,
ptr::Ptr,
header_offset::RelOffset) where T

params, unknown_params = types_from_refs(f, ptr+odr_sizeof(Vlen{UInt8}))
# For cross-platform compatibility convert integer type parameters to system precision
params = [p isa Union{Int64,Int32} ? Int(p) : p for p in params]
hasparams = !isempty(params)
mypath = String(jlconvert(ReadRepresentation{UInt8,Vlen{UInt8}}(), f, ptr, NULL_REFERENCE))

m = _resolve_type(rr, f, ptr, header_offset, mypath, hasparams, hasparams ? params : nothing)
#m isa UnknownType && return m

if m isa UnknownType
return UnknownType{String}(m.name, params, [], [])
end

if hasparams
try
m = m{params...}
catch
return UnknownType{DataType}(m, params, [],[])
#return UnknownType(m, params)
end
elseif m === Tuple
# Need to instantiate with no parameters, since Tuple is really
# Tuple{Vararg{Any}}
m = Tuple{}
end
track_weakref!(f, header_offset, m)
return m
end

function constructrr(::JLDFile, ::Type{T}, dt::CompoundDatatype, ::Vector{ReadAttribute}) where {T<:DataType}
if dt == H5TYPE_DATATYPE
return ReadRepresentation{DataType,DataTypeODR()}(), true
elseif dt == H5TYPE_OLD_DATATYPE
return ReadRepresentation{DataType,OldDataTypeODR()}(), true
else
throw(UnsupportedFeatureException())
end
end


## Type reconstruction
Expand Down Expand Up @@ -384,7 +425,7 @@ end
function typestring(T::UnknownType)
tn = IOBuffer()
print(tn, T.name)
if isdefined(T, :parameters)
if isdefined(T, :parameters) && !isempty(T.parameters)
write(tn, '{')
for i = 1:length(T.parameters)
x = T.parameters[i]
Expand Down
26 changes: 17 additions & 9 deletions src/data/writing_datatypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ const DataTypeODR = OnDiskRepresentation{
Vlen{RelOffset},
Vlen{RelOffset}}}

const OldDataTypeODR = OnDiskRepresentation{(0, odr_sizeof(Vlen{String})),Tuple{String,Vector{Any}},Tuple{Vlen{String},Vlen{RelOffset}}}

const H5TYPE_DATATYPE = CompoundDatatype(
odr_sizeof(Vlen{String})+odr_sizeof(Vlen{RelOffset})+odr_sizeof(Vlen{RelOffset})+odr_sizeof(Vlen{RelOffset}),
[:name, :parameters, :fieldnames, :fieldtypes],
Expand All @@ -35,6 +37,14 @@ const H5TYPE_DATATYPE = CompoundDatatype(
VariableLengthDatatype(ReferenceDatatype())]
)

const H5TYPE_OLD_DATATYPE = CompoundDatatype(
odr_sizeof(Vlen{String})+odr_sizeof(Vlen{RelOffset}),
[:name, :parameters],
[0, odr_sizeof(Vlen{String})],
[H5TYPE_VLEN_UTF8, VariableLengthDatatype(ReferenceDatatype())]
)


function h5convert!(out::Pointers, ::DataTypeODR, f::JLDFile, T::DataType, wsession::JLDWriteSession)
t = typename(T)
store_vlen!(out, UInt8, f, unsafe_wrap(Vector{UInt8}, t), f.datatype_wsession)
Expand Down Expand Up @@ -74,23 +84,21 @@ function h5convert!(out::Pointers, ::DataTypeODR, f::JLDFile, T::Type{DataType},
end
out += odr_sizeof(Vlen{RelOffset})
fieldnames = T.name.names
#if isempty(fieldnames)
if isempty(fieldnames)
h5convert_uninitialized!(out, Vlen{String})
out += odr_sizeof(Vlen{String})
h5convert_uninitialized!(out, Vlen{RelOffset})
#else
# store_vlen!(out, String, f, string.(fieldnames), wsession)
# out += odr_sizeof(Vlen{String})
# refs = refs_from_types(f, T.types, wsession)
# store_vlen!(out, RelOffset, f, refs, f.datatype_wsession)
#end
else
store_vlen!(out, String, f, string.(fieldnames), wsession)
out += odr_sizeof(Vlen{String})
refs = refs_from_types(f, T.types, wsession)
store_vlen!(out, RelOffset, f, refs, f.datatype_wsession)
end

nothing
end


#const DataTypeODR = RelOffset

#= function h5convert!(out::Pointers, ::DataTypeODR, f::JLDFile, T::DataType, wsession::JLDWriteSession)
#= t = typename(T)
store_vlen!(out, UInt8, f, unsafe_wrap(Vector{UInt8}, t), f.datatype_wsession)
Expand Down
24 changes: 9 additions & 15 deletions src/datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -466,19 +466,19 @@ function write_dataset(f::JLDFile,
datatype::H5Datatype,
odr::S,
data::DataType,
wsession::JLDWriteSession) where S
wsession::JLDWriteSession) where S
# Ensure that all juliatypes are loaded
load_all_juliatypes(f)
ref = get(f.juliatype_locations_rev, data, RelOffset(0))
if ref != RelOffset(0)
return ref
end

ref != RelOffset(0) && return ref

header_offset = f.end_of_data
ref = h5offset(f, header_offset)
id = length(f.juliatypes)+1
#println("Stored datatype to ref: ", ref)
#error("test")

id = length(f.juliatypes_group)+1
f.juliatypes_group[@sprintf("%08d", id)] = ref
push!(f.juliatypes, data)

track_weakref!(f, ref, data)
f.juliatype_locations[ref] = data
f.juliatype_locations_rev[data] = ref

Expand All @@ -502,12 +502,6 @@ function write_dataset(f::JLDFile,

seek(io, header_offset)
f.end_of_data = header_offset + fullsz

if ismutabletype(typeof(data)) && !isa(wsession, JLDWriteSession{Union{}})
wsession.h5offset[objectid(data)] = h5offset(f, header_offset)
push!(wsession.objects, data)
end
#println("write_dataset: datatype=$(datatype), data=$(data), odr=$(odr)")

cio = begin_checksum_write(io, fullsz - 4)
write_object_header_and_dataspace_message(cio, f, psz, dataspace)
Expand Down
12 changes: 11 additions & 1 deletion src/groups.jl
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,16 @@ function Base.keys(g::Group)
ks
end

function Base.length(g::Group)
l = 0
if g.last_chunk_start_offset != -1
l += length(g.written_links)
end
l += length(g.unwritten_links)
l += length(g.unwritten_child_groups)
return l
end

struct LinkInfo
version::UInt8
flags::UInt8
Expand Down Expand Up @@ -573,7 +583,7 @@ function show_group(io::IO, g::Group, maxnumlines::Int=10, prefix::String=" ", s
end

ks = collect(keys(g))
skiptypes && filter!(x -> x != "_types", ks)
skiptypes && filter!(x -> x != "_types" && x != "_juliatypes", ks)

if isempty(ks) && prefix == " "
print(io, " (no datasets)")
Expand Down

0 comments on commit 62ef4e9

Please sign in to comment.