Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Type system overhaul #338

Closed
wants to merge 12 commits into from
52 changes: 47 additions & 5 deletions src/JLD2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -172,17 +172,29 @@ mutable struct JLDFile{T<:IO}
n_times_opened::Int
datatype_locations::OrderedDict{RelOffset,CommittedDatatype}
datatypes::Vector{H5Datatype}
# Protocol of written datatypes to avoid duplication
# points to location of committed datatype
datatype_wsession::JLDWriteSession{Dict{UInt,RelOffset}}
jlh5type::IdDict
h5jltype::IdDict
jloffset::Dict{RelOffset,WeakRef}
jlh5type::IdDict # Julia Type → H5Type
h5jltype::IdDict # H5Type → Julia Type
jloffset::Dict{RelOffset,WeakRef} # Cache for (mutable) loaded objects
end_of_data::Int64
global_heaps::Dict{RelOffset,GlobalHeap}
global_heap::GlobalHeap
loaded_groups::Dict{RelOffset,Group}
root_group_offset::RelOffset

# Mapping between julia datatypes that (also) exist in the file
# and the location in the file. Similir to datatype_wsession
# but for julia types, not HDF5 types
juliatype_locations::IdDict{Any,RelOffset}#OrderedDict{Any, RelOffset}

root_group::Group

# Central place in the file where hdf5/julia types are referenced
# to recreate mapping in loaded files.
types_group::Group
juliatypes_group::Group

function JLDFile{T}(io::IO, path::AbstractString, writable::Bool, written::Bool,
compress,#::Union{Bool,Symbol},
Expand All @@ -191,7 +203,8 @@ mutable struct JLDFile{T<:IO}
OrderedDict{RelOffset,CommittedDatatype}(), H5Datatype[],
JLDWriteSession(), IdDict(), IdDict(), Dict{RelOffset,WeakRef}(),
Int64(FILE_HEADER_LENGTH + jlsizeof(Superblock)), Dict{RelOffset,GlobalHeap}(),
GlobalHeap(0, 0, 0, Int64[]), Dict{RelOffset,Group}(), UNDEFINED_ADDRESS)
GlobalHeap(0, 0, 0, Int64[]), Dict{RelOffset,Group}(), UNDEFINED_ADDRESS,
IdDict{Any, RelOffset}())
finalizer(jld_finalizer, f)
f
end
Expand Down Expand Up @@ -297,6 +310,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool,
if f.written
f.root_group = Group{typeof(f)}(f)
f.types_group = Group{typeof(f)}(f)
f.juliatypes_group = Group{typeof(f)}(f)
else
verify_file_header(f)

Expand All @@ -317,6 +331,13 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool,
else
f.types_group = Group{typeof(f)}(f)
end

if haskey(f.root_group.written_links, "_juliatypes")
types_group_offset = f.root_group.written_links["_juliatypes"]
f.juliatypes_group = f.loaded_groups[types_group_offset] = load_group(f, types_group_offset)
else
f.juliatypes_group = Group{typeof(f)}(f)
end
end

f
Expand Down Expand Up @@ -347,6 +368,7 @@ end

Populate f.datatypes and f.jlh5types with all of the committed datatypes from a file. We
need to do this before writing to make sure we reuse written datatypes.
Additionally match all written julia types in case more need to be written.
"""
function load_datatypes(f::JLDFile)
dts = f.datatypes
Expand All @@ -357,8 +379,17 @@ function load_datatypes(f::JLDFile)
!isassigned(dts, i) && jltype(f, cdt)
i += 1
end
if f.juliatypes_group.last_chunk_start_offset != -1
for offset in values(f.juliatypes_group.written_links)
dt = load_dataset(f, offset)
f.juliatype_locations[dt] = offset
track_weakref!(f, offset, dt)
end
end
return nothing
end


"""
prewrite(f::JLDFile)

Expand All @@ -380,7 +411,15 @@ Base.getindex(f::JLDFile, name::AbstractString) = f.root_group[name]
Base.setindex!(f::JLDFile, obj, name::AbstractString) = (f.root_group[name] = obj; f)
Base.haskey(f::JLDFile, name::AbstractString) = haskey(f.root_group, name)
Base.isempty(f::JLDFile) = isempty(f.root_group)
Base.keys(f::JLDFile) = filter!(x->x != "_types", keys(f.root_group))
Base.keys(f::JLDFile) = filter!(x->x != "_types" && x != "_juliatypes", keys(f.root_group))

function Base.length(f::JLDFile)
l = length(f.root_group)
haskey(f.root_group, "_types") && (l -= 1)
haskey(f.root_group, "_juliatypes") && (l -= 1)
return l
end

Base.get(default::Function, f::Union{JLDFile, Group}, name::AbstractString) =
haskey(f, name) ? f[name] : default()
Base.get(f::Union{JLDFile, Group}, name::AbstractString, default) =
Expand Down Expand Up @@ -412,6 +451,8 @@ function Base.close(f::JLDFile)
end
if !isempty(f.types_group) && !haskey(f.root_group, "_types")
f.root_group["_types"] = f.types_group
f.root_group["_juliatypes"] = f.juliatypes_group

end
res = save_group(f.root_group)
if f.root_group_offset == UNDEFINED_ADDRESS
Expand Down Expand Up @@ -511,6 +552,7 @@ include("data/number_types.jl")
include("data/custom_serialization.jl")
include("data/writing_datatypes.jl")
include("data/reconstructing_datatypes.jl")
include("data/unknown_types.jl")

include("dataio.jl")
include("loadsave.jl")
Expand Down
8 changes: 4 additions & 4 deletions src/backwards_compatibility.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# The following block allows reading Union types written prior to v0.2
const LEGACY_H5TYPE_UNION = VariableLengthDatatype(H5TYPE_DATATYPE)
const LEGACY_H5TYPE_UNION = VariableLengthDatatype(H5TYPE_OLD_DATATYPE)

function jlconvert(::ReadRepresentation{Union, Vlen{DataTypeODR()}}, f::JLDFile,
function jlconvert(::ReadRepresentation{Union, Vlen{OldDataTypeODR()}}, f::JLDFile,
ptr::Ptr, header_offset::RelOffset)
v = Union{jlconvert(ReadRepresentation{DataType,Vlen{DataTypeODR()}}(), f, ptr, NULL_REFERENCE)...}
v = Union{jlconvert(ReadRepresentation{DataType,Vlen{OldDataTypeODR()}}(), f, ptr, NULL_REFERENCE)...}
track_weakref!(f, header_offset, v)
v
end

constructrr(::JLDFile, ::Type{T}, dt::VariableLengthDatatype, ::Vector{ReadAttribute}) where {T<:Union} =
dt == LEGACY_H5TYPE_UNION ? (ReadRepresentation{Union,Vlen{DataTypeODR()}}(), true) :
dt == LEGACY_H5TYPE_UNION ? (ReadRepresentation{Union,Vlen{OldDataTypeODR()}}(), true) :
throw(UnsupportedFeatureException())

# The following definition is needed to correctly load Strings written
Expand Down
Loading