From 9d47b40b10a65f52bca394812da086e7dcf1f4b7 Mon Sep 17 00:00:00 2001 From: Tamas Gal Date: Wed, 19 Apr 2023 16:11:36 +0200 Subject: [PATCH] Recovering TTree baskets (#238) * Type safety * Implement basket recovery mechanics * Turn warning into debug * Bump version number --- Project.toml | 2 +- src/bootstrap.jl | 9 ++++++--- src/iteration.jl | 22 ++++++++++++++++++---- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/Project.toml b/Project.toml index 6c12ecd8..58ea499e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "UnROOT" uuid = "3cd96dde-e98d-4713-81e9-a4a1b0235ce9" authors = ["Tamas Gal", "Jerry Ling", "Johannes Schumann", "Nick Amin"] -version = "0.10.5" +version = "0.10.6" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/src/bootstrap.jl b/src/bootstrap.jl index eb8f46fa..65bf7f7c 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -2,7 +2,8 @@ # dynamically in future. struct RecoveredTBasket - data + data::Vector{UInt8} + offsets::Vector{UInt32} end function unpack(io, tkey::TKey, refs::Dict{Int32, Any}, T::Type{RecoveredTBasket}) @initparse @@ -31,6 +32,8 @@ function unpack(io, tkey::TKey, refs::Dict{Int32, Any}, T::Type{RecoveredTBasket if fNevBufSize > 8 byteoffsets = read(io, fNevBuf * 4 + 8) skip(io, -4) + else + byteoffsets = Int32[] end # there's a second TKey here, but it doesn't contain any new information (in fact, less) @@ -47,8 +50,8 @@ function unpack(io, tkey::TKey, refs::Dict{Int32, Any}, T::Type{RecoveredTBasket end fObjlen = size fNbytes = fObjlen + fKeylen - @warn "Found $(length(contents)) bytes of basket data (not yet supported) in a TTree." - RecoveredTBasket(contents) + @debug "Found $(length(contents)) bytes of basket data (not yet supported) in a TTree." + RecoveredTBasket(contents, byteoffsets) end abstract type TNamed <: ROOTStreamedObject end diff --git a/src/iteration.jl b/src/iteration.jl index e7ceb9ef..b27388c3 100644 --- a/src/iteration.jl +++ b/src/iteration.jl @@ -53,7 +53,13 @@ function basketarray(f::ROOTFile, branch, ithbasket) "Branches with multiple leaves are not supported yet. Try reading with `array(...; raw=true)`.", ) - rawdata, rawoffsets = readbasket(f, branch, ithbasket) + if ithbasket != -1 + rawdata, rawoffsets = readbasket(f, branch, ithbasket) + else + # recovering a basket + recovered_basket = branch.fBaskets.elements[end] + rawdata, rawoffsets = recovered_basket.data, recovered_basket.offsets + end T, J = auto_T_JaggT(f, branch; customstructs=f.customstructs) return interped_data(rawdata, rawoffsets, T, J) end @@ -170,6 +176,7 @@ and update buffer and buffer range accordingly. function Base.getindex(ba::LazyBranch{T,J,B}, idx::Integer) where {T,J,B} tid = Threads.threadid() br = @inbounds ba.buffer_range[tid] + # index within the basket localidx = if idx ∉ br _localindex_newbasket!(ba, idx, tid) else @@ -179,9 +186,16 @@ function Base.getindex(ba::LazyBranch{T,J,B}, idx::Integer) where {T,J,B} end function _localindex_newbasket!(ba::LazyBranch{T,J,B}, idx::Integer, tid::Int) where {T,J,B} - seek_idx = findfirst(x -> x > (idx - 1), ba.fEntry) - 1 #support 1.0 syntax - ba.buffer[tid] = basketarray(ba.f, ba.b, seek_idx) - br = (ba.fEntry[seek_idx] + 1):(ba.fEntry[seek_idx + 1]) + seek_idx = findfirst(x -> x > (idx - 1), ba.fEntry) #support 1.0 syntax + if isnothing(seek_idx) # no basket found, checking in recovered basket + ba.buffer[tid] = basketarray(ba.f, ba.b, -1) # -1 indicating recovered basket mechanics + # FIXME: this range is probably wrong for jagged data with non-empty offsets + br = ba.b.fBasketEntry[end] + 1:ba.b.fEntries + else + seek_idx -= 1 + ba.buffer[tid] = basketarray(ba.f, ba.b, seek_idx) + br = (ba.fEntry[seek_idx] + 1):(ba.fEntry[seek_idx + 1]) + end ba.buffer_range[tid] = br return idx - br.start + 1 end