Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simplify array creation when not Offsetjagg #90

Merged
merged 4 commits into from
Sep 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/custom.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ function interped_data(rawdata, rawoffsets, ::Type{Vector{LVF64}}, ::Type{Offset
_size = 64 # needs to account for 32 bytes header
dp = 0 # book keeping for copy_to!
lr = length(rawoffsets)
offset = Vector{Int64}(undef, lr)
offset = Vector{Int32}(undef, lr)
offset[1] = 0
@views @inbounds for i in 1:lr-1
start = rawoffsets[i]+10+1
Expand Down
2 changes: 1 addition & 1 deletion src/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ mutable struct LazyBranch{T,J,B} <: AbstractVector{T}

function LazyBranch(f::ROOTFile, b::Union{TBranch,TBranchElement})
T, J = auto_T_JaggT(f, b; customstructs=f.customstructs)
_buffer = J === Nojagg ? T[] : VectorOfVectors{eltype(T)}()
_buffer = J === Nojagg ? T[] : VectorOfVectors(T(), Int32[1])
return new{T,J,typeof(_buffer)}(f, b, length(b),
b.fBasketEntry,
[_buffer for _ in 1:Threads.nthreads()],
Expand Down
49 changes: 24 additions & 25 deletions src/root.jl
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ function interped_data(rawdata, rawoffsets, ::Type{T}, ::Type{J}) where {T, J<:J
elseif J == Offsetjaggjagg # the branch is doubly jagged
jagg_offset = 10
subT = eltype(eltype(T))
out = VectorOfVectors{Vector{subT}}()
out = VectorOfVectors(T(), Int32[1])
@views for i in 1:(length(rawoffsets)-1)
flat = rawdata[(rawoffsets[i]+1+jagg_offset:rawoffsets[i+1])]
row = VectorOfVectors{subT}()
Expand All @@ -221,32 +221,34 @@ function interped_data(rawdata, rawoffsets, ::Type{T}, ::Type{J}) where {T, J<:J
end
return out
else # the branch is singly jagged
jagg_offset = J===Offsetjagg ? 10 : 0

# for each "event", the index range is `offsets[i] + jagg_offset + 1` to `offsets[i+1]`
# this is why we need to append `rawoffsets` in the `readbranchraw()` call
# when you use this range to index `rawdata`, you will get raw bytes belong to each event
# Say your real data is Int32 and you see 8 bytes after indexing, then this event has [num1, num2] as real data
_size = sizeof(eltype(T))

dp = 0 # book keeping for copy_to!
lr = length(rawoffsets)
offset = Vector{Int64}(undef, lr)
offset[1] = 0
@views @inbounds for i in 1:lr-1
start = rawoffsets[i]+jagg_offset+1
stop = rawoffsets[i+1]
l = stop-start+1
if l > 0
unsafe_copyto!(rawdata, dp+1, rawdata, start, l)
dp += l
offset[i+1] = offset[i] + l
else
# when we have an empty [] in jagged basket
offset[i+1] = offset[i]
if J === Offsetjagg
jagg_offset = 10
dp = 0 # book keeping for copy_to!
lr = length(rawoffsets)
offset = Vector{Int32}(undef, lr)
offset[1] = 0
@views @inbounds for i in 1:lr-1
start = rawoffsets[i]+jagg_offset+1
stop = rawoffsets[i+1]
l = stop-start+1
if l > 0
unsafe_copyto!(rawdata, dp+1, rawdata, start, l)
dp += l
offset[i+1] = offset[i] + l
else
# when we have an empty [] in jagged basket
offset[i+1] = offset[i]
end
end
resize!(rawdata, dp)
else
offset = rawoffsets
end
resize!(rawdata, dp)
real_data = ntoh.(reinterpret(T, rawdata))
offset .÷= _size
offset .+= 1
Expand Down Expand Up @@ -414,10 +416,7 @@ See also: [`auto_T_JaggT`](@ref), [`basketarray`](@ref)
"""
readbasket(f::ROOTFile, branch, ith) = readbasketseek(f, branch, branch.fBasketSeek[ith])

# @memoize LRU(; maxsize=1024^3, by=x -> sum(sizeof, x)) function readbasketseek(
function readbasketseek(
f::ROOTFile, branch::Union{TBranch, TBranchElement}, seek_pos::Int
)::Tuple{Vector{UInt8},Vector{Int32}} # just being extra careful
function readbasketseek(f::ROOTFile, branch::Union{TBranch, TBranchElement}, seek_pos::Int)
lock(f)
local basketkey, compressedbytes
try
Expand All @@ -443,7 +442,7 @@ f::ROOTFile, branch::Union{TBranch, TBranchElement}, seek_pos::Int

offsetbytesize = basketkey.fObjlen - contentsize - 8

data = @view basketrawbytes[1:contentsize]
data = basketrawbytes[1:contentsize]
if offsetbytesize > 0

#indexing is inclusive on both ends
Expand Down
Binary file modified test/samples/TLorentzVector.root
Binary file not shown.