Skip to content

Commit

Permalink
iterate on baskets + an optimization (#111)
Browse files Browse the repository at this point in the history
* iterate on baskets

* 10-50% speedup for simple benchmarks

* simplify
  • Loading branch information
aminnj authored Sep 17, 2021
1 parent ac66158 commit db89380
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 11 deletions.
1 change: 1 addition & 0 deletions src/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ function Base.hash(b::Union{TBranch, TBranchElement}, h::UInt)
end
Base.length(b::Union{TBranch, TBranchElement}) = b.fEntries
Base.eachindex(b::Union{TBranch, TBranchElement}) = Base.OneTo(b.fEntries)
numbaskets(b::Union{TBranch, TBranchElement}) = findfirst(x->x>(b.fEntries-1),b.fBasketEntry)-1
function Base.eltype(b::Union{TBranch, TBranchElement})
T, jagT = interp_jaggT(b)
jagT === Nojagg ? T : Vector{T}
Expand Down
41 changes: 30 additions & 11 deletions src/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ end
"""
basketarray(f::ROOTFile, path::AbstractString, ith)
basketarray(f::ROOTFile, branch::Union{TBranch, TBranchElement}, ith)
basketarray(lb::LazyBranch, ith)
Reads actual data from ith basket of a branch. This function first calls [`readbasket`](@ref)
to obtain raw bytes and offsets of a basket, then calls [`auto_T_JaggT`](@ref) followed
Expand All @@ -57,6 +58,16 @@ function basketarray(f::ROOTFile, branch, ithbasket)
return interped_data(rawdata, rawoffsets, T, J)
end

"""
basketarray_iter(f::ROOTFile, branch::Union{TBranch, TBranchElement})
basketarray_iter(lb::LazyBranch)
Returns a `Base.Generator` yielding the output of `basketarray()` for all baskets.
"""
function basketarray_iter(f::ROOTFile, branch)
return (basketarray(f, branch, i) for i in 1:numbaskets(branch))
end

# function barrior to make getting individual index faster
# TODO upstream some types into parametric types for Branch/BranchElement
"""
Expand Down Expand Up @@ -121,6 +132,9 @@ Base.firstindex(ba::LazyBranch) = 1
Base.lastindex(ba::LazyBranch) = ba.L
Base.eltype(ba::LazyBranch{T,J,B}) where {T,J,B} = T

basketarray(lb::LazyBranch, ithbasket) = basketarray(lb.f, lb.b, ithbasket)
basketarray_iter(lb::LazyBranch) = basketarray_iter(lb.f, lb.b)

function Base.show(io::IO, lb::LazyBranch)
summary(io, lb)
println(io, ":")
Expand All @@ -145,23 +159,28 @@ and update buffer and buffer range accordingly.
moment, access a `LazyBranch` from different threads at the same time can cause
performance issue and incorrect event result.
"""

function Base.getindex(ba::LazyBranch{T,J,B}, idx::Integer) where {T,J,B}
tid = Threads.threadid()
br = ba.buffer_range[tid]
if idx br
seek_idx = findfirst(x -> x > (idx - 1), ba.fEntry) - 1 #support 1.0 syntax
bb = basketarray(ba.f, ba.b, seek_idx)
if typeof(bb) !== B
error("Expected type of interpreted data: $(B), got: $(typeof(bb))")
end
ba.buffer[tid] = bb
br = (ba.fEntry[seek_idx] + 1):(ba.fEntry[seek_idx + 1])
ba.buffer_range[tid] = br
br = @inbounds ba.buffer_range[tid]
localidx = if idx br
_localindex_newbasket!(ba, idx, tid)
else
idx - br.start + 1
end
localidx = idx - br.start + 1
return @inbounds ba.buffer[tid][localidx]
end

function _localindex_newbasket!(ba::LazyBranch{T,J,B}, idx::Integer, tid::Int) where {T,J,B}
seek_idx = findfirst(x -> x > (idx - 1), ba.fEntry) - 1 #support 1.0 syntax
ba.buffer[tid] = basketarray(ba.f, ba.b, seek_idx)
br = (ba.fEntry[seek_idx] + 1):(ba.fEntry[seek_idx + 1])
ba.buffer_range[tid] = br
return idx - br.start + 1
end

Base.IndexStyle(::Type{<:LazyBranch}) = IndexCartesian()

function Base.iterate(ba::LazyBranch{T,J,B}, idx=1) where {T,J,B}
idx > ba.L && return nothing
return (ba[idx], idx + 1)
Expand Down
8 changes: 8 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -682,3 +682,11 @@ end
onesrow = LazyTree(f,"t")[2] |> values .|> first .|> Int
@test all(onesrow .== 1)
end

@testset "basketarray_iter()" begin
f = UnROOT.samplefile("tree_with_vector_multiple_baskets.root")
t = LazyTree(f,"t1")
@test (UnROOT.basketarray_iter(f, f["t1"]["b1"]) .|> length) == [1228, 1228, 44]
@test (UnROOT.basketarray_iter(t.b1) .|> length) == [1228, 1228, 44]
@test length(UnROOT.basketarray(t.b1, 1)) == 1228
end

0 comments on commit db89380

Please sign in to comment.