Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve loadtime by removing LazyArray (and be ready for julia 1.9) #189

Merged
merged 9 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "UnROOT"
uuid = "3cd96dde-e98d-4713-81e9-a4a1b0235ce9"
authors = ["Tamas Gal", "Jerry Ling", "Johannes Schumann", "Nick Amin"]
version = "0.8.16"
version = "0.8.17"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand All @@ -12,14 +12,14 @@ CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637"
LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
LibDeflate = "9255714d-24a7-4b30-8ea3-d46a97f7e13b"
LorentzVectors = "3f54b04b-17fc-5cd4-9758-90c048d965e3"
Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73"
Mixers = "2a8e4939-dab8-5edc-8f64-72a8776f13de"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
SentinelArrays = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
xrootdgo_jll = "9d84c17e-11f2-50ef-8cc9-e9701362097f"
Expand All @@ -33,14 +33,13 @@ CodecZstd = "^0.6.0, ^0.7.0"
HTTP = "^0.9.7, 1"
IterTools = "^1"
LRUCache = "^1.3.0"
LazyArrays = "^0.21, ^0.22, ^1"
LibDeflate = "^0.4.1"
LorentzVectors = "^0.4.0"
Memoization = "^0.1.10"
Mixers = "^0.1.0"
Parameters = "^0.12.0"
Polyester = "^0.5.3"
PrettyTables = "2"
SentinelArrays = "^1.3"
StaticArrays = "^0.12.0, ^1"
Tables = "^1.0.0"
julia = "^1.6"
Expand All @@ -50,9 +49,8 @@ xrootdgo_jll = "^0.31.1"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
MD5 = "6ac74813-4b46-53a4-afec-0b5dc9d7885c"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
ThreadsX = "ac1d9e8a-700a-412c-b207-f0111f4b6c0d"

[targets]
test = ["Test", "Pkg", "ThreadsX", "MD5", "InteractiveUtils", "Polyester"]
test = ["Test", "Pkg", "ThreadsX", "MD5", "InteractiveUtils"]
10 changes: 9 additions & 1 deletion src/UnROOT.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module UnROOT

using LazyArrays
import SentinelArrays: ChainedVector
import Mmap: mmap
export ROOTFile, LazyBranch, LazyTree

Expand Down Expand Up @@ -44,4 +44,12 @@ include("iteration.jl")
include("custom.jl")
include("displays.jl")

@static if VERSION >= v"1.9"
Copy link
Member

@giordano giordano Oct 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should help for any version of julia, no? Also, top-level @static does nothing: top-level expressions in a package are evaluated at compile-time anyway.

Copy link
Member Author

@Moelf Moelf Oct 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't help for <1.9

let
t = LazyTree(UnROOT.samplefile("tree_with_jagged_array.root"), "t1")
show(devnull, t)
show(devnull, t[1])
end
end

end # module
44 changes: 37 additions & 7 deletions src/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -250,17 +250,47 @@ Base.length(lt::LazyTree) = length(first(Tables.columns(lt)))
Base.ndims(::Type{<:LazyTree}) = 1
Base.size(lt::LazyTree) = size(first(Tables.columns(lt))) # all column has the same size

function LazyArrays.Vcat(ts::LazyTree...)
"""
chaintrees(ts)

Chain a collection of `LazyTree`s together to form a larger tree, every tree should
have identical branch names and types, we're not trying to re-implement SQL here.

## Example

```julia
julia> typeof(tree)
LazyTree with 1 branches:
a

julia> tree2 = UnROOT.chaintrees([tree,tree]);

julia> eltype(tree.a) == eltype(tree2.a)
true

julia> length(tree)
100

julia> length(tree2)
200

julia> eltype(tree)
UnROOT.LazyEvent{NamedTuple{(:a,), Tuple{LazyBranch{Int32, UnROOT.Nojagg, Vector{Int32}}}}}

julia> eltype(tree2)
UnROOT.LazyEvent{NamedTuple{(:a,), Tuple{SentinelArrays.ChainedVector{Int32, LazyBranch{Int32, UnROOT.Nojagg, Vector{Int32}}}}}}
```
"""
function chaintrees(ts)
branch_names = propertynames(first(ts))
res_branches = map(branch_names) do bname
LazyArrays.Vcat(getproperty.(ts, bname)...)
ChainedVector(getproperty.(ts, bname))
end
LazyTree(NamedTuple{branch_names}(res_branches))
end
Base.vcat(ts::LazyTree...) = Vcat(ts...)
Base.reduce(::typeof(vcat), ts::AbstractVector{<:LazyTree}) = Vcat((ts)...)
Base.mapreduce(f, ::typeof(vcat), ts::Vector{<:LazyTree}) = Vcat(f.(ts)...)
Base.mapreduce(f, ::typeof(Vcat), ts::Vector{<:LazyTree}) = Vcat(f.(ts)...)

Base.vcat(ts::LazyTree...) = chaintrees(collect(ts))
Base.reduce(::typeof(vcat), ts::AbstractVector{<:LazyTree}) = chaintrees(ts)

function getbranchnamesrecursive(obj)
out = Vector{String}()
Expand Down Expand Up @@ -372,7 +402,7 @@ function Base.getindex(ba::LazyBranch{T,J,B}, range::UnitRange) where {T,J,B}
ib2 = findfirst(x -> x > (last(range) - 1), ba.fEntry) - 1
offset = ba.fEntry[ib1]
range = (first(range)-offset):(last(range)-offset)
return Vcat(asyncmap(i->basketarray(ba, i), ib1:ib2)...)[range]
return ChainedVector(asyncmap(i->basketarray(ba, i), ib1:ib2))[range]
end

_clusterranges(t::LazyTree) = _clusterranges([getproperty(t,p) for p in propertynames(t)])
Expand Down
15 changes: 10 additions & 5 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using StaticArrays
using InteractiveUtils
using MD5

using ThreadsX, Polyester
using ThreadsX

const SAMPLES_DIR = joinpath(@__DIR__, "samples")

Expand Down Expand Up @@ -719,10 +719,12 @@ t = LazyTree(ROOTFile(joinpath(SAMPLES_DIR, "NanoAODv5_sample.root")), "Events",

nmus .= 0
t_dummy = LazyTree(ROOTFile(joinpath(SAMPLES_DIR, "NanoAODv5_sample.root")), "Events", ["Muon_pt"])
@batch for evt in vcat(t,t_dummy) # avoid using the same underlying file handler
chained_tree = vcat(t,t_dummy)
Threads.@threads for evt in chained_tree # avoid using the same underlying file handler
nmus[Threads.threadid()] += length(evt.Muon_pt)
end
@test sum(nmus) == 2*878
@test mapreduce(length, +, [t,t_dummy]) == length(t) + length(t_dummy)

for j in 1:3
inds = [Vector{Int}() for _ in 1:nthreads]
Expand Down Expand Up @@ -796,11 +798,14 @@ end
@test sum(UnROOT._clusterbytes([t.b2]; compressed=true)) == 23710.0 # same as uproot4
end

@testset "Vcat/chaining" begin
@testset "vcat/chaining" begin
rootfile = ROOTFile(joinpath(SAMPLES_DIR, "NanoAODv5_sample.root"))
t = LazyTree(rootfile, "Events", ["nMuon", "Muon_pt"])
tt = vcat(t,t)
@test (@allocated vcat(t,t)) < 1000
tt = UnROOT.chaintrees([t,t])
@test all(vcat(t, t).Muon_pt .== tt.Muon_pt)
@static if VERSION >= v"1.7"
@test (@allocated UnROOT.chaintrees([t,t])) < 1000
end
@test length(tt) == 2*length(t)
s1 = sum(t.nMuon)
s2 = sum(tt.nMuon)
Expand Down