diff --git a/Project.toml b/Project.toml index cd813eba..94a94ccd 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ CodecLz4 = "5ba52731-8f18-5e0d-9241-30f10d1ec561" CodecXz = "ba30903b-d9e8-5048-a5ec-d1f5b0d4b47b" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" +IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637" LorentzVectors = "3f54b04b-17fc-5cd4-9758-90c048d965e3" Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73" diff --git a/src/UnROOT.jl b/src/UnROOT.jl index 2e58975c..9c08e5cb 100644 --- a/src/UnROOT.jl +++ b/src/UnROOT.jl @@ -10,6 +10,7 @@ import AbstractTrees: children, printnode, print_tree using CodecZlib, CodecLz4, CodecXz, CodecZstd, StaticArrays, LorentzVectors, ArraysOfArrays using Mixers, Parameters, Memoization, LRUCache +import IterTools: groupby import Tables, TypedTables, PrettyTables diff --git a/src/iteration.jl b/src/iteration.jl index 0ff09b13..67766c78 100644 --- a/src/iteration.jl +++ b/src/iteration.jl @@ -307,3 +307,29 @@ end function Base.getindex(ba::LazyBranch{T,J,B}, rang::UnitRange) where {T,J,B} return [ba[i] for i in rang] end + +_clusterranges(t::LazyTree) = _clusterranges([getproperty(t,p) for p in propertynames(t)]) +function _clusterranges(lbs::AbstractVector{<:LazyBranch}) + basketentries = [lb.b.fBasketEntry[1:numbaskets(lb.b)+1] for lb in lbs] + common = mapreduce(Set, ∩, basketentries) |> collect |> sort + return [common[i]+1:common[i+1] for i in 1:length(common)-1] +end +_clusterbytes(t::LazyTree; kw...) = _clusterbytes([getproperty(t,p) for p in propertynames(t)]; kw...) +function _clusterbytes(lbs::AbstractVector{<:LazyBranch}; compressed=false) + basketentries = [lb.b.fBasketEntry[1:numbaskets(lb.b)+1] for lb in lbs] + common = mapreduce(Set, ∩, basketentries) |> collect |> sort + bytes = zeros(Float64, length(common)-1) + for lb in lbs + b = lb.b + finflate = compressed ? 1.0 : b.fTotBytes/b.fZipBytes + entries = b.fBasketEntry[1:numbaskets(b)+1] + basketbytes = b.fBasketBytes[1:numbaskets(b)+1] * finflate + iclusters = searchsortedlast.(Ref(common), entries[1:end-1]) + pairs = zip(iclusters, basketbytes) + sumbytes = [sum(last.(g)) for g in groupby(first, pairs)] + bytes .+= sumbytes + end + return bytes +end + +Tables.partitions(t::LazyTree) = (t[r] for r in _clusterranges(t)) diff --git a/test/runtests.jl b/test/runtests.jl index 6038efa1..d7d54ac9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -690,3 +690,15 @@ end @test (UnROOT.basketarray_iter(t.b1) .|> length) == [1228, 1228, 44] @test length(UnROOT.basketarray(t.b1, 1)) == 1228 end + +@testset "Cluster ranges" begin + t = LazyTree(UnROOT.samplefile("tree_with_clusters.root"),"t1"); + @test all(UnROOT._clusterbytes(t; compressed=true) .< 10000) + @test all(UnROOT._clusterbytes(t; compressed=false) .< 10000) + @test UnROOT._clusterbytes([t.b1,t.b2]) == UnROOT._clusterbytes(t) + @test length(UnROOT._clusterranges([t.b1])) == 157 + @test length(UnROOT._clusterranges([t.b2])) == 70 + @test length(UnROOT._clusterranges(t)) == 18 # same as uproot4 + @test sum(UnROOT._clusterbytes([t.b1]; compressed=true)) == 33493.0 # same as uproot4 + @test sum(UnROOT._clusterbytes([t.b2]; compressed=true)) == 23710.0 # same as uproot4 +end diff --git a/test/samples/tree_with_clusters.py b/test/samples/tree_with_clusters.py new file mode 100644 index 00000000..8b83fd2c --- /dev/null +++ b/test/samples/tree_with_clusters.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +import ROOT as r + +f = r.TFile("tree_with_clusters.root", "recreate") +t = r.TTree("t1","t1") +v1 = r.vector("int")() +v2 = r.vector("int")() +t.Branch("b1", v1, 500) # default bufsize is 32000, but making sure +t.Branch("b2", v2, 1000) # default bufsize is 32000, but making sure +t.SetAutoFlush(10000) +for irow in range(2500): + v1.clear() + v2.clear() + for e in [irow+q for q in range(2)]: + v1.push_back(e) + v2.push_back(e+1) + t.Fill() +t.Write() +f.Close() diff --git a/test/samples/tree_with_clusters.root b/test/samples/tree_with_clusters.root new file mode 100644 index 00000000..77483150 Binary files /dev/null and b/test/samples/tree_with_clusters.root differ