From b6ceb8361871af235e92b273f8c5bcc2a9ca6813 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 17 Oct 2021 12:16:44 +0200 Subject: [PATCH 1/6] Improve quantile performance v2 --- src/Statistics.jl | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 91cf56fb..3f7bdc85 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -937,8 +937,7 @@ function quantile!(q::AbstractArray, v::AbstractVector, p::AbstractArray; end isempty(q) && return q - minp, maxp = extrema(p) - _quantilesort!(v, sorted, minp, maxp) + _quantilesort!(v, sorted, p) for (i, j) in zip(eachindex(p), eachindex(q)) @inbounds q[j] = _quantile(v,p[i], alpha=alpha, beta=beta) @@ -949,27 +948,27 @@ end function quantile!(v::AbstractVector, p::Union{AbstractArray, Tuple{Vararg{Real}}}; sorted::Bool=false, alpha::Real=1., beta::Real=alpha) if !isempty(p) - minp, maxp = extrema(p) - _quantilesort!(v, sorted, minp, maxp) + _quantilesort!(v, sorted, p isa Tuple : collect(p) ? p) end return map(x->_quantile(v, x, alpha=alpha, beta=beta), p) end quantile!(v::AbstractVector, p::Real; sorted::Bool=false, alpha::Real=1., beta::Real=alpha) = - _quantile(_quantilesort!(v, sorted, p, p), p, alpha=alpha, beta=beta) + _quantile(_quantilesort!(v, sorted, [p]), p, alpha=alpha, beta=beta) -# Function to perform partial sort of v for quantiles in given range -function _quantilesort!(v::AbstractArray, sorted::Bool, minp::Real, maxp::Real) +function _quantilesort!(v::AbstractArray, sorted::Bool, p::AbstractVector{<:Real}) isempty(v) && throw(ArgumentError("empty data vector")) require_one_based_indexing(v) if !sorted - lv = length(v) - lo = floor(Int,minp*(lv)) - hi = ceil(Int,1+maxp*(lv)) - - # only need to perform partial sort - sort!(v, 1, lv, Base.Sort.PartialQuickSort(lo:hi), Base.Sort.Forward) + start = 1 + for pv in sort(p) + lv = length(v) + lo = floor(Int,pv*(lv)) + hi = ceil(Int,1+pv*(lv)) + sort!(v, start, lv, Base.Sort.PartialQuickSort(lo:hi), Base.Sort.Forward) + start = hi + 1 + end end if (sorted && (ismissing(v[end]) || (v[end] isa Number && isnan(v[end])))) || any(x -> ismissing(x) || (x isa Number && isnan(x)), v) @@ -977,7 +976,7 @@ function _quantilesort!(v::AbstractArray, sorted::Bool, minp::Real, maxp::Real) end return v end - + # Core quantile lookup function: assumes `v` sorted @inline function _quantile(v::AbstractVector, p::Real; alpha::Real=1.0, beta::Real=alpha) 0 <= p <= 1 || throw(ArgumentError("input probability out of [0,1] range")) From 7943e977c113b6b606039d4efe1b2aadd639188e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 17 Oct 2021 12:20:01 +0200 Subject: [PATCH 2/6] Update src/Statistics.jl --- src/Statistics.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 3f7bdc85..22a46a27 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -948,7 +948,7 @@ end function quantile!(v::AbstractVector, p::Union{AbstractArray, Tuple{Vararg{Real}}}; sorted::Bool=false, alpha::Real=1., beta::Real=alpha) if !isempty(p) - _quantilesort!(v, sorted, p isa Tuple : collect(p) ? p) + _quantilesort!(v, sorted, p isa Tuple ? collect(p) : p) end return map(x->_quantile(v, x, alpha=alpha, beta=beta), p) end From af561f2dd6741e2dde598c62919c8b3d1f280c40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 17 Oct 2021 12:20:21 +0200 Subject: [PATCH 3/6] Update src/Statistics.jl --- src/Statistics.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 22a46a27..10c6f9ec 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -976,7 +976,6 @@ function _quantilesort!(v::AbstractArray, sorted::Bool, p::AbstractVector{<:Real end return v end - # Core quantile lookup function: assumes `v` sorted @inline function _quantile(v::AbstractVector, p::Real; alpha::Real=1.0, beta::Real=alpha) 0 <= p <= 1 || throw(ArgumentError("input probability out of [0,1] range")) From 6e233dedad177afee37b94f67b6079f4489112e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 17 Oct 2021 12:35:40 +0200 Subject: [PATCH 4/6] Update src/Statistics.jl --- src/Statistics.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 10c6f9ec..e278549e 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -956,7 +956,7 @@ end quantile!(v::AbstractVector, p::Real; sorted::Bool=false, alpha::Real=1., beta::Real=alpha) = _quantile(_quantilesort!(v, sorted, [p]), p, alpha=alpha, beta=beta) -function _quantilesort!(v::AbstractArray, sorted::Bool, p::AbstractVector{<:Real}) +function _quantilesort!(v::AbstractArray, sorted::Bool, p::AbstractArray) isempty(v) && throw(ArgumentError("empty data vector")) require_one_based_indexing(v) From d84deda3d8598f1c6bb5421cdfce6da784d01711 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 17 Oct 2021 12:51:04 +0200 Subject: [PATCH 5/6] Update runtests.jl --- test/runtests.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 2e061ee4..f4bc60ea 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -601,6 +601,14 @@ end @test quantile(skipmissing([1, missing, 2]), 0.5) === 1.5 @test quantile([1], 0.5) === 1.0 + # randomized partialsort correctness test + Random.seed!(1234) + for i in 1:100, j in 1:20 + x = rand(2000) + p = rand(j) + @test quantile(x, p) == [quantile(x, v) for v in p] + end + # make sure that type inference works correctly in normal cases for T in [Int, BigInt, Float64, Float16, BigFloat, Rational{Int}, Rational{BigInt}] for S in [Float64, Float16, BigFloat, Rational{Int}, Rational{BigInt}] From fb4c8d843464427c0be9fd5146d2ed228a0ee017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 17 Oct 2021 12:52:10 +0200 Subject: [PATCH 6/6] Update runtests.jl --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index f4bc60ea..fed79f14 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -603,7 +603,7 @@ end # randomized partialsort correctness test Random.seed!(1234) - for i in 1:100, j in 1:20 + for i in 1:200, j in 1:20 x = rand(2000) p = rand(j) @test quantile(x, p) == [quantile(x, v) for v in p]