Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Native binompdf and binomlogpdf #33

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/StatsFuns.jl
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ include("constants.jl")
include("basicfuns.jl")
include("misc.jl")
include("rmath.jl")
include("binomial.jl")

using .RFunctions
using SpecialFunctions
Expand Down
75 changes: 75 additions & 0 deletions src/binomial.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Computation of binomial probability distribution function using Catherine Loader's Saddle point algorithm (http://octave.1599824.n4.nabble.com/attachment/3829107/0/loader2000Fast.pdf)
# binompdf(n, p, x)
# log(binompdf(n, p, x)) = log(binompdf(n, x/n, x)) - Dev(n, p, x)
# Deviance term 'Dev': Dev(n, p, x) = xlog(x/(np)) + (n-x)log((n - x)/(n*(1 - p)))
# Stirling's approximation: n! = √(2*π*n) * (n/e)^n + δ(n), where δ(n) is the error in the stirling approximation
# δ(n) ≈ lstirling_asym(n), the Asymptotic stirling series expansion error, https://dlmf.nist.gov/5.11, lstirling_asym defined in misc.jl
# p(n, x/n, x) = √(n/(2πx(n - x)))*e^(δ(n) - δ(x) - δ(n - x)), log(p(n, x/n, x)) = 0.5*log(n/(2*pi*x*(n - x))) + δ(n) - δ(x) - δ(n - x)
"""
binompdf(n::Integer, p::Float, x::Integer)

Computes the binomial probability distibution function.
Given probability *`p`* of for success of each trial, returns the probability that *`x`* out of *`n`* trials are successful.

# Examples
```julia-repl
julia> binompdf(13, 0.58, 7)
0.20797396939077062
```
# Arguments
- `n::Integer`: total number of trials.
- `p::Float`: probability of success of each trial.
- `x::Integer`: number of successful trials.
...
"""
function binompdf{ T <: Union{Float16, Float32, Float64} }(n::Integer, p::T, x::Integer)
n > typemax(Int64) && error("n is too large.")
n < 0 && throw(ArgumentError("n = $n must be a non-zero positive integer"))
( x > n || x < 0 ) && throw(ArgumentError("x = $x must ∈ [0, n]"))
(p < 0.0 || p > 1.0) && throw(ArgumentError("p = $p must ∈ [0, 1]"))
n == 0 && return one(T)
p == 0.0 && return ( (x == 0) ? one(T): zero(T) )
p == 1.0 && return ( (x == n) ? zero(T) : zero(T) )
x == 0 && return exp(n*log1p(-p))
x == n && return p^n
p = Float64(p)
lc = lstirling_asym(n) - lstirling_asym(x) - lstirling_asym(n - x) -D(x, n*p) - D(n - x, n*(1 - p))
return T(exp(lc)*sqrt(n/(2π*x*(n - x))))
end

# Deviance term: D(x, np) = x*log(x/np) + np - x
D(x, np) = -x*logmxp1(np/x)

# binomlogpdf(n, p, x) = log(binompdf(n, p, x))
# We use the same strategy as above but do not exponentiate the final result
"""
binomlogpdf(n::Integer, p::Float, x::Integer)

Computes the log of the binomial probability distibution function.

# Examples
```julia-repl
julia> binomlogpdf(13, 0.58, 7)
-1.5703423542721349
```
# Arguments
- `n::Integer`: total number of trials.
- `p::Float`: probability of success of each trial.
- `x::Integer`: number of successful trials.
...
"""
function binomlogpdf{ T <: Union{Float16, Float32, Float64} }(n::Integer, p::T, x::Integer)
n > typemax(Int64) && error("n is too large")
n < 0 && throw(ArgumentError("n = $n must be a non-zero positive integer"))
( x > n || x < 0 ) && throw(ArgumentError("x = $x must ∈ [0, n]"))
(p < 0.0 || p > 1.0) && throw(ArgumentError("p = $p must ∈ [0, 1]"))
n == 0 && return zero(T)
p == 0.0 && return ( (x == 0) ? zero(T) : T(-Inf) )
p == 1.0 && return ( (x == n) ? zero(T) : T(-Inf) )
x == 0 && return n*log1p(-p)
x == n && return n*log(p)
p = Float64(p)
(n, x) = (Int64(n), Int64(x))
lc = lstirling_asym(n) - lstirling_asym(x) - lstirling_asym(n - x) - D(x, n*p) - D(n - x, n*(1.0 - p))
return T(lc + 0.5*log(n/(2π*x*(n - x))))
end
8 changes: 0 additions & 8 deletions src/distrs/binom.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# functions related to binomial distribution

import .RFunctions:
binompdf,
binomlogpdf,
binomcdf,
binomccdf,
binomlogcdf,
Expand All @@ -11,9 +9,3 @@ import .RFunctions:
binominvccdf,
binominvlogcdf,
binominvlogccdf

# pdf for numbers with generic types
binompdf(n::Real, p::Real, k::Real) = exp(binomlogpdf(n, p, k))

# logpdf for numbers with generic types
binomlogpdf(n::Real, p::Real, k::Real) = -log1p(n) - lbeta(n - k + 1, k + 1) + k * log(p) + (n - k) * log1p(-p)
28 changes: 28 additions & 0 deletions test/binomial.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
using StatsFuns
using Base.Test

#Test Compares the difference in values w.r.t Rmath implementation

N = Int64.( (0, 1, 1, 8, 20, 20, 20, 150, 700, 900, 1000) );
P = (0.0, 0.5, 0.7, 0.6, 0.34, 0.89, 0.53, 0.77, 0.98, 0.5, 0.29);

@testset "binompdf" begin
for i = 1:length(N)
for x = Int64(0):N[i]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn’t this inferred to be Int64 by default?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, 2 year old PR, back when I had no idea how Julia worked, will fix this tomorrow.

if isfinite(binompdf(N[i], P[i], x))
@test abs(binompdf(N[i], P[i], x) - Rmath.dbinom(x, N[i], P[i])) < 1e-15
end
end
end
end

@testset "binomlogpdf" begin
for i = 1:length(N)
for x = Int64(0):N[i]
val = abs(binomlogpdf(N[i], P[i], x) - Rmath.dbinom(x, N[i], P[i], true))/binomlogpdf(N[i], P[i], x)
if isfinite(val) && binomlogpdf(N[i], P[i], x) != Rmath.dbinom(x, N[i], P[i], true)
@test val < 2.24e-14
end
end
end
end
9 changes: 0 additions & 9 deletions test/generic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,6 @@ genericcomp_tests("beta", [
((10.0, 2.0), 0.01:0.01:0.99),
])

genericcomp_tests("binom", [
((1, 0.5), 0.0:1.0),
((1, 0.7), 0.0:1.0),
((8, 0.6), 0.0:8.0),
((20, 0.1), 0.0:20.0),
((20, 0.9), 0.0:20.0),
((20, 0.9), 0:20),
])

genericcomp_tests("chisq", [
((1,), 0.0:0.1:8.0),
((4,), 0.0:0.1:8.0),
Expand Down
2 changes: 2 additions & 0 deletions test/rmath.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,12 @@ function rmathcomp(basename, params, X::AbstractArray, rtol=100eps(float(one(elt

for i = 1:length(X)
x = X[i]
if(basename != "binom")
check_rmath(pdf, stats_pdf, rmath_pdf,
params, "x", x, true, rtol)
check_rmath(logpdf, stats_logpdf, rmath_logpdf,
params, "x", x, false, rtol)
end
check_rmath(cdf, stats_cdf, rmath_cdf,
params, "x", x, true, rtol)
check_rmath(ccdf, stats_ccdf, rmath_ccdf,
Expand Down
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
tests = ["basicfuns", "rmath", "generic"]
tests = ["basicfuns", "rmath", "generic", "binomial"]

for t in tests
fp = "$t.jl"
Expand Down