diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 0000000..c743950 --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1 @@ +style = "blue" \ No newline at end of file diff --git a/.gitignore b/.gitignore index 99e2cea..a22e235 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ *.jl.mem .DS_Store /Manifest.toml +test/Manifest.toml /dev/ /docs/build/ /docs/site/ +.vscode/ diff --git a/Project.toml b/Project.toml index af7bd8f..1befd1d 100644 --- a/Project.toml +++ b/Project.toml @@ -8,7 +8,7 @@ AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" [compat] AbstractGPs = "0.2, 0.3" diff --git a/src/GPLikelihoods.jl b/src/GPLikelihoods.jl index fa5cd83..3117ea1 100644 --- a/src/GPLikelihoods.jl +++ b/src/GPLikelihoods.jl @@ -1,20 +1,33 @@ module GPLikelihoods -using Distributions using AbstractGPs -using Random +using Distributions using Functors -using StatsFuns: logistic, softmax - -import Distributions +using Random +using StatsFuns export BernoulliLikelihood, CategoricalLikelihood, - GaussianLikelihood, - HeteroscedasticGaussianLikelihood, + GaussianLikelihood, + HeteroscedasticGaussianLikelihood, PoissonLikelihood, ExponentialLikelihood, GammaLikelihood +export Link, + ChainLink, + ExpLink, + LogLink, + InvLink, + SqrtLink, + SquareLink, + LogitLink, + LogisticLink, + ProbitLink, + NormalCDFLink, + SoftMaxLink + +# Links +include("links.jl") # Likelihoods include(joinpath("likelihoods", "bernoulli.jl")) diff --git a/src/likelihoods/bernoulli.jl b/src/likelihoods/bernoulli.jl index 3b9b571..15e2cad 100644 --- a/src/likelihoods/bernoulli.jl +++ b/src/likelihoods/bernoulli.jl @@ -1,16 +1,21 @@ """ - BernoulliLikelihood + BernoulliLikelihood(l::AbstractLink=LogisticLink()) Bernoulli likelihood is to be used if we assume that the uncertainity associated with the data follows a Bernoulli distribution. +The link `l` needs to transform the input `f` to the domain [0, 1] ```math - p(y|f) = Bernoulli(y | f) + p(y|f) = Bernoulli(y | l(f)) ``` -On calling, this would return a Bernoulli distribution with `f` probability of `true`. +On calling, this would return a Bernoulli distribution with `l(f)` probability of `true`. """ -struct BernoulliLikelihood end +struct BernoulliLikelihood{Tl<:AbstractLink} + invlink::Tl +end -(l::BernoulliLikelihood)(f::Real) = Bernoulli(logistic(f)) +BernoulliLikelihood() = BernoulliLikelihood(LogisticLink()) -(l::BernoulliLikelihood)(fs::AbstractVector{<:Real}) = Product(Bernoulli.(logistic.(fs))) +(l::BernoulliLikelihood)(f::Real) = Bernoulli(l.invlink(f)) + +(l::BernoulliLikelihood)(fs::AbstractVector{<:Real}) = Product(Bernoulli.(l.invlink.(fs))) diff --git a/src/likelihoods/categorical.jl b/src/likelihoods/categorical.jl index 3463e3c..7c5f1ba 100644 --- a/src/likelihoods/categorical.jl +++ b/src/likelihoods/categorical.jl @@ -1,16 +1,22 @@ """ - CategoricalLikelihood + CategoricalLikelihood(l::AbstractLink=SoftMaxLink()) Categorical likelihood is to be used if we assume that the uncertainity associated with the data follows a Categorical distribution. ```math - p(y|f_1, f_2, \\dots, f_{n-1}) = Categorical(y | softmax(f_1, f_2, \\dots, f_{n-1}, 0)) + p(y|f_1, f_2, \\dots, f_{n-1}) = Categorical(y | l(f_1, f_2, \\dots, f_{n-1}, 0)) ``` -On calling, this would return a Categorical distribution with `f_i` -probability of `i` category. +Given an `AbstractVector` [f_1, f_2, ..., f_{n-1}], returns a `Categorical` distribution, +with probabilities given by `l(f_1, f_2, ..., f_{n-1}, 0)`. """ -struct CategoricalLikelihood end +struct CategoricalLikelihood{Tl<:AbstractLink} + invlink::Tl +end -(l::CategoricalLikelihood)(f::AbstractVector{<:Real}) = Categorical(softmax(vcat(f, 0))) +CategoricalLikelihood() = CategoricalLikelihood(SoftMaxLink()) -(l::CategoricalLikelihood)(fs::AbstractVector) = Product(Categorical.(softmax.(vcat.(fs, 0)))) +(l::CategoricalLikelihood)(f::AbstractVector{<:Real}) = Categorical(l.invlink(vcat(f, 0))) + +function (l::CategoricalLikelihood)(fs::AbstractVector) + return Product(Categorical.(l.invlink.(vcat.(fs, 0)))) +end diff --git a/src/likelihoods/exponential.jl b/src/likelihoods/exponential.jl index ded96d7..53e31d5 100644 --- a/src/likelihoods/exponential.jl +++ b/src/likelihoods/exponential.jl @@ -1,14 +1,20 @@ """ - ExponentialLikelihood() + ExponentialLikelihood(l::AbstractLink=ExpLink()) -Exponential likelihood with scale given by `exp(f)`. +Exponential likelihood with scale given by `l(f)`. ```math - p(y|f) = Exponential(y | exp(f)) + p(y|f) = Exponential(y | l(f)) ``` """ -struct ExponentialLikelihood end +struct ExponentialLikelihood{Tl<:AbstractLink} + invlink::Tl +end -(l::ExponentialLikelihood)(f::Real) = Exponential(exp(f)) +ExponentialLikelihood() = ExponentialLikelihood(ExpLink()) -(l::ExponentialLikelihood)(fs::AbstractVector{<:Real}) = Product(Exponential.(exp.(fs))) +(l::ExponentialLikelihood)(f::Real) = Exponential(l.invlink(f)) + +function (l::ExponentialLikelihood)(fs::AbstractVector{<:Real}) + return Product(Exponential.(l.invlink.(fs))) +end diff --git a/src/likelihoods/gamma.jl b/src/likelihoods/gamma.jl index cde7740..2092ccc 100644 --- a/src/likelihoods/gamma.jl +++ b/src/likelihoods/gamma.jl @@ -1,21 +1,24 @@ """ - GammaLikelihood(α) + GammaLikelihood(α::Real=1.0, l::AbstractLink=ExpLink()) Gamma likelihood with fixed shape `α`. ```math - p(y|f) = Gamma(y | α, θ=exp(f)) + p(y|f) = Gamma(y | α, l(f)) ``` -On calling, this would return a gamma distribution with shape `α` and scale `exp(f)`. +On calling, this would return a gamma distribution with shape `α` and scale `l(f)`. """ -struct GammaLikelihood{T<:Real} +struct GammaLikelihood{T<:Real,Tl<:AbstractLink} α::T # shape parameter + invlink::Tl end -GammaLikelihood() = GammaLikelihood(1.) +GammaLikelihood() = GammaLikelihood(1.0) + +GammaLikelihood(α::Real) = GammaLikelihood(α, ExpLink()) @functor GammaLikelihood -(l::GammaLikelihood)(f::Real) = Gamma(l.α, exp(f)) +(l::GammaLikelihood)(f::Real) = Gamma(l.α, l.invlink(f)) -(l::GammaLikelihood)(fs::AbstractVector{<:Real}) = Product(Gamma.(l.α, exp.(fs))) +(l::GammaLikelihood)(fs::AbstractVector{<:Real}) = Product(Gamma.(l.α, l.invlink.(fs))) diff --git a/src/likelihoods/gaussian.jl b/src/likelihoods/gaussian.jl index cfc8006..2d69dc0 100644 --- a/src/likelihoods/gaussian.jl +++ b/src/likelihoods/gaussian.jl @@ -1,7 +1,7 @@ """ GaussianLikelihood(σ²) -Gaussian likelihood with `σ²` variance. This is to be used if we assume that the +Gaussian likelihood with `σ²` variance. This is to be used if we assume that the uncertainity associated with the data follows a Gaussian distribution. ```math @@ -10,31 +10,42 @@ uncertainity associated with the data follows a Gaussian distribution. On calling, this would return a normal distribution with mean `f` and variance σ². """ struct GaussianLikelihood{T<:Real} - σ²::T + σ²::Vector{T} end GaussianLikelihood() = GaussianLikelihood(1e-6) +GaussianLikelihood(σ²::Real) = GaussianLikelihood([σ²]) + @functor GaussianLikelihood -(l::GaussianLikelihood)(f::Real) = Normal(f, sqrt(l.σ²)) +(l::GaussianLikelihood)(f::Real) = Normal(f, sqrt(first(l.σ²))) -(l::GaussianLikelihood)(fs::AbstractVector{<:Real}) = MvNormal(fs, sqrt(l.σ²)) +(l::GaussianLikelihood)(fs::AbstractVector{<:Real}) = MvNormal(fs, sqrt(first(l.σ²))) """ - HeteroscedasticGaussianLikelihood(σ²) + HeteroscedasticGaussianLikelihood(l::AbstractLink=ExpLink()) Heteroscedastic Gaussian likelihood. This is a Gaussian likelihood whose mean and the log of whose variance are functions of the latent process. ```math - p(y|[f, g]) = Normal(y | f, exp(g)) + p(y|[f, g]) = Normal(y | f, l(g)) ``` -On calling, this would return a normal distribution with mean `f` and variance `exp(g)`. +On calling, this would return a normal distribution with mean `f` and std. dev. `l(g)`. +Where `l` is link going from R to R^+ """ -struct HeteroscedasticGaussianLikelihood end +struct HeteroscedasticGaussianLikelihood{Tl<:AbstractLink} + invlink::Tl +end -(::HeteroscedasticGaussianLikelihood)(f::AbstractVector{<:Real}) = Normal(f[1], exp(f[2])) +HeteroscedasticGaussianLikelihood() = HeteroscedasticGaussianLikelihood(ExpLink()) -(::HeteroscedasticGaussianLikelihood)(fs::AbstractVector) = MvNormal(first.(fs), exp.(last.(fs))) +function (l::HeteroscedasticGaussianLikelihood)(f::AbstractVector{<:Real}) + return Normal(f[1], l.invlink(f[2])) +end + +function (l::HeteroscedasticGaussianLikelihood)(fs::AbstractVector) + return MvNormal(first.(fs), l.invlink.(last.(fs))) +end diff --git a/src/likelihoods/poisson.jl b/src/likelihoods/poisson.jl index d7ca9fd..4ddacdb 100644 --- a/src/likelihoods/poisson.jl +++ b/src/likelihoods/poisson.jl @@ -1,11 +1,21 @@ """ - PoissonLikelihood() + PoissonLikelihood(l::AbstractLink=ExpLink()) -Poisson likelihood with rate as exponential of samples from GP `f`. This is to be used if -we assume that the uncertainity associated with the data follows a Poisson distribution. +Poisson likelihood with rate defined as `l(f)`. + +```math + p(y|f) = Poisson(y | θ=l(f)) +``` + +This is to be used if we assume that the uncertainity associated +with the data follows a Poisson distribution. """ -struct PoissonLikelihood end +struct PoissonLikelihood{L<:AbstractLink} + invlink::L +end + +PoissonLikelihood() = PoissonLikelihood(ExpLink()) -(l::PoissonLikelihood)(f::Real) = Poisson(exp(f)) +(l::PoissonLikelihood)(f::Real) = Poisson(l.invlink(f)) -(l::PoissonLikelihood)(fs::AbstractVector{<:Real}) = Product(Poisson.(exp.(fs))) +(l::PoissonLikelihood)(fs::AbstractVector{<:Real}) = Product(Poisson.(l.invlink.(fs))) diff --git a/src/links.jl b/src/links.jl new file mode 100644 index 0000000..8764230 --- /dev/null +++ b/src/links.jl @@ -0,0 +1,140 @@ +""" + AbstractLink + +Abstract type defining maps from R^n -> X. +They can be applied by calling `link(x)`. + +A series of definitions are given in http://web.pdx.edu/~newsomj/mvclass/ho_link.pdf +""" +abstract type AbstractLink end + +struct ChainLink{Tls} <: AbstractLink + links::Tls +end + +(l::ChainLink)(x) = foldl((x, l) -> l(x), l.ls; init=x) + +""" + Link(f) + +General construction for a link with a function `f`. +""" +struct Link{F} <: AbstractLink + f::F +end + +(l::Link)(x) = l.f(x) + +""" + LogLink() + +`log` link, f:ℝ⁺->ℝ . Its inverse is the [`ExpLink`](@ref). +""" +struct LogLink <: AbstractLink end + +(::LogLink)(x) = log(x) + +Base.inv(::LogLink) = ExpLink() + +""" + ExpLink() + +`exp` link, f:ℝ->ℝ⁺. Its inverse is the [`LogLink`](@ref). +""" +struct ExpLink <: AbstractLink end + +(::ExpLink)(x) = exp(x) + +Base.inv(::ExpLink) = LogLink() + +""" + InvLink() + +`inv` link, f:ℝ/{0}->ℝ/{0}. It is its own inverse. +""" +struct InvLink <: AbstractLink end + +(::InvLink)(x) = inv(x) + +Base.inv(::InvLink) = InvLink() + +""" + SqrtLink() + +`sqrt` link, f:ℝ⁺∪{0}->ℝ⁺∪{0}. Its inverse is the [`SquareLink`](@ref). +""" +struct SqrtLink <: AbstractLink end + +(::SqrtLink)(x) = sqrt(x) + +Base.inv(::SqrtLink) = SquareLink() + +""" + SquareLink() + +`^2` link, f:ℝ->ℝ⁺∪{0}. Its inverse is the [`SqrtLink`](@ref). +""" +struct SquareLink <: AbstractLink end + +(::SquareLink)(x) = x^2 + +Base.inv(::SquareLink) = SqrtLink() + +""" + LogitLink() + +`log(x/(1-x))` link, f:[0,1]->ℝ. Its inverse is the [`LogisticLink`](@ref). +""" +struct LogitLink <: AbstractLink end + +(::LogitLink)(x) = logit(x) + +Base.inv(::LogitLink) = LogisticLink() + +""" + LogisticLink() + +`exp(x)/(1+exp(-x))` link. f:ℝ->[0,1]. Its inverse is the [`Logit`](@ref). +""" +struct LogisticLink <: AbstractLink end + +(::LogisticLink)(x) = logistic(x) + +Base.inv(::LogisticLink) = LogitLink() + +""" + ProbitLink() + +`ϕ⁻¹(y)` link, where `ϕ⁻¹` is the `invcdf` of a `Normal` distribution, f:[0,1]->ℝ. +Its inverse is the [`NormalCDFLink`](@ref). +""" +struct ProbitLink <: AbstractLink end + +(::ProbitLink)(x) = norminvcdf(x) + +Base.inv(::ProbitLink) = NormalCDFLink() + +""" + NormalCDFLink() + +`ϕ(y)` link, where `ϕ` is the `cdf` of a `Normal` distribution, f:ℝ->[0,1]. +Its inverse is the [`ProbitLink`](@ref). +""" +struct NormalCDFLink <: AbstractLink end + +(::NormalCDFLink)(x) = normcdf(x) + +Base.inv(::NormalCDFLink) = ProbitLink() + +(::ChainLink{<:Tuple{LogLink,NormalCDFLink}})(x) = normlogcdf(x) # Specialisation for log + normal cdf + +""" + SoftMaxLink() + +`softmax` link, i.e `f(x)ᵢ = exp(xᵢ)/∑ⱼexp(xⱼ)`. +f:ℝⁿ->Sⁿ⁻¹, where Sⁿ⁻¹ is an [(n-1)-simplex](https://en.wikipedia.org/wiki/Simplex) +It has no defined inverse +""" +struct SoftMaxLink <: AbstractLink end + +(::SoftMaxLink)(x::AbstractVector{<:Real}) = softmax(x) diff --git a/test/likelihoods/gamma.jl b/test/likelihoods/gamma.jl index 72f474b..2f78083 100644 --- a/test/likelihoods/gamma.jl +++ b/test/likelihoods/gamma.jl @@ -1,4 +1,4 @@ @testset "GammaLikelihood" begin lik = GammaLikelihood(1.) - test_interface(lik, SqExponentialKernel(), rand(10); functor_args=(:α,)) + test_interface(lik, SqExponentialKernel(), rand(10); functor_args=(:α, :invlink)) end diff --git a/test/links.jl b/test/links.jl new file mode 100644 index 0000000..efb1e3a --- /dev/null +++ b/test/links.jl @@ -0,0 +1,62 @@ +@testset "link" begin + x = rand() + xs = rand(3) + + # Generic link + f = sin + l = Link(f) + @test l(x) == f(x) + + # Log + l = LogLink() + @test l(x) == log(x) + @test inv(l) == ExpLink() + @test inv(inv(l)) == l + + # Exp + l = ExpLink() + @test l(x) == exp(x) + @test inv(l) == LogLink() + @test inv(inv(l)) == l + + # Sqrt + l = SqrtLink() + @test l(x) == sqrt(x) + @test inv(l) == SquareLink() + @test inv(inv(l)) == l + + # Square + l = SquareLink() + @test l(x) == x^2 + @test inv(l) == SqrtLink() + @test inv(inv(l)) == l + + # Logit + l = LogitLink() + @test l(x) == logit(x) + @test inv(l) isa LogisticLink + @test inv(inv(l)) == l + + # Logistic + l = LogisticLink() + @test l(x) == logistic(x) + @test inv(l) isa LogitLink + @test inv(inv(l)) == l + + # Probit + l = ProbitLink() + @test l(x) == norminvcdf(x) + @test inv(l) == NormalCDFLink() + @test inv(inv(l)) == l + + # NormalCDF + l = NormalCDFLink() + @test l(x) == normcdf(x) + @test inv(l) == ProbitLink() + @test inv(inv(l)) == l + + # SoftMax + l = SoftMaxLink() + @test l(xs) == softmax(xs) + +end diff --git a/test/runtests.jl b/test/runtests.jl index 33171d7..e8de2e0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,11 +4,12 @@ using Test using Random using Functors using Distributions +using StatsFuns @testset "GPLikelihoods.jl" begin include("test_utils.jl") - + include("links.jl") @testset "likelihoods" begin include(joinpath("likelihoods", "bernoulli.jl")) include(joinpath("likelihoods", "categorical.jl"))