From 34181c4021eaa16519d086234dc368e37fa60fd6 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Mon, 15 Nov 2021 20:55:53 -0300 Subject: [PATCH 01/21] Implementation of MvDiscreteNonParametric distribution. --- src/multivariate/mvdiscretenonparametric.jl | 199 ++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 src/multivariate/mvdiscretenonparametric.jl diff --git a/src/multivariate/mvdiscretenonparametric.jl b/src/multivariate/mvdiscretenonparametric.jl new file mode 100644 index 000000000..345d7c7b8 --- /dev/null +++ b/src/multivariate/mvdiscretenonparametric.jl @@ -0,0 +1,199 @@ +struct MvDiscreteNonParametric{T <: Real,P <: Real,Ts <: ArrayOfSimilarArrays{T},Ps <: AbstractVector{P}} <: DiscreteMultivariateDistribution + + support::Ts + p::Ps + + function MvDiscreteNonParametric{T,P,Ts,Ps}(support::Ts, + p::Ps) where {T <: Real,P <: Real,Ts <: AbstractVector{<:AbstractVector{T}},Ps <: AbstractVector{P}} + + length(support) == length(p) || error("length of `support` and `p` must be equal") + isprobvec(p) || error("`p` must be a probability vector") + allunique(support) || error("`support` must contain only unique value") + new{T,P,Ts,Ps}(support, p) + end +end + +""" + MvDiscreteNonParametric( + support::AbstractVector, + p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), + ) + +Construct a multivariate discrete nonparametric probability distribution with `support` and corresponding +probabilities `p`. If the probability vector argument is not passed, then +equal probability is assigned to each entry in the support. + +# Examples +```julia +using ArraysOfArrays +# rows correspond to samples +μ = MvDiscreteNonParametric(nestedview(rand(7,3)')) + +# columns correspond to samples +ν = MvDiscreteNonParametric(nestedview(rand(7,3))) +``` +""" +function MvDiscreteNonParametric( + support::AbstractVector{<:AbstractVector{<:Real}}, + p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), +) + return MvDiscreteNonParametric{eltype(eltype(support)),eltype(p),typeof(ArrayOfSimilarArrays(support)),typeof(p)}( + ArrayOfSimilarArrays(support), p) +end + +""" + MvDiscreteNonParametric( + support::Matrix{<:Real}, + p::AbstractVector{<:Real}=fill(inv(length(support)), length(support) + ) + +Construct a multivariate discrete nonparametric probability distribution +from a matrix as `support` where each row is a sample, and corresponding +probabilities `p`. If the probability vector argument is not passed, then +equal probability is assigned to each entry in the support. + +# Examples +```julia +# the rows correspond to the samples +using LinearAlgebra +μ = MvDiscreteNonParametric(rand(10,3), normalize!(rand(10),1)) +``` +""" +function MvDiscreteNonParametric( + support::Matrix{<:Real}, + p::AbstractVector{<:Real}=fill(inv(size(support)[1]), size(support)[1]) +) + return MvDiscreteNonParametric(nestedview(support'), p) +end + +Base.eltype(::Type{<:MvDiscreteNonParametric{T}}) where T = T + +""" + support(d::MvDiscreteNonParametric) +Get a sorted AbstractVector defining the support of `d`. +""" +Distributions.support(d::MvDiscreteNonParametric) = d.support + +""" + probs(d::MvDiscreteNonParametric) +Get the vector of probabilities associated with the support of `d`. +""" +Distributions.probs(d::MvDiscreteNonParametric) = d.p + + +# It would be more intuitive if length was the +# +""" + length(d::MvDiscreteNonParametric) +Retunrs the dimension of the mass points (samples). +It corresponds to `innersize(d.support)[1]`, +where `innersize` is a function from from ArraysOfArrays.jl. +""" +Base.length(d::MvDiscreteNonParametric) = innersize(d.support)[1] + +""" + size(d::MvDiscreteNonParametric) +Returns the size of the support as a tuple where +the first value is the number of points (samples) +and the second is the dimension of the samples (e.g ℝⁿ). +It corresponds to `size(flatview(d.support)')` +where `flatview` is a function from from ArraysOfArrays.jl +that turns an Array of Arrays into a matrix. +""" +Base.size(d::MvDiscreteNonParametric) = size(flatview(d.support)') + +""" + empiricalmeasure( + support::AbstractVector, + probs::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), + ) + +Construct a finite discrete probability measure with `support` and corresponding +`probabilities`. If the probability vector argument is not passed, then +equal probability is assigned to each entry in the support. + +# Examples +```julia +using ArraysOfArrays +# rows correspond to samples +μ = empiricalmeasure(nestedview(rand(7,3)'), normalize!(rand(10),1)) + +# columns correspond to samples, each with equal probability +ν = empiricalmeasure(nestedview(rand(3,12))) +``` + +!!! note + If `support` is a 1D vector, the constructed measure will be sorted, + e.g. for `mu = empiricalmeasure([3, 1, 2],[0.5, 0.2, 0.3])`, then + `mu.support` will be `[1, 2, 3]` and `mu.p` will be `[0.2, 0.3, 0.5]`. + Also, avoid passing 1D distributions as `RowVecs(rand(3))` or `[[1],[3],[4]]`, + since this will be dispatched to the multivariate case instead + of the univariate case for which the algorithm is more efficient. + +""" +function empiricalmeasure( + support::AbstractVector{<:Real}, + p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), + ) + return DiscreteNonParametric(support, p) +end +function empiricalmeasure( + support::AbstractVector{<:AbstractVector{<:Real}}, + p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), + ) + return MvDiscreteNonParametric(support, p) + # return MvDiscreteNonParametric{eltype(eltype(support)),eltype(p),typeof(support),typeof(p)}(support, p) +end + +""" + empiricalmeasure( + support::Matrix{<:Real}, + probs::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), + ) +Construct a multivariate empirical measure from a matrix +as `support` and sample by rows. +# Examples +```julia +using ArraysOfArrays +# the rows correspond to the samples +μ = empiricalmeasure(rand(7,3), normalize!(rand(10),1)) +``` +""" +function empiricalmeasure( + support::Matrix{<:Real}, + p::AbstractVector{<:Real}=fill(inv(size(support)[1]), size(support)[1]) +) + return MvDiscreteNonParametric(nestedview(support'), p) +end + + +function _logpdf(d::MvDiscreteNonParametric, x::AbstractVector{T}) where T <: Real + s = support(d) + p = probs(d) + for i in 1:length(p) + if s[i] == x + return log(p[i]) + end + end + return log(zero(eltype(p))) +end + + +function mean(d::MvDiscreteNonParametric) + return StatsBase.mean(d.support, weights(d.p)) +end + +function var(d::MvDiscreteNonParametric) + x = support(d) + p = probs(d) + return StatsBase.var(x, Weights(p, one(eltype(p))), corrected=false) +end + +function cov(d::MvDiscreteNonParametric) + x = support(d) + p = probs(d) + return cov(x, Weights(p, one(eltype(p))), corrected=false) +end + +entropy(d::MvDiscreteNonParametric) = entropy(probs(d)) +entropy(d::MvDiscreteNonParametric, b::Real) = entropy(probs(d), b) From 0cc75180300ac253a063ef65b0be96bdc2ce418a Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Mon, 15 Nov 2021 20:56:09 -0300 Subject: [PATCH 02/21] Implementation of MvDiscreteNonParametric samplers. --- src/samplers/mvdiscretenonparametric.jl | 44 +++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 src/samplers/mvdiscretenonparametric.jl diff --git a/src/samplers/mvdiscretenonparametric.jl b/src/samplers/mvdiscretenonparametric.jl new file mode 100644 index 000000000..addde932e --- /dev/null +++ b/src/samplers/mvdiscretenonparametric.jl @@ -0,0 +1,44 @@ +""" + MvDiscreteNonParametricSampler(support, p) +Data structure for efficiently sampling from an arbitrary probability mass +function defined by `support` and probabilities `p`. +""" +struct MvDiscreteNonParametricSampler{T <: Real,S <: AbstractVector{<:AbstractVector{T}},A <: AliasTable} <: Sampleable{Multivariate,Discrete} + support::S + aliastable::A + + function MvDiscreteNonParametricSampler{T,S}(support::S, probs::AbstractVector{<:Real} + ) where {T <: Real,S <: AbstractVector{<:AbstractVector{T}}} + aliastable = AliasTable(probs) + new{T,S,typeof(aliastable)}(support, aliastable) + end +end + +MvDiscreteNonParametricSampler(support::S, p::AbstractVector{<:Real} + ) where {T <: Real,S <: AbstractVector{<:AbstractVector{T}}} = + MvDiscreteNonParametricSampler{T,S}(support, p) + +# Sampling + +sampler(d::MvDiscreteNonParametric) = + MvDiscreteNonParametricSampler(support(d), probs(d)) + + +function _rand!(rng::AbstractRNG, d::MvDiscreteNonParametric, x::AbstractVector{T}) where T <: Real + + length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) + s = d.support + p = d.p + + n = length(p) + draw = Base.rand(rng, float(eltype(p))) + cp = p[1] + i = 1 + while cp <= draw && i < n + @inbounds cp += p[i += 1] + end + for (j, v) in enumerate(s[i]) + x[j] = v + end + return x +end From 67b1890436049cf143221f34733129337f38d5db Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Mon, 15 Nov 2021 20:57:19 -0300 Subject: [PATCH 03/21] Added ArraysOfArrays as dependency. --- Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project.toml b/Project.toml index 8ce7aa07a..6c3da9f43 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["JuliaStats"] version = "0.25.5" [deps] +ArraysOfArrays = "65a8f2f4-9b39-5baf-92e2-a9cc46fdf018" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" @@ -17,6 +18,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" [compat] +ArraysOfArrays = "0.5" FillArrays = "0.9, 0.10, 0.11" PDMats = "0.10, 0.11" QuadGK = "2" From 7fba9dacd787e80ec8e89bdc0436ea63faaebdf7 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Mon, 15 Nov 2021 20:59:10 -0300 Subject: [PATCH 04/21] Added MvDiscreteNonParametric to be exported. --- src/Distributions.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Distributions.jl b/src/Distributions.jl index 72005e7f8..23b956fff 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -121,6 +121,7 @@ export MixtureModel, Multinomial, MultivariateNormal, + MvDiscreteNonParametric, MvLogNormal, MvNormal, MvNormalCanon, From 6f43b1830df976989bc6fc13446c9e9b4f20d4d1 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Mon, 15 Nov 2021 21:03:32 -0300 Subject: [PATCH 05/21] Added MvDiscreteNonParametric to be exported. --- src/multivariates.jl | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/multivariates.jl b/src/multivariates.jl index 0d2f214c9..c0c75142d 100644 --- a/src/multivariates.jl +++ b/src/multivariates.jl @@ -29,8 +29,8 @@ function _rand!(rng::AbstractRNG, s::Sampleable{Multivariate}, m::AbstractMatrix @boundscheck size(m, 1) == length(s) || throw(DimensionMismatch("Output size inconsistent with sample length.")) smp = sampler(s) - for i in Base.OneTo(size(m,2)) - _rand!(rng, smp, view(m,:,i)) + for i in Base.OneTo(size(m, 2)) + _rand!(rng, smp, view(m, :, i)) end return m end @@ -47,7 +47,7 @@ end rand!(rng::AbstractRNG, s::Sampleable{Multivariate}, X::AbstractArray{<:AbstractVector}) = @inbounds rand!(rng, s, X, - !all([isassigned(X,i) for i in eachindex(X)]) || + !all([isassigned(X, i) for i in eachindex(X)]) || !all(length.(X) .== length(s))) function rand!(rng::AbstractRNG, s::Sampleable{Multivariate}, @@ -90,11 +90,11 @@ rand(rng::AbstractRNG, s::Sampleable{Multivariate,Continuous}) = If ``x`` is a vector, it returns whether x is within the support of ``d``. If ``x`` is a matrix, it returns whether every column in ``x`` is within the support of ``d``. """ -insupport{D<:MultivariateDistribution}(d::Union{D, Type{D}}, x::AbstractArray) +insupport{D <: MultivariateDistribution}(d::Union{D,Type{D}}, x::AbstractArray) -function insupport!(r::AbstractArray, d::Union{D,Type{D}}, X::AbstractMatrix) where D<:MultivariateDistribution +function insupport!(r::AbstractArray, d::Union{D,Type{D}}, X::AbstractMatrix) where D <: MultivariateDistribution n = length(r) - size(X) == (length(d),n) || + size(X) == (length(d), n) || throw(DimensionMismatch("Inconsistent array dimensions.")) for i in 1:n @inbounds r[i] = insupport(d, view(X, :, i)) @@ -102,8 +102,8 @@ function insupport!(r::AbstractArray, d::Union{D,Type{D}}, X::AbstractMatrix) wh return r end -insupport(d::Union{D,Type{D}}, X::AbstractMatrix) where {D<:MultivariateDistribution} = - insupport!(BitArray(undef, size(X,2)), d, X) +insupport(d::Union{D,Type{D}}, X::AbstractMatrix) where {D <: MultivariateDistribution} = + insupport!(BitArray(undef, size(X, 2)), d, X) ## statistics @@ -154,11 +154,11 @@ function cor(d::MultivariateDistribution) R = Matrix{eltype(C)}(undef, n, n) for j = 1:n - for i = 1:j-1 + for i = 1:j - 1 @inbounds R[i, j] = R[j, i] end R[j, j] = 1.0 - for i = j+1:n + for i = j + 1:n @inbounds R[i, j] = C[i, j] / sqrt(C[i, i] * C[j, j]) end end @@ -208,15 +208,15 @@ function pdf(d::MultivariateDistribution, X::AbstractVector) end function _logpdf!(r::AbstractArray, d::MultivariateDistribution, X::AbstractMatrix) - for i in 1 : size(X,2) - @inbounds r[i] = logpdf(d, view(X,:,i)) + for i in 1:size(X, 2) + @inbounds r[i] = logpdf(d, view(X, :, i)) end return r end function _pdf!(r::AbstractArray, d::MultivariateDistribution, X::AbstractMatrix) - for i in 1 : size(X,2) - @inbounds r[i] = pdf(d, view(X,:,i)) + for i in 1:size(X, 2) + @inbounds r[i] = pdf(d, view(X, :, i)) end return r end @@ -275,6 +275,7 @@ end for fname in ["dirichlet.jl", "multinomial.jl", "dirichletmultinomial.jl", + "mvdiscretenonparametric.jl", "mvnormal.jl", "mvnormalcanon.jl", "mvlognormal.jl", From c64339a53eca94bfa165902fef33cd980781208b Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Mon, 15 Nov 2021 21:04:55 -0300 Subject: [PATCH 06/21] Added MvDiscreteNonParametric test set. --- test/mvdiscretenonparametric.jl | 140 ++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 test/mvdiscretenonparametric.jl diff --git a/test/mvdiscretenonparametric.jl b/test/mvdiscretenonparametric.jl new file mode 100644 index 000000000..69b10ec46 --- /dev/null +++ b/test/mvdiscretenonparametric.jl @@ -0,0 +1,140 @@ +using Distributions +using ArraysOfArrays +using StatsBase +using LinearAlgebra +using Random +using Test + + +@testset "Declaring MvDiscreteNonParametric" begin + + @testset "MvDiscreteNonParametric from ArraysOfArrays" begin + + Random.seed!(7) + n = 4 + m = 2 + A = nestedview(rand(n, m)') + p = normalize!(rand(n), 1) + + # Passing probabilities + μ = @inferred(MvDiscreteNonParametric{Float64,Float64,typeof(A),typeof(p)}(A, p)) + @test support(μ) == A + @test length(μ) == m + @test size(μ) == size(flatview(A)') + @test probs(μ) == p + + μ = @inferred(MvDiscreteNonParametric(A, p)) + @test support(μ) == A + @test length(μ) == m + @test size(μ) == size(flatview(A)') + + # Without passing probabilities + μ = @inferred(MvDiscreteNonParametric(A)) + @test support(μ) == A + @test length(μ) == m + @test size(μ) == size(flatview(A)') + @test probs(μ) == fill(1 / n, n) + + # Array of arrays without ArraysOfArrays.jl + n, m = 3, 2 + p = ([3 / 5, 1 / 5, 1 / 5]) + A = [[1,0],[1,1],[0,1]] + μ = @inferred(MvDiscreteNonParametric(A, p)) + + @test support(μ) == A + @test length(μ) == m + @test size(μ) == (length(A), length(A[1])) + @test probs(μ) == p + + end + + @testset "MvDiscreteNonParametric from Matrix" begin + + Random.seed!(7) + n, m = 10, 5 + A = rand(n, m) + p = normalize!(rand(n), 1) + + # Passing probabilities + μ = @inferred(MvDiscreteNonParametric(A, p)) + + @test flatview(support(μ))' == A + @test length(μ) == m + @test size(μ) == size(A) + @test probs(μ) == p + + # Without passing probabilities + μ = @inferred(MvDiscreteNonParametric(A)) + + @test flatview(support(μ))' == A + @test length(μ) == m + @test size(μ) == size(A) + @test probs(μ) == fill(1 / n, n) + + end +end + + +@testset "Functionalities" begin + + + function variance(d) + v = zeros(length(d)) + for i in 1:length(d) + s = flatview(d.support)'[:,i] + mₛ = mean(d)[i] + v[i] = sum(abs2.(s .- mₛ), Weights(d.p)) + end + return v + end + + function covariance(d) + n = length(d) + v = zeros(n, n) + for i in 1:n, j in 1:n + s = flatview(d.support)'[:,i] + mₛ = mean(d)[i] + + u = flatview(d.support)'[:,j] + mᵤ = mean(d)[j] + + v[i,j] = sum((s .- mₛ) .* (u .- mᵤ), Weights(d.p)) + end +return v + end + +Random.seed!(7) + n, m = 7, 9 + A = rand(n, m) + p = normalize!(rand(n), 1) + μ = @inferred(MvDiscreteNonParametric(A, p)) + + @test mean(μ) == mean(flatview(μ.support)[:,:], Weights(p), dims=2)[:] + @test var(μ) ≈ variance(μ) + @test cov(μ) ≈ covariance(μ) + @test pdf(μ, flatview(μ.support)) ≈ μ.p + @test pdf(μ, zeros(m)) == 0.0 + @test entropy(μ) == entropy(μ.p) + @test entropy(μ, 2) == entropy(μ.p, 2) + +end + +@testset "Sampling" begin + Random.seed!(7) + μ = MvDiscreteNonParametric(rand(10, 10)) + @test rand(μ) in μ.support + + for sample in eachcol(rand(μ, 10)) + @test sample in μ.support + end + + A = rand(3, 2) + μ = MvDiscreteNonParametric(A, [0.2,0.5,0.3]) + + for i in 1:3 + samples = nestedview(rand(μ, 10000)) + @test abs(mean([A[i,:] == s for s in samples]) - μ.p[i]) < 0.05 + + end +end + From e156052d3f5a15dead899aaa4c03ac55468053f9 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Mon, 15 Nov 2021 21:05:51 -0300 Subject: [PATCH 07/21] Added MvDiscreteNonParametric samplers to samplers.jl. --- src/samplers.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/samplers.jl b/src/samplers.jl index 794f2bff4..ce13be716 100644 --- a/src/samplers.jl +++ b/src/samplers.jl @@ -24,6 +24,7 @@ for fname in ["aliastable.jl", "vonmises.jl", "vonmisesfisher.jl", "discretenonparametric.jl", + "mvdiscretenonparametric.jl", "categorical.jl"] include(joinpath("samplers", fname)) From d11d300d942f7e883d58aa36ed97faa6ee4e137c Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Mon, 15 Nov 2021 21:09:39 -0300 Subject: [PATCH 08/21] Added _rand!(::GlobalRNG,...). --- src/samplers/mvdiscretenonparametric.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/samplers/mvdiscretenonparametric.jl b/src/samplers/mvdiscretenonparametric.jl index addde932e..e566e30b8 100644 --- a/src/samplers/mvdiscretenonparametric.jl +++ b/src/samplers/mvdiscretenonparametric.jl @@ -23,9 +23,10 @@ MvDiscreteNonParametricSampler(support::S, p::AbstractVector{<:Real} sampler(d::MvDiscreteNonParametric) = MvDiscreteNonParametricSampler(support(d), probs(d)) +_rand!(s::MvDiscreteNonParametricSampler, x::AbstractVector{T}) where T<:Real = + _rand!(GLOBAL_RNG, s, x) function _rand!(rng::AbstractRNG, d::MvDiscreteNonParametric, x::AbstractVector{T}) where T <: Real - length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) s = d.support p = d.p From 24612da7f3a6fbac0a9a597aa1a30998bb38c5d1 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Mon, 15 Nov 2021 21:55:04 -0300 Subject: [PATCH 09/21] Moved MvDiscreteNonParametric sampling to just one file. All tests passing. --- src/Distributions.jl | 2 + src/multivariate/mvdiscretenonparametric.jl | 80 +++++---------------- src/samplers.jl | 1 - src/samplers/mvdiscretenonparametric.jl | 45 ------------ test/runtests.jl | 1 + 5 files changed, 20 insertions(+), 109 deletions(-) delete mode 100644 src/samplers/mvdiscretenonparametric.jl diff --git a/src/Distributions.jl b/src/Distributions.jl index 23b956fff..056001ddd 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -25,6 +25,8 @@ import PDMats: dim, PDMat, invquad using SpecialFunctions +using ArraysOfArrays + export # re-export Statistics mean, median, quantile, std, var, cov, cor, diff --git a/src/multivariate/mvdiscretenonparametric.jl b/src/multivariate/mvdiscretenonparametric.jl index 345d7c7b8..254694318 100644 --- a/src/multivariate/mvdiscretenonparametric.jl +++ b/src/multivariate/mvdiscretenonparametric.jl @@ -72,13 +72,13 @@ Base.eltype(::Type{<:MvDiscreteNonParametric{T}}) where T = T support(d::MvDiscreteNonParametric) Get a sorted AbstractVector defining the support of `d`. """ -Distributions.support(d::MvDiscreteNonParametric) = d.support +support(d::MvDiscreteNonParametric) = d.support """ probs(d::MvDiscreteNonParametric) Get the vector of probabilities associated with the support of `d`. """ -Distributions.probs(d::MvDiscreteNonParametric) = d.p +probs(d::MvDiscreteNonParametric) = d.p # It would be more intuitive if length was the @@ -102,71 +102,25 @@ that turns an Array of Arrays into a matrix. """ Base.size(d::MvDiscreteNonParametric) = size(flatview(d.support)') -""" - empiricalmeasure( - support::AbstractVector, - probs::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), - ) - -Construct a finite discrete probability measure with `support` and corresponding -`probabilities`. If the probability vector argument is not passed, then -equal probability is assigned to each entry in the support. +function _rand!(rng::AbstractRNG, d::MvDiscreteNonParametric, x::AbstractVector{T}) where T <: Real -# Examples -```julia -using ArraysOfArrays -# rows correspond to samples -μ = empiricalmeasure(nestedview(rand(7,3)'), normalize!(rand(10),1)) + length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) + s = d.support + p = d.p -# columns correspond to samples, each with equal probability -ν = empiricalmeasure(nestedview(rand(3,12))) -``` - -!!! note - If `support` is a 1D vector, the constructed measure will be sorted, - e.g. for `mu = empiricalmeasure([3, 1, 2],[0.5, 0.2, 0.3])`, then - `mu.support` will be `[1, 2, 3]` and `mu.p` will be `[0.2, 0.3, 0.5]`. - Also, avoid passing 1D distributions as `RowVecs(rand(3))` or `[[1],[3],[4]]`, - since this will be dispatched to the multivariate case instead - of the univariate case for which the algorithm is more efficient. - -""" -function empiricalmeasure( - support::AbstractVector{<:Real}, - p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), - ) - return DiscreteNonParametric(support, p) -end -function empiricalmeasure( - support::AbstractVector{<:AbstractVector{<:Real}}, - p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), - ) - return MvDiscreteNonParametric(support, p) - # return MvDiscreteNonParametric{eltype(eltype(support)),eltype(p),typeof(support),typeof(p)}(support, p) -end - -""" - empiricalmeasure( - support::Matrix{<:Real}, - probs::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), - ) -Construct a multivariate empirical measure from a matrix -as `support` and sample by rows. -# Examples -```julia -using ArraysOfArrays -# the rows correspond to the samples -μ = empiricalmeasure(rand(7,3), normalize!(rand(10),1)) -``` -""" -function empiricalmeasure( - support::Matrix{<:Real}, - p::AbstractVector{<:Real}=fill(inv(size(support)[1]), size(support)[1]) -) - return MvDiscreteNonParametric(nestedview(support'), p) + n = length(p) + draw = Base.rand(rng, float(eltype(p))) + cp = p[1] + i = 1 + while cp <= draw && i < n + @inbounds cp += p[i += 1] + end + for (j, v) in enumerate(s[i]) + x[j] = v + end + return x end - function _logpdf(d::MvDiscreteNonParametric, x::AbstractVector{T}) where T <: Real s = support(d) p = probs(d) diff --git a/src/samplers.jl b/src/samplers.jl index ce13be716..794f2bff4 100644 --- a/src/samplers.jl +++ b/src/samplers.jl @@ -24,7 +24,6 @@ for fname in ["aliastable.jl", "vonmises.jl", "vonmisesfisher.jl", "discretenonparametric.jl", - "mvdiscretenonparametric.jl", "categorical.jl"] include(joinpath("samplers", fname)) diff --git a/src/samplers/mvdiscretenonparametric.jl b/src/samplers/mvdiscretenonparametric.jl deleted file mode 100644 index e566e30b8..000000000 --- a/src/samplers/mvdiscretenonparametric.jl +++ /dev/null @@ -1,45 +0,0 @@ -""" - MvDiscreteNonParametricSampler(support, p) -Data structure for efficiently sampling from an arbitrary probability mass -function defined by `support` and probabilities `p`. -""" -struct MvDiscreteNonParametricSampler{T <: Real,S <: AbstractVector{<:AbstractVector{T}},A <: AliasTable} <: Sampleable{Multivariate,Discrete} - support::S - aliastable::A - - function MvDiscreteNonParametricSampler{T,S}(support::S, probs::AbstractVector{<:Real} - ) where {T <: Real,S <: AbstractVector{<:AbstractVector{T}}} - aliastable = AliasTable(probs) - new{T,S,typeof(aliastable)}(support, aliastable) - end -end - -MvDiscreteNonParametricSampler(support::S, p::AbstractVector{<:Real} - ) where {T <: Real,S <: AbstractVector{<:AbstractVector{T}}} = - MvDiscreteNonParametricSampler{T,S}(support, p) - -# Sampling - -sampler(d::MvDiscreteNonParametric) = - MvDiscreteNonParametricSampler(support(d), probs(d)) - -_rand!(s::MvDiscreteNonParametricSampler, x::AbstractVector{T}) where T<:Real = - _rand!(GLOBAL_RNG, s, x) - -function _rand!(rng::AbstractRNG, d::MvDiscreteNonParametric, x::AbstractVector{T}) where T <: Real - length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) - s = d.support - p = d.p - - n = length(p) - draw = Base.rand(rng, float(eltype(p))) - cp = p[1] - i = 1 - while cp <= draw && i < n - @inbounds cp += p[i += 1] - end - for (j, v) in enumerate(s[i]) - x[j] = v - end - return x -end diff --git a/test/runtests.jl b/test/runtests.jl index e2a83166a..7ed6e3520 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -52,6 +52,7 @@ const tests = [ "pgeneralizedgaussian", "product", "discretenonparametric", + "mvdiscretenonparametric", "functionals", "chernoff", "univariate_bounds", From e8cf084651da65609e782a9ee5b811e595068b2d Mon Sep 17 00:00:00 2001 From: Davi Sales Barreira Date: Tue, 16 Nov 2021 20:50:17 -0300 Subject: [PATCH 10/21] Apply suggestions from code review Co-authored-by: David Widmann --- src/multivariate/mvdiscretenonparametric.jl | 28 ++++----------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/src/multivariate/mvdiscretenonparametric.jl b/src/multivariate/mvdiscretenonparametric.jl index 254694318..03756430f 100644 --- a/src/multivariate/mvdiscretenonparametric.jl +++ b/src/multivariate/mvdiscretenonparametric.jl @@ -1,4 +1,4 @@ -struct MvDiscreteNonParametric{T <: Real,P <: Real,Ts <: ArrayOfSimilarArrays{T},Ps <: AbstractVector{P}} <: DiscreteMultivariateDistribution +const MvDiscreteNonParametric{T<:AbstractVector{<:Real},P<:Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} = GeneralDiscreteNonParametric{Multivariate,T,P,Ts,Ps} support::Ts p::Ps @@ -83,24 +83,8 @@ probs(d::MvDiscreteNonParametric) = d.p # It would be more intuitive if length was the # -""" - length(d::MvDiscreteNonParametric) -Retunrs the dimension of the mass points (samples). -It corresponds to `innersize(d.support)[1]`, -where `innersize` is a function from from ArraysOfArrays.jl. -""" -Base.length(d::MvDiscreteNonParametric) = innersize(d.support)[1] - -""" - size(d::MvDiscreteNonParametric) -Returns the size of the support as a tuple where -the first value is the number of points (samples) -and the second is the dimension of the samples (e.g ℝⁿ). -It corresponds to `size(flatview(d.support)')` -where `flatview` is a function from from ArraysOfArrays.jl -that turns an Array of Arrays into a matrix. -""" -Base.size(d::MvDiscreteNonParametric) = size(flatview(d.support)') +Base.length(d::MvDiscreteNonParametric) = length(first(d.support)) +Base.size(d::MvDiscreteNonParametric) = (length(d), length(d.support)) function _rand!(rng::AbstractRNG, d::MvDiscreteNonParametric, x::AbstractVector{T}) where T <: Real @@ -115,11 +99,10 @@ function _rand!(rng::AbstractRNG, d::MvDiscreteNonParametric, x::AbstractVector{ while cp <= draw && i < n @inbounds cp += p[i += 1] end - for (j, v) in enumerate(s[i]) - x[j] = v - end + copyto!(x, s[i]) return x end +end function _logpdf(d::MvDiscreteNonParametric, x::AbstractVector{T}) where T <: Real s = support(d) @@ -150,4 +133,3 @@ function cov(d::MvDiscreteNonParametric) end entropy(d::MvDiscreteNonParametric) = entropy(probs(d)) -entropy(d::MvDiscreteNonParametric, b::Real) = entropy(probs(d), b) From cbb6ae75965c9ff79f91f53fc81f3ac047b2d86b Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Tue, 16 Nov 2021 21:39:39 -0300 Subject: [PATCH 11/21] Removing ArraysOfArarys dependency. --- src/multivariate/mvdiscretenonparametric.jl | 109 ++++++++++++++++++-- 1 file changed, 100 insertions(+), 9 deletions(-) diff --git a/src/multivariate/mvdiscretenonparametric.jl b/src/multivariate/mvdiscretenonparametric.jl index 03756430f..b0f1d026e 100644 --- a/src/multivariate/mvdiscretenonparametric.jl +++ b/src/multivariate/mvdiscretenonparametric.jl @@ -1,11 +1,89 @@ -const MvDiscreteNonParametric{T<:AbstractVector{<:Real},P<:Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} = GeneralDiscreteNonParametric{Multivariate,T,P,Ts,Ps} - +# struct MvDiscreteNonParametric{T <: Real,P <: Real,Ts <: ArrayOfSimilarArrays{T},Ps <: AbstractVector{P}} <: DiscreteMultivariateDistribution +# support::Ts +# p::Ps +# function MvDiscreteNonParametric{T,P,Ts,Ps}(support::Ts, +# p::Ps) where {T <: Real,P <: Real,Ts <: AbstractVector{<:AbstractVector{T}},Ps <: AbstractVector{P}} +# length(support) == length(p) || error("length of `support` and `p` must be equal") +# isprobvec(p) || error("`p` must be a probability vector") +# allunique(support) || error("`support` must contain only unique value") +# new{T,P,Ts,Ps}(support, p) +# end +# end +# """ +# MvDiscreteNonParametric( +# support::AbstractVector, +# p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), +# ) +# Construct a multivariate discrete nonparametric probability distribution with `support` and corresponding +# probabilities `p`. If the probability vector argument is not passed, then +# equal probability is assigned to each entry in the support. +# # Examples +# ```julia +# using ArraysOfArrays +# # rows correspond to samples +# μ = MvDiscreteNonParametric(nestedview(rand(7,3)')) +# # columns correspond to samples +# ν = MvDiscreteNonParametric(nestedview(rand(7,3))) +# ``` +# """ +# function MvDiscreteNonParametric( +# support::AbstractVector{<:AbstractVector{<:Real}}, +# p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), +# ) +# return MvDiscreteNonParametric{eltype(eltype(support)),eltype(p),typeof(ArrayOfSimilarArrays(support)),typeof(p)}( +# ArrayOfSimilarArrays(support), p) +# end +# """ +# MvDiscreteNonParametric( +# support::Matrix{<:Real}, +# p::AbstractVector{<:Real}=fill(inv(length(support)), length(support) +# ) +# Construct a multivariate discrete nonparametric probability distribution +# from a matrix as `support` where each row is a sample, and corresponding +# probabilities `p`. If the probability vector argument is not passed, then +# equal probability is assigned to each entry in the support. +# # Examples +# ```julia +# # the rows correspond to the samples +# using LinearAlgebra +# μ = MvDiscreteNonParametric(rand(10,3), normalize!(rand(10),1)) +# ``` +# """ +# function MvDiscreteNonParametric( +# support::Matrix{<:Real}, +# p::AbstractVector{<:Real}=fill(inv(size(support)[1]), size(support)[1]) +# ) +# return MvDiscreteNonParametric(nestedview(support'), p) +# end +# Base.eltype(::Type{<:MvDiscreteNonParametric{T}}) where T = T + + + +# struct GeneralDiscreteNonParametric{VF,T,P<:Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} <: Distribution{VF,Discrete} + +# support::Ts +# p::Ps + +# function GeneralDiscreteNonParametric{VF,T,P,Ts,Ps}(support::Ts, +# p::Ps) where {T,P <: Real,Ts <: AbstractVector{<:AbstractVector{T}},Ps <: AbstractVector{P}} + +# length(support) == length(p) || error("length of `support` and `p` must be equal") +# isprobvec(p) || error("`p` must be a probability vector") +# allunique(support) || error("`support` must contain only unique value") +# new{VF,T,P,Ts,Ps}(support, p) +# end +# end + +# const MvDiscreteNonParametric{T<:AbstractVector{<:Real}, +# P<:Real,Ts<:AbstractVector{T}, +# Ps<:AbstractVector{P}} = GeneralDiscreteNonParametric{Multivariate,T,P,Ts,Ps} + +struct MvDiscreteNonParametric{T <: Real,P <: Real, + Ts <: AbstractVector{<: AbstractVector{T}},Ps <: AbstractVector{P}} <: DiscreteMultivariateDistribution support::Ts p::Ps - function MvDiscreteNonParametric{T,P,Ts,Ps}(support::Ts, p::Ps) where {T <: Real,P <: Real,Ts <: AbstractVector{<:AbstractVector{T}},Ps <: AbstractVector{P}} - length(support) == length(p) || error("length of `support` and `p` must be equal") isprobvec(p) || error("`p` must be a probability vector") allunique(support) || error("`support` must contain only unique value") @@ -13,6 +91,21 @@ const MvDiscreteNonParametric{T<:AbstractVector{<:Real},P<:Real,Ts<:AbstractVect end end + +# struct MvDiscreteNonParametric{VF,T,P<:Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} <: Distribution{VF,Discrete} + +# support::Ts +# p::Ps + +# function MvDiscreteNonParametric{T,P,Ts,Ps}(support::Ts, +# p::Ps) where {T <: Real,P <: Real,Ts <: AbstractVector{<:AbstractVector{T}},Ps <: AbstractVector{P}} + +# length(support) == length(p) || error("length of `support` and `p` must be equal") +# isprobvec(p) || error("`p` must be a probability vector") +# allunique(support) || error("`support` must contain only unique value") +# new{T,P,Ts,Ps}(support, p) +# end +# end """ MvDiscreteNonParametric( support::AbstractVector, @@ -34,11 +127,10 @@ using ArraysOfArrays ``` """ function MvDiscreteNonParametric( - support::AbstractVector{<:AbstractVector{<:Real}}, + support::AbstractArray{<:AbstractVector{<:Real}}, p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), ) - return MvDiscreteNonParametric{eltype(eltype(support)),eltype(p),typeof(ArrayOfSimilarArrays(support)),typeof(p)}( - ArrayOfSimilarArrays(support), p) + return MvDiscreteNonParametric{eltype(eltype(support)),eltype(p),typeof(support),typeof(p)}(support, p) end """ @@ -82,7 +174,7 @@ probs(d::MvDiscreteNonParametric) = d.p # It would be more intuitive if length was the -# +# Base.length(d::MvDiscreteNonParametric) = length(first(d.support)) Base.size(d::MvDiscreteNonParametric) = (length(d), length(d.support)) @@ -102,7 +194,6 @@ function _rand!(rng::AbstractRNG, d::MvDiscreteNonParametric, x::AbstractVector{ copyto!(x, s[i]) return x end -end function _logpdf(d::MvDiscreteNonParametric, x::AbstractVector{T}) where T <: Real s = support(d) From 67a15ddc1b4ce5f986e2c5a8081f8522488d7c14 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Fri, 19 Nov 2021 13:26:09 -0300 Subject: [PATCH 12/21] ArrarysOfArrays on test. --- Project.toml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 7d32f2ded..bfc964145 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ authors = ["JuliaStats"] version = "0.25.30" [deps] -ArraysOfArrays = "65a8f2f4-9b39-5baf-92e2-a9cc46fdf018" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" @@ -21,7 +20,6 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] -ArraysOfArrays = "0.5" ChainRulesCore = "1" DensityInterface = "0.4" FillArrays = "0.9, 0.10, 0.11, 0.12" @@ -33,6 +31,7 @@ StatsFuns = "0.8, 0.9" julia = "1" [extras] +ArraysOfArrays = "65a8f2f4-9b39-5baf-92e2-a9cc46fdf018" Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" @@ -44,4 +43,4 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["StableRNGs", "Calculus", "ChainRulesTestUtils", "Distributed", "FiniteDifferences", "ForwardDiff", "JSON", "StaticArrays", "Test"] +test = ["ArraysOfArrays", "StableRNGs", "Calculus", "ChainRulesTestUtils", "Distributed", "FiniteDifferences", "ForwardDiff", "JSON", "StaticArrays", "Test"] From 5644219cf61ce9366f6a366d7ee36e5f59f931a1 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Fri, 19 Nov 2021 21:35:41 -0300 Subject: [PATCH 13/21] Fixed GeneralDiscreteNonParametric with version from devmotion. --- src/multivariate/mvdiscretenonparametric.jl | 148 +++----------------- 1 file changed, 17 insertions(+), 131 deletions(-) diff --git a/src/multivariate/mvdiscretenonparametric.jl b/src/multivariate/mvdiscretenonparametric.jl index b0f1d026e..15c94204c 100644 --- a/src/multivariate/mvdiscretenonparametric.jl +++ b/src/multivariate/mvdiscretenonparametric.jl @@ -1,111 +1,21 @@ -# struct MvDiscreteNonParametric{T <: Real,P <: Real,Ts <: ArrayOfSimilarArrays{T},Ps <: AbstractVector{P}} <: DiscreteMultivariateDistribution -# support::Ts -# p::Ps -# function MvDiscreteNonParametric{T,P,Ts,Ps}(support::Ts, -# p::Ps) where {T <: Real,P <: Real,Ts <: AbstractVector{<:AbstractVector{T}},Ps <: AbstractVector{P}} -# length(support) == length(p) || error("length of `support` and `p` must be equal") -# isprobvec(p) || error("`p` must be a probability vector") -# allunique(support) || error("`support` must contain only unique value") -# new{T,P,Ts,Ps}(support, p) -# end -# end -# """ -# MvDiscreteNonParametric( -# support::AbstractVector, -# p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), -# ) -# Construct a multivariate discrete nonparametric probability distribution with `support` and corresponding -# probabilities `p`. If the probability vector argument is not passed, then -# equal probability is assigned to each entry in the support. -# # Examples -# ```julia -# using ArraysOfArrays -# # rows correspond to samples -# μ = MvDiscreteNonParametric(nestedview(rand(7,3)')) -# # columns correspond to samples -# ν = MvDiscreteNonParametric(nestedview(rand(7,3))) -# ``` -# """ -# function MvDiscreteNonParametric( -# support::AbstractVector{<:AbstractVector{<:Real}}, -# p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), -# ) -# return MvDiscreteNonParametric{eltype(eltype(support)),eltype(p),typeof(ArrayOfSimilarArrays(support)),typeof(p)}( -# ArrayOfSimilarArrays(support), p) -# end -# """ -# MvDiscreteNonParametric( -# support::Matrix{<:Real}, -# p::AbstractVector{<:Real}=fill(inv(length(support)), length(support) -# ) -# Construct a multivariate discrete nonparametric probability distribution -# from a matrix as `support` where each row is a sample, and corresponding -# probabilities `p`. If the probability vector argument is not passed, then -# equal probability is assigned to each entry in the support. -# # Examples -# ```julia -# # the rows correspond to the samples -# using LinearAlgebra -# μ = MvDiscreteNonParametric(rand(10,3), normalize!(rand(10),1)) -# ``` -# """ -# function MvDiscreteNonParametric( -# support::Matrix{<:Real}, -# p::AbstractVector{<:Real}=fill(inv(size(support)[1]), size(support)[1]) -# ) -# return MvDiscreteNonParametric(nestedview(support'), p) -# end -# Base.eltype(::Type{<:MvDiscreteNonParametric{T}}) where T = T - - - -# struct GeneralDiscreteNonParametric{VF,T,P<:Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} <: Distribution{VF,Discrete} - -# support::Ts -# p::Ps - -# function GeneralDiscreteNonParametric{VF,T,P,Ts,Ps}(support::Ts, -# p::Ps) where {T,P <: Real,Ts <: AbstractVector{<:AbstractVector{T}},Ps <: AbstractVector{P}} - -# length(support) == length(p) || error("length of `support` and `p` must be equal") -# isprobvec(p) || error("`p` must be a probability vector") -# allunique(support) || error("`support` must contain only unique value") -# new{VF,T,P,Ts,Ps}(support, p) -# end -# end - -# const MvDiscreteNonParametric{T<:AbstractVector{<:Real}, -# P<:Real,Ts<:AbstractVector{T}, -# Ps<:AbstractVector{P}} = GeneralDiscreteNonParametric{Multivariate,T,P,Ts,Ps} - -struct MvDiscreteNonParametric{T <: Real,P <: Real, - Ts <: AbstractVector{<: AbstractVector{T}},Ps <: AbstractVector{P}} <: DiscreteMultivariateDistribution +struct GeneralDiscreteNonParametric{VF,T,P<:Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} <: Distribution{VF,Discrete} support::Ts p::Ps - function MvDiscreteNonParametric{T,P,Ts,Ps}(support::Ts, - p::Ps) where {T <: Real,P <: Real,Ts <: AbstractVector{<:AbstractVector{T}},Ps <: AbstractVector{P}} - length(support) == length(p) || error("length of `support` and `p` must be equal") - isprobvec(p) || error("`p` must be a probability vector") - allunique(support) || error("`support` must contain only unique value") - new{T,P,Ts,Ps}(support, p) + + function GeneralDiscreteNonParametric{VF,T,P,Ts,Ps}(support::Ts, + p::Ps; check_args=true) where {VF,T,P<: Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} + if check_args + length(support) == length(p) || error("length of `support` and `p` must be equal") + isprobvec(p) || error("`p` must be a probability vector") + allunique(support) || error("`support` must contain only unique values") + end + new{VF,T,P,Ts,Ps}(support, p) end end +const MvDiscreteNonParametric{T<:AbstractVector{<:Real}, + P<:Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} = GeneralDiscreteNonParametric{Multivariate,T,P,Ts,Ps} -# struct MvDiscreteNonParametric{VF,T,P<:Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} <: Distribution{VF,Discrete} - -# support::Ts -# p::Ps - -# function MvDiscreteNonParametric{T,P,Ts,Ps}(support::Ts, -# p::Ps) where {T <: Real,P <: Real,Ts <: AbstractVector{<:AbstractVector{T}},Ps <: AbstractVector{P}} - -# length(support) == length(p) || error("length of `support` and `p` must be equal") -# isprobvec(p) || error("`p` must be a probability vector") -# allunique(support) || error("`support` must contain only unique value") -# new{T,P,Ts,Ps}(support, p) -# end -# end """ MvDiscreteNonParametric( support::AbstractVector, @@ -118,44 +28,20 @@ equal probability is assigned to each entry in the support. # Examples ```julia -using ArraysOfArrays # rows correspond to samples -μ = MvDiscreteNonParametric(nestedview(rand(7,3)')) +x = collect(eachrow(rand(10,2))) +μ = MvDiscreteNonParametric(x) # columns correspond to samples -ν = MvDiscreteNonParametric(nestedview(rand(7,3))) +y = collect(eachcol(rand(7,12))) +ν = MvDiscreteNonParametric(y) ``` """ function MvDiscreteNonParametric( support::AbstractArray{<:AbstractVector{<:Real}}, p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), ) - return MvDiscreteNonParametric{eltype(eltype(support)),eltype(p),typeof(support),typeof(p)}(support, p) -end - -""" - MvDiscreteNonParametric( - support::Matrix{<:Real}, - p::AbstractVector{<:Real}=fill(inv(length(support)), length(support) - ) - -Construct a multivariate discrete nonparametric probability distribution -from a matrix as `support` where each row is a sample, and corresponding -probabilities `p`. If the probability vector argument is not passed, then -equal probability is assigned to each entry in the support. - -# Examples -```julia -# the rows correspond to the samples -using LinearAlgebra -μ = MvDiscreteNonParametric(rand(10,3), normalize!(rand(10),1)) -``` -""" -function MvDiscreteNonParametric( - support::Matrix{<:Real}, - p::AbstractVector{<:Real}=fill(inv(size(support)[1]), size(support)[1]) -) - return MvDiscreteNonParametric(nestedview(support'), p) + return MvDiscreteNonParametric{eltype(support),eltype(p),typeof(support),typeof(p)}(support, p) end Base.eltype(::Type{<:MvDiscreteNonParametric{T}}) where T = T From 3baece6339d86dfcea873cd6c1c2f6b069d59461 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Fri, 19 Nov 2021 21:56:01 -0300 Subject: [PATCH 14/21] Reverting wrong formatting changes. --- src/Distributions.jl | 4 +--- src/multivariates.jl | 29 +++++++++++++++-------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/Distributions.jl b/src/Distributions.jl index cfcc4d4c8..000dd5165 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -25,13 +25,10 @@ import PDMats: dim, PDMat, invquad using SpecialFunctions -using ArraysOfArrays - import ChainRulesCore import DensityInterface - export # re-export Statistics mean, median, quantile, std, var, cov, cor, @@ -101,6 +98,7 @@ export FullNormalCanon, Gamma, DiscreteNonParametric, + GeneralDiscreteNonParametric, GeneralizedPareto, GeneralizedExtremeValue, Geometric, diff --git a/src/multivariates.jl b/src/multivariates.jl index c0c75142d..5a3fcf9de 100644 --- a/src/multivariates.jl +++ b/src/multivariates.jl @@ -29,8 +29,8 @@ function _rand!(rng::AbstractRNG, s::Sampleable{Multivariate}, m::AbstractMatrix @boundscheck size(m, 1) == length(s) || throw(DimensionMismatch("Output size inconsistent with sample length.")) smp = sampler(s) - for i in Base.OneTo(size(m, 2)) - _rand!(rng, smp, view(m, :, i)) + for i in Base.OneTo(size(m,2)) + _rand!(rng, smp, view(m,:,i)) end return m end @@ -47,7 +47,7 @@ end rand!(rng::AbstractRNG, s::Sampleable{Multivariate}, X::AbstractArray{<:AbstractVector}) = @inbounds rand!(rng, s, X, - !all([isassigned(X, i) for i in eachindex(X)]) || + !all([isassigned(X,i) for i in eachindex(X)]) || !all(length.(X) .== length(s))) function rand!(rng::AbstractRNG, s::Sampleable{Multivariate}, @@ -90,11 +90,11 @@ rand(rng::AbstractRNG, s::Sampleable{Multivariate,Continuous}) = If ``x`` is a vector, it returns whether x is within the support of ``d``. If ``x`` is a matrix, it returns whether every column in ``x`` is within the support of ``d``. """ -insupport{D <: MultivariateDistribution}(d::Union{D,Type{D}}, x::AbstractArray) +insupport{D<:MultivariateDistribution}(d::Union{D, Type{D}}, x::AbstractArray) -function insupport!(r::AbstractArray, d::Union{D,Type{D}}, X::AbstractMatrix) where D <: MultivariateDistribution +function insupport!(r::AbstractArray, d::Union{D,Type{D}}, X::AbstractMatrix) where D<:MultivariateDistribution n = length(r) - size(X) == (length(d), n) || + size(X) == (length(d),n) || throw(DimensionMismatch("Inconsistent array dimensions.")) for i in 1:n @inbounds r[i] = insupport(d, view(X, :, i)) @@ -102,8 +102,8 @@ function insupport!(r::AbstractArray, d::Union{D,Type{D}}, X::AbstractMatrix) wh return r end -insupport(d::Union{D,Type{D}}, X::AbstractMatrix) where {D <: MultivariateDistribution} = - insupport!(BitArray(undef, size(X, 2)), d, X) +insupport(d::Union{D,Type{D}}, X::AbstractMatrix) where {D<:MultivariateDistribution} = + insupport!(BitArray(undef, size(X,2)), d, X) ## statistics @@ -154,11 +154,11 @@ function cor(d::MultivariateDistribution) R = Matrix{eltype(C)}(undef, n, n) for j = 1:n - for i = 1:j - 1 + for i = 1:j-1 @inbounds R[i, j] = R[j, i] end R[j, j] = 1.0 - for i = j + 1:n + for i = j+1:n @inbounds R[i, j] = C[i, j] / sqrt(C[i, i] * C[j, j]) end end @@ -208,15 +208,15 @@ function pdf(d::MultivariateDistribution, X::AbstractVector) end function _logpdf!(r::AbstractArray, d::MultivariateDistribution, X::AbstractMatrix) - for i in 1:size(X, 2) - @inbounds r[i] = logpdf(d, view(X, :, i)) + for i in 1 : size(X,2) + @inbounds r[i] = logpdf(d, view(X,:,i)) end return r end function _pdf!(r::AbstractArray, d::MultivariateDistribution, X::AbstractMatrix) - for i in 1:size(X, 2) - @inbounds r[i] = pdf(d, view(X, :, i)) + for i in 1 : size(X,2) + @inbounds r[i] = pdf(d, view(X,:,i)) end return r end @@ -275,6 +275,7 @@ end for fname in ["dirichlet.jl", "multinomial.jl", "dirichletmultinomial.jl", + "generaldiscretenonparametric.jl", "mvdiscretenonparametric.jl", "mvnormal.jl", "mvnormalcanon.jl", From 10ba49341c41fd113c943776f85181da362bf07f Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Fri, 19 Nov 2021 23:40:37 -0300 Subject: [PATCH 15/21] Fixed functionalities mean, var and cov for vector of vectors without ArraysOfArrays. --- src/multivariate/mvdiscretenonparametric.jl | 56 +++++++++------------ 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/src/multivariate/mvdiscretenonparametric.jl b/src/multivariate/mvdiscretenonparametric.jl index 15c94204c..04a96dc06 100644 --- a/src/multivariate/mvdiscretenonparametric.jl +++ b/src/multivariate/mvdiscretenonparametric.jl @@ -1,20 +1,9 @@ -struct GeneralDiscreteNonParametric{VF,T,P<:Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} <: Distribution{VF,Discrete} - support::Ts - p::Ps - - function GeneralDiscreteNonParametric{VF,T,P,Ts,Ps}(support::Ts, - p::Ps; check_args=true) where {VF,T,P<: Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} - if check_args - length(support) == length(p) || error("length of `support` and `p` must be equal") - isprobvec(p) || error("`p` must be a probability vector") - allunique(support) || error("`support` must contain only unique values") - end - new{VF,T,P,Ts,Ps}(support, p) - end -end - -const MvDiscreteNonParametric{T<:AbstractVector{<:Real}, - P<:Real,Ts<:AbstractVector{T},Ps<:AbstractVector{P}} = GeneralDiscreteNonParametric{Multivariate,T,P,Ts,Ps} +const MvDiscreteNonParametric{ + T<:AbstractVector{<:Real}, + P<:Real, + Ts<:AbstractVector{T}, + Ps<:AbstractVector{P}, +} = GeneralDiscreteNonParametric{Multivariate,T,P,Ts,Ps} """ MvDiscreteNonParametric( @@ -39,12 +28,15 @@ y = collect(eachcol(rand(7,12))) """ function MvDiscreteNonParametric( support::AbstractArray{<:AbstractVector{<:Real}}, - p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), + p::AbstractVector{<:Real} = fill(inv(length(support)), length(support)), ) - return MvDiscreteNonParametric{eltype(support),eltype(p),typeof(support),typeof(p)}(support, p) + return MvDiscreteNonParametric{eltype(support),eltype(p),typeof(support),typeof(p)}( + support, + p, + ) end -Base.eltype(::Type{<:MvDiscreteNonParametric{T}}) where T = T +Base.eltype(::Type{<:MvDiscreteNonParametric{T}}) where {T} = T """ support(d::MvDiscreteNonParametric) @@ -64,7 +56,11 @@ probs(d::MvDiscreteNonParametric) = d.p Base.length(d::MvDiscreteNonParametric) = length(first(d.support)) Base.size(d::MvDiscreteNonParametric) = (length(d), length(d.support)) -function _rand!(rng::AbstractRNG, d::MvDiscreteNonParametric, x::AbstractVector{T}) where T <: Real +function _rand!( + rng::AbstractRNG, + d::MvDiscreteNonParametric, + x::AbstractVector{T}, +) where {T<:Real} length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) s = d.support @@ -75,16 +71,16 @@ function _rand!(rng::AbstractRNG, d::MvDiscreteNonParametric, x::AbstractVector{ cp = p[1] i = 1 while cp <= draw && i < n - @inbounds cp += p[i += 1] + @inbounds cp += p[i+=1] end copyto!(x, s[i]) return x end -function _logpdf(d::MvDiscreteNonParametric, x::AbstractVector{T}) where T <: Real +function _logpdf(d::MvDiscreteNonParametric, x::AbstractVector{T}) where {T<:Real} s = support(d) p = probs(d) - for i in 1:length(p) + for i = 1:length(p) if s[i] == x return log(p[i]) end @@ -94,19 +90,17 @@ end function mean(d::MvDiscreteNonParametric) - return StatsBase.mean(d.support, weights(d.p)) + return StatsBase.mean(hcat(d.support...), Weights(d.p, one(eltype(d.p))),dims=2) end function var(d::MvDiscreteNonParametric) - x = support(d) + x = hcat(support(d)...) p = probs(d) - return StatsBase.var(x, Weights(p, one(eltype(p))), corrected=false) + return StatsBase.var(x, Weights(p, one(eltype(p))), 2,corrected = false) end function cov(d::MvDiscreteNonParametric) - x = support(d) + x = hcat(support(d)...) p = probs(d) - return cov(x, Weights(p, one(eltype(p))), corrected=false) + return cov(x, Weights(p, one(eltype(p))), 2,corrected = false) end - -entropy(d::MvDiscreteNonParametric) = entropy(probs(d)) From 236380adf02196859e03c5a9ec73c3d5949350ab Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Sat, 18 Dec 2021 16:48:30 -0300 Subject: [PATCH 16/21] Removing testing. --- test/mvdiscretenonparametric.jl | 154 +++++++++++++------------------- 1 file changed, 62 insertions(+), 92 deletions(-) diff --git a/test/mvdiscretenonparametric.jl b/test/mvdiscretenonparametric.jl index 69b10ec46..8bdcade82 100644 --- a/test/mvdiscretenonparametric.jl +++ b/test/mvdiscretenonparametric.jl @@ -6,9 +6,9 @@ using Random using Test -@testset "Declaring MvDiscreteNonParametric" begin +@testset "MvDiscreteNonParametric" begin - @testset "MvDiscreteNonParametric from ArraysOfArrays" begin + @testset "Declaring MvDiscreteNonParametric" begin Random.seed!(7) n = 4 @@ -17,22 +17,17 @@ using Test p = normalize!(rand(n), 1) # Passing probabilities - μ = @inferred(MvDiscreteNonParametric{Float64,Float64,typeof(A),typeof(p)}(A, p)) - @test support(μ) == A - @test length(μ) == m - @test size(μ) == size(flatview(A)') - @test probs(μ) == p - μ = @inferred(MvDiscreteNonParametric(A, p)) @test support(μ) == A @test length(μ) == m - @test size(μ) == size(flatview(A)') + @test size(μ) == (m, n) + @test probs(μ) == p # Without passing probabilities μ = @inferred(MvDiscreteNonParametric(A)) @test support(μ) == A @test length(μ) == m - @test size(μ) == size(flatview(A)') + @test size(μ) == (m, n) @test probs(μ) == fill(1 / n, n) # Array of arrays without ArraysOfArrays.jl @@ -43,98 +38,73 @@ using Test @test support(μ) == A @test length(μ) == m - @test size(μ) == (length(A), length(A[1])) + @test size(μ) == (length(A[1]), length(A)) @test probs(μ) == p end - @testset "MvDiscreteNonParametric from Matrix" begin + # @testset "Functionalities" begin + # + # function variance(d) + # v = zeros(length(d)) + # for i in 1:length(d) + # s = flatview(d.support)'[:,i] + # mₛ = mean(d)[i] + # v[i] = sum(abs2.(s .- mₛ), Weights(d.p)) + # end + # return v + # end + # + # function covariance(d) + # n = length(d) + # v = zeros(n, n) + # for i in 1:n, j in 1:n + # s = flatview(d.support)'[:,i] + # mₛ = mean(d)[i] + # + # u = flatview(d.support)'[:,j] + # mᵤ = mean(d)[j] + # + # v[i,j] = sum((s .- mₛ) .* (u .- mᵤ), Weights(d.p)) + # end + # return v + # end + # + # Random.seed!(7) + # n, m = 7, 9 + + # A = collect(eachrow(rand(n, m))) + # p = normalize!(rand(n), 1) + # μ = @inferred(MvDiscreteNonParametric(A, p)) + + # @test mean(μ) == mean(flatview(μ.support)[:,:], Weights(p), dims=2)[:] + # @test var(μ) ≈ variance(μ) + # @test cov(μ) ≈ covariance(μ) + # @test pdf(μ, flatview(μ.support)) ≈ μ.p + # @test pdf(μ, zeros(m)) == 0.0 + # @test entropy(μ) == entropy(μ.p) + # @test entropy(μ, 2) == entropy(μ.p, 2) + + # end + + @testset "Sampling" begin Random.seed!(7) - n, m = 10, 5 - A = rand(n, m) - p = normalize!(rand(n), 1) + A = collect(eachrow(rand(10,10))) + μ = MvDiscreteNonParametric(A) - # Passing probabilities - μ = @inferred(MvDiscreteNonParametric(A, p)) + # for sample in eachcol(rand(μ, 30)) + # @test sample in μ.support + # end - @test flatview(support(μ))' == A - @test length(μ) == m - @test size(μ) == size(A) - @test probs(μ) == p - - # Without passing probabilities - μ = @inferred(MvDiscreteNonParametric(A)) - - @test flatview(support(μ))' == A - @test length(μ) == m - @test size(μ) == size(A) - @test probs(μ) == fill(1 / n, n) - - end -end + A = collect(eachrow(rand(3, 2))) + μ = MvDiscreteNonParametric(A, [0.2,0.5,0.3]) - -@testset "Functionalities" begin - - - function variance(d) - v = zeros(length(d)) - for i in 1:length(d) - s = flatview(d.support)'[:,i] - mₛ = mean(d)[i] - v[i] = sum(abs2.(s .- mₛ), Weights(d.p)) + for i in 1:3 + samples = nestedview(rand(μ, 10000)) + @test abs(mean([A[i,:] == s for s in samples]) - μ.p[i]) < 0.05 end - return v - end - - function covariance(d) - n = length(d) - v = zeros(n, n) - for i in 1:n, j in 1:n - s = flatview(d.support)'[:,i] - mₛ = mean(d)[i] - - u = flatview(d.support)'[:,j] - mᵤ = mean(d)[j] - - v[i,j] = sum((s .- mₛ) .* (u .- mᵤ), Weights(d.p)) - end -return v - end - -Random.seed!(7) - n, m = 7, 9 - A = rand(n, m) - p = normalize!(rand(n), 1) - μ = @inferred(MvDiscreteNonParametric(A, p)) - - @test mean(μ) == mean(flatview(μ.support)[:,:], Weights(p), dims=2)[:] - @test var(μ) ≈ variance(μ) - @test cov(μ) ≈ covariance(μ) - @test pdf(μ, flatview(μ.support)) ≈ μ.p - @test pdf(μ, zeros(m)) == 0.0 - @test entropy(μ) == entropy(μ.p) - @test entropy(μ, 2) == entropy(μ.p, 2) - -end - -@testset "Sampling" begin - Random.seed!(7) - μ = MvDiscreteNonParametric(rand(10, 10)) - @test rand(μ) in μ.support - - for sample in eachcol(rand(μ, 10)) - @test sample in μ.support - end - - A = rand(3, 2) - μ = MvDiscreteNonParametric(A, [0.2,0.5,0.3]) - - for i in 1:3 - samples = nestedview(rand(μ, 10000)) - @test abs(mean([A[i,:] == s for s in samples]) - μ.p[i]) < 0.05 - end + end From eeba4f26434f3053718cc1213f5add9bc37159af Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Sun, 19 Dec 2021 10:04:26 -0300 Subject: [PATCH 17/21] Added rand function to generaldiscretenonparametric, but _rand! is not working. --- .../generaldiscretenonparametric.jl | 31 ++++++++++++++ src/multivariate/mvdiscretenonparametric.jl | 42 +++++++++---------- .../discrete/discretenonparametric.jl | 13 ------ 3 files changed, 51 insertions(+), 35 deletions(-) create mode 100644 src/multivariate/generaldiscretenonparametric.jl diff --git a/src/multivariate/generaldiscretenonparametric.jl b/src/multivariate/generaldiscretenonparametric.jl new file mode 100644 index 000000000..8af51f8c9 --- /dev/null +++ b/src/multivariate/generaldiscretenonparametric.jl @@ -0,0 +1,31 @@ +struct GeneralDiscreteNonParametric{VF,T,P <: Real,Ts <: AbstractVector{T},Ps <: AbstractVector{P},} <: Distribution{VF,Discrete} + support::Ts + p::Ps + + function GeneralDiscreteNonParametric{VF,T,P,Ts,Ps}( + support::Ts, + p::Ps; + check_args=true, + ) where {VF,T,P <: Real,Ts <: AbstractVector{T},Ps <: AbstractVector{P}} + if check_args + length(support) == length(p) || + error("length of `support` and `p` must be equal") + isprobvec(p) || error("`p` must be a probability vector") + allunique(support) || error("`support` must contain only unique values") + end + new{VF,T,P,Ts,Ps}(support, p) + end +end + +function rand(rng::AbstractRNG, d::GeneralDiscreteNonParametric) + x = support(d) + p = probs(d) + n = length(p) + draw = rand(rng, float(eltype(p))) + cp = p[1] + i = 1 + while cp <= draw && i < n + @inbounds cp += p[i +=1] + end + return x[i] +end diff --git a/src/multivariate/mvdiscretenonparametric.jl b/src/multivariate/mvdiscretenonparametric.jl index 04a96dc06..11dacbb08 100644 --- a/src/multivariate/mvdiscretenonparametric.jl +++ b/src/multivariate/mvdiscretenonparametric.jl @@ -51,31 +51,29 @@ Get the vector of probabilities associated with the support of `d`. probs(d::MvDiscreteNonParametric) = d.p -# It would be more intuitive if length was the -# Base.length(d::MvDiscreteNonParametric) = length(first(d.support)) Base.size(d::MvDiscreteNonParametric) = (length(d), length(d.support)) -function _rand!( - rng::AbstractRNG, - d::MvDiscreteNonParametric, - x::AbstractVector{T}, -) where {T<:Real} - - length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) - s = d.support - p = d.p - - n = length(p) - draw = Base.rand(rng, float(eltype(p))) - cp = p[1] - i = 1 - while cp <= draw && i < n - @inbounds cp += p[i+=1] - end - copyto!(x, s[i]) - return x -end +# function _rand!( +# rng::AbstractRNG, +# d::MvDiscreteNonParametric, +# x::AbstractVector{T}, +# ) where {T<:Real} + +# length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) +# s = d.support +# p = d.p + +# n = length(p) +# draw = Base.rand(rng, float(eltype(p))) +# cp = p[1] +# i = 1 +# while cp <= draw && i < n +# @inbounds cp += p[i+=1] +# end +# copyto!(x, s[i]) +# return x +# end function _logpdf(d::MvDiscreteNonParametric, x::AbstractVector{T}) where {T<:Real} s = support(d) diff --git a/src/univariate/discrete/discretenonparametric.jl b/src/univariate/discrete/discretenonparametric.jl index 987f351a4..5f7537bec 100644 --- a/src/univariate/discrete/discretenonparametric.jl +++ b/src/univariate/discrete/discretenonparametric.jl @@ -69,19 +69,6 @@ Base.isapprox(c1::D, c2::D) where D<:DiscreteNonParametric = # Sampling -function rand(rng::AbstractRNG, d::DiscreteNonParametric) - x = support(d) - p = probs(d) - n = length(p) - draw = rand(rng, float(eltype(p))) - cp = p[1] - i = 1 - while cp <= draw && i < n - @inbounds cp += p[i +=1] - end - return x[i] -end - sampler(d::DiscreteNonParametric) = DiscreteNonParametricSampler(support(d), probs(d)) From b208df3370dd990da201ae1c5e07ab74f8c475f7 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Mon, 20 Dec 2021 20:11:19 -0300 Subject: [PATCH 18/21] Moving some functions to generaldiscretenonparametric. --- .../generaldiscretenonparametric.jl | 15 ++++++ src/multivariate/mvdiscretenonparametric.jl | 53 +++++++------------ 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/multivariate/generaldiscretenonparametric.jl b/src/multivariate/generaldiscretenonparametric.jl index 8af51f8c9..7a32e268d 100644 --- a/src/multivariate/generaldiscretenonparametric.jl +++ b/src/multivariate/generaldiscretenonparametric.jl @@ -29,3 +29,18 @@ function rand(rng::AbstractRNG, d::GeneralDiscreteNonParametric) end return x[i] end + +""" + support(d::MvDiscreteNonParametric) +Get a sorted AbstractVector defining the support of `d`. +""" +support(d::GeneralDiscreteNonParametric) = d.support + +""" + probs(d::MvDiscreteNonParametric) +Get the vector of probabilities associated with the support of `d`. +""" +probs(d::GeneralDiscreteNonParametric) = d.p + + +Base.length(d::GeneralDiscreteNonParametric) = length(first(d.support)) diff --git a/src/multivariate/mvdiscretenonparametric.jl b/src/multivariate/mvdiscretenonparametric.jl index 11dacbb08..40c6200c6 100644 --- a/src/multivariate/mvdiscretenonparametric.jl +++ b/src/multivariate/mvdiscretenonparametric.jl @@ -36,44 +36,29 @@ function MvDiscreteNonParametric( ) end -Base.eltype(::Type{<:MvDiscreteNonParametric{T}}) where {T} = T +Base.eltype(::Type{<:MvDiscreteNonParametric{T}}) where T = Base.eltype(T) -""" - support(d::MvDiscreteNonParametric) -Get a sorted AbstractVector defining the support of `d`. -""" -support(d::MvDiscreteNonParametric) = d.support - -""" - probs(d::MvDiscreteNonParametric) -Get the vector of probabilities associated with the support of `d`. -""" -probs(d::MvDiscreteNonParametric) = d.p +function _rand!( + rng::AbstractRNG, + d::MvDiscreteNonParametric, + x::AbstractVector{T}, +) where {T<:Real} -Base.length(d::MvDiscreteNonParametric) = length(first(d.support)) -Base.size(d::MvDiscreteNonParametric) = (length(d), length(d.support)) + length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) + s = d.support + p = d.p -# function _rand!( -# rng::AbstractRNG, -# d::MvDiscreteNonParametric, -# x::AbstractVector{T}, -# ) where {T<:Real} - -# length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) -# s = d.support -# p = d.p - -# n = length(p) -# draw = Base.rand(rng, float(eltype(p))) -# cp = p[1] -# i = 1 -# while cp <= draw && i < n -# @inbounds cp += p[i+=1] -# end -# copyto!(x, s[i]) -# return x -# end + n = length(p) + draw = Base.rand(rng, float(eltype(p))) + cp = p[1] + i = 1 + while cp <= draw && i < n + @inbounds cp += p[i+=1] + end + copyto!(x, s[i]) + return x +end function _logpdf(d::MvDiscreteNonParametric, x::AbstractVector{T}) where {T<:Real} s = support(d) From b2f0ec0a351d50924025376bd442662fbba88902 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Mon, 20 Dec 2021 21:17:43 -0300 Subject: [PATCH 19/21] Moving some functions to generaldiscretenonparametric. --- .../generaldiscretenonparametric.jl | 33 ++++++++++++++++++ src/multivariate/mvdiscretenonparametric.jl | 34 ------------------- 2 files changed, 33 insertions(+), 34 deletions(-) diff --git a/src/multivariate/generaldiscretenonparametric.jl b/src/multivariate/generaldiscretenonparametric.jl index 7a32e268d..3486df6ce 100644 --- a/src/multivariate/generaldiscretenonparametric.jl +++ b/src/multivariate/generaldiscretenonparametric.jl @@ -44,3 +44,36 @@ probs(d::GeneralDiscreteNonParametric) = d.p Base.length(d::GeneralDiscreteNonParametric) = length(first(d.support)) + +function _rand!( + rng::AbstractRNG, + d::GeneralDiscreteNonParametric, + x::AbstractVector{T}, + ) where {T<:Real} + + length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) + s = d.support + p = d.p + + n = length(p) + draw = Base.rand(rng, float(eltype(p))) + cp = p[1] + i = 1 + while cp <= draw && i < n + @inbounds cp += p[i+=1] + end + copyto!(x, s[i]) + return x +end + + +function _logpdf(d::GeneralDiscreteNonParametric, x::AbstractVector{T}) where {T<:Real} + s = support(d) + p = probs(d) + for i = 1:length(p) + if s[i] == x + return log(p[i]) + end + end + return log(zero(eltype(p))) +end diff --git a/src/multivariate/mvdiscretenonparametric.jl b/src/multivariate/mvdiscretenonparametric.jl index 40c6200c6..3e6e2947e 100644 --- a/src/multivariate/mvdiscretenonparametric.jl +++ b/src/multivariate/mvdiscretenonparametric.jl @@ -38,40 +38,6 @@ end Base.eltype(::Type{<:MvDiscreteNonParametric{T}}) where T = Base.eltype(T) - -function _rand!( - rng::AbstractRNG, - d::MvDiscreteNonParametric, - x::AbstractVector{T}, -) where {T<:Real} - - length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) - s = d.support - p = d.p - - n = length(p) - draw = Base.rand(rng, float(eltype(p))) - cp = p[1] - i = 1 - while cp <= draw && i < n - @inbounds cp += p[i+=1] - end - copyto!(x, s[i]) - return x -end - -function _logpdf(d::MvDiscreteNonParametric, x::AbstractVector{T}) where {T<:Real} - s = support(d) - p = probs(d) - for i = 1:length(p) - if s[i] == x - return log(p[i]) - end - end - return log(zero(eltype(p))) -end - - function mean(d::MvDiscreteNonParametric) return StatsBase.mean(hcat(d.support...), Weights(d.p, one(eltype(d.p))),dims=2) end From ad6d01e9afb660b1e1448431ab24dbe481e67a34 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Tue, 21 Dec 2021 08:55:37 -0300 Subject: [PATCH 20/21] Removing ArraysOfArarys as dependency on tests. --- Project.toml | 3 +- test/mvdiscretenonparametric.jl | 107 +++++++++++++++----------------- 2 files changed, 50 insertions(+), 60 deletions(-) diff --git a/Project.toml b/Project.toml index 547572357..55e3e8b22 100644 --- a/Project.toml +++ b/Project.toml @@ -31,7 +31,6 @@ StatsFuns = "0.8, 0.9" julia = "1" [extras] -ArraysOfArrays = "65a8f2f4-9b39-5baf-92e2-a9cc46fdf018" Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" @@ -43,4 +42,4 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["ArraysOfArrays", "StableRNGs", "Calculus", "ChainRulesTestUtils", "Distributed", "FiniteDifferences", "ForwardDiff", "JSON", "StaticArrays", "Test"] +test = ["StableRNGs", "Calculus", "ChainRulesTestUtils", "Distributed", "FiniteDifferences", "ForwardDiff", "JSON", "StaticArrays", "Test"] diff --git a/test/mvdiscretenonparametric.jl b/test/mvdiscretenonparametric.jl index 8bdcade82..d907920fa 100644 --- a/test/mvdiscretenonparametric.jl +++ b/test/mvdiscretenonparametric.jl @@ -1,5 +1,4 @@ using Distributions -using ArraysOfArrays using StatsBase using LinearAlgebra using Random @@ -13,21 +12,21 @@ using Test Random.seed!(7) n = 4 m = 2 - A = nestedview(rand(n, m)') + A = collect(eachrow(rand(n, m))) p = normalize!(rand(n), 1) # Passing probabilities μ = @inferred(MvDiscreteNonParametric(A, p)) @test support(μ) == A @test length(μ) == m - @test size(μ) == (m, n) + # @test size(μ) == (m, n) @test probs(μ) == p # Without passing probabilities μ = @inferred(MvDiscreteNonParametric(A)) @test support(μ) == A @test length(μ) == m - @test size(μ) == (m, n) + # @test size(μ) == (m, n) @test probs(μ) == fill(1 / n, n) # Array of arrays without ArraysOfArrays.jl @@ -38,71 +37,63 @@ using Test @test support(μ) == A @test length(μ) == m - @test size(μ) == (length(A[1]), length(A)) @test probs(μ) == p end - # @testset "Functionalities" begin - # - # function variance(d) - # v = zeros(length(d)) - # for i in 1:length(d) - # s = flatview(d.support)'[:,i] - # mₛ = mean(d)[i] - # v[i] = sum(abs2.(s .- mₛ), Weights(d.p)) - # end - # return v - # end - # - # function covariance(d) - # n = length(d) - # v = zeros(n, n) - # for i in 1:n, j in 1:n - # s = flatview(d.support)'[:,i] - # mₛ = mean(d)[i] - # - # u = flatview(d.support)'[:,j] - # mᵤ = mean(d)[j] - # - # v[i,j] = sum((s .- mₛ) .* (u .- mᵤ), Weights(d.p)) - # end - # return v - # end - # - # Random.seed!(7) - # n, m = 7, 9 - - # A = collect(eachrow(rand(n, m))) - # p = normalize!(rand(n), 1) - # μ = @inferred(MvDiscreteNonParametric(A, p)) - - # @test mean(μ) == mean(flatview(μ.support)[:,:], Weights(p), dims=2)[:] - # @test var(μ) ≈ variance(μ) - # @test cov(μ) ≈ covariance(μ) - # @test pdf(μ, flatview(μ.support)) ≈ μ.p - # @test pdf(μ, zeros(m)) == 0.0 - # @test entropy(μ) == entropy(μ.p) - # @test entropy(μ, 2) == entropy(μ.p, 2) - - # end - - @testset "Sampling" begin + @testset "Functionalities" begin + + function variance(d) + v = zeros(length(d)) + for i in 1:length(d) + s = hcat(μ.support...)[i,:] + mₛ = mean(d)[i] + v[i] = sum(abs2.(s .- mₛ), Weights(d.p)) + end + return v + end + + function covariance(d) + n = length(d) + v = zeros(n, n) + for i in 1:n, j in 1:n + s = hcat(μ.support...)[i,:] + mₛ = mean(d)[i] + + u = hcat(μ.support...)[j,:] + mᵤ = mean(d)[j] + + v[i,j] = sum((s .- mₛ) .* (u .- mᵤ), Weights(d.p)) + end + return v + end + Random.seed!(7) - A = collect(eachrow(rand(10,10))) - μ = MvDiscreteNonParametric(A) + n, m = 7, 9 - # for sample in eachcol(rand(μ, 30)) - # @test sample in μ.support - # end + A = collect(eachrow(rand(n, m))) + p = normalize!(rand(n), 1) + μ = @inferred(MvDiscreteNonParametric(A, p)) + @test mean(μ) ≈ mean(hcat(μ.support...), Weights(p), dims=2)[:] + @test var(μ) ≈ variance(μ) + @test cov(μ) ≈ covariance(μ) + @test pdf(μ, μ.support) ≈ μ.p + @test pdf(μ, zeros(m)) == 0.0 + # @test entropy(μ) == entropy(μ.p) + # @test entropy(μ, 2) == entropy(μ.p, 2) + + end + + @testset "Sampling" begin + Random.seed!(7) A = collect(eachrow(rand(3, 2))) - μ = MvDiscreteNonParametric(A, [0.2,0.5,0.3]) + μ = MvDiscreteNonParametric(A, [0.9,0.1,0.0]) for i in 1:3 - samples = nestedview(rand(μ, 10000)) - @test abs(mean([A[i,:] == s for s in samples]) - μ.p[i]) < 0.05 + samples = rand(μ, 10000) + @test abs(mean([s == A[i] for s in eachcol(samples)]) - μ.p[i]) < 0.05 end end From 001d73d332b97b0faba25c7d2457f8e343af5a6b Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Tue, 21 Dec 2021 08:57:12 -0300 Subject: [PATCH 21/21] Renamed test set. --- ...discretenonparametric.jl => generaldiscretenonparametric.jl} | 2 +- test/runtests.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename test/{mvdiscretenonparametric.jl => generaldiscretenonparametric.jl} (98%) diff --git a/test/mvdiscretenonparametric.jl b/test/generaldiscretenonparametric.jl similarity index 98% rename from test/mvdiscretenonparametric.jl rename to test/generaldiscretenonparametric.jl index d907920fa..c743aaf0f 100644 --- a/test/mvdiscretenonparametric.jl +++ b/test/generaldiscretenonparametric.jl @@ -5,7 +5,7 @@ using Random using Test -@testset "MvDiscreteNonParametric" begin +@testset "GeneralDiscreteNonParametric" begin @testset "Declaring MvDiscreteNonParametric" begin diff --git a/test/runtests.jl b/test/runtests.jl index 1f9ced70e..6194d953a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -54,7 +54,7 @@ const tests = [ "pgeneralizedgaussian", "product", "discretenonparametric", - "mvdiscretenonparametric", + "generaldiscretenonparametric", "functionals", "chernoff", "univariate_bounds",