diff --git a/.github/workflows/DocPreviewCleanup.yml b/.github/workflows/DocPreviewCleanup.yml
new file mode 100644
index 000000000..bc29462c0
--- /dev/null
+++ b/.github/workflows/DocPreviewCleanup.yml
@@ -0,0 +1,26 @@
+name: Doc Preview Cleanup
+
+on:
+  pull_request:
+    types: [closed]
+
+jobs:
+  doc-preview-cleanup:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout gh-pages branch
+        uses: actions/checkout@v2
+        with:
+          ref: gh-pages
+      - name: Delete preview and history + push changes
+        run: |
+            if [ -d "previews/PR$PRNUM" ]; then
+              git config user.name "Documenter.jl"
+              git config user.email "documenter@juliadocs.github.io"
+              git rm -rf "previews/PR$PRNUM"
+              git commit -m "delete preview"
+              git branch gh-pages-new $(echo "delete history" | git commit-tree HEAD^{tree})
+              git push --force origin gh-pages-new:gh-pages
+            fi
+        env:
+            PRNUM: ${{ github.event.number }}
diff --git a/CITATION.bib b/CITATION.bib
index 81141f167..05b42fe0a 100644
--- a/CITATION.bib
+++ b/CITATION.bib
@@ -1,18 +1,16 @@
 % reference paper
-@article{2019arXiv190708611B,
-    author = {{Besan{\c{c}}on}, Mathieu and {Anthoff}, David and {Arslan}, Alex and
-    {Byrne}, Simon and {Lin}, Dahua and {Papamarkou}, Theodore and
-    {Pearson}, John},
-    title = {Distributions.jl: Definition and Modeling of Probability Distributions in the JuliaStats Ecosystem},
-    journal = {arXiv e-prints},
-    keywords = {Statistics - Computation, Computer Science - Mathematical Software},
-    year = 2019,
-    month = "Jul",
-    eid = {arXiv:1907.08611},
-    pages = {arXiv:1907.08611},
-    archivePrefix = {arXiv},
-    eprint = {1907.08611},
-    primaryClass = {stat.CO},
+@article{JSSv098i16,
+   author = {Mathieu Besançon and Theodore Papamarkou and David Anthoff and Alex Arslan and Simon Byrne and Dahua Lin and John Pearson},
+   title = {Distributions.jl: Definition and Modeling of Probability Distributions in the JuliaStats Ecosystem},
+   journal = {Journal of Statistical Software},
+   volume = {98},
+   number = {16},
+   year = {2021},
+   keywords = {Julia; distributions; modeling; interface; mixture; KDE; sampling; probabilistic programming; inference},
+   issn = {1548-7660},
+   pages = {1--30},
+   doi = {10.18637/jss.v098.i16},
+   url = {https://www.jstatsoft.org/v098/i16}
 }
 
 % reference for the software itself
@@ -32,7 +30,7 @@ @misc{Distributions.jl-2019
                   Moritz Schauer and
                   other contributors},
   title        = {{JuliaStats/Distributions.jl: a Julia package for probability distributions and associated functions}},
-  month        = july,
+  month        = jul,
   year         = 2019,
   doi          = {10.5281/zenodo.2647458},
   url          = {https://doi.org/10.5281/zenodo.2647458}
diff --git a/Project.toml b/Project.toml
index 6c3da9f43..1f3155e4c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,10 +1,12 @@
 name = "Distributions"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
 authors = ["JuliaStats"]
-version = "0.25.5"
+version = "0.25.29"
 
 [deps]
 ArraysOfArrays = "65a8f2f4-9b39-5baf-92e2-a9cc46fdf018"
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
@@ -16,10 +18,13 @@ SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [compat]
 ArraysOfArrays = "0.5"
-FillArrays = "0.9, 0.10, 0.11"
+ChainRulesCore = "1"
+DensityInterface = "0.3.2"
+FillArrays = "0.9, 0.10, 0.11, 0.12"
 PDMats = "0.10, 0.11"
 QuadGK = "2"
 SpecialFunctions = "0.8, 0.9, 0.10, 1.0"
@@ -29,6 +34,7 @@ julia = "1"
 
 [extras]
 Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9"
+ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
@@ -38,4 +44,4 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["StableRNGs", "Calculus", "Distributed", "FiniteDifferences", "ForwardDiff", "JSON", "StaticArrays", "Test"]
+test = ["StableRNGs", "Calculus", "ChainRulesTestUtils", "Distributed", "FiniteDifferences", "ForwardDiff", "JSON", "StaticArrays", "Test"]
diff --git a/docs/Project.toml b/docs/Project.toml
index 1bc937b99..fe1618933 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,5 +1,7 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
 
 [compat]
 Documenter = "0.26, 0.27"
+GR = "0.61, 0.62"
diff --git a/docs/make.jl b/docs/make.jl
index 289a5ec0b..a92bc4679 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -13,13 +13,16 @@ makedocs(
         "truncate.md",
         "multivariate.md",
         "matrix.md",
+        "cholesky.md",
         "mixture.md",
         "fit.md",
         "extends.md",
+        "density_interface.md",
     ]
 )
 
-deploydocs(
+deploydocs(;
     repo = "github.com/JuliaStats/Distributions.jl.git",
-    versions = ["stable" => "v^", "v#.#", "dev" => "master"]
+    versions = ["stable" => "v^", "v#.#", "dev" => "master"],
+    push_preview=true,
 )
diff --git a/docs/src/cholesky.md b/docs/src/cholesky.md
new file mode 100644
index 000000000..430beaaa0
--- /dev/null
+++ b/docs/src/cholesky.md
@@ -0,0 +1,15 @@
+# [Cholesky-variate Distributions](@id cholesky-variates)
+
+*Cholesky-variate distributions* are distributions whose variate forms are `CholeskyVariate`. This means each draw is a factorization of a positive-definite matrix of type `LinearAlgebra.Cholesky` (the object produced by the function `LinearAlgebra.cholesky` applied to a dense positive-definite matrix.)
+
+## Distributions
+
+```@docs
+LKJCholesky
+```
+
+## Index
+
+```@index
+Pages = ["cholesky.md"]
+```
diff --git a/docs/src/density_interface.md b/docs/src/density_interface.md
new file mode 100644
index 000000000..5c01357df
--- /dev/null
+++ b/docs/src/density_interface.md
@@ -0,0 +1,5 @@
+# Support for DensityInterface
+
+`Distributions` supports [`DensityInterface`](https://github.com/JuliaMath/DensityInterface.jl) for distributions.
+
+For *single* variate values `x`, `DensityInterface.logdensityof(d::Distribution, x)` is equivalent to `logpdf(d, x)` and `DensityInterface.densityof(d::Distribution, x)` is equivalent to `pdf(d, x)`.
diff --git a/docs/src/multivariate.md b/docs/src/multivariate.md
index c1edc18cb..be4f5427a 100644
--- a/docs/src/multivariate.md
+++ b/docs/src/multivariate.md
@@ -71,8 +71,12 @@ invcov(::Distributions.AbstractMvNormal)
 logdetcov(::Distributions.AbstractMvNormal)
 sqmahal(::Distributions.AbstractMvNormal, ::AbstractArray)
 rand(::AbstractRNG, ::Distributions.AbstractMvNormal)
+minimum(::Distributions.AbstractMvNormal)
+maximum(::Distributions.AbstractMvNormal)
+extrema(::Distributions.AbstractMvNormal)
 ```
 
+
 ### MvLogNormal
 
 In addition to the methods listed in the common interface above, we also provide the following methods:
diff --git a/docs/src/univariate.md b/docs/src/univariate.md
index 0418a38fa..f9f1922a7 100644
--- a/docs/src/univariate.md
+++ b/docs/src/univariate.md
@@ -92,56 +92,383 @@ rand!(::AbstractRNG, ::UnivariateDistribution, ::AbstractArray)
 
 ## Continuous Distributions
 
+```@setup plotdensity
+using Distributions, GR
+
+# display figures as SVGs
+GR.inline("svg")
+
+# plot probability density of continuous distributions
+function plotdensity(
+    (xmin, xmax),
+    dist::ContinuousUnivariateDistribution;
+    npoints=299,
+    title="",
+    kwargs...,
+)
+    figure(;
+        title=title,
+        xlabel="x",
+        ylabel="density",
+        grid=false,
+        backgroundcolor=0, # white instead of transparent background for dark Documenter scheme
+        font="Helvetica_Regular", # work around https://github.com/JuliaPlots/Plots.jl/issues/2596
+        linewidth=2.0, # thick lines
+        kwargs...,
+    )
+    return plot(range(xmin, xmax; length=npoints), Base.Fix1(pdf, dist))
+end
+
+# convenience function with automatic title
+function plotdensity(
+    xmin_xmax,
+    ::Type{T},
+    args=();
+    title=string(T) * "(" * join(args, ", ") * ")",
+    kwargs...
+) where {T<:ContinuousUnivariateDistribution}
+    return plotdensity(xmin_xmax, T(args...); title=title, kwargs...)
+end
+```
+
 ```@docs
 Arcsine
+```
+```@example plotdensity
+plotdensity((0.001, 0.999), Arcsine, (0, 1)) # hide
+```
+
+```@docs
 Beta
+```
+```@example plotdensity
+plotdensity((0, 1), Beta, (2, 2)) # hide
+```
+
+```@docs
 BetaPrime
+```
+```@example plotdensity
+plotdensity((0, 1), BetaPrime, (1, 2)) # hide
+```
+
+```@docs
 Biweight
+```
+```@example plotdensity
+plotdensity((-1, 3), Biweight, (1, 2)) # hide
+```
+
+```@docs
 Cauchy
+```
+```@example plotdensity
+plotdensity((-12, 5), Cauchy, (-2, 1)) # hide
+```
+
+```@docs
 Chernoff
+```
+```@example plotdensity
+plotdensity((-3, 3), Chernoff) # hide
+```
+
+```@docs
 Chi
+```
+```@example plotdensity
+plotdensity((0.001, 3), Chi, (1,)) # hide
+```
+
+```@docs
 Chisq
+```
+```@example plotdensity
+plotdensity((0, 9), Chisq, (3,)) # hide
+```
+
+```@docs
 Cosine
+```
+```@example plotdensity
+plotdensity((-1, 1), Cosine, (0, 1)) # hide
+```
+
+```@docs
 Epanechnikov
+```
+```@example plotdensity
+plotdensity((-1, 1), Epanechnikov, (0, 1)) # hide
+```
+
+```@docs
 Erlang
+```
+```@example plotdensity
+plotdensity((0, 8), Erlang, (7, 0.5)) # hide
+```
+
+```@docs
 Exponential
+```
+```@example plotdensity
+plotdensity((0, 3.5), Exponential, (0.5,)) # hide
+```
+
+```@docs
 FDist
+```
+```@example plotdensity
+plotdensity((0, 10), FDist, (10, 1)) # hide
+```
+
+```@docs
 Frechet
+```
+```@example plotdensity
+plotdensity((0, 20), Frechet, (1, 1)) # hide
+```
+
+```@docs
 Gamma
+```
+```@example plotdensity
+plotdensity((0, 18), Gamma, (7.5, 1)) # hide
+```
+
+```@docs
 GeneralizedExtremeValue
+```
+```@example plotdensity
+plotdensity((0, 30), GeneralizedExtremeValue, (0, 1, 1)) # hide
+```
+
+```@docs
 GeneralizedPareto
+```
+```@example plotdensity
+plotdensity((0, 20), GeneralizedPareto, (0, 1, 1)) # hide
+```
+
+```@docs
 Gumbel
+```
+```@example plotdensity
+plotdensity((-2, 5), Gumbel, (0, 1)) # hide
+```
+
+```@docs
 InverseGamma
+```
+```@example plotdensity
+plotdensity((0.001, 1), InverseGamma, (3, 0.5)) # hide
+```
+
+```@docs
 InverseGaussian
+```
+```@example plotdensity
+plotdensity((0, 5), InverseGaussian, (1, 1)) # hide
+```
+
+```@docs
 Kolmogorov
+```
+```@example plotdensity
+plotdensity((0, 2), Kolmogorov) # hide
+```
+
+```@docs
 KSDist
 KSOneSided
+```
+
+```@docs
 Laplace
+```
+```@example plotdensity
+plotdensity((-20, 20), Laplace, (0, 4)) # hide
+```
+
+```@docs
 Levy
+```
+```@example plotdensity
+plotdensity((0, 20), Levy, (0, 1)) # hide
+```
+
+```@docs
 LocationScale
+```
+```@example plotdensity
+plotdensity(
+    (-2, 5), LocationScale(2, 1, Normal(0, 1)); title="LocationScale(2, 1, Normal(0, 1))",
+) # hide
+```
+
+```@docs
 Logistic
+```
+```@example plotdensity
+plotdensity((-4, 8), Logistic, (2, 1)) # hide
+```
+
+```@docs
 LogitNormal
+```
+```@example plotdensity
+plotdensity((0, 1), LogitNormal, (0, 1)) # hide
+```
+
+```@docs
 LogNormal
+```
+```@example plotdensity
+plotdensity((0, 5), LogNormal, (0, 1)) # hide
+```
+
+```@docs
+LogUniform
+```
+```@example plotdensity
+plotdensity((0, 11), LogUniform, (1, 10)) # hide
+```
+
+```@docs
 NoncentralBeta
+```
+```@example plotdensity
+plotdensity((0, 1), NoncentralBeta, (2, 3, 1)) # hide
+```
+
+```@docs
 NoncentralChisq
+```
+```@example plotdensity
+plotdensity((0, 20), NoncentralChisq, (2, 3)) # hide
+```
+
+```@docs
 NoncentralF
+```
+```@example plotdensity
+plotdensity((0, 10), NoncentralF, (2, 3, 1)) # hide
+```
+
+```@docs
 NoncentralT
+```
+```@example plotdensity
+plotdensity((-1, 20), NoncentralT, (2, 3)) # hide
+```
+
+```@docs
 Normal
+```
+```@example plotdensity
+plotdensity((-4, 4), Normal, (0, 1)) # hide
+```
+
+```@docs
 NormalCanon
+```
+```@example plotdensity
+plotdensity((-4, 4), NormalCanon, (0, 1)) # hide
+```
+
+```@docs
 NormalInverseGaussian
+```
+```@example plotdensity
+plotdensity((-2, 2), NormalInverseGaussian, (0, 0.5, 0.2, 0.1)) # hide
+```
+
+```@docs
 Pareto
+```
+```@example plotdensity
+plotdensity((1, 8), Pareto, (1, 1)) # hide
+```
+
+```@docs
 PGeneralizedGaussian
+```
+```@example plotdensity
+plotdensity((0, 20), PGeneralizedGaussian, (0.2)) # hide
+```
+
+```@docs
 Rayleigh
+```
+```@example plotdensity
+plotdensity((0, 2), Rayleigh, (0.5)) # hide
+```
+
+```@docs
+Rician
+```
+```@example plotdensity
+plotdensity((0, 5), Rician, (0.5, 1)) # hide
+```
+
+```@docs
 Semicircle
+```
+```@example plotdensity
+plotdensity((-1, 1), Semicircle, (1,)) # hide
+```
+
+```@docs
 StudentizedRange
 SymTriangularDist
+```
+```@example plotdensity
+# we only need to plot 5 equally spaced points for these parameters and limits # hide
+plotdensity((-2, 2), SymTriangularDist, (0, 1); npoints=5) # hide
+```
+
+```@docs
 TDist
+```
+```@example plotdensity
+plotdensity((-5, 5), TDist, (5,)) # hide
+```
+
+```@docs
 TriangularDist
+```
+```@example plotdensity
+# we only need to plot 6 equally spaced points for these parameters and limits # hide
+plotdensity((-0.5, 2), TriangularDist, (0, 1.5, 0.5); npoints=6) # hide
+```
+
+```@docs
 Triweight
+```
+```@example plotdensity
+plotdensity((0, 2), Triweight, (1, 1)) # hide
+```
+
+```@docs
 Uniform
+```
+```@example plotdensity
+plotdensity((-0.5, 1.5), Uniform, (0, 1); ylim=(0, 1.5)) # hide
+```
+
+```@docs
 VonMises
+```
+```@example plotdensity
+plotdensity((-π, π), VonMises, (0.5,); xlim=(-π, π), xticks=(π/5, 5), xticklabels=x -> x ≈ -π ? "-π" : (x ≈ π ? "π" : "0")) # hide
+```
+
+```@docs
 Weibull
 ```
+```@example plotdensity
+plotdensity((0.001, 3), Weibull, (0.5, 1)) # hide
+```
 
 ## Discrete Distributions
 
diff --git a/src/Distributions.jl b/src/Distributions.jl
index 056001ddd..cfcc4d4c8 100644
--- a/src/Distributions.jl
+++ b/src/Distributions.jl
@@ -27,6 +27,11 @@ using SpecialFunctions
 
 using ArraysOfArrays
 
+import ChainRulesCore
+
+import DensityInterface
+
+
 export
     # re-export Statistics
     mean, median, quantile, std, var, cov, cor,
@@ -38,6 +43,7 @@ export
     Univariate,
     Multivariate,
     Matrixvariate,
+    CholeskyVariate,
     Discrete,
     Continuous,
     Sampleable,
@@ -111,9 +117,11 @@ export
     Laplace,
     Levy,
     LKJ,
+    LKJCholesky,
     LocationScale,
     Logistic,
     LogNormal,
+    LogUniform,
     LogitNormal,
     MatrixBeta,
     MatrixFDist,
@@ -145,6 +153,7 @@ export
     PoissonBinomial,
     QQPair,
     Rayleigh,
+    Rician,
     Semicircle,
     Skellam,
     SkewNormal,
@@ -202,6 +211,7 @@ export
     islowerbounded,
     isbounded,
     hasfinitesupport,
+    kldivergence,       # kl divergence between distributions
     kurtosis,           # kurtosis of the distribution
     logccdf,            # ccdf returning log-probability
     logcdf,             # cdf returning log-probability
@@ -281,6 +291,7 @@ include("univariates.jl")
 include("edgeworth.jl")
 include("multivariates.jl")
 include("matrixvariates.jl")
+include("cholesky/lkjcholesky.jl")
 include("samplers.jl")
 
 # others
@@ -295,6 +306,12 @@ include("pdfnorm.jl")
 include("mixtures/mixturemodel.jl")
 include("mixtures/unigmm.jl")
 
+# Implementation of DensityInterface API
+include("density_interface.jl")
+
+# Testing utilities for other packages which implement distributions.
+include("test_utils.jl")
+
 include("deprecates.jl")
 
 """
@@ -325,14 +342,14 @@ Supported distributions:
     Frechet, FullNormal, FullNormalCanon, Gamma, GeneralizedPareto,
     GeneralizedExtremeValue, Geometric, Gumbel, Hypergeometric,
     InverseWishart, InverseGamma, InverseGaussian, IsoNormal,
-    IsoNormalCanon, Kolmogorov, KSDist, KSOneSided, Laplace, Levy, LKJ,
+    IsoNormalCanon, Kolmogorov, KSDist, KSOneSided, Laplace, Levy, LKJ, LKJCholesky,
     Logistic, LogNormal, MatrixBeta, MatrixFDist, MatrixNormal,
     MatrixReshaped, MatrixTDist, MixtureModel, Multinomial,
     MultivariateNormal, MvLogNormal, MvNormal, MvNormalCanon,
     MvNormalKnownCov, MvTDist, NegativeBinomial, NoncentralBeta, NoncentralChisq,
     NoncentralF, NoncentralHypergeometric, NoncentralT, Normal, NormalCanon,
     NormalInverseGaussian, Pareto, PGeneralizedGaussian, Poisson, PoissonBinomial,
-    QQPair, Rayleigh, Skellam, Soliton, StudentizedRange, SymTriangularDist, TDist, TriangularDist,
+    QQPair, Rayleigh, Rician, Skellam, Soliton, StudentizedRange, SymTriangularDist, TDist, TriangularDist,
     Triweight, Truncated, TruncatedNormal, Uniform, UnivariateGMM,
     VonMises, VonMisesFisher, WalleniusNoncentralHypergeometric, Weibull,
     Wishart, ZeroMeanIsoNormal, ZeroMeanIsoNormalCanon,
diff --git a/src/cholesky/lkjcholesky.jl b/src/cholesky/lkjcholesky.jl
new file mode 100644
index 000000000..12beecfd8
--- /dev/null
+++ b/src/cholesky/lkjcholesky.jl
@@ -0,0 +1,264 @@
+"""
+    LKJCholesky(d::Int, η::Real, uplo='L')
+
+The `LKJCholesky` distribution of size ``d`` with shape parameter ``\\eta`` is a
+distribution over `LinearAlgebra.Cholesky` factorisations of ``d\\times d`` real correlation
+matrices (positive-definite matrices with ones on the diagonal).
+
+Variates or samples of the distribution are `LinearAlgebra.Cholesky` objects, as might
+be returned by `F = LinearAlgebra.cholesky(R)`, so that `Matrix(F) ≈ R` is a variate or
+sample of [`LKJ`](@ref). 
+
+Sampling `LKJCholesky` is faster than sampling `LKJ`, and often having the correlation
+matrix in factorized form makes subsequent computations cheaper as well.
+
+!!! note
+    `LinearAlgebra.Cholesky` stores either the upper or lower Cholesky factor, related by
+    `F.U == F.L'`. Both can be accessed with `F.U` and `F.L`, but if the factor
+    not stored is requested, then a copy is made. The `uplo` parameter specifies whether the
+    upper (`'U'`) or lower (`'L'`) Cholesky factor is stored when randomly generating
+    samples. Set `uplo` to `'U'` if the upper factor is desired to avoid allocating a copy
+    when calling `F.U`.
+
+See [`LKJ`](@ref) for more details.
+
+External links
+
+* Lewandowski D, Kurowicka D, Joe H.
+  Generating random correlation matrices based on vines and extended onion method,
+  Journal of Multivariate Analysis (2009), 100(9): 1989-2001
+  doi: [10.1016/j.jmva.2009.04.008](https://doi.org/10.1016/j.jmva.2009.04.008)
+"""
+struct LKJCholesky{T <: Real} <: Distribution{CholeskyVariate,Continuous}
+    d::Int
+    η::T
+    uplo::Char
+    logc0::T
+end
+
+#  -----------------------------------------------------------------------------
+#  Constructors
+#  -----------------------------------------------------------------------------
+
+function LKJCholesky(d::Int, η::Real, _uplo::Union{Char,Symbol} = 'L'; check_args = true)
+    if check_args
+        d > 0 || throw(ArgumentError("matrix dimension must be positive"))
+        η > 0 || throw(ArgumentError("shape parameter must be positive"))
+    end
+    logc0 = lkj_logc0(d, η)
+    uplo = _char_uplo(_uplo)
+    T = Base.promote_eltype(η, logc0)
+    return LKJCholesky(d, T(η), uplo, T(logc0))
+end
+
+# adapted from LinearAlgebra.char_uplo
+function _char_uplo(uplo::Union{Symbol,Char})
+    uplo ∈ (:U, 'U') && return 'U'
+    uplo ∈ (:L, 'L') && return 'L'
+    throw(ArgumentError("uplo argument must be either 'U' (upper) or 'L' (lower)"))
+end
+
+#  -----------------------------------------------------------------------------
+#  REPL display
+#  -----------------------------------------------------------------------------
+
+Base.show(io::IO, d::LKJCholesky) = show(io, d, (:d, :η, :uplo))
+
+#  -----------------------------------------------------------------------------
+#  Conversion
+#  -----------------------------------------------------------------------------
+
+function Base.convert(::Type{LKJCholesky{T}}, d::LKJCholesky) where T <: Real
+    return LKJCholesky{T}(d.d, T(d.η), d.uplo, T(d.logc0))
+end
+function convert(::Type{LKJCholesky{T}}, d::Integer, η::Real, uplo::Char, logc0::Real) where T <: Real
+    return LKJCholesky{T}(Int(d), T(η), uplo, T(logc0))
+end
+
+#  -----------------------------------------------------------------------------
+#  Properties
+#  -----------------------------------------------------------------------------
+
+Base.eltype(::Type{LKJCholesky{T}}) where {T} = T
+
+function Base.size(d::LKJCholesky)
+    p = d.d
+    return (p, p)
+end
+
+function insupport(d::LKJCholesky, R::LinearAlgebra.Cholesky)
+    p = d.d
+    factors = R.factors
+    (isreal(factors) && size(factors, 1) == p) || return false
+    iinds, jinds = axes(factors)
+    # check that the diagonal of U'*U or L*L' is all ones
+    @inbounds if R.uplo === 'U'
+        for (j, jind) in enumerate(jinds)
+            col_iinds = view(iinds, 1:j)
+            sum(abs2, view(factors, col_iinds, jind)) ≈ 1 || return false
+        end
+    else  # R.uplo === 'L'
+        for (i, iind) in enumerate(iinds)
+            row_jinds = view(jinds, 1:i)
+            sum(abs2, view(factors, iind, row_jinds)) ≈ 1 || return false
+        end
+    end
+    return true
+end
+
+function StatsBase.mode(d::LKJCholesky)
+    factors = Matrix{eltype(d)}(LinearAlgebra.I, size(d))
+    return LinearAlgebra.Cholesky(factors, d.uplo, 0)
+end
+
+StatsBase.params(d::LKJCholesky) = (d.d, d.η, d.uplo)
+
+@inline partype(::LKJCholesky{T}) where {T <: Real} = T
+
+#  -----------------------------------------------------------------------------
+#  Evaluation
+#  -----------------------------------------------------------------------------
+
+function logkernel(d::LKJCholesky, R::LinearAlgebra.Cholesky)
+    factors = R.factors
+    p, η = params(d)
+    c = p + 2(η - 1)
+    p == 1 && return c * log(first(factors))
+    # assuming D = diag(factors) with length(D) = p,
+    # logp = sum(i -> (c - i) * log(D[i]), 2:p)
+    logp = sum(Iterators.drop(enumerate(diagind(factors)), 1)) do (i, di) 
+        return (c - i) * log(factors[di])
+    end
+    return logp
+end
+
+function logpdf(d::LKJCholesky, R::LinearAlgebra.Cholesky)
+    insupport(d, R) || throw(ArgumentError("provided point is not in the support"))
+    return _logpdf(d, R)
+end
+
+_logpdf(d::LKJCholesky, R::LinearAlgebra.Cholesky) = logkernel(d, R) + d.logc0
+
+pdf(d::LKJCholesky, R::LinearAlgebra.Cholesky) = exp(logpdf(d, R))
+
+loglikelihood(d::LKJCholesky, R::LinearAlgebra.Cholesky) = logpdf(d, R)
+function loglikelihood(d::LKJCholesky, Rs::AbstractArray{<:LinearAlgebra.Cholesky})
+    return sum(R -> logpdf(d, R), Rs)
+end
+
+#  -----------------------------------------------------------------------------
+#  Sampling
+#  -----------------------------------------------------------------------------
+
+function Base.rand(rng::AbstractRNG, d::LKJCholesky)
+    factors = Matrix{eltype(d)}(undef, size(d))
+    R = LinearAlgebra.Cholesky(factors, d.uplo, 0)
+    return _lkj_cholesky_onion_sampler!(rng, d, R)
+end
+function Base.rand(rng::AbstractRNG, d::LKJCholesky, dims::Dims)
+    p = d.d
+    uplo = d.uplo
+    T = eltype(d)
+    TM = Matrix{T}
+    Rs = Array{LinearAlgebra.Cholesky{T,TM}}(undef, dims)
+    for i in eachindex(Rs)
+        factors = TM(undef, p, p)
+        Rs[i] = R = LinearAlgebra.Cholesky(factors, uplo, 0)
+        _lkj_cholesky_onion_sampler!(rng, d, R)
+    end
+    return Rs
+end
+
+Random.rand!(d::LKJCholesky, R::LinearAlgebra.Cholesky) = Random.rand!(GLOBAL_RNG, d, R)
+function Random.rand!(rng::AbstractRNG, d::LKJCholesky, R::LinearAlgebra.Cholesky)
+    return _lkj_cholesky_onion_sampler!(rng, d, R)
+end
+
+function Random.rand!(
+    rng::AbstractRNG,
+    d::LKJCholesky,
+    Rs::AbstractArray{<:LinearAlgebra.Cholesky{T,TM}},
+    allocate::Bool,
+) where {T,TM}
+    p = d.d
+    uplo = d.uplo
+    if allocate
+        for i in eachindex(Rs)
+            Rs[i] = _lkj_cholesky_onion_sampler!(
+                rng,
+                d,
+                LinearAlgebra.Cholesky(TM(undef, p, p), uplo, 0),
+            )
+        end
+    else
+        for i in eachindex(Rs)
+            _lkj_cholesky_onion_sampler!(rng, d, Rs[i])
+        end
+    end
+    return Rs
+end
+function Random.rand!(
+    rng::AbstractRNG,
+    d::LKJCholesky,
+    Rs::AbstractArray{<:LinearAlgebra.Cholesky{<:Real}},
+)
+    allocate = any(!isassigned(Rs, i) for i in eachindex(Rs)) || any(R -> size(R, 1) != d.d, Rs)
+    return Random.rand!(rng, d, Rs, allocate)
+end
+
+#
+# onion method
+#
+
+function _lkj_cholesky_onion_sampler!(
+    rng::AbstractRNG,
+    d::LKJCholesky,
+    R::LinearAlgebra.Cholesky,
+)
+    if R.uplo === 'U'
+        _lkj_cholesky_onion_tri!(rng, R.factors, d.d, d.η, Val(:U))
+    else
+        _lkj_cholesky_onion_tri!(rng, R.factors, d.d, d.η, Val(:L))
+    end
+    return R
+end
+
+function _lkj_cholesky_onion_tri!(
+    rng::AbstractRNG,
+    A::AbstractMatrix,
+    d::Int,
+    η::Real,
+    ::Val{uplo},
+) where {uplo}
+    # Section 3.2 in LKJ (2009 JMA)
+    # reformulated to incrementally construct Cholesky factor as mentioned in Section 5
+    # equivalent steps in algorithm in reference are marked.
+    @assert size(A) == (d, d)
+    A[1, 1] = 1
+    d > 1 || return R
+    β = η + (d - 2)//2
+    #  1. Initialization
+    w0 = 2 * rand(rng, Beta(β, β)) - 1
+    @inbounds if uplo === :L
+        A[2, 1] = w0
+    else
+        A[1, 2] = w0
+    end
+    @inbounds A[2, 2] = sqrt(1 - w0^2)
+    #  2. Loop, each iteration k adds row/column k+1
+    for k in 2:(d - 1)
+        #  (a)
+        β -= 1//2
+        #  (b)
+        y = rand(rng, Beta(k//2, β))
+        #  (c)-(e)
+        # w is directionally uniform vector of length √y
+        @inbounds w = @views uplo === :L ? A[k + 1, 1:k] : A[1:k, k + 1]
+        Random.randn!(rng, w)
+        rmul!(w, sqrt(y) / norm(w))
+        # normalize so new row/column has unit norm
+        @inbounds A[k + 1, k + 1] = sqrt(1 - y)
+    end
+    #  3.
+    return A
+end
diff --git a/src/common.jl b/src/common.jl
index 36c19a5bc..243e1a591 100644
--- a/src/common.jl
+++ b/src/common.jl
@@ -16,6 +16,12 @@ const Univariate    = ArrayLikeVariate{0}
 const Multivariate  = ArrayLikeVariate{1}
 const Matrixvariate = ArrayLikeVariate{2}
 
+"""
+`F <: CholeskyVariate` specifies that the variate or a sample is of type
+`LinearAlgebra.Cholesky`.
+"""
+abstract type CholeskyVariate <: VariateForm end
+
 """
 `S <: ValueSupport` specifies the support of sample elements,
 either discrete or continuous.
@@ -138,6 +144,26 @@ value_support(::Type{<:Distribution{VF,VS}}) where {VF,VS} = VS
 # to be decided: how to handle multivariate/matrixvariate distributions?
 Broadcast.broadcastable(d::UnivariateDistribution) = Ref(d)
 
+"""
+    minimum(d::Distribution)
+
+Return the minimum of the support of `d`.
+"""
+minimum(d::Distribution)
+
+"""
+    maximum(d::Distribution)
+
+Return the maximum of the support of `d`.
+"""
+maximum(d::Distribution)
+
+"""
+    extrema(d::Distribution)
+
+Return the minimum and maximum of the support of `d` as a 2-tuple.
+"""
+Base.extrema(d::Distribution) = minimum(d), maximum(d)
 
 ## TODO: the following types need to be improved
 abstract type SufficientStats end
diff --git a/src/density_interface.jl b/src/density_interface.jl
new file mode 100644
index 000000000..3049a6903
--- /dev/null
+++ b/src/density_interface.jl
@@ -0,0 +1,19 @@
+@inline DensityInterface.hasdensity(::Distribution) = true
+
+for (di_func, d_func) in ((:logdensityof, :logpdf), (:densityof, :pdf))
+    @eval begin
+        DensityInterface.$di_func(d::Distribution, x) = $d_func(d, x)
+
+        function DensityInterface.$di_func(d::UnivariateDistribution, x::AbstractArray)
+            throw(ArgumentError("$(DensityInterface.$di_func) doesn't support multiple samples as an argument"))
+        end
+
+        function DensityInterface.$di_func(d::MultivariateDistribution, x::AbstractMatrix)
+            throw(ArgumentError("$(DensityInterface.$di_func) doesn't support multiple samples as an argument"))
+        end
+
+        function DensityInterface.$di_func(d::MatrixDistribution, x::AbstractArray{<:AbstractMatrix{<:Real}})
+            throw(ArgumentError("$(DensityInterface.$di_func) doesn't support multiple samples as an argument"))
+        end
+    end
+end
diff --git a/src/deprecates.jl b/src/deprecates.jl
index dc303951e..9b36f4c60 100644
--- a/src/deprecates.jl
+++ b/src/deprecates.jl
@@ -41,3 +41,13 @@ for fun in [:pdf, :logpdf,
 end
 
 @deprecate pdf(d::DiscreteUnivariateDistribution) pdf.(Ref(d), support(d))
+
+# Wishart constructors
+@deprecate Wishart(df::Real, S::AbstractPDMat, warn::Bool) Wishart(df, S)
+@deprecate Wishart(df::Real, S::Matrix, warn::Bool) Wishart(df, S)
+@deprecate Wishart(df::Real, S::Cholesky, warn::Bool) Wishart(df, S)
+
+# Deprecate 3 arguments expectation and once with function in second place
+@deprecate expectation(distr::DiscreteUnivariateDistribution, g::Function, epsilon::Real) expectation(g, distr; epsilon=epsilon) false
+@deprecate expectation(distr::ContinuousUnivariateDistribution, g::Function, epsilon::Real) expectation(g, distr) false
+@deprecate expectation(distr::Union{UnivariateDistribution,MultivariateDistribution}, g::Function; kwargs...) expectation(g, distr; kwargs...) false
diff --git a/src/functionals.jl b/src/functionals.jl
index fac016d07..2fc2ee47a 100644
--- a/src/functionals.jl
+++ b/src/functionals.jl
@@ -1,27 +1,24 @@
-function getEndpoints(distr::UnivariateDistribution, epsilon::Real)
-    (left,right) = map(x -> quantile(distr,x), (0,1))
-    leftEnd = left!=-Inf ? left : quantile(distr, epsilon)
-    rightEnd = right!=-Inf ? right : quantile(distr, 1-epsilon)
-    (leftEnd, rightEnd)
-end
-
-function expectation(distr::ContinuousUnivariateDistribution, g::Function, epsilon::Real)
-    f = x->pdf(distr,x)
-    (leftEnd, rightEnd) = getEndpoints(distr, epsilon)
-    quadgk(x -> f(x)*g(x), leftEnd, rightEnd)[1]
+function expectation(g, distr::ContinuousUnivariateDistribution; kwargs...)
+    return first(quadgk(x -> pdf(distr, x) * g(x), extrema(distr)...; kwargs...))
 end
 
 ## Assuming that discrete distributions only take integer values.
-function expectation(distr::DiscreteUnivariateDistribution, g::Function, epsilon::Real)
-    f = x->pdf(distr,x)
-    (leftEnd, rightEnd) = getEndpoints(distr, epsilon)
-    sum(x -> f(x)*g(x), leftEnd:rightEnd)
+function expectation(g, distr::DiscreteUnivariateDistribution; epsilon::Real=1e-10)
+    mindist, maxdist = extrema(distr)
+    # We want to avoid taking values up to infinity
+    minval = isfinite(mindist) ? mindist : quantile(distr, epsilon)
+    maxval = isfinite(maxdist) ? maxdist : quantile(distr, 1 - epsilon)
+    return sum(x -> pdf(distr, x) * g(x), minval:maxval)
 end
 
-function expectation(distr::UnivariateDistribution, g::Function)
-    expectation(distr, g, 1e-10)
+function expectation(g, distr::MultivariateDistribution; nsamples::Int=100, rng::AbstractRNG=GLOBAL_RNG)
+    nsamples > 0 || throw(ArgumentError("number of samples should be > 0"))
+    # We use a function barrier to work around type instability of `sampler(dist)`
+    return mcexpectation(rng, g, sampler(distr), nsamples)
 end
 
+mcexpectation(rng, f, sampler, n) = sum(f, rand(rng, sampler) for _ in 1:n) / n
+
 ## Leave undefined until we've implemented a numerical integration procedure
 # function entropy(distr::UnivariateDistribution)
 #     pf = typeof(distr)<:ContinuousDistribution ? pdf : pmf
@@ -29,6 +26,9 @@ end
 #     expectation(distr, x -> -log(f(x)))
 # end
 
-function kldivergence(P::UnivariateDistribution, Q::UnivariateDistribution)
-    expectation(P, x -> let p = pdf(P,x); (p > 0)*log(p/pdf(Q,x)) end)
+function kldivergence(P::Distribution{V}, Q::Distribution{V}; kwargs...) where {V<:VariateForm}
+    return expectation(P; kwargs...) do x
+        logp = logpdf(P, x)
+        return (logp > oftype(logp, -Inf)) * (logp - logpdf(Q, x))
+    end
 end
diff --git a/src/matrix/lkj.jl b/src/matrix/lkj.jl
index a181c4597..06af0e1f5 100644
--- a/src/matrix/lkj.jl
+++ b/src/matrix/lkj.jl
@@ -17,6 +17,10 @@ f(\\mathbf{R};\\eta) = \\left[\\prod_{k=1}^{d-1}\\pi^{\\frac{k}{2}}
 
 If ``\\eta = 1``, then the LKJ distribution is uniform over
 [the space of correlation matrices](https://www.jstor.org/stable/2684832).
+
+!!! note
+    if a Cholesky factor of the correlation matrix is desired, it is more efficient to
+    use [`LKJCholesky`](@ref), which avoids factorizing the matrix.
 """
 struct LKJ{T <: Real, D <: Integer} <: ContinuousMatrixDistribution
     d::D
diff --git a/src/matrix/wishart.jl b/src/matrix/wishart.jl
index e9ccdc262..efd0199f8 100644
--- a/src/matrix/wishart.jl
+++ b/src/matrix/wishart.jl
@@ -42,29 +42,28 @@ end
 #  Constructors
 #  -----------------------------------------------------------------------------
 
-function Wishart(df::T, S::AbstractPDMat{T}, warn::Bool = true) where T<:Real
+function Wishart(df::T, S::AbstractPDMat{T}) where T<:Real
     df > 0 || throw(ArgumentError("df must be positive. got $(df)."))
     p = dim(S)
-    rnk = p
     singular = df <= p - 1
     if singular
-        isinteger(df) || throw(ArgumentError("singular df must be an integer. got $(df)."))
-        rnk = convert(Integer, df)
-        warn && @warn("got df <= dim - 1; returning a singular Wishart")
+        isinteger(df) || throw(
+            ArgumentError("df of a singular Wishart distribution must be an integer (got $df)")
+        )
     end
+    rnk::Integer = ifelse(singular, df, p)
     logc0 = wishart_logc0(df, S, rnk)
-    R = Base.promote_eltype(T, logc0)
-    prom_S = convert(AbstractArray{T}, S)
-    Wishart{R, typeof(prom_S), typeof(rnk)}(R(df), prom_S, R(logc0), rnk, singular)
+    _df, _logc0 = promote(df, logc0)
+    Wishart{typeof(_df), typeof(S), typeof(rnk)}(_df, S, _logc0, rnk, singular)
 end
 
-function Wishart(df::Real, S::AbstractPDMat, warn::Bool = true)
+function Wishart(df::Real, S::AbstractPDMat)
     T = Base.promote_eltype(df, S)
-    Wishart(T(df), convert(AbstractArray{T}, S), warn)
+    Wishart(T(df), convert(AbstractArray{T}, S))
 end
 
-Wishart(df::Real, S::Matrix, warn::Bool = true) = Wishart(df, PDMat(S), warn)
-Wishart(df::Real, S::Cholesky, warn::Bool = true) = Wishart(df, PDMat(S), warn)
+Wishart(df::Real, S::Matrix) = Wishart(df, PDMat(S))
+Wishart(df::Real, S::Cholesky) = Wishart(df, PDMat(S))
 
 #  -----------------------------------------------------------------------------
 #  REPL display
diff --git a/src/mixtures/mixturemodel.jl b/src/mixtures/mixturemodel.jl
index 93402c14d..7ced3ff1f 100644
--- a/src/mixtures/mixturemodel.jl
+++ b/src/mixtures/mixturemodel.jl
@@ -99,7 +99,7 @@ Draw `n` samples from `d`.
 rand(d::AbstractMixtureModel)
 
 """
-    rand!(d::Union{UnivariateMixture, MultivariateMixture}, r::AbstactArray)
+    rand!(d::Union{UnivariateMixture, MultivariateMixture}, r::AbstractArray)
 
 Draw multiple samples from `d` and write them to `r`.
 """
@@ -277,15 +277,12 @@ function insupport(d::AbstractMixtureModel, x::AbstractVector)
     return any(insupport(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi))
 end
 
-function _cdf(d::UnivariateMixture, x::Real)
+function cdf(d::UnivariateMixture, x::Real)
     p = probs(d)
     r = sum(pi * cdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi))
     return r
 end
 
-cdf(d::UnivariateMixture{Continuous}, x::Real) = _cdf(d, x)
-cdf(d::UnivariateMixture{Discrete}, x::Integer) = _cdf(d, x)
-
 function _mixpdf1(d::AbstractMixtureModel, x)
     p = probs(d)
     return sum(pi * pdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi))
@@ -362,10 +359,8 @@ function _mixlogpdf!(r::AbstractArray, d::AbstractMixtureModel, x)
     return r
 end
 
-pdf(d::UnivariateMixture{Continuous}, x::Real) = _mixpdf1(d, x)
-pdf(d::UnivariateMixture{Discrete}, x::Int) = _mixpdf1(d, x)
-logpdf(d::UnivariateMixture{Continuous}, x::Real) = _mixlogpdf1(d, x)
-logpdf(d::UnivariateMixture{Discrete}, x::Int) = _mixlogpdf1(d, x)
+pdf(d::UnivariateMixture, x::Real) = _mixpdf1(d, x)
+logpdf(d::UnivariateMixture, x::Real) = _mixlogpdf1(d, x)
 
 _pdf!(r::AbstractArray, d::UnivariateMixture{Discrete}, x::UnitRange) = _mixpdf!(r, d, x)
 _pdf!(r::AbstractArray, d::UnivariateMixture, x::AbstractArray) = _mixpdf!(r, d, x)
@@ -374,7 +369,7 @@ _logpdf!(r::AbstractArray, d::UnivariateMixture, x::AbstractArray) = _mixlogpdf!
 _pdf(d::MultivariateMixture, x::AbstractVector) = _mixpdf1(d, x)
 _logpdf(d::MultivariateMixture, x::AbstractVector) = _mixlogpdf1(d, x)
 _pdf!(r::AbstractArray, d::MultivariateMixture, x::AbstractMatrix) = _mixpdf!(r, d, x)
-_lodpdf!(r::AbstractArray, d::MultivariateMixture, x::AbstractMatrix) = _mixlogpdf!(r, d, x)
+_logpdf!(r::AbstractArray, d::MultivariateMixture, x::AbstractMatrix) = _mixlogpdf!(r, d, x)
 
 
 ## component-wise pdf and logpdf
@@ -446,6 +441,20 @@ componentwise_logpdf(d::MultivariateMixture, x::AbstractVector) = componentwise_
 componentwise_logpdf(d::MultivariateMixture, x::AbstractMatrix) = componentwise_logpdf!(Matrix{eltype(x)}(undef, size(x,2), ncomponents(d)), d, x)
 
 
+function quantile(d::UnivariateMixture{Continuous}, p::Real)
+    ps = probs(d)
+    min_q, max_q = extrema(quantile(component(d, i), p) for (i, pi) in enumerate(ps) if pi > 0)
+    quantile_bisect(d, p, min_q, max_q)
+end
+
+# we also implement `median` since `median` is implemented more efficiently than
+# `quantile(d, 1//2)` for some distributions
+function median(d::UnivariateMixture{Continuous})
+    ps = probs(d)
+    min_q, max_q = extrema(median(component(d, i)) for (i, pi) in enumerate(ps) if pi > 0)
+    quantile_bisect(d, 1//2, min_q, max_q)
+end
+
 ## Sampling
 
 struct MixtureSampler{VF,VS,Sampler} <: Sampleable{VF,VS}
diff --git a/src/multivariate/mvlognormal.jl b/src/multivariate/mvlognormal.jl
index 9b0af3c26..25cddeae8 100644
--- a/src/multivariate/mvlognormal.jl
+++ b/src/multivariate/mvlognormal.jl
@@ -165,17 +165,17 @@ struct MvLogNormal{T<:Real,Cov<:AbstractPDMat,Mean<:AbstractVector} <: AbstractM
     normal::MvNormal{T,Cov,Mean}
 end
 
-#Constructors mirror the ones for MvNormmal
-MvLogNormal(μ::AbstractVector,Σ::AbstractPDMat) = MvLogNormal(MvNormal(μ,Σ))
-MvLogNormal(Σ::AbstractPDMat) = MvLogNormal(MvNormal(Zeros{eltype(Σ)}(dim(Σ)),Σ))
-MvLogNormal(μ::AbstractVector,Σ::Matrix) = MvLogNormal(MvNormal(μ,Σ))
+# Constructors mirror the ones for MvNormmal
+MvLogNormal(μ::AbstractVector{<:Real}, Σ::AbstractMatrix{<:Real}) = MvLogNormal(MvNormal(μ, Σ))
+MvLogNormal(Σ::AbstractMatrix{<:Real}) = MvLogNormal(MvNormal(Σ))
+MvLogNormal(μ::AbstractVector{<:Real}, Σ::UniformScaling{<:Real}) = MvLogNormal(MvNormal(μ, Σ))
+
+# Deprecated constructors
 MvLogNormal(μ::AbstractVector,σ::Vector) = MvLogNormal(MvNormal(μ,σ))
 MvLogNormal(μ::AbstractVector,s::Real) = MvLogNormal(MvNormal(μ,s))
-MvLogNormal(Σ::AbstractMatrix) = MvLogNormal(MvNormal(Σ))
 MvLogNormal(σ::AbstractVector) = MvLogNormal(MvNormal(σ))
 MvLogNormal(d::Int,s::Real) = MvLogNormal(MvNormal(d,s))
 
-
 Base.eltype(::Type{<:MvLogNormal{T}}) where {T} = T
 
 ### Conversion
diff --git a/src/multivariate/mvnormal.jl b/src/multivariate/mvnormal.jl
index a3fa74c14..cef84c17b 100644
--- a/src/multivariate/mvnormal.jl
+++ b/src/multivariate/mvnormal.jl
@@ -42,7 +42,7 @@ type `MvNormal`, defined as below, which allows users to specify the special str
 the mean and covariance.
 
 ```julia
-struct MvNormal{Cov<:AbstractPDMat,Mean<:AbstractVector} <: AbstractMvNormal
+struct MvNormal{T<:Real,Cov<:AbstractPDMat,Mean<:AbstractVector} <: AbstractMvNormal
     μ::Mean
     Σ::Cov
 end
@@ -51,19 +51,19 @@ end
 Here, the mean vector can be an instance of any `AbstractVector`. The covariance can be
 of any subtype of `AbstractPDMat`. Particularly, one can use `PDMat` for full covariance,
 `PDiagMat` for diagonal covariance, and `ScalMat` for the isotropic covariance -- those
-in the form of ``\\sigma \\mathbf{I}``. (See the Julia package
-[PDMats](https://github.com/lindahua/PDMats.jl) for details).
+in the form of ``\\sigma^2 \\mathbf{I}``. (See the Julia package
+[PDMats](https://github.com/JuliaStats/PDMats.jl/) for details).
 
-We also define a set of alias for the types using different combinations of mean vectors and covariance:
+We also define a set of aliases for the types using different combinations of mean vectors and covariance:
 
 ```julia
-const IsoNormal  = MvNormal{ScalMat,  Vector{Float64}}
-const DiagNormal = MvNormal{PDiagMat, Vector{Float64}}
-const FullNormal = MvNormal{PDMat,    Vector{Float64}}
+const IsoNormal  = MvNormal{Float64, ScalMat{Float64},                  Vector{Float64}}
+const DiagNormal = MvNormal{Float64, PDiagMat{Float64,Vector{Float64}}, Vector{Float64}}
+const FullNormal = MvNormal{Float64, PDMat{Float64,Matrix{Float64}},    Vector{Float64}}
 
-const ZeroMeanIsoNormal{Axes}  = MvNormal{ScalMat,  Zeros{Float64,1,Axes}}
-const ZeroMeanDiagNormal{Axes} = MvNormal{PDiagMat, Zeros{Float64,1,Axes}}
-const ZeroMeanFullNormal{Axes} = MvNormal{PDMat,    Zeros{Float64,1,Axes}}
+const ZeroMeanIsoNormal{Axes}  = MvNormal{Float64, ScalMat{Float64},                  Zeros{Float64,1,Axes}}
+const ZeroMeanDiagNormal{Axes} = MvNormal{Float64, PDiagMat{Float64,Vector{Float64}}, Zeros{Float64,1,Axes}}
+const ZeroMeanFullNormal{Axes} = MvNormal{Float64, PDMat{Float64,Matrix{Float64}},    Zeros{Float64,1,Axes}}
 ```
 
 Multivariate normal distributions support affine transformations:
@@ -80,6 +80,8 @@ abstract type AbstractMvNormal <: ContinuousMultivariateDistribution end
 insupport(d::AbstractMvNormal, x::AbstractVector) =
     length(d) == length(x) && all(isfinite, x)
 
+minimum(d::AbstractMvNormal) = fill(eltype(d)(-Inf), length(d))
+maximum(d::AbstractMvNormal) = fill(eltype(d)(Inf), length(d))
 mode(d::AbstractMvNormal) = mean(d)
 modes(d::AbstractMvNormal) = [mean(d)]
 
@@ -98,6 +100,18 @@ end
 
 mvnormal_c0(g::AbstractMvNormal) = -(length(g) * convert(eltype(g), log2π) + logdetcov(g))/2
 
+function kldivergence(p::AbstractMvNormal, q::AbstractMvNormal)
+    # This is the generic implementation for AbstractMvNormal, you might need to specialize for your type
+    length(p) == length(q) || 
+        throw(DimensionMismatch("Distributions p and q have different dimensions $(length(p)) and $(length(q))"))
+    # logdetcov is used separately from _cov for any potential optimization done there
+    return (tr(_cov(q) \ _cov(p)) + sqmahal(q, mean(p)) - length(p) + logdetcov(q) - logdetcov(p)) / 2
+end
+
+# This is a workaround to take advantage of the PDMats objects for MvNormal and avoid copies as Matrix
+# TODO: Remove this once `cov(::MvNormal)` returns the PDMats object 
+_cov(d::AbstractMvNormal) = cov(d)
+
 """
     invcov(d::AbstractMvNormal)
 
@@ -148,33 +162,9 @@ _pdf!(r::AbstractArray, d::AbstractMvNormal, x::AbstractMatrix) = exp!(_logpdf!(
     MvNormal
 
 Generally, users don't have to worry about these internal details.
+
 We provide a common constructor `MvNormal`, which will construct a distribution of
 appropriate type depending on the input arguments.
-
-    MvNormal(sig)
-
-Construct a multivariate normal distribution with zero mean and covariance represented by `sig`.
-
-    MvNormal(mu, sig)
-
-Construct a multivariate normal distribution with mean `mu` and covariance represented by `sig`.
-
-    MvNormal(d, sig)
-
-Construct a multivariate normal distribution of dimension `d`, with zero mean, and an
-isotropic covariance matrix corresponding `abs2(sig)*I`.
-
-# Arguments
-- `mu::Vector{T<:Real}`: The mean vector.
-- `d::Real`: dimension of distribution.
-- `sig`: The covariance, which can in of either of the following forms (with `T<:Real`):
-    1. subtype of `AbstractPDMat`,
-    2. symmetric matrix of type `Matrix{T}`,
-    3. vector of type `Vector{T}`: indicating a diagonal covariance as `diagm(abs2(sig))`,
-    4. real-valued number: indicating an isotropic covariance matrix corresponding `abs2(sig) * I`.
-
-**Note:** The constructor will choose an appropriate covariance form internally, so that
-special structure of the covariance can be exploited.
 """
 struct MvNormal{T<:Real,Cov<:AbstractPDMat,Mean<:AbstractVector} <: AbstractMvNormal
     μ::Mean
@@ -197,33 +187,40 @@ function MvNormal(μ::AbstractVector{T}, Σ::AbstractPDMat{T}) where {T<:Real}
     MvNormal{T,typeof(Σ), typeof(μ)}(μ, Σ)
 end
 
-function MvNormal(μ::AbstractVector{<:Real}, Σ::AbstractPDMat)
-    R = Base.promote_eltype(μ, Σ)
-    MvNormal(convert(AbstractArray{R}, μ), convert(AbstractArray{R}, Σ))
-end
-
-function MvNormal(μ::AbstractVector, Σ::AbstractPDMat)
+function MvNormal(μ::AbstractVector{<:Real}, Σ::AbstractPDMat{<:Real})
     R = Base.promote_eltype(μ, Σ)
     MvNormal(convert(AbstractArray{R}, μ), convert(AbstractArray{R}, Σ))
 end
 
 # constructor with general covariance matrix
+"""
+    MvNormal(μ::AbstractVector{<:Real}, Σ::AbstractMatrix{<:Real})
+
+Construct a multivariate normal distribution with mean `μ` and covariance matrix `Σ`.
+"""
 MvNormal(μ::AbstractVector{<:Real}, Σ::AbstractMatrix{<:Real}) = MvNormal(μ, PDMat(Σ))
-MvNormal(μ::AbstractVector{<:Real}, Σ::Diagonal{<:Real}) = MvNormal(μ, PDiagMat(diag(Σ)))
+MvNormal(μ::AbstractVector{<:Real}, Σ::Diagonal{<:Real}) = MvNormal(μ, PDiagMat(Σ.diag))
 MvNormal(μ::AbstractVector{<:Real}, Σ::UniformScaling{<:Real}) =
     MvNormal(μ, ScalMat(length(μ), Σ.λ))
-
-# constructor with vector of standard deviations
-MvNormal(μ::AbstractVector{<:Real}, σ::AbstractVector{<:Real}) = MvNormal(μ, PDiagMat(abs2.(σ)))
-
-# constructor with scalar standard deviation
-MvNormal(μ::AbstractVector{<:Real}, σ::Real) = MvNormal(μ, ScalMat(length(μ), abs2(σ)))
+function MvNormal(
+    μ::AbstractVector{<:Real}, Σ::Diagonal{<:Real,<:FillArrays.AbstractFill{<:Real,1}}
+)
+    return MvNormal(μ, ScalMat(size(Σ, 1), FillArrays.getindex_value(Σ.diag)))
+end
 
 # constructor without mean vector
-MvNormal(Σ::AbstractVecOrMat{<:Real}) = MvNormal(Zeros{eltype(Σ)}(size(Σ, 1)), Σ)
+"""
+    MvNormal(Σ::AbstractMatrix{<:Real})
+
+Construct a multivariate normal distribution with zero mean and covariance matrix `Σ`.
+"""
+MvNormal(Σ::AbstractMatrix{<:Real}) = MvNormal(Zeros{eltype(Σ)}(size(Σ, 1)), Σ)
 
-# special constructor
-MvNormal(d::Int, σ::Real) = MvNormal(Zeros{typeof(σ)}(d), σ)
+# deprecated constructors with standard deviations
+Base.@deprecate MvNormal(μ::AbstractVector{<:Real}, σ::AbstractVector{<:Real}) MvNormal(μ, Diagonal(map(abs2, σ)))
+Base.@deprecate MvNormal(μ::AbstractVector{<:Real}, σ::Real) MvNormal(μ, σ^2 * I)
+Base.@deprecate MvNormal(σ::AbstractVector{<:Real}) MvNormal(Diagonal(map(abs2, σ)))
+Base.@deprecate MvNormal(d::Int, σ::Real) MvNormal(Diagonal(Fill(σ^2, d)))
 
 Base.eltype(::Type{<:MvNormal{T}}) where {T} = T
 
@@ -257,6 +254,7 @@ params(d::MvNormal) = (d.μ, d.Σ)
 
 var(d::MvNormal) = diag(d.Σ)
 cov(d::MvNormal) = Matrix(d.Σ)
+_cov(d::MvNormal) = d.Σ
 
 invcov(d::MvNormal) = Matrix(inv(d.Σ))
 logdetcov(d::MvNormal) = logdet(d.Σ)
diff --git a/src/multivariate/mvnormalcanon.jl b/src/multivariate/mvnormalcanon.jl
index 140d38ebd..91c9596d3 100644
--- a/src/multivariate/mvnormalcanon.jl
+++ b/src/multivariate/mvnormalcanon.jl
@@ -5,7 +5,7 @@
 """
     MvNormalCanon
 
-Multivariate normal distribution is an [exponential family distribution](http://en.wikipedia.org/wiki/Exponential_family),
+The multivariate normal distribution is an [exponential family distribution](http://en.wikipedia.org/wiki/Exponential_family),
 with two *canonical parameters*: the *potential vector* ``\\mathbf{h}`` and the *precision matrix* ``\\mathbf{J}``.
 The relation between these parameters and the conventional representation (*i.e.* the one using mean ``\\boldsymbol{\\mu}`` and
 covariance ``\\boldsymbol{\\Sigma}``) is:
@@ -19,7 +19,7 @@ which is also a subtype of `AbstractMvNormal` to represent a multivariate normal
 canonical parameters. Particularly, `MvNormalCanon` is defined as:
 
 ```julia
-struct MvNormalCanon{P<:AbstractPDMat,V<:AbstractVector} <: AbstractMvNormal
+struct MvNormalCanon{T<:Real,P<:AbstractPDMat,V<:AbstractVector} <: AbstractMvNormal
     μ::V    # the mean vector
     h::V    # potential vector, i.e. inv(Σ) * μ
     J::P    # precision matrix, i.e. inv(Σ)
@@ -29,39 +29,15 @@ end
 We also define aliases for common specializations of this parametric type:
 
 ```julia
-const FullNormalCanon = MvNormalCanon{PDMat,    Vector{Float64}}
-const DiagNormalCanon = MvNormalCanon{PDiagMat, Vector{Float64}}
-const IsoNormalCanon  = MvNormalCanon{ScalMat,  Vector{Float64}}
+const FullNormalCanon = MvNormalCanon{Float64, PDMat{Float64,Matrix{Float64}},    Vector{Float64}}
+const DiagNormalCanon = MvNormalCanon{Float64, PDiagMat{Float64,Vector{Float64}}, Vector{Float64}}
+const IsoNormalCanon  = MvNormalCanon{Float64, ScalMat{Float64},                  Vector{Float64}}
 
-const ZeroMeanFullNormalCanon{Axes} = MvNormalCanon{PDMat,    Zeros{Float64,1}}
-const ZeroMeanDiagNormalCanon{Axes} = MvNormalCanon{PDiagMat, Zeros{Float64,1}}
-const ZeroMeanIsoNormalCanon{Axes}  = MvNormalCanon{ScalMat,  Zeros{Float64,1,Axes}}
+const ZeroMeanFullNormalCanon{Axes} = MvNormalCanon{Float64, PDMat{Float64,Matrix{Float64}},    Zeros{Float64,1,Axes}}
+const ZeroMeanDiagNormalCanon{Axes} = MvNormalCanon{Float64, PDiagMat{Float64,Vector{Float64}}, Zeros{Float64,1,Axes}}
+const ZeroMeanIsoNormalCanon{Axes}  = MvNormalCanon{Float64, ScalMat{Float64},                  Zeros{Float64,1,Axes}}
 ```
 
-A multivariate distribution with canonical parameterization can be constructed using a common constructor `MvNormalCanon` as:
-
-    MvNormalCanon(h, J)
-
-Construct a multivariate normal distribution with potential vector `h` and precision matrix represented by `J`.
-
-    MvNormalCanon(J)
-
-Construct a multivariate normal distribution with zero mean (thus zero potential vector) and precision matrix represented by `J`.
-
-    MvNormalCanon(d, J)
-
-Construct a multivariate normal distribution of dimension `d`, with zero mean and
-an isotropic precision matrix corresponding `J*I`.
-
-# Arguments
-- `d::Int`: dimension of distribution
-- `h::Vector{T<:Real}`: the potential vector, of type `Vector{T}` with `T<:Real`.
-- `J`: the representation of the precision matrix, which can be in either of the following forms (`T<:Real`):
-    1. an instance of a subtype of `AbstractPDMat`,
-    2. a square matrix of type `Matrix{T}`,
-    3. a vector of type `Vector{T}`: indicating a diagonal precision matrix as `diagm(J)`,
-    4. a real number: indicating an isotropic precision matrix corresponding `J*I`.
-
 **Note:** `MvNormalCanon` share the same set of methods as `MvNormal`.
 """
 struct MvNormalCanon{T<:Real,P<:AbstractPDMat,V<:AbstractVector} <: AbstractMvNormal
@@ -70,7 +46,7 @@ struct MvNormalCanon{T<:Real,P<:AbstractPDMat,V<:AbstractVector} <: AbstractMvNo
     J::P    # precision matrix, i.e. inv(Σ)
 end
 
-const FullNormalCanon = MvNormalCanon{Float64, PDMat{Float64,Matrix{Float64}},Vector{Float64}}
+const FullNormalCanon = MvNormalCanon{Float64,PDMat{Float64,Matrix{Float64}},Vector{Float64}}
 const DiagNormalCanon = MvNormalCanon{Float64,PDiagMat{Float64,Vector{Float64}},Vector{Float64}}
 const IsoNormalCanon  = MvNormalCanon{Float64,ScalMat{Float64},Vector{Float64}}
 
@@ -80,46 +56,64 @@ const ZeroMeanIsoNormalCanon{Axes}  = MvNormalCanon{Float64,ScalMat{Float64},Zer
 
 
 ### Constructors
-
-function MvNormalCanon(μ::AbstractVector{T}, h::AbstractVector{T}, J::AbstractPDMat{T}) where T<:Real
+function MvNormalCanon(μ::AbstractVector{T}, h::AbstractVector{T}, J::AbstractPDMat{T}) where {T<:Real}
     length(μ) == length(h) == dim(J) || throw(DimensionMismatch("Inconsistent argument dimensions"))
-    if typeof(μ) == typeof(h)
+    if typeof(μ) === typeof(h)
         return MvNormalCanon{T,typeof(J),typeof(μ)}(μ, h, J)
     else
         return MvNormalCanon{T,typeof(J),Vector{T}}(collect(μ), collect(h), J)
     end
 end
 
-function MvNormalCanon(μ::AbstractVector{T}, h::AbstractVector{T}, J::P) where {T<:Real, P<:AbstractPDMat}
+function MvNormalCanon(μ::AbstractVector{T}, h::AbstractVector{T}, J::AbstractPDMat) where {T<:Real}
     R = promote_type(T, eltype(J))
     MvNormalCanon(convert(AbstractArray{R}, μ), convert(AbstractArray{R}, h), convert(AbstractArray{R}, J))
 end
 
-function MvNormalCanon(μ::AbstractVector{T}, h::AbstractVector{S}, J::P) where {T<:Real, S<:Real, P<:AbstractPDMat}
+function MvNormalCanon(μ::AbstractVector{<:Real}, h::AbstractVector{<:Real}, J::AbstractPDMat)
     R = Base.promote_eltype(μ, h, J)
     MvNormalCanon(convert(AbstractArray{R}, μ), convert(AbstractArray{R}, h), convert(AbstractArray{R}, J))
 end
 
-function MvNormalCanon(J::AbstractPDMat)
-    z = Zeros{eltype(J)}(dim(J))
-    MvNormalCanon(z, z, J)
-end
-
-function MvNormalCanon(h::AbstractVector{T}, J::P) where {T<:Real, P<:AbstractPDMat}
+function MvNormalCanon(h::AbstractVector{<:Real}, J::AbstractPDMat)
     length(h) == dim(J) || throw(DimensionMismatch("Inconsistent argument dimensions"))
     R = Base.promote_eltype(h, J)
-    hh, JJ = collect(convert(AbstractArray{R}, h)), convert(AbstractArray{R}, J)
-    MvNormalCanon{eltype(hh),typeof(JJ),typeof(hh)}(JJ \ hh, hh, JJ)
+    hh = convert(AbstractArray{R}, h)
+    JJ = convert(AbstractArray{R}, J)
+    MvNormalCanon(JJ \ hh, hh, JJ)
 end
 
+"""
+    MvNormalCanon(h::AbstractVector{<:Real}, J::AbstractMatrix{<:Real})
+
+Construct a multivariate normal distribution with potential vector `h` and precision matrix
+`J`.
+"""
 MvNormalCanon(h::AbstractVector{<:Real}, J::AbstractMatrix{<:Real}) = MvNormalCanon(h, PDMat(J))
-MvNormalCanon(h::AbstractVector{<:Real}, prec::AbstractVector{<:Real}) = MvNormalCanon(h, PDiagMat(prec))
-MvNormalCanon(h::AbstractVector{<:Real}, prec::Real) = MvNormalCanon(h, ScalMat(length(h), prec))
+MvNormalCanon(h::AbstractVector{<:Real}, J::Diagonal{<:Real}) = MvNormalCanon(h, PDiagMat(J.diag))
+function MvNormalCanon(h::AbstractVector{<:Real}, J::UniformScaling{<:Real})
+    return MvNormalCanon(h, ScalMat(length(h), J.λ))
+end
+function MvNormalCanon(
+    h::AbstractVector{<:Real}, J::Diagonal{<:Real,<:FillArrays.AbstractFill{<:Real,1}}
+)
+    return MvNormalCanon(h, ScalMat(size(J, 1), FillArrays.getindex_value(J.diag)))
+end
 
-MvNormalCanon(J::AbstractMatrix) = MvNormalCanon(PDMat(J))
-MvNormalCanon(prec::AbstractVector) = MvNormalCanon(PDiagMat(prec))
-MvNormalCanon(d::Int, prec) = MvNormalCanon(ScalMat(d, prec))
+# Constructor without mean vector
+"""
+    MvNormalCanon(J::AbstractMatrix{<:Real})
+
+Construct a multivariate normal distribution with zero mean (thus zero potential vector) and
+precision matrix `J`.
+"""
+MvNormalCanon(J::AbstractMatrix{<:Real}) = MvNormalCanon(Zeros{eltype(J)}(size(J, 1)), J)
 
+# Deprecated constructors
+Base.@deprecate MvNormalCanon(h::AbstractVector{<:Real}, prec::AbstractVector{<:Real}) MvNormalCanon(h, Diagonal(prec))
+Base.@deprecate MvNormalCanon(h::AbstractVector{<:Real}, prec::Real) MvNormalCanon(h, prec * I)
+Base.@deprecate MvNormalCanon(prec::AbstractVector) MvNormalCanon(Diagonal(prec))
+Base.@deprecate MvNormalCanon(d::Int, prec::Real) MvNormalCanon(Diagonal(Fill(prec, d)))
 
 ### Show
 
diff --git a/src/multivariate/product.jl b/src/multivariate/product.jl
index 55bb0103f..cee21c55d 100644
--- a/src/multivariate/product.jl
+++ b/src/multivariate/product.jl
@@ -40,6 +40,8 @@ var(d::Product) = var.(d.v)
 cov(d::Product) = Diagonal(var(d))
 entropy(d::Product) = sum(entropy, d.v)
 insupport(d::Product, x::AbstractVector) = all(insupport.(d.v, x))
+minimum(d::Product) = map(minimum, d.v)
+maximum(d::Product) = map(maximum, d.v)
 
 """
     product_distribution(dists::AbstractVector{<:UnivariateDistribution})
@@ -61,6 +63,6 @@ covariance matrix.
 """
 function product_distribution(dists::AbstractVector{<:Normal})
     µ = mean.(dists)
-    σ = std.(dists)
-    return MvNormal(µ, σ)
+    σ2 = var.(dists)
+    return MvNormal(µ, Diagonal(σ2))
 end
diff --git a/src/quantilealgs.jl b/src/quantilealgs.jl
index 1de57e79d..4cbd70f7a 100644
--- a/src/quantilealgs.jl
+++ b/src/quantilealgs.jl
@@ -1,8 +1,12 @@
 # Various algorithms for computing quantile
 
-function quantile_bisect(d::ContinuousUnivariateDistribution, p::Real,
-                         lx::Real, rx::Real, tol::Real)
-
+function quantile_bisect(
+    d::ContinuousUnivariateDistribution, p::Real, lx::Real, rx::Real,
+    # base tolerance on types to support e.g. `Float32` (avoids an infinite loop)
+    # ≈ 3.7e-11 for Float64
+    # ≈ 2.4e-5 for Float32
+    tol::Real=(eps(Base.promote_typeof(float(lx), float(rx))))^(2 / 3),
+)
     # find quantile using bisect algorithm
     cl = cdf(d, lx)
     cr = cdf(d, rx)
@@ -22,7 +26,7 @@ function quantile_bisect(d::ContinuousUnivariateDistribution, p::Real,
 end
 
 quantile_bisect(d::ContinuousUnivariateDistribution, p::Real) =
-    quantile_bisect(d, p, minimum(d), maximum(d), 1.0e-12)
+    quantile_bisect(d, p, minimum(d), maximum(d))
 
 # if starting at mode, Newton is convergent for any unimodal continuous distribution, see:
 #   Göknur Giner, Gordon K. Smyth (2014)
diff --git a/src/test_utils.jl b/src/test_utils.jl
new file mode 100644
index 000000000..98d1f8a7a
--- /dev/null
+++ b/src/test_utils.jl
@@ -0,0 +1,96 @@
+module TestUtils
+
+using Distributions
+using LinearAlgebra
+using Random
+using Test
+
+
+__rand(::Nothing, args...) = rand(args...)
+__rand(rng::AbstractRNG, args...) = rand(rng, args...)
+
+__rand!(::Nothing, args...) = rand!(args...)
+__rand!(rng::AbstractRNG, args...) = rand!(rng, args...)
+
+"""
+    test_mvnormal(
+        g::AbstractMvNormal, n_tsamples::Int=10^6, rng::AbstractRNG=Random.GLOBAL_RNG
+    )
+
+Test that `AbstractMvNormal` implements the expected API.
+"""
+function test_mvnormal(
+    g::AbstractMvNormal, n_tsamples::Int=10^6, rng::Union{AbstractRNG, Nothing}=nothing
+)
+    d = length(g)
+    μ = mean(g)
+    Σ = cov(g)
+    @test length(μ) == d
+    @test size(Σ) == (d, d)
+    @test var(g) ≈ diag(Σ)
+    @test entropy(g) ≈ 0.5 * logdet(2π * ℯ * Σ)
+    ldcov = logdetcov(g)
+    @test ldcov ≈ logdet(Σ)
+    vs = diag(Σ)
+    @test g == typeof(g)(params(g)...)
+    @test g == deepcopy(g)
+    @test minimum(g) == fill(-Inf, d)
+    @test maximum(g) == fill(Inf, d)
+    @test extrema(g) == (minimum(g), maximum(g))
+    @test isless(extrema(g)...)
+
+    # test sampling for AbstractMatrix (here, a SubArray):
+    subX = view(__rand(rng, d, 2d), :, 1:d)
+    @test isa(__rand!(rng, g, subX), SubArray)
+
+    # sampling
+    @test isa(__rand(rng, g), Vector{Float64})
+    X = __rand(rng, g, n_tsamples)
+    emp_mu = vec(mean(X, dims=2))
+    Z = X .- emp_mu
+    emp_cov = (Z * Z') * inv(n_tsamples)
+
+    mean_atols = 8 .* sqrt.(vs ./ n_tsamples)
+    cov_atols = 10 .* sqrt.(vs .* vs') ./ sqrt.(n_tsamples)
+    for i = 1:d
+        @test isapprox(emp_mu[i], μ[i], atol=mean_atols[i])
+    end
+    for i = 1:d, j = 1:d
+        @test isapprox(emp_cov[i,j], Σ[i,j], atol=cov_atols[i,j])
+    end
+
+    X = rand(MersenneTwister(14), g, n_tsamples)
+    Y = rand(MersenneTwister(14), g, n_tsamples)
+    @test X == Y
+    emp_mu = vec(mean(X, dims=2))
+    Z = X .- emp_mu
+    emp_cov = (Z * Z') * inv(n_tsamples)
+    for i = 1:d
+        @test isapprox(emp_mu[i]   , μ[i]  , atol=mean_atols[i])
+    end
+    for i = 1:d, j = 1:d
+        @test isapprox(emp_cov[i,j], Σ[i,j], atol=cov_atols[i,j])
+    end
+
+
+    # evaluation of sqmahal & logpdf
+    U = X .- μ
+    sqm = vec(sum(U .* (Σ \ U), dims=1))
+    for i = 1:min(100, n_tsamples)
+        @test sqmahal(g, X[:,i]) ≈ sqm[i]
+    end
+    @test sqmahal(g, X) ≈ sqm
+
+    lp = -0.5 .* sqm .- 0.5 * (d * log(2.0 * pi) + ldcov)
+    for i = 1:min(100, n_tsamples)
+        @test logpdf(g, X[:,i]) ≈ lp[i]
+    end
+    @test logpdf(g, X) ≈ lp
+
+    # log likelihood
+    @test loglikelihood(g, X) ≈ sum(i -> Distributions._logpdf(g, X[:,i]), 1:n_tsamples)
+    @test loglikelihood(g, X[:, 1]) ≈ logpdf(g, X[:, 1])
+    @test loglikelihood(g, [X[:, i] for i in axes(X, 2)]) ≈ loglikelihood(g, X)
+end
+
+end
diff --git a/src/truncate.jl b/src/truncate.jl
index c9157b66a..ba76b7757 100644
--- a/src/truncate.jl
+++ b/src/truncate.jl
@@ -1,54 +1,55 @@
 """
-    truncated(d, l, u):
+    truncated(d::UnivariateDistribution, l::Real, u::Real)
 
-Truncate a distribution between `l` and `u`.
-Builds the most appropriate distribution for the type of `d`,
-the fallback is constructing a `Truncated` wrapper.
+Truncate a univariate distribution `d` to the interval `[l, u]`.
 
-To implement a specialized truncated form for a distribution `D`,
-the method `truncate(d::D, l::T, u::T) where {T <: Real}`
-should be implemented.
+The lower bound `l` can be finite or `-Inf` and the upper bound `u` can be finite or
+`Inf`. The function throws an error if `l > u`.
 
-# Arguments
-- `d::UnivariateDistribution`: The original distribution.
-- `l::Real`: The lower bound of the truncation, which can be a finite value or `-Inf`.
-- `u::Real`: The upper bound of the truncation, which can be a finite value of `Inf`.
+The function falls back to constructing a [`Truncated`](@ref) wrapper.
 
-Throws an error if `l >= u`.
+# Implementation
+
+To implement a specialized truncated form for distributions of type `D`, the method
+`truncate(d::D, l::T, u::T) where {T <: Real}` should be implemented.
 """
 function truncated(d::UnivariateDistribution, l::Real, u::Real)
     return truncated(d, promote(l, u)...)
 end
 
 function truncated(d::UnivariateDistribution, l::T, u::T) where {T <: Real}
-    l < u || error("lower bound should be less than upper bound.")
-    T2 = promote_type(T, eltype(d))
-    lcdf = isinf(l) ? zero(T2) : T2(cdf(d, l))
-    ucdf = isinf(u) ? one(T2) : T2(cdf(d, u))
-    tp = ucdf - lcdf
-    Truncated(d, promote(l, u, lcdf, ucdf, tp, log(tp))...)
-end
+    l <= u || error("the lower bound must be less or equal than the upper bound")
 
-truncated(d::UnivariateDistribution, l::Integer, u::Integer) = truncated(d, float(l), float(u))
+    # (log)lcdf = (log) P(X < l) where X ~ d
+    loglcdf = if value_support(typeof(d)) === Discrete
+        logsubexp(logcdf(d, l), logpdf(d, l))
+    else
+        logcdf(d, l)
+    end
+    lcdf = exp(loglcdf)
 
-"""
-    Truncated(d, l, u):
+    # (log)ucdf = (log) P(X ≤ u) where X ~ d
+    logucdf = logcdf(d, u)
+    ucdf = exp(logucdf)
 
-Create a generic wrapper for a truncated distribution.
-Prefer calling the function `truncated(d, l, u)`, which can choose the appropriate
-representation of the truncated distribution.
+    # (log)tp = (log) P(l ≤ X ≤ u) where X ∼ d
+    logtp = logsubexp(loglcdf, logucdf)
+    tp = exp(logtp)
+
+    Truncated(d, promote(l, u, lcdf, ucdf, tp, logtp)...)
+end
+
+"""
+    Truncated
 
-# Arguments
-- `d::UnivariateDistribution`: The original distribution.
-- `l::Real`: The lower bound of the truncation, which can be a finite value or `-Inf`.
-- `u::Real`: The upper bound of the truncation, which can be a finite value of `Inf`.
+Generic wrapper for a truncated distribution.
 """
 struct Truncated{D<:UnivariateDistribution, S<:ValueSupport, T <: Real} <: UnivariateDistribution{S}
     untruncated::D      # the original distribution (untruncated)
     lower::T      # lower bound
     upper::T      # upper bound
-    lcdf::T       # cdf of lower bound
-    ucdf::T       # cdf of upper bound
+    lcdf::T       # cdf of lower bound (exclusive): P(X < lower)
+    ucdf::T       # cdf of upper bound (inclusive): P(X ≤ upper)
 
     tp::T         # the probability of the truncated part, i.e. ucdf - lcdf
     logtp::T      # log(tp), i.e. log(ucdf - lcdf)
@@ -57,18 +58,7 @@ struct Truncated{D<:UnivariateDistribution, S<:ValueSupport, T <: Real} <: Univa
     end
 end
 
-### Constructors
-function Truncated(d::UnivariateDistribution, l::T, u::T) where {T <: Real}
-    l < u || error("lower bound should be less than upper bound.")
-    T2 = promote_type(T, eltype(d))
-    lcdf = isinf(l) ? zero(T2) : T2(cdf(d, l))
-    ucdf = isinf(u) ? one(T2) : T2(cdf(d, u))
-    tp = ucdf - lcdf
-    Truncated(d, promote(l, u, lcdf, ucdf, tp, log(tp))...)
-end
-
-Truncated(d::UnivariateDistribution, l::Integer, u::Integer) = Truncated(d, float(l), float(u))
-
+### Constructors of `Truncated` are deprecated - users should call `truncated`
 @deprecate Truncated(d::UnivariateDistribution, l::Real, u::Real) truncated(d, l, u)
 
 params(d::Truncated) = tuple(params(d.untruncated)..., d.lower, d.upper)
@@ -91,94 +81,59 @@ end
 
 quantile(d::Truncated, p::Real) = quantile(d.untruncated, d.lcdf + p * d.tp)
 
-function _pdf(d::Truncated, x::T) where {T<:Real}
-    if d.lower <= x <= d.upper
-        pdf(d.untruncated, x) / d.tp
-    else
-        zero(T)
-    end
-end
-
-function pdf(d::Truncated{<:ContinuousUnivariateDistribution}, x::T) where {T<:Real}
-    _pdf(d, float(x))
-end
-
-function pdf(d::Truncated{D}, x::T) where {D<:DiscreteUnivariateDistribution, T<:Real}
-    isinteger(x) || return zero(float(T))
-    _pdf(d, x)
+function pdf(d::Truncated, x::Real)
+    result = pdf(d.untruncated, x) / d.tp
+    return d.lower <= x <= d.upper ? result : zero(result)
 end
 
-function pdf(d::Truncated{D}, x::T) where {D<:DiscreteUnivariateDistribution, T<:Integer}
-    _pdf(d, float(x))
+function logpdf(d::Truncated, x::Real)
+    result = logpdf(d.untruncated, x) - d.logtp
+    return d.lower <= x <= d.upper ? result : oftype(result, -Inf)
 end
 
-function _logpdf(d::Truncated, x::T) where {T<:Real}
-    if d.lower <= x <= d.upper
-        logpdf(d.untruncated, x) - d.logtp
+function cdf(d::Truncated, x::Real)
+    result = (cdf(d.untruncated, x) - d.lcdf) / d.tp
+    return if x < d.lower
+        zero(result)
+    elseif x >= d.upper
+        one(result)
     else
-        TF = float(T)
-        -TF(Inf)
+        result
     end
 end
 
-function logpdf(d::Truncated{D}, x::T) where {D<:DiscreteUnivariateDistribution, T<:Real}
-    TF = float(T)
-    isinteger(x) || return -TF(Inf)
-    return _logpdf(d, x)
-end
-
-function logpdf(d::Truncated{D}, x::Integer) where {D<:DiscreteUnivariateDistribution}
-    _logpdf(d, x)
-end
-
-function logpdf(d::Truncated{D, Continuous}, x::T) where {D<:ContinuousUnivariateDistribution, T<:Real}
-    _logpdf(d, x)
-end
-
-# fallback to avoid method ambiguities
-_cdf(d::Truncated, x::T) where {T<:Real} =
-    x <= d.lower ? zero(T) :
-    x >= d.upper ? one(T) :
-    (cdf(d.untruncated, x) - d.lcdf) / d.tp
-
-cdf(d::Truncated, x::Real) = _cdf(d, x)
-cdf(d::Truncated, x::Integer) = _cdf(d, float(x)) # float conversion for stability
-
-function _logcdf(d::Truncated, x::T) where {T<:Real}
-    TF = float(T)
-    if x <= d.lower
-        -TF(Inf)
+function logcdf(d::Truncated, x::Real)
+    result = logsubexp(logcdf(d.untruncated, x), log(d.lcdf)) - d.logtp
+    return if x < d.lower
+        oftype(result, -Inf)
     elseif x >= d.upper
-        zero(TF)
+        zero(result)
     else
-        log(cdf(d.untruncated, x) - d.lcdf) - d.logtp
+        result
     end
 end
 
-logcdf(d::Truncated, x::Real) = _logcdf(d, x)
-logcdf(d::Truncated, x::Integer) = _logcdf(d, x)
-
-_ccdf(d::Truncated, x::T) where {T<:Real} =
-    x <= d.lower ? one(T) :
-    x >= d.upper ? zero(T) :
-    (d.ucdf - cdf(d.untruncated, x)) / d.tp
-
-ccdf(d::Truncated, x::Real) = _ccdf(d, x)
-ccdf(d::Truncated, x::Integer) = _ccdf(d, float(x))
-
-function _logccdf(d::Truncated, x::T) where {T<:Real}
-    TF = float(T)
-    if x <= d.lower
-        zero(TF)
-    elseif x >= d.upper
-        -TF(Inf)
+function ccdf(d::Truncated, x::Real)
+    result = (d.ucdf - cdf(d.untruncated, x)) / d.tp
+    return if x <= d.lower
+        one(result)
+    elseif x > d.upper
+        zero(result)
     else
-        log(d.ucdf - cdf(d.untruncated, x)) - d.logtp
+        result
     end
 end
 
-logccdf(d::Truncated, x::Real) = _logccdf(d, x)
-logccdf(d::Truncated, x::Integer) = _logccdf(d, x)
+function logccdf(d::Truncated, x::Real)
+    result = logsubexp(logccdf(d.untruncated, x), log1p(-d.ucdf)) - d.logtp
+    return if x <= d.lower
+        zero(result)
+    elseif x > d.upper
+        oftype(result, -Inf)
+    else
+        result
+    end
+end
 
 ## random number generation
 
@@ -217,3 +172,4 @@ _use_multline_show(d::Truncated) = _use_multline_show(d.untruncated)
 include(joinpath("truncated", "normal.jl"))
 include(joinpath("truncated", "exponential.jl"))
 include(joinpath("truncated", "uniform.jl"))
+include(joinpath("truncated", "loguniform.jl"))
diff --git a/src/truncated/loguniform.jl b/src/truncated/loguniform.jl
new file mode 100644
index 000000000..d4814fc3a
--- /dev/null
+++ b/src/truncated/loguniform.jl
@@ -0,0 +1 @@
+truncated(d::LogUniform, lo::T, hi::T) where {T<:Real} = LogUniform(max(d.a, lo), min(d.b, hi))
diff --git a/src/truncated/uniform.jl b/src/truncated/uniform.jl
index a8681922c..3e345cde1 100644
--- a/src/truncated/uniform.jl
+++ b/src/truncated/uniform.jl
@@ -2,6 +2,4 @@
 ##### Shortcut for truncating uniform distributions.
 #####
 
-truncated(d::Uniform, l::T, u::T) where {T <: Real} = Uniform(promote(max(l, d.a), min(u, d.b))...)
-
-truncated(d::Uniform, l::Integer, u::Integer) = truncated(d, float(l), float(u))
+truncated(d::Uniform, l::T, u::T) where {T <: Real} = Uniform(max(l, d.a), min(u, d.b))
diff --git a/src/univariate/continuous/beta.jl b/src/univariate/continuous/beta.jl
index 6259b18cd..fa85733c3 100644
--- a/src/univariate/continuous/beta.jl
+++ b/src/univariate/continuous/beta.jl
@@ -1,5 +1,5 @@
 """
-    Beta(α,β)
+    Beta(α, β)
 
 The *Beta distribution* has probability density function
 
@@ -15,10 +15,10 @@ independently, then ``X / (X + Y) \\sim \\operatorname{Beta}(\\alpha, \\beta)``.
 
 ```julia
 Beta()        # equivalent to Beta(1, 1)
-Beta(a)       # equivalent to Beta(a, a)
-Beta(a, b)    # Beta distribution with shape parameters a and b
+Beta(α)       # equivalent to Beta(α, α)
+Beta(α, β)    # Beta distribution with shape parameters α and β
 
-params(d)     # Get the parameters, i.e. (a, b)
+params(d)     # Get the parameters, i.e. (α, β)
 ```
 
 External links
@@ -107,6 +107,12 @@ function entropy(d::Beta)
         (s - 2) * digamma(s)
 end
 
+function kldivergence(p::Beta, q::Beta)
+    αp, βp = params(p)
+    αq, βq = params(q)
+    return logbeta(αq, βq) - logbeta(αp, βp) + (αp - αq) * digamma(αp) +
+        (βp - βq) * digamma(βp) + (αq - αp + βq - βp) * digamma(αp + βp)
+end
 
 #### Evaluation
 
diff --git a/src/univariate/continuous/betaprime.jl b/src/univariate/continuous/betaprime.jl
index 51918387a..7c30bd7ed 100644
--- a/src/univariate/continuous/betaprime.jl
+++ b/src/univariate/continuous/betaprime.jl
@@ -1,5 +1,5 @@
 """
-    BetaPrime(α,β)
+    BetaPrime(α, β)
 
 The *Beta prime distribution* has probability density function
 
@@ -15,10 +15,10 @@ relation ship that if ``X \\sim \\operatorname{Beta}(\\alpha, \\beta)`` then ``\
 
 ```julia
 BetaPrime()        # equivalent to BetaPrime(1, 1)
-BetaPrime(a)       # equivalent to BetaPrime(a, a)
-BetaPrime(a, b)    # Beta prime distribution with shape parameters a and b
+BetaPrime(α)       # equivalent to BetaPrime(α, α)
+BetaPrime(α, β)    # Beta prime distribution with shape parameters α and β
 
-params(d)          # Get the parameters, i.e. (a, b)
+params(d)          # Get the parameters, i.e. (α, β)
 ```
 
 External links
@@ -85,25 +85,28 @@ end
 
 #### Evaluation
 
-function logpdf(d::BetaPrime{T}, x::Real) where T<:Real
-    (α, β) = params(d)
-    if x < 0
-        T(-Inf)
-    else
-        (α - 1) * log(x) - (α + β) * log1p(x) - logbeta(α, β)
-    end
+function logpdf(d::BetaPrime, x::Real)
+    α, β = params(d)
+    _x = max(0, x)
+    z = xlogy(α - 1, _x) - (α + β) * log1p(_x) - logbeta(α, β)
+    return x < 0 ? oftype(z, -Inf) : z
 end
 
-cdf(d::BetaPrime{T}, x::Real) where {T<:Real} = x <= 0 ? zero(T) : betacdf(d.α, d.β, x / (1 + x))
-ccdf(d::BetaPrime{T}, x::Real) where {T<:Real} = x <= 0 ? one(T) : betaccdf(d.α, d.β, x / (1 + x))
-logcdf(d::BetaPrime{T}, x::Real) where {T<:Real} =  x <= 0 ? T(-Inf) : betalogcdf(d.α, d.β, x / (1 + x))
-logccdf(d::BetaPrime{T}, x::Real) where {T<:Real} =  x <= 0 ? zero(T) : betalogccdf(d.α, d.β, x / (1 + x))
+function zval(::BetaPrime, x::Real)
+    y = max(x, 0)
+    z = y / (1 + y)
+    # map `Inf` to `Inf` (otherwise it returns `NaN`)
+    return isinf(x) && x > 0 ? oftype(z, Inf) : z
+end
+xval(::BetaPrime, z::Real) = z / (1 - z)
 
-quantile(d::BetaPrime, p::Real) = (x = betainvcdf(d.α, d.β, p); x / (1 - x))
-cquantile(d::BetaPrime, p::Real) = (x = betainvccdf(d.α, d.β, p); x / (1 - x))
-invlogcdf(d::BetaPrime, p::Real) = (x = betainvlogcdf(d.α, d.β, p); x / (1 - x))
-invlogccdf(d::BetaPrime, p::Real) = (x = betainvlogccdf(d.α, d.β, p); x / (1 - x))
+for f in (:cdf, :ccdf, :logcdf, :logccdf)
+    @eval $f(d::BetaPrime, x::Real) = $f(Beta(d.α, d.β; check_args=false), zval(d, x))
+end
 
+for f in (:quantile, :cquantile, :invlogcdf, :invlogccdf)
+    @eval $f(d::BetaPrime, p::Real) = xval(d, $f(Beta(d.α, d.β; check_args=false), p))
+end
 
 #### Sampling
 
diff --git a/src/univariate/continuous/cauchy.jl b/src/univariate/continuous/cauchy.jl
index d7a187342..5afe79d90 100644
--- a/src/univariate/continuous/cauchy.jl
+++ b/src/univariate/continuous/cauchy.jl
@@ -9,12 +9,12 @@ f(x; \\mu, \\sigma) = \\frac{1}{\\pi \\sigma \\left(1 + \\left(\\frac{x - \\mu}{
 
 ```julia
 Cauchy()         # Standard Cauchy distribution, i.e. Cauchy(0, 1)
-Cauchy(u)        # Cauchy distribution with location u and unit scale, i.e. Cauchy(u, 1)
-Cauchy(u, b)     # Cauchy distribution with location u and scale b
+Cauchy(μ)        # Cauchy distribution with location μ and unit scale, i.e. Cauchy(μ, 1)
+Cauchy(μ, σ)     # Cauchy distribution with location μ and scale σ
 
-params(d)        # Get the parameters, i.e. (u, b)
-location(d)      # Get the location parameter, i.e. u
-scale(d)         # Get the scale parameter, i.e. b
+params(d)        # Get the parameters, i.e. (μ, σ)
+location(d)      # Get the location parameter, i.e. μ
+scale(d)         # Get the scale parameter, i.e. σ
 ```
 
 External links
diff --git a/src/univariate/continuous/chernoff.jl b/src/univariate/continuous/chernoff.jl
index 340f68b22..920f05b2c 100644
--- a/src/univariate/continuous/chernoff.jl
+++ b/src/univariate/continuous/chernoff.jl
@@ -16,7 +16,7 @@ The *Chernoff distribution* is the distribution of the random variable
 ```math
 \\underset{t \\in (-\\infty,\\infty)}{\\arg\\max} ( G(t) - t^2 ),
 ```
-where ``G`` is standard two--sided Brownian motion.
+where ``G`` is standard two-sided Brownian motion.
 
 The distribution arises as the limit distribution of various cube-root-n consistent estimators,
 including the isotonic regression estimator of Brunk, the isotonic density estimator of Grenander,
@@ -27,21 +27,7 @@ computation of pdf and cdf is based on the algorithm described in Groeneboom and
 Journal of Computational and Graphical Statistics, 2001.
 
 ```julia
-Chernoff()
-pdf(Chernoff(),x::Real)
-cdf(Chernoff(),x::Real)
-logpdf(Chernoff(),x::Real)
-survivor(Chernoff(),x::Real)
-mean(Chernoff())
-var(Chernoff())
-skewness(Chernoff())
-kurtosis(Chernoff())
-kurtosis(Chernoff(), excess::Bool)
-mode(Chernoff())
-entropy(Chernoff())
-rand(Chernoff())
-rand(rng, Chernoff()
-cdf(Chernoff(),-x)              #For tail probabilities, use this instead of 1-cdf(Chernoff(),x)
+cdf(Chernoff(),-x)              # For tail probabilities, use this instead of 1-cdf(Chernoff(),x)
 ```
 """
 struct Chernoff <: ContinuousUnivariateDistribution
diff --git a/src/univariate/continuous/chi.jl b/src/univariate/continuous/chi.jl
index ff55afab0..4fa328697 100644
--- a/src/univariate/continuous/chi.jl
+++ b/src/univariate/continuous/chi.jl
@@ -83,16 +83,13 @@ logpdf(d::Chi, x::Real) = (ν = d.ν;
 
 gradlogpdf(d::Chi{T}, x::Real) where {T<:Real} = x >= 0 ? (d.ν - 1) / x - x : zero(T)
 
-cdf(d::Chi, x::Real) = chisqcdf(d.ν, x^2)
-ccdf(d::Chi, x::Real) = chisqccdf(d.ν, x^2)
-logcdf(d::Chi, x::Real) = chisqlogcdf(d.ν, x^2)
-logccdf(d::Chi, x::Real) = chisqlogccdf(d.ν, x^2)
-
-quantile(d::Chi, p::Real) = sqrt(chisqinvcdf(d.ν, p))
-cquantile(d::Chi, p::Real) = sqrt(chisqinvccdf(d.ν, p))
-invlogcdf(d::Chi, p::Real) = sqrt(chisqinvlogcdf(d.ν, p))
-invlogccdf(d::Chi, p::Real) = sqrt(chisqinvlogccdf(d.ν, p))
+for f in (:cdf, :ccdf, :logcdf, :logccdf)
+    @eval $f(d::Chi, x::Real) = $f(Chisq(d.ν; check_args=false), max(x, 0)^2)
+end
 
+for f in (:quantile, :cquantile, :invlogcdf, :invlogccdf)
+    @eval $f(d::Chi, p::Real) = sqrt($f(Chisq(d.ν; check_args=false), p))
+end
 
 #### Sampling
 
diff --git a/src/univariate/continuous/exponential.jl b/src/univariate/continuous/exponential.jl
index 124103cea..5c395c962 100644
--- a/src/univariate/continuous/exponential.jl
+++ b/src/univariate/continuous/exponential.jl
@@ -60,21 +60,31 @@ kurtosis(::Exponential{T}) where {T} = T(6)
 
 entropy(d::Exponential{T}) where {T} = one(T) + log(d.θ)
 
+function kldivergence(p::Exponential, q::Exponential)
+    λq_over_λp = scale(q) / scale(p)
+    return -logmxp1(λq_over_λp)
+end
+
 #### Evaluation
 
-zval(d::Exponential, x::Real) = x / d.θ
+zval(d::Exponential, x::Real) = max(x / d.θ, 0)
 xval(d::Exponential, z::Real) = z * d.θ
 
-pdf(d::Exponential, x::Real) = (λ = rate(d); x < 0 ? zero(λ) : λ * exp(-λ * x))
-function logpdf(d::Exponential{T}, x::Real) where T<:Real
+function pdf(d::Exponential, x::Real)
+    λ = rate(d)
+    z = λ * exp(-λ * max(x, 0))
+    return x < 0 ? zero(z) : z
+end
+function logpdf(d::Exponential, x::Real)
     λ = rate(d)
-    x < 0 ? -T(Inf) : log(λ) - λ * x
+    z = log(λ) - λ * x
+    return x < 0 ? oftype(z, -Inf) : z
 end
 
-cdf(d::Exponential{T}, x::Real) where {T<:Real} = x > 0 ? -expm1(-zval(d, x)) : zero(T)
-ccdf(d::Exponential{T}, x::Real) where {T<:Real} = x > 0 ? exp(-zval(d, x)) : zero(T)
-logcdf(d::Exponential{T}, x::Real) where {T<:Real} = x > 0 ? log1mexp(-zval(d, x)) : -T(Inf)
-logccdf(d::Exponential{T}, x::Real) where {T<:Real} = x > 0 ? -zval(d, x) : zero(T)
+cdf(d::Exponential, x::Real) = -expm1(-zval(d, x))
+ccdf(d::Exponential, x::Real) = exp(-zval(d, x))
+logcdf(d::Exponential, x::Real) = log1mexp(-zval(d, x))
+logccdf(d::Exponential, x::Real) = -zval(d, x)
 
 quantile(d::Exponential, p::Real) = -xval(d, log1p(-p))
 cquantile(d::Exponential, p::Real) = -xval(d, log(p))
diff --git a/src/univariate/continuous/frechet.jl b/src/univariate/continuous/frechet.jl
index b73c49812..f50092a8c 100644
--- a/src/univariate/continuous/frechet.jl
+++ b/src/univariate/continuous/frechet.jl
@@ -119,15 +119,18 @@ function logpdf(d::Frechet{T}, x::Real) where T<:Real
     end
 end
 
-cdf(d::Frechet{T}, x::Real) where {T<:Real} = x > 0 ? exp(-((d.θ / x) ^ d.α)) : zero(T)
-ccdf(d::Frechet{T}, x::Real) where {T<:Real} = x > 0 ? -expm1(-((d.θ / x) ^ d.α)) : one(T)
-logcdf(d::Frechet{T}, x::Real) where {T<:Real} = x > 0 ? -(d.θ / x) ^ d.α : -T(Inf)
-logccdf(d::Frechet{T}, x::Real) where {T<:Real} = x > 0 ? log1mexp(-((d.θ / x) ^ d.α)) : zero(T)
-
-quantile(d::Frechet, p::Real) = d.θ * (-log(p)) ^ (-1 / d.α)
-cquantile(d::Frechet, p::Real) = d.θ * (-log1p(-p)) ^ (-1 / d.α)
-invlogcdf(d::Frechet, lp::Real) = d.θ * (-lp)^(-1 / d.α)
-invlogccdf(d::Frechet, lp::Real) = d.θ * (-log1mexp(lp))^(-1 / d.α)
+zval(d::Frechet, x::Real) = (d.θ / max(x, 0))^d.α
+xval(d::Frechet, z::Real) = d.θ * z^(- 1 / d.α)
+
+cdf(d::Frechet, x::Real) = exp(- zval(d, x))
+ccdf(d::Frechet, x::Real) = -expm1(- zval(d, x))
+logcdf(d::Frechet, x::Real) = - zval(d, x)
+logccdf(d::Frechet, x::Real) = log1mexp(- zval(d, x))
+
+quantile(d::Frechet, p::Real) = xval(d, -log(p))
+cquantile(d::Frechet, p::Real) = xval(d, -log1p(-p))
+invlogcdf(d::Frechet, lp::Real) = xval(d, -lp)
+invlogccdf(d::Frechet, lp::Real) = xval(d, -log1mexp(lp))
 
 function gradlogpdf(d::Frechet{T}, x::Real) where T<:Real
     (α, θ) = params(d)
diff --git a/src/univariate/continuous/gamma.jl b/src/univariate/continuous/gamma.jl
index fb6ac1bcc..de550cb35 100644
--- a/src/univariate/continuous/gamma.jl
+++ b/src/univariate/continuous/gamma.jl
@@ -79,6 +79,14 @@ mgf(d::Gamma, t::Real) = (1 - t * d.θ)^(-d.α)
 
 cf(d::Gamma, t::Real) = (1 - im * t * d.θ)^(-d.α)
 
+function kldivergence(p::Gamma, q::Gamma)
+    # We use the parametrization with the scale θ
+    αp, θp = params(p)
+    αq, θq = params(q)
+    θp_over_θq = θp / θq
+    return (αp - αq) * digamma(αp) - loggamma(αp) + loggamma(αq) -
+        αq * log(θp_over_θq) + αp * (θp_over_θq - 1)
+end
 
 #### Evaluation & Sampling
 
diff --git a/src/univariate/continuous/generalizedextremevalue.jl b/src/univariate/continuous/generalizedextremevalue.jl
index 8583f5587..9dbd1567b 100644
--- a/src/univariate/continuous/generalizedextremevalue.jl
+++ b/src/univariate/continuous/generalizedextremevalue.jl
@@ -219,43 +219,21 @@ function pdf(d::GeneralizedExtremeValue{T}, x::Real) where T<:Real
     end
 end
 
-function logcdf(d::GeneralizedExtremeValue{T}, x::Real) where T<:Real
-    if insupport(d, x)
-        (μ, σ, ξ) = params(d)
-
-        z = (x - μ) / σ # Normalise x.
-        if abs(ξ) < eps(one(ξ)) # ξ == 0
-            return -exp(- z)
-        else
-            return - (1 + z * ξ) ^ ( -1/ξ)
-        end
-    elseif x <= minimum(d)
-        return -T(Inf)
-    else
-        return zero(T)
-    end
-end
-
-function cdf(d::GeneralizedExtremeValue{T}, x::Real) where T<:Real
-    if insupport(d, x)
-        (μ, σ, ξ) = params(d)
-
-        z = (x - μ) / σ # Normalise x.
-        if abs(ξ) < eps(one(ξ)) # ξ == 0
-            t = exp(-z)
-        else
-            t = (1 + z * ξ) ^ (-1/ξ)
-        end
-        return exp(- t)
-    elseif x <= minimum(d)
-        return zero(T)
+function logcdf(d::GeneralizedExtremeValue, x::Real)
+    μ, σ, ξ = params(d)
+    z = (x - μ) / σ
+    return if abs(ξ) < eps(one(ξ)) # ξ == 0
+        -exp(- z)
     else
-        return one(T)
+        # y(x) = y(bound) = 0 if x is not in the support with lower/upper bound
+        y = max(1 + z * ξ, 0)
+        - y^(-1/ξ)
     end
 end
+cdf(d::GeneralizedExtremeValue, x::Real) = exp(logcdf(d, x))
 
-logccdf(d::GeneralizedExtremeValue, x::Real) = log1p(- cdf(d, x))
 ccdf(d::GeneralizedExtremeValue, x::Real) = - expm1(logcdf(d, x))
+logccdf(d::GeneralizedExtremeValue, x::Real) = log1mexp(logcdf(d, x))
 
 
 #### Sampling
diff --git a/src/univariate/continuous/generalizedpareto.jl b/src/univariate/continuous/generalizedpareto.jl
index a63d6226c..12a6aaa6e 100644
--- a/src/univariate/continuous/generalizedpareto.jl
+++ b/src/univariate/continuous/generalizedpareto.jl
@@ -132,26 +132,22 @@ function logpdf(d::GeneralizedPareto{T}, x::Real) where T<:Real
     return p
 end
 
-function logccdf(d::GeneralizedPareto{T}, x::Real) where T<:Real
-    (μ, σ, ξ) = params(d)
-
-    # The logccdf is log(0) outside the support range.
-    p = -T(Inf)
-
-    if x >= μ
-        z = (x - μ) / σ
-        if abs(ξ) < eps()
-            p = -z
-        elseif ξ > 0 || (ξ < 0 && x < maximum(d))
-            p = (-1 / ξ) * log1p(z * ξ)
-        end
+function logccdf(d::GeneralizedPareto, x::Real)
+    μ, σ, ξ = params(d)
+    z = max((x - μ) / σ, 0) # z(x) = z(μ) = 0 if x < μ (lower bound)
+    return if abs(ξ) < eps(one(ξ)) # ξ == 0
+        -z
+    elseif ξ < 0
+        # y(x) = y(μ - σ / ξ) = -1 if x > μ - σ / ξ (upper bound)
+        -log1p(max(z * ξ, -1)) / ξ
+    else
+        -log1p(z * ξ) / ξ
     end
-
-    return p
 end
-
 ccdf(d::GeneralizedPareto, x::Real) = exp(logccdf(d, x))
+
 cdf(d::GeneralizedPareto, x::Real) = -expm1(logccdf(d, x))
+logcdf(d::GeneralizedPareto, x::Real) = log1mexp(logccdf(d, x))
 
 function quantile(d::GeneralizedPareto{T}, p::Real) where T<:Real
     (μ, σ, ξ) = params(d)
diff --git a/src/univariate/continuous/inversegamma.jl b/src/univariate/continuous/inversegamma.jl
index e68e33004..2c28f5605 100644
--- a/src/univariate/continuous/inversegamma.jl
+++ b/src/univariate/continuous/inversegamma.jl
@@ -83,6 +83,11 @@ function entropy(d::InverseGamma)
     α + loggamma(α) - (1 + α) * digamma(α) + log(θ)
 end
 
+function kldivergence(p::InverseGamma, q::InverseGamma)
+    # We can reuse the implementation of Gamma
+    return kldivergence(p.invd, q.invd)
+end
+
 
 #### Evaluation
 
@@ -91,15 +96,17 @@ function logpdf(d::InverseGamma, x::Real)
     α * log(θ) - loggamma(α) - (α + 1) * log(x) - θ / x
 end
 
-cdf(d::InverseGamma, x::Real) = ccdf(d.invd, 1 / x)
-ccdf(d::InverseGamma, x::Real) = cdf(d.invd, 1 / x)
-logcdf(d::InverseGamma, x::Real) = logccdf(d.invd, 1 / x)
-logccdf(d::InverseGamma, x::Real) = logcdf(d.invd, 1 / x)
+zval(::InverseGamma, x::Real) = inv(max(x, 0))
+
+cdf(d::InverseGamma, x::Real) = ccdf(d.invd, zval(d, x))
+ccdf(d::InverseGamma, x::Real) = cdf(d.invd, zval(d, x))
+logcdf(d::InverseGamma, x::Real) = logccdf(d.invd, zval(d, x))
+logccdf(d::InverseGamma, x::Real) = logcdf(d.invd, zval(d, x))
 
-quantile(d::InverseGamma, p::Real) = 1 / cquantile(d.invd, p)
-cquantile(d::InverseGamma, p::Real) = 1 / quantile(d.invd, p)
-invlogcdf(d::InverseGamma, p::Real) = 1 / invlogccdf(d.invd, p)
-invlogccdf(d::InverseGamma, p::Real) = 1 / invlogcdf(d.invd, p)
+quantile(d::InverseGamma, p::Real) = inv(cquantile(d.invd, p))
+cquantile(d::InverseGamma, p::Real) = inv(quantile(d.invd, p))
+invlogcdf(d::InverseGamma, p::Real) = inv(invlogccdf(d.invd, p))
+invlogccdf(d::InverseGamma, p::Real) = inv(invlogcdf(d.invd, p))
 
 function mgf(d::InverseGamma{T}, t::Real) where T<:Real
     (a, b) = params(d)
diff --git a/src/univariate/continuous/inversegaussian.jl b/src/univariate/continuous/inversegaussian.jl
index 498b454f9..89a19b74e 100644
--- a/src/univariate/continuous/inversegaussian.jl
+++ b/src/univariate/continuous/inversegaussian.jl
@@ -92,52 +92,54 @@ function logpdf(d::InverseGaussian{T}, x::Real) where T<:Real
     end
 end
 
-function cdf(d::InverseGaussian{T}, x::Real) where T<:Real
-    if x > 0
-        μ, λ = params(d)
-        u = sqrt(λ / x)
-        v = x / μ
-        return normcdf(u * (v - 1)) + exp(2λ / μ) * normcdf(-u * (v + 1))
-    else
-        return zero(T)
-    end
+function cdf(d::InverseGaussian, x::Real)
+    μ, λ = params(d)
+    y = max(x, 0)
+    u = sqrt(λ / y)
+    v = y / μ
+    z = normcdf(u * (v - 1)) + exp(2λ / μ) * normcdf(-u * (v + 1))
+
+    # otherwise `NaN` is returned for `+Inf`
+    return isinf(x) && x > 0 ? one(z) : z
 end
 
-function ccdf(d::InverseGaussian{T}, x::Real) where T<:Real
-    if x > 0
-        μ, λ = params(d)
-        u = sqrt(λ / x)
-        v = x / μ
-        normccdf(u * (v - 1)) - exp(2λ / μ) * normcdf(-u * (v + 1))
-    else
-        return one(T)
-    end
+function ccdf(d::InverseGaussian, x::Real)
+    μ, λ = params(d)
+    y = max(x, 0)
+    u = sqrt(λ / y)
+    v = y / μ
+    z = normccdf(u * (v - 1)) - exp(2λ / μ) * normcdf(-u * (v + 1))
+
+    # otherwise `NaN` is returned for `+Inf`
+    return isinf(x) && x > 0 ? zero(z) : z
 end
 
-function logcdf(d::InverseGaussian{T}, x::Real) where T<:Real
-    if x > 0
-        μ, λ = params(d)
-        u = sqrt(λ / x)
-        v = x / μ
-        a = normlogcdf(u * (v -1))
-        b = 2λ / μ + normlogcdf(-u * (v + 1))
-        a + log1pexp(b - a)
-    else
-        return -T(Inf)
-    end
+function logcdf(d::InverseGaussian, x::Real)
+    μ, λ = params(d)
+    y = max(x, 0)
+    u = sqrt(λ / y)
+    v = y / μ
+
+    a = normlogcdf(u * (v - 1))
+    b = 2λ / μ + normlogcdf(-u * (v + 1))
+    z = logaddexp(a, b)
+
+    # otherwise `NaN` is returned for `+Inf`
+    return isinf(x) && x > 0 ? zero(z) : z
 end
 
-function logccdf(d::InverseGaussian{T}, x::Real) where T<:Real
-    if x > 0
-        μ, λ = params(d)
-        u = sqrt(λ / x)
-        v = x / μ
-        a = normlogccdf(u * (v - 1))
-        b = 2λ / μ + normlogcdf(-u * (v + 1))
-        a + log1mexp(b - a)
-    else
-        return zero(T)
-    end
+function logccdf(d::InverseGaussian, x::Real)
+    μ, λ = params(d)
+    y = max(x, 0)
+    u = sqrt(λ / y)
+    v = y / μ
+
+    a = normlogccdf(u * (v - 1))
+    b = 2λ / μ + normlogcdf(-u * (v + 1))
+    z = logsubexp(a, b)
+
+    # otherwise `NaN` is returned for `+Inf`
+    return isinf(x) && x > 0 ? oftype(z, -Inf) : z
 end
 
 @quantile_newton InverseGaussian
diff --git a/src/univariate/continuous/kolmogorov.jl b/src/univariate/continuous/kolmogorov.jl
index 4729fbe06..f7bb373ab 100644
--- a/src/univariate/continuous/kolmogorov.jl
+++ b/src/univariate/continuous/kolmogorov.jl
@@ -153,7 +153,7 @@ function rand(rng::AbstractRNG, d::Kolmogorov)
 end
 
 # equivalent to
-# rand(Truncated(Gamma(1.5,1),tp,Inf))
+# rand(truncated(Gamma(1.5,1),tp,Inf))
 function rand_trunc_gamma(rng::AbstractRNG)
     tp = 2.193245422464302 #pi^2/(8*t^2)
     while true
diff --git a/src/univariate/continuous/laplace.jl b/src/univariate/continuous/laplace.jl
index ebd3945ea..5cf1c171d 100644
--- a/src/univariate/continuous/laplace.jl
+++ b/src/univariate/continuous/laplace.jl
@@ -1,20 +1,20 @@
 """
-    Laplace(μ,β)
+    Laplace(μ,θ)
 
-The *Laplace distribution* with location `μ` and scale `β` has probability density function
+The *Laplace distribution* with location `μ` and scale `θ` has probability density function
 
 ```math
-f(x; \\mu, \\beta) = \\frac{1}{2 \\beta} \\exp \\left(- \\frac{|x - \\mu|}{\\beta} \\right)
+f(x; \\mu, \\theta) = \\frac{1}{2 \\theta} \\exp \\left(- \\frac{|x - \\mu|}{\\theta} \\right)
 ```
 
 ```julia
 Laplace()       # Laplace distribution with zero location and unit scale, i.e. Laplace(0, 1)
 Laplace(μ)      # Laplace distribution with location μ and unit scale, i.e. Laplace(μ, 1)
-Laplace(μ, β)   # Laplace distribution with location μ and scale β
+Laplace(μ, θ)   # Laplace distribution with location μ and scale θ
 
-params(d)       # Get the parameters, i.e., (μ, β)
+params(d)       # Get the parameters, i.e., (μ, θ)
 location(d)     # Get the location parameter, i.e. μ
-scale(d)        # Get the scale parameter, i.e. β
+scale(d)        # Get the scale parameter, i.e. θ
 ```
 
 External links
diff --git a/src/univariate/continuous/loguniform.jl b/src/univariate/continuous/loguniform.jl
new file mode 100644
index 000000000..cbfe1890e
--- /dev/null
+++ b/src/univariate/continuous/loguniform.jl
@@ -0,0 +1,75 @@
+"""
+    LogUniform(a,b)
+
+A positive random variable `X` is log-uniformly with parameters `a` and `b` if the logarithm of `X` is `Uniform(log(a), log(b))`.
+The *log uniform* distribution is also known as *reciprocal distribution*.
+```julia
+LogUniform(1,10)
+```
+External links
+
+* [Log uniform distribution on Wikipedia](https://en.wikipedia.org/wiki/Reciprocal_distribution)
+"""
+struct LogUniform{T<:Real} <: ContinuousUnivariateDistribution
+    a::T
+    b::T
+    LogUniform{T}(a::T, b::T) where {T <: Real} = new{T}(a, b)
+end
+
+function LogUniform(a::T, b::T; check_args=true) where {T <: Real}
+    check_args && @check_args(LogUniform, 0 < a < b)
+    LogUniform{T}(a, b)
+end
+
+LogUniform(a::Real, b::Real; kwargs...) = LogUniform(promote(a, b)...; kwargs...)
+
+convert(::Type{LogUniform{T}}, d::LogUniform) where {T<:Real} = LogUniform(T(d.a), T(d.b))
+Base.minimum(d::LogUniform) = d.a
+Base.maximum(d::LogUniform) = d.b
+
+#### Parameters
+params(d::LogUniform) = (d.a, d.b)
+partype(::LogUniform{T}) where {T<:Real} = T
+
+#### Statistics
+
+function mean(d::LogUniform)
+    a, b = params(d)
+    (b - a) / log(b/a)
+end
+function var(d::LogUniform)
+    a, b = params(d)
+    log_ba = log(b/a)
+    (b^2 - a^2) / (2*log_ba) - ((b-a)/ log_ba)^2
+end
+mode(d::LogUniform)   = d.a
+modes(d::LogUniform)  = partype(d)[]
+
+function entropy(d::LogUniform)
+    a,b = params(d)
+    log(a * b) / 2 + log(log(b / a))
+end
+#### Evaluation
+function pdf(d::LogUniform, x::Real)
+    x1, a, b = promote(x, params(d)...) # ensure e.g. pdf(LogUniform(1,2), 1f0)::Float32
+    res = inv(x1 * log(b / a))
+    return insupport(d, x1) ? res : zero(res)
+end
+function cdf(d::LogUniform, x::Real)
+    x1, a, b = promote(x, params(d)...) # ensure e.g. cdf(LogUniform(1,2), 1f0)::Float32
+    x1 = clamp(x1, a, b)
+    return log(x1 / a) / log(b / a)
+end
+logpdf(d::LogUniform, x::Real) = log(pdf(d,x))
+
+function quantile(d::LogUniform, p::Real)
+    p1,a,b = promote(p, params(d)...) # ensure e.g. quantile(LogUniform(1,2), 1f0)::Float32
+    exp(p1 * log(b/a)) * a
+end
+
+function kldivergence(p::LogUniform, q::LogUniform)
+    ap, bp, aq, bq = promote(params(p)..., params(q)...)
+    finite = aq <= ap < bp <= bq
+    res = log(log(bq / aq) / log(bp / ap))
+    return finite ? res : oftype(res, Inf)
+end
diff --git a/src/univariate/continuous/normal.jl b/src/univariate/continuous/normal.jl
index 9612db7a6..fe5feab84 100644
--- a/src/univariate/continuous/normal.jl
+++ b/src/univariate/continuous/normal.jl
@@ -75,6 +75,15 @@ kurtosis(d::Normal{T}) where {T<:Real} = zero(T)
 
 entropy(d::Normal) = (log2π + 1)/2 + log(d.σ)
 
+function kldivergence(p::Normal, q::Normal)
+    μp = mean(p)
+    σ²p = var(p)
+    μq = mean(q)
+    σ²q = var(q)
+    σ²p_over_σ²q = σ²p / σ²q
+    return (abs2(μp - μq) / σ²q - logmxp1(σ²p_over_σ²q)) / 2
+end
+
 #### Evaluation
 
 # Helpers
@@ -198,7 +207,7 @@ end
 # quantile
 function quantile(d::Normal, p::Real)
     # Promote to ensure that we don't compute erfcinv in low precision and then promote
-    _p, _μ, _σ = promote(float(p), d.μ, d.σ)
+    _p, _μ, _σ = map(float, promote(p, d.μ, d.σ))
     q = xval(d, -erfcinv(2*_p) * sqrt2)
     if isnan(_p)
         return oftype(q, _p)
@@ -218,7 +227,7 @@ end
 # cquantile
 function cquantile(d::Normal, p::Real)
     # Promote to ensure that we don't compute erfcinv in low precision and then promote
-    _p, _μ, _σ = promote(float(p), d.μ, d.σ)
+    _p, _μ, _σ = map(float, promote(p, d.μ, d.σ))
     q = xval(d, erfcinv(2*_p) * sqrt2)
     if isnan(_p)
         return oftype(q, _p)
diff --git a/src/univariate/continuous/normalcanon.jl b/src/univariate/continuous/normalcanon.jl
index 250cb0937..10fae8108 100644
--- a/src/univariate/continuous/normalcanon.jl
+++ b/src/univariate/continuous/normalcanon.jl
@@ -1,7 +1,10 @@
 """
     NormalCanon(η, λ)
 
-Canonical Form of Normal distribution
+Canonical parametrisation of the Normal distribution with canonical parameters `η` and `λ`.
+
+The two *canonical parameters* of a normal distribution ``\\mathcal{N}(\\mu, \\sigma^2)`` with mean ``\\mu`` and
+standard deviation ``\\sigma`` are ``\\eta = \\sigma^{-2} \\mu`` and ``\\lambda = \\sigma^{-2}``.
 """
 struct NormalCanon{T<:Real} <: ContinuousUnivariateDistribution
     η::T       # σ^(-2) * μ
@@ -29,6 +32,7 @@ convert(::Type{NormalCanon{T}}, d::NormalCanon{S}) where {T <: Real, S <: Real}
 
 convert(::Type{Normal}, d::NormalCanon) = Normal(d.μ, 1 / sqrt(d.λ))
 convert(::Type{NormalCanon}, d::Normal) = (λ = 1 / d.σ^2; NormalCanon(λ * d.μ, λ))
+meanform(d::NormalCanon) = convert(Normal, d)
 canonform(d::Normal) = convert(NormalCanon, d)
 
 
diff --git a/src/univariate/continuous/pareto.jl b/src/univariate/continuous/pareto.jl
index 77d158b56..00901b876 100644
--- a/src/univariate/continuous/pareto.jl
+++ b/src/univariate/continuous/pareto.jl
@@ -91,19 +91,17 @@ function logpdf(d::Pareto{T}, x::Real) where T<:Real
     x >= θ ? log(α) + α * log(θ) - (α + 1) * log(x) : -T(Inf)
 end
 
-function ccdf(d::Pareto{T}, x::Real) where T<:Real
-    (α, θ) = params(d)
-    x >= θ ? (θ / x)^α : one(T)
+function ccdf(d::Pareto, x::Real)
+    α, θ = params(d)
+    return (θ / max(x, θ))^α
 end
-
 cdf(d::Pareto, x::Real) = 1 - ccdf(d, x)
 
-function logccdf(d::Pareto{T}, x::Real) where T<:Real
-    (α, θ) = params(d)
-    x >= θ ? α * log(θ / x) : zero(T)
+function logccdf(d::Pareto, x::Real)
+    α, θ = params(d)
+    return xlogy(α, θ / max(x, θ))
 end
-
-logcdf(d::Pareto, x::Real) = log1p(-ccdf(d, x))
+logcdf(d::Pareto, x::Real) = log1mexp(logccdf(d, x))
 
 cquantile(d::Pareto, p::Real) = d.θ / p^(1 / d.α)
 quantile(d::Pareto, p::Real) = cquantile(d, 1 - p)
diff --git a/src/univariate/continuous/rayleigh.jl b/src/univariate/continuous/rayleigh.jl
index 374d65ac2..0b93dcdc5 100644
--- a/src/univariate/continuous/rayleigh.jl
+++ b/src/univariate/continuous/rayleigh.jl
@@ -77,10 +77,13 @@ function logpdf(d::Rayleigh{T}, x::Real) where T<:Real
     x > 0 ? log(x / σ2) - (x^2) / (2σ2) : -T(Inf)
 end
 
-logccdf(d::Rayleigh{T}, x::Real) where {T<:Real} = x > 0 ? - (x^2) / (2d.σ^2) : zero(T)
+function logccdf(d::Rayleigh, x::Real)
+    z = - x^2 / (2 * d.σ^2)
+    return x > 0 ? z : isnan(x) ? oftype(z, NaN) : zero(x)
+end
 ccdf(d::Rayleigh, x::Real) = exp(logccdf(d, x))
 
-cdf(d::Rayleigh, x::Real) = 1 - ccdf(d, x)
+cdf(d::Rayleigh, x::Real) = -expm1(logccdf(d, x))
 logcdf(d::Rayleigh, x::Real) = log1mexp(logccdf(d, x))
 
 quantile(d::Rayleigh, p::Real) = sqrt(-2d.σ^2 * log1p(-p))
diff --git a/src/univariate/continuous/rician.jl b/src/univariate/continuous/rician.jl
new file mode 100644
index 000000000..9a5ccdb59
--- /dev/null
+++ b/src/univariate/continuous/rician.jl
@@ -0,0 +1,171 @@
+"""
+    Rician(ν, σ)
+
+The *Rician distribution* with parameters `ν` and `σ` has probability density function:
+
+```math
+f(x; \\nu, \\sigma) = \\frac{x}{\\sigma^2} \\exp\\left( \\frac{-(x^2 + \\nu^2)}{2\\sigma^2} \\right) I_0\\left( \\frac{x\\nu}{\\sigma^2} \\right).
+```
+
+If shape and scale parameters `K` and `Ω` are given instead, `ν` and `σ` may be computed from them:
+
+```math
+\\sigma = \\sqrt{\\frac{\\Omega}{2(K + 1)}}, \\quad \\nu = \\sigma\\sqrt{2K}
+```
+
+```julia
+Rician()         # Rician distribution with parameters ν=0 and σ=1
+Rician(ν, σ)     # Rician distribution with parameters ν and σ
+
+params(d)        # Get the parameters, i.e. (ν, σ)
+shape(d)         # Get the shape parameter K = ν²/2σ²
+scale(d)         # Get the scale parameter Ω = ν² + 2σ²
+```
+
+External links:
+
+* [Rician distribution on Wikipedia](https://en.wikipedia.org/wiki/Rice_distribution)
+
+"""
+struct Rician{T<:Real} <: ContinuousUnivariateDistribution
+    ν::T
+    σ::T
+    Rician{T}(ν, σ) where {T} = new{T}(ν, σ)
+end
+
+function Rician(ν::T, σ::T; check_args=true) where {T<:Real}
+    check_args && @check_args(Rician, ν ≥ zero(ν) && σ ≥ zero(σ))
+    return Rician{T}(ν, σ)
+end
+
+Rician() = Rician(0.0, 1.0)
+Rician(ν::Real, σ::Real) = Rician(promote(ν, σ)...)
+Rician(ν::Integer, σ::Integer) = Rician(float(ν), float(σ))
+
+@distr_support Rician 0.0 Inf
+
+#### Conversions
+
+function convert(::Type{Rician{T}}, ν::Real, σ::Real) where T<:Real
+    Rician(T(ν), T(σ))
+end
+
+function convert(::Type{Rician{T}}, d::Rician{S}) where {T <: Real, S <: Real}
+    Rician(T(d.ν), T(d.σ); check_args=false)
+end
+
+#### Parameters
+
+shape(d::Rician) = d.ν^2 / (2 * d.σ^2)
+scale(d::Rician) = d.ν^2 + 2 * d.σ^2
+
+params(d::Rician) = (d.ν, d.σ)
+partype(d::Rician{T}) where {T<:Real} = T
+
+#### Statistics
+
+# helper
+_Lhalf(x) = exp(x/2) * ((1-x) * besseli(zero(x), -x/2) - x * besseli(oneunit(x), -x/2))
+
+mean(d::Rician) = d.σ * sqrthalfπ * _Lhalf(-d.ν^2/(2 * d.σ^2))
+var(d::Rician) = 2 * d.σ^2 + d.ν^2 - halfπ * d.σ^2 * _Lhalf(-d.ν^2/(2 * d.σ^2))^2
+
+function mode(d::Rician)
+    m = mean(d)
+    _minimize_gss(x -> -pdf(d, x), zero(m), m)
+end
+
+# helper: 1D minimization using Golden-section search
+function _minimize_gss(f, a, b; tol=1e-12)
+    ϕ = (√5 + 1) / 2
+    c = b - (b - a) / ϕ
+    d = a + (b - a) / ϕ
+    while abs(b - a) > tol
+        if f(c) < f(d)
+            b = d
+        else
+            a = c
+        end
+        c = b - (b - a) / ϕ
+        d = a + (b - a) / ϕ
+    end
+    (b + a) / 2
+end
+
+#### PDF/CDF/quantile delegated to NoncentralChisq
+
+function quantile(d::Rician, x::Real)
+    ν, σ = params(d)
+    return sqrt(quantile(NoncentralChisq(2, (ν / σ)^2), x)) * σ
+end
+
+function cquantile(d::Rician, x::Real)
+    ν, σ = params(d)
+    return sqrt(cquantile(NoncentralChisq(2, (ν / σ)^2), x)) * σ
+end
+
+function pdf(d::Rician, x::Real)
+    ν, σ = params(d)
+    result = 2 * x / σ^2 * pdf(NoncentralChisq(2, (ν / σ)^2), (x / σ)^2)
+    return x < 0 || isinf(x) ? zero(result) : result
+end
+
+function logpdf(d::Rician, x::Real)
+    ν, σ = params(d)
+    result = log(2 * abs(x) / σ^2) + logpdf(NoncentralChisq(2, (ν / σ)^2), (x / σ)^2)
+    return x < 0 || isinf(x) ? oftype(result, -Inf) : result
+end
+
+function cdf(d::Rician, x::Real)
+    ν, σ = params(d)
+    result = cdf(NoncentralChisq(2, (ν / σ)^2), (x / σ)^2)
+    return x < 0 ? zero(result) : result
+end
+
+function logcdf(d::Rician, x::Real)
+    ν, σ = params(d)
+    result = logcdf(NoncentralChisq(2, (ν / σ)^2), (x / σ)^2)
+    return x < 0 ? oftype(result, -Inf) : result
+end
+
+#### Sampling
+
+function rand(rng::AbstractRNG, d::Rician)
+    x = randn(rng) * d.σ + d.ν
+    y = randn(rng) * d.σ
+    hypot(x, y)
+end
+
+#### Fitting
+
+# implementation based on the Koay inversion technique
+function fit(::Type{<:Rician}, x::AbstractArray{T}; tol=1e-12, maxiters=500) where T
+    μ₁ = mean(x)
+    μ₂ = var(x)
+    r = μ₁ / √μ₂
+    if r < sqrt(π/(4-π))
+        ν = zero(float(T))
+        σ = scale(fit(Rayleigh, x))
+    else
+        ξ(θ) = 2 + θ^2 - π/8 * exp(-θ^2 / 2) * ((2 + θ^2) * besseli(0, θ^2 / 4) + θ^2 * besseli(1, θ^2 / 4))^2
+        g(θ) = sqrt(ξ(θ) * (1+r^2) - 2)
+        θ = g(1)
+        for j in 1:maxiters
+            θ⁻ = θ
+            θ = g(θ)
+            abs(θ - θ⁻) < tol && break
+        end
+        ξθ = ξ(θ)
+        σ = convert(float(T), sqrt(μ₂ / ξθ))
+        ν = convert(float(T), sqrt(μ₁^2 + (ξθ - 2) * σ^2))
+    end
+    Rician(ν, σ)
+end
+
+# Not implemented:
+#   skewness(d::Rician)
+#   kurtosis(d::Rician)
+#   entropy(d::Rician)
+#   mgf(d::Rician, t::Real)
+#   cf(d::Rician, t::Real)
+
diff --git a/src/univariate/continuous/symtriangular.jl b/src/univariate/continuous/symtriangular.jl
index dd12f06e5..77f25f68e 100644
--- a/src/univariate/continuous/symtriangular.jl
+++ b/src/univariate/continuous/symtriangular.jl
@@ -1,5 +1,5 @@
 """
-    SymTriangularDist(μ,σ)
+    SymTriangularDist(μ, σ)
 
 The *Symmetric triangular distribution* with location `μ` and scale `σ` has probability density function
 
@@ -9,8 +9,8 @@ f(x; \\mu, \\sigma) = \\frac{1}{\\sigma} \\left( 1 - \\left| \\frac{x - \\mu}{\\
 
 ```julia
 SymTriangularDist()         # Symmetric triangular distribution with zero location and unit scale
-SymTriangularDist(u)        # Symmetric triangular distribution with location μ and unit scale
-SymTriangularDist(u, s)     # Symmetric triangular distribution with location μ and scale σ
+SymTriangularDist(μ)        # Symmetric triangular distribution with location μ and unit scale
+SymTriangularDist(μ, s)     # Symmetric triangular distribution with location μ and scale σ
 
 params(d)       # Get the parameters, i.e. (μ, σ)
 location(d)     # Get the location parameter, i.e. μ
@@ -68,42 +68,30 @@ entropy(d::SymTriangularDist) = 1//2 + log(d.σ)
 
 #### Evaluation
 
-zval(d::SymTriangularDist, x::Real) = (x - d.μ) / d.σ
+zval(d::SymTriangularDist, x::Real) = min(abs(x - d.μ) / d.σ, 1)
 xval(d::SymTriangularDist, z::Real) = d.μ + z * d.σ
 
+pdf(d::SymTriangularDist, x::Real) = (1 - zval(d, x)) / scale(d)
+logpdf(d::SymTriangularDist, x::Real) = log(pdf(d, x))
 
-pdf(d::SymTriangularDist{T}, x::Real) where {T<:Real} = insupport(d, x) ? (1 - abs(zval(d, x))) / scale(d) : zero(T)
-
-function logpdf(d::SymTriangularDist{T}, x::Real) where T<:Real
-    insupport(d, x) ? log((1 - abs(zval(d, x))) / scale(d)) : -convert(T, T(Inf))
+function cdf(d::SymTriangularDist, x::Real)
+    r = (1 - zval(d, x))^2/2
+    return x < d.μ ? r : 1 - r
 end
 
-function cdf(d::SymTriangularDist{T}, x::Real) where T<:Real
-    (μ, σ) = params(d)
-    x <= μ - σ ? zero(T) :
-    x <= μ ? (1 + zval(d, x))^2/2 :
-    x < μ + σ ? 1 - (1 - zval(d, x))^2/2 : one(T)
+function ccdf(d::SymTriangularDist, x::Real)
+    r = (1 - zval(d, x))^2/2
+    return x < d.μ ? 1 - r : r
 end
 
-function ccdf(d::SymTriangularDist{T}, x::Real) where T<:Real
-    (μ, σ) = params(d)
-    x <= μ - σ ? one(T) :
-    x <= μ ? 1 - (1 + zval(d, x))^2/2 :
-    x < μ + σ ? (1 - zval(d, x))^2/2 : zero(T)
+function logcdf(d::SymTriangularDist, x::Real)
+    log_r = 2 * log1p(- zval(d, x)) + loghalf
+    return x < d.μ ? log_r : log1mexp(log_r)
 end
 
-function logcdf(d::SymTriangularDist{T}, x::Real) where T<:Real
-    (μ, σ) = params(d)
-    x <= μ - σ ? -T(Inf) :
-    x <= μ ? loghalf + 2*log1p(zval(d, x)) :
-    x < μ + σ ? log1p(-1/2 * (1 - zval(d, x))^2) : zero(T)
-end
-
-function logccdf(d::SymTriangularDist{T}, x::Real) where T<:Real
-    (μ, σ) = params(d)
-    x <= μ - σ ? zero(T) :
-    x <= μ ? log1p(-1/2 * (1 + zval(d, x))^2) :
-    x < μ + σ ? loghalf + 2*log1p(-zval(d, x)) : -T(Inf)
+function logccdf(d::SymTriangularDist, x::Real)
+    log_r = 2 * log1p(- zval(d, x)) + loghalf
+    return x < d.μ ? log1mexp(log_r) : log_r
 end
 
 quantile(d::SymTriangularDist, p::Real) = p < 1/2 ? xval(d, sqrt(2p) - 1) :
diff --git a/src/univariate/continuous/triangular.jl b/src/univariate/continuous/triangular.jl
index 257a20695..15b430c2a 100644
--- a/src/univariate/continuous/triangular.jl
+++ b/src/univariate/continuous/triangular.jl
@@ -90,21 +90,23 @@ entropy(d::TriangularDist{T}) where {T<:Real} = one(T)/2 + log((d.b - d.a) / 2)
 
 #### Evaluation
 
-function pdf(d::TriangularDist{T}, x::Real) where T<:Real
-    (a, b, c) = params(d)
-    x <= a ? zero(T) :
-    x <  c ? 2 * (x - a) / ((b - a) * (c - a)) :
-    x == c ? 2 / (b - a) :
-    x <= b ? 2 * (b - x) / ((b - a) * (b - c)) : zero(T)
+function pdf(d::TriangularDist, x::Real)
+    a, b, c = params(d)
+    return if x < c
+        2 * max(x - a, 0) / ((b - a) * (c - a))
+    else
+        2 * max(b - x, 0) / ((b - a) * (b - c))
+    end
 end
 logpdf(d::TriangularDist, x::Real) = log(pdf(d, x))
 
-function cdf(d::TriangularDist{T}, x::Real) where T<:Real
-    (a, b, c) = params(d)
-    x <= a ? zero(T) :
-    x <  c ? (x - a)^2 / ((b - a) * (c - a)) :
-    x == c ? (c - a) / (b - a) :
-    x <= b ? 1 - (b - x)^2 / ((b - a) * (b - c)) : one(T)
+function cdf(d::TriangularDist, x::Real)
+    a, b, c = params(d)
+    return if x < c
+        max(x - a, 0)^2 / ((b - a) * (c - a))
+    else
+        1 - max(b - x, 0)^2 / ((b - a) * (b - c))
+    end
 end
 
 function quantile(d::TriangularDist, p::Real)
diff --git a/src/univariate/continuous/uniform.jl b/src/univariate/continuous/uniform.jl
index fa7cfffb9..fc04a9546 100644
--- a/src/univariate/continuous/uniform.jl
+++ b/src/univariate/continuous/uniform.jl
@@ -70,20 +70,23 @@ entropy(d::Uniform) = log(d.b - d.a)
 
 #### Evaluation
 
-pdf(d::Uniform{T}, x::Real) where {T<:Real} = insupport(d, x) ? 1 / (d.b - d.a) : zero(T)
-logpdf(d::Uniform{T}, x::Real) where {T<:Real} = insupport(d, x) ? -log(d.b - d.a) : -T(Inf)
-gradlogpdf(d::Uniform{T}, x::Real) where {T<:Real} = zero(T)
-
-function cdf(d::Uniform{T}, x::Real) where T<:Real
-    (a, b) = params(d)
-    x <= a ? zero(T) :
-    x >= d.b ? one(T) : (x - a) / (b - a)
+function pdf(d::Uniform, x::Real)
+    val = inv(d.b - d.a)
+    return insupport(d, x) ? val : zero(val)
+end
+function logpdf(d::Uniform, x::Real)
+    diff = d.b - d.a
+    return insupport(d, x) ? -log(diff) : log(zero(diff))
 end
+gradlogpdf(d::Uniform, x::Real) = zero(partype(d)) / oneunit(x)
 
-function ccdf(d::Uniform{T}, x::Real) where T<:Real
-    (a, b) = params(d)
-    x <= a ? one(T) :
-    x >= d.b ? zero(T) : (b - x) / (b - a)
+function cdf(d::Uniform, x::Real)
+    a, b = params(d)
+    return clamp((x - a) / (b - a), 0, 1)
+end
+function ccdf(d::Uniform, x::Real)
+    a, b = params(d)
+    return clamp((b - x) / (b - a), 0, 1)
 end
 
 quantile(d::Uniform, p::Real) = d.a + p * (d.b - d.a)
diff --git a/src/univariate/continuous/vonmises.jl b/src/univariate/continuous/vonmises.jl
index 605aefc47..eb55c5b64 100644
--- a/src/univariate/continuous/vonmises.jl
+++ b/src/univariate/continuous/vonmises.jl
@@ -70,7 +70,11 @@ pdf(d::VonMises{T}, x::Real) where T<:Real =
 logpdf(d::VonMises{T}, x::Real) where T<:Real =
     minimum(d) ≤ x ≤ maximum(d) ? d.κ * (cos(x - d.μ) - 1) - log(d.I0κx) - log2π : -T(Inf)
 
-cdf(d::VonMises, x::Real) = _vmcdf(d.κ, d.I0κx, x - d.μ, 1e-15)
+function cdf(d::VonMises, x::Real)
+    # handle `±Inf` for which `sin` can't be evaluated
+    z = clamp(x, extrema(d)...)
+    return _vmcdf(d.κ, d.I0κx, z - d.μ, 1e-15)
+end
 
 function _vmcdf(κ::Real, I0κx::Real, x::Real, tol::Real)
     tol *= exp(-κ)
diff --git a/src/univariate/continuous/weibull.jl b/src/univariate/continuous/weibull.jl
index d081c0717..865515def 100644
--- a/src/univariate/continuous/weibull.jl
+++ b/src/univariate/continuous/weibull.jl
@@ -113,18 +113,18 @@ function logpdf(d::Weibull{T}, x::Real) where T<:Real
     end
 end
 
-zv(d::Weibull, x::Real) = (x / d.θ) ^ d.α
-xv(d::Weibull, z::Real) = d.θ * z ^ (1 / d.α)
+zval(d::Weibull, x::Real) = (max(x, 0) / d.θ) ^ d.α
+xval(d::Weibull, z::Real) = d.θ * z ^ (1 / d.α)
 
-cdf(d::Weibull{T}, x::Real) where {T<:Real} = x > 0 ? -expm1(-zv(d, x)) : zero(T)
-ccdf(d::Weibull{T}, x::Real) where {T<:Real} = x > 0 ? exp(-zv(d, x)) : one(T)
-logcdf(d::Weibull{T}, x::Real) where {T<:Real} = x > 0 ? log1mexp(-zv(d, x)) : -T(Inf)
-logccdf(d::Weibull{T}, x::Real) where {T<:Real} = x > 0 ? -zv(d, x) : zero(T)
+cdf(d::Weibull, x::Real) = -expm1(- zval(d, x))
+ccdf(d::Weibull, x::Real) = exp(- zval(d, x))
+logcdf(d::Weibull, x::Real) = log1mexp(- zval(d, x))
+logccdf(d::Weibull, x::Real) = - zval(d, x)
 
-quantile(d::Weibull, p::Real) = xv(d, -log1p(-p))
-cquantile(d::Weibull, p::Real) = xv(d, -log(p))
-invlogcdf(d::Weibull, lp::Real) = xv(d, -log1mexp(lp))
-invlogccdf(d::Weibull, lp::Real) = xv(d, -lp)
+quantile(d::Weibull, p::Real) = xval(d, -log1p(-p))
+cquantile(d::Weibull, p::Real) = xval(d, -log(p))
+invlogcdf(d::Weibull, lp::Real) = xval(d, -log1mexp(lp))
+invlogccdf(d::Weibull, lp::Real) = xval(d, -lp)
 
 function gradlogpdf(d::Weibull{T}, x::Real) where T<:Real
     if insupport(Weibull, x)
@@ -138,7 +138,7 @@ end
 
 #### Sampling
 
-rand(rng::AbstractRNG, d::Weibull) = xv(d, randexp(rng))
+rand(rng::AbstractRNG, d::Weibull) = xval(d, randexp(rng))
 
 #### Fit model
 
diff --git a/src/univariate/discrete/betabinomial.jl b/src/univariate/discrete/betabinomial.jl
index f03ccf4f1..be1808229 100644
--- a/src/univariate/discrete/betabinomial.jl
+++ b/src/univariate/discrete/betabinomial.jl
@@ -92,6 +92,16 @@ function logpdf(d::BetaBinomial, k::Real)
     return _insupport ? result : oftype(result, -Inf)
 end
 
+# sum values of the probability mass function
+cdf(d::BetaBinomial, k::Int) = integerunitrange_cdf(d, k)
+
+for f in (:ccdf, :logcdf, :logccdf)
+    @eval begin
+        $f(d::BetaBinomial, k::Real) = $(Symbol(f, :_int))(d, k)
+        $f(d::BetaBinomial, k::Int) = $(Symbol(:integerunitrange_, f))(d, k)
+    end
+end
+
 entropy(d::BetaBinomial) = entropy(Categorical(pdf.(Ref(d),support(d))))
 median(d::BetaBinomial) = median(Categorical(pdf.(Ref(d),support(d)))) - 1
 mode(d::BetaBinomial) = argmax(pdf.(Ref(d),support(d))) - 1
diff --git a/src/univariate/discrete/binomial.jl b/src/univariate/discrete/binomial.jl
index bc495bcb7..afdf89472 100644
--- a/src/univariate/discrete/binomial.jl
+++ b/src/univariate/discrete/binomial.jl
@@ -110,18 +110,6 @@ end
 
 @_delegate_statsfuns Binomial binom n p
 
-function pdf(d::Binomial, x::Real)
-    _insupport = insupport(d, x)
-    s = pdf(d, _insupport ? round(Int, x) : 0)
-    return _insupport ? s : zero(s)
-end
-
-function logpdf(d::Binomial, x::Real)
-    _insupport = insupport(d, x)
-    s = logpdf(d, _insupport ? round(Int, x) : 0)
-    return _insupport ? s : oftype(s, -Inf)
-end
-
 function rand(rng::AbstractRNG, d::Binomial)
     p, n = d.p, d.n
     if p <= 0.5
diff --git a/src/univariate/discrete/categorical.jl b/src/univariate/discrete/categorical.jl
index 36e806c76..851881172 100644
--- a/src/univariate/discrete/categorical.jl
+++ b/src/univariate/discrete/categorical.jl
@@ -68,17 +68,12 @@ end
 
 ### Evaluation
 
-function cdf(d::Categorical{T}, x::Int) where T<:Real
-    k = ncategories(d)
-    p = probs(d)
-    x < 1 && return zero(T)
-    x >= k && return one(T)
-    c = p[1]
-    for i = 2:x
-        @inbounds c += p[i]
-    end
-    return c
-end
+# the fallbacks are overridden by `DiscreteNonParameteric`
+cdf(d::Categorical, x::Real) = cdf_int(d, x)
+ccdf(d::Categorical, x::Real) = ccdf_int(d, x)
+
+cdf(d::Categorical, x::Int) = integerunitrange_cdf(d, x)
+ccdf(d::Categorical, x::Int) = integerunitrange_ccdf(d, x)
 
 function pdf(d::Categorical, x::Real)
     ps = probs(d)
diff --git a/src/univariate/discrete/dirac.jl b/src/univariate/discrete/dirac.jl
index 108a5609b..7be41459f 100644
--- a/src/univariate/discrete/dirac.jl
+++ b/src/univariate/discrete/dirac.jl
@@ -42,8 +42,10 @@ entropy(d::Dirac{T}) where {T} = zero(T)
 pdf(d::Dirac, x::Real) = insupport(d, x) ? 1.0 : 0.0
 logpdf(d::Dirac, x::Real) = insupport(d, x) ? 0.0 : -Inf
 
-cdf(d::Dirac, x::Real) = x < d.value ? 0.0 : 1.0
-cdf(d::Dirac, x::Integer) = x < d.value ? 0.0 : 1.0
+cdf(d::Dirac, x::Real) = x < d.value ? 0.0 : isnan(x) ? NaN : 1.0
+logcdf(d::Dirac, x::Real) = x < d.value ? -Inf : isnan(x) ? NaN : 0.0
+ccdf(d::Dirac, x::Real) = x < d.value ? 1.0 : isnan(x) ? NaN : 0.0
+logccdf(d::Dirac, x::Real) = x < d.value ? 0.0 : isnan(x) ? NaN : -Inf
 
 quantile(d::Dirac{T}, p::Real) where {T} = 0 <= p <= 1 ? d.value : T(NaN)
 
diff --git a/src/univariate/discrete/discretenonparametric.jl b/src/univariate/discrete/discretenonparametric.jl
index 940166cd1..e54a0789f 100644
--- a/src/univariate/discrete/discretenonparametric.jl
+++ b/src/univariate/discrete/discretenonparametric.jl
@@ -105,43 +105,57 @@ function pdf(d::DiscreteNonParametric, x::Real)
 end
 logpdf(d::DiscreteNonParametric, x::Real) = log(pdf(d, x))
 
-# Helper functions for cdf and ccdf required to fix ambiguous method
-# error involving [c]cdf(::DisceteUnivariateDistribution, ::Int)
-function _cdf(d::DiscreteNonParametric{T,P}, x::T) where {T,P}
-    x > maximum(d) && return 1.0
-    s = zero(P)
+function cdf(d::DiscreteNonParametric, x::Real)
     ps = probs(d)
+    P = float(eltype(ps))
+
+    # trivial cases
+    x < minimum(d) && return zero(P)
+    x >= maximum(d) && return one(P)
+    isnan(x) && return P(NaN)
+
+    n = length(ps)
     stop_idx = searchsortedlast(support(d), x)
-    for i in 1:stop_idx
-        s += ps[i]
+    s = zero(P)
+    if stop_idx < div(n, 2)
+        @inbounds for i in 1:stop_idx
+            s += ps[i]
+        end
+    else
+        @inbounds for i in (stop_idx + 1):n
+            s += ps[i]
+        end
+        s = 1 - s
     end
+
     return s
 end
-cdf(d::DiscreteNonParametric{T}, x::Integer) where T = _cdf(d, convert(T, x))
-cdf(d::DiscreteNonParametric{T}, x::Real) where T = _cdf(d, convert(T, x))
 
-function _ccdf(d::DiscreteNonParametric{T,P}, x::T) where {T,P}
-    x < minimum(d) && return 1.0
-    s = zero(P)
+function ccdf(d::DiscreteNonParametric, x::Real)
     ps = probs(d)
+    P = float(eltype(ps))
+
+    # trivial cases
+    x < minimum(d) && return one(P)
+    x >= maximum(d) && return zero(P)
+    isnan(x) && return P(NaN)
+
+    n = length(ps)
     stop_idx = searchsortedlast(support(d), x)
-    for i in (stop_idx+1):length(ps)
-        s += ps[i]
+    s = zero(P)
+    if stop_idx < div(n, 2)
+        @inbounds for i in 1:stop_idx
+            s += ps[i]
+        end
+        s = 1 - s
+    else
+        @inbounds for i in (stop_idx + 1):n
+            s += ps[i]
+        end
     end
+
     return s
 end
-ccdf(d::DiscreteNonParametric{T}, x::Integer) where T = _ccdf(d, convert(T, x))
-ccdf(d::DiscreteNonParametric{T}, x::Real) where T = _ccdf(d, convert(T, x))
-
-# fix incorrect defaults
-for f in (:cdf, :ccdf)
-    _f = Symbol(:_, f)
-    logf = Symbol(:log, f)
-    @eval begin
-        $logf(d::DiscreteNonParametric{T}, x::Integer) where T = log($_f(d, convert(T, x)))
-        $logf(d::DiscreteNonParametric{T}, x::Real) where T = log($_f(d, convert(T, x)))
-    end
-end
 
 function quantile(d::DiscreteNonParametric, q::Real)
     0 <= q <= 1 || throw(DomainError())
@@ -164,71 +178,36 @@ insupport(d::DiscreteNonParametric, x::Real) =
 
 mean(d::DiscreteNonParametric) = dot(probs(d), support(d))
 
-function var(d::DiscreteNonParametric{T}) where T
-    m = mean(d)
+function var(d::DiscreteNonParametric)
     x = support(d)
     p = probs(d)
-    k = length(x)
-    σ² = zero(T)
-    for i in 1:k
-        @inbounds σ² += abs2(x[i] - m) * p[i]
-    end
-    σ²
+    return var(x, Weights(p, one(eltype(p))); corrected=false)
 end
 
-function skewness(d::DiscreteNonParametric{T}) where T
-    m = mean(d)
+function skewness(d::DiscreteNonParametric)
     x = support(d)
     p = probs(d)
-    k = length(x)
-    μ₃ = zero(T)
-    σ² = zero(T)
-    @inbounds for i in 1:k
-        d = x[i] - m
-        d²w = abs2(d) * p[i]
-        μ₃ += d * d²w
-        σ² += d²w
-    end
-    μ₃ / (σ² * sqrt(σ²))
+    return skewness(x, Weights(p, one(eltype(p))))
 end
 
-function kurtosis(d::DiscreteNonParametric{T}) where T
-    m = mean(d)
+function kurtosis(d::DiscreteNonParametric)
     x = support(d)
     p = probs(d)
-    k = length(x)
-    μ₄ = zero(T)
-    σ² = zero(T)
-    @inbounds for i in 1:k
-        d² = abs2(x[i] - m)
-        d²w = d² * p[i]
-        μ₄ += d² * d²w
-        σ² += d²w
-    end
-    μ₄ / abs2(σ²) - 3
+    return kurtosis(x, Weights(p, one(eltype(p))))
 end
 
 entropy(d::DiscreteNonParametric) = entropy(probs(d))
 entropy(d::DiscreteNonParametric, b::Real) = entropy(probs(d), b)
 
-mode(d::DiscreteNonParametric) = support(d)[argmax(probs(d))]
-function modes(d::DiscreteNonParametric{T,P}) where {T,P}
+function mode(d::DiscreteNonParametric)
     x = support(d)
     p = probs(d)
-    k = length(x)
-    mds = T[]
-    max_p = zero(P)
-    @inbounds for i in 1:k
-        pi = p[i]
-        xi = x[i]
-        if pi > max_p
-            max_p = pi
-            mds = [xi]
-        elseif pi == max_p
-            push!(mds, xi)
-        end
-    end
-    mds
+    return mode(x, Weights(p, one(eltype(p))))
+end
+function modes(d::DiscreteNonParametric)
+    x = support(d)
+    p = probs(d)
+    return modes(x, Weights(p, one(eltype(p))))
 end
 
 function mgf(d::DiscreteNonParametric, t::Real)
diff --git a/src/univariate/discrete/discreteuniform.jl b/src/univariate/discrete/discreteuniform.jl
index cc04abc1b..bab5393ec 100644
--- a/src/univariate/discrete/discreteuniform.jl
+++ b/src/univariate/discrete/discreteuniform.jl
@@ -70,13 +70,21 @@ modes(d::DiscreteUniform) = [d.a:d.b]
 
 ### Evaluation
 
-cdf(d::DiscreteUniform, x::Int) = (x < d.a ? 0.0 :
-                                   x > d.b ? 1.0 :
-                                   (floor(Int,x) - d.a + 1.0) * d.pv)
-
 pdf(d::DiscreteUniform, x::Real) = insupport(d, x) ? d.pv : zero(d.pv)
 logpdf(d::DiscreteUniform, x::Real) = log(pdf(d, x))
 
+function cdf(d::DiscreteUniform, x::Int)
+    a = d.a
+    result = (x - a + 1) * d.pv
+    return if x < a
+        zero(result)
+    elseif x >= d.b
+        one(result)
+    else
+        result
+    end
+end
+
 quantile(d::DiscreteUniform, p::Float64) = d.a + floor(Int,p * span(d))
 
 function mgf(d::DiscreteUniform, t::T) where {T <: Real}
diff --git a/src/univariate/discrete/geometric.jl b/src/univariate/discrete/geometric.jl
index fece8275a..64cdb4ba3 100644
--- a/src/univariate/discrete/geometric.jl
+++ b/src/univariate/discrete/geometric.jl
@@ -73,25 +73,24 @@ function logpdf(d::Geometric, x::Real)
     insupport(d, x) ? log(d.p) + log1p(-d.p) * x : log(zero(d.p))
 end
 
-function cdf(d::Geometric{T}, x::Int) where T<:Real
-    x < 0 && return zero(T)
+function cdf(d::Geometric, x::Int)
     p = succprob(d)
-    n = x + 1
+    n = max(x + 1, 0)
     p < 1/2 ? -expm1(log1p(-p)*n) : 1 - (1 - p)^n
 end
 
-function ccdf(d::Geometric{T}, x::Int) where T<:Real
-    x < 0 && return one(T)
+ccdf(d::Geometric, x::Real) = ccdf_int(d, x)
+function ccdf(d::Geometric, x::Int)
     p = succprob(d)
-    n = x + 1
+    n = max(x + 1, 0)
     p < 1/2 ? exp(log1p(-p)*n) : (1 - p)^n
 end
 
-function logcdf(d::Geometric{T}, x::Int) where T<:Real
-    x < 0 ? -T(Inf) : log1mexp(log1p(-d.p) * (x + 1))
-end
+logcdf(d::Geometric, x::Real) = logcdf_int(d, x)
+logcdf(d::Geometric, x::Int) = log1mexp(log1p(-d.p) * max(x + 1, 0))
 
-logccdf(d::Geometric, x::Int) =  x < 0 ? zero(d.p) : log1p(-d.p) * (x + 1)
+logccdf(d::Geometric, x::Real) = logccdf_int(d, x)
+logccdf(d::Geometric, x::Int) =  log1p(-d.p) * max(x + 1, 0)
 
 quantile(d::Geometric, p::Real) = invlogccdf(d, log1p(-p))
 
diff --git a/src/univariate/discrete/hypergeometric.jl b/src/univariate/discrete/hypergeometric.jl
index 39f1a41bf..fe4b6ada0 100644
--- a/src/univariate/discrete/hypergeometric.jl
+++ b/src/univariate/discrete/hypergeometric.jl
@@ -79,18 +79,6 @@ entropy(d::Hypergeometric) = entropy(pdf.(Ref(d), support(d)))
 
 @_delegate_statsfuns Hypergeometric hyper ns nf n
 
-function pdf(d::Hypergeometric, x::Real)
-    _insupport = insupport(d, x)
-    s = pdf(d, _insupport ? round(Int, x) : 0)
-    return _insupport ? s : zero(s)
-end
-
-function logpdf(d::Hypergeometric, x::Real)
-    _insupport = insupport(d, x)
-    s = logpdf(d, _insupport ? round(Int, x) : 0)
-    return _insupport ? s : oftype(s, -Inf)
-end
-
 ## sampling
 
 # TODO: remove RFunctions dependency. Implement:
diff --git a/src/univariate/discrete/negativebinomial.jl b/src/univariate/discrete/negativebinomial.jl
index 7809f3691..2b0be1f58 100644
--- a/src/univariate/discrete/negativebinomial.jl
+++ b/src/univariate/discrete/negativebinomial.jl
@@ -100,13 +100,13 @@ function logpdf(d::NegativeBinomial, k::Real)
 end
 
 # cdf and quantile functions are more involved so we still rely on Rmath
-cdf(       d::NegativeBinomial,  x::Int)  =              nbinomcdf(       d.r, d.p, x)
-ccdf(      d::NegativeBinomial,  x::Int)  =              nbinomccdf(      d.r, d.p, x)
-logcdf(    d::NegativeBinomial,  x::Int)  =              nbinomlogcdf(    d.r, d.p, x)
-logccdf(   d::NegativeBinomial,  x::Int)  =              nbinomlogccdf(   d.r, d.p, x)
-quantile(  d::NegativeBinomial,  q::Real) = convert(Int, nbinominvcdf(    d.r, d.p, q))
-cquantile( d::NegativeBinomial,  q::Real) = convert(Int, nbinominvccdf(   d.r, d.p, q))
-invlogcdf( d::NegativeBinomial, lq::Real) = convert(Int, nbinominvlogcdf( d.r, d.p, lq))
+cdf(d::NegativeBinomial, x::Real) = nbinomcdf(d.r, d.p, x)
+ccdf(d::NegativeBinomial, x::Real) = nbinomccdf(d.r, d.p, x)
+logcdf(d::NegativeBinomial, x::Real) = nbinomlogcdf(d.r, d.p, x)
+logccdf(d::NegativeBinomial, x::Real) = nbinomlogccdf(d.r, d.p, x)
+quantile(d::NegativeBinomial, q::Real) = convert(Int, nbinominvcdf(d.r, d.p, q))
+cquantile(d::NegativeBinomial, q::Real) = convert(Int, nbinominvccdf(d.r, d.p, q))
+invlogcdf(d::NegativeBinomial, lq::Real) = convert(Int, nbinominvlogcdf(d.r, d.p, lq))
 invlogccdf(d::NegativeBinomial, lq::Real) = convert(Int, nbinominvlogccdf(d.r, d.p, lq))
 
 ## sampling
diff --git a/src/univariate/discrete/noncentralhypergeometric.jl b/src/univariate/discrete/noncentralhypergeometric.jl
index 9e25939da..04abab549 100644
--- a/src/univariate/discrete/noncentralhypergeometric.jl
+++ b/src/univariate/discrete/noncentralhypergeometric.jl
@@ -119,7 +119,7 @@ function pdf(d::FisherNoncentralHypergeometric, k::Integer)
     return fₖ/s
 end
 
-logpdf(d::FisherNoncentralHypergeometric, k::Integer) = log(pdf(d, k))
+logpdf(d::FisherNoncentralHypergeometric, k::Real) = log(pdf(d, k))
 
 function cdf(d::FisherNoncentralHypergeometric, k::Integer)
     ω, _ = promote(d.ω, float(k))
@@ -167,8 +167,6 @@ function cdf(d::FisherNoncentralHypergeometric, k::Integer)
     return Fₖ/s
 end
 
-logcdf(d::FisherNoncentralHypergeometric, k::Integer) = log(cdf(d, k))
-
 function _expectation(f, d::FisherNoncentralHypergeometric)
     ω = float(d.ω)
     l = max(0, d.n - d.nf)
@@ -259,3 +257,14 @@ function logpdf(d::WalleniusNoncentralHypergeometric, k::Real)
         return log(zero(k))
     end
 end
+
+cdf(d::WalleniusNoncentralHypergeometric, k::Integer) = integerunitrange_cdf(d, k)
+
+for f in (:ccdf, :logcdf, :logccdf)
+    @eval begin
+        $f(d::WalleniusNoncentralHypergeometric, k::Real) = $(Symbol(f, :_int))(d, k)
+        function $f(d::WalleniusNoncentralHypergeometric, k::Integer)
+            return $(Symbol(:integerunitrange_, f))(d, k)
+        end
+    end
+end
diff --git a/src/univariate/discrete/poisson.jl b/src/univariate/discrete/poisson.jl
index d971aee4e..75fa25b36 100644
--- a/src/univariate/discrete/poisson.jl
+++ b/src/univariate/discrete/poisson.jl
@@ -84,23 +84,18 @@ function entropy(d::Poisson{T}) where T<:Real
     end
 end
 
+function kldivergence(p::Poisson, q::Poisson)
+    λp = rate(p)
+    λq = rate(q)
+    # `false` is a strong zero and ensures that `λp = 0` is handled correctly
+    # we don't use `xlogy` since it returns `NaN` for `λp = λq = 0`
+    return λq - λp + (λp > 0) * (λp * log(λp / λq))
+end
 
 ### Evaluation
 
 @_delegate_statsfuns Poisson pois λ
 
-function pdf(d::Poisson, x::Real)
-    _insupport = insupport(d, x)
-    s = pdf(d, _insupport ? round(Int, x) : 0)
-    return _insupport ? s : zero(s)
-end
-
-function logpdf(d::Poisson, x::Real)
-    _insupport = insupport(d, x)
-    s = logpdf(d, _insupport ? round(Int, x) : 0)
-    return _insupport ? s : oftype(s, -Inf)
-end
-
 function mgf(d::Poisson, t::Real)
     λ = rate(d)
     return exp(λ * (exp(t) - 1))
diff --git a/src/univariate/discrete/poissonbinomial.jl b/src/univariate/discrete/poissonbinomial.jl
index 841238de5..5cd0ddaeb 100644
--- a/src/univariate/discrete/poissonbinomial.jl
+++ b/src/univariate/discrete/poissonbinomial.jl
@@ -126,6 +126,16 @@ end
 pdf(d::PoissonBinomial, k::Real) = insupport(d, k) ? d.pmf[Int(k+1)] : zero(eltype(d.pmf))
 logpdf(d::PoissonBinomial, k::Real) = log(pdf(d, k))
 
+cdf(d::PoissonBinomial, k::Int) = integerunitrange_cdf(d, k)
+
+# leads to numerically more accurate results
+for f in (:ccdf, :logcdf, :logccdf)
+    @eval begin
+        $f(d::PoissonBinomial, k::Real) = $(Symbol(f, :_int))(d, k)
+        $f(d::PoissonBinomial, k::Int) = $(Symbol(:integerunitrange_, f))(d, k)
+    end
+end
+
 # Computes the pdf of a poisson-binomial random variable using
 # simple, fast recursive formula
 #
@@ -191,3 +201,70 @@ end
 #### Sampling
 
 sampler(d::PoissonBinomial) = PoissBinAliasSampler(d)
+
+## ChainRules definitions
+
+# Compute matrix of partial derivatives [∂P(X=j-1)/∂pᵢ]_{i=1,…,n; j=1,…,n+1}
+#
+# This implementation uses the same dynamic programming "trick" as for the computation of
+# the primals.
+#
+# Reference (for the primal):
+#
+#      Marlin A. Thomas & Audrey E. Taub (1982)
+#      Calculating binomial probabilities when the trial probabilities are unequal,
+#      Journal of Statistical Computation and Simulation, 14:2, 125-131, DOI: 10.1080/00949658208810534
+function poissonbinomial_pdf_partialderivatives(p::AbstractVector{<:Real})
+    n = length(p)
+    A = zeros(eltype(p), n, n + 1)
+    @inbounds for j in 1:n
+        A[j, end] = 1
+    end
+    @inbounds for (i, pi) in enumerate(p)
+        qi = 1 - pi
+        for k in (n - i + 1):n
+            kp1 = k + 1
+            for j in 1:(i - 1)
+                A[j, k] = pi * A[j, k] + qi * A[j, kp1]
+            end
+            for j in (i+1):n
+                A[j, k] = pi * A[j, k] + qi * A[j, kp1]
+            end
+        end
+        for j in 1:(i-1)
+            A[j, end] *= pi
+        end
+        for j in (i+1):n
+            A[j, end] *= pi
+        end
+    end
+    @inbounds for j in 1:n, i in 1:n
+        A[i, j] -= A[i, j+1]
+    end
+    return A
+end
+
+for f in (:poissonbinomial_pdf, :poissonbinomial_pdf_fft)
+    pullback = Symbol(f, :_pullback)
+    @eval begin
+        function ChainRulesCore.frule(
+            (_, Δp)::Tuple{<:Any,<:AbstractVector{<:Real}}, ::typeof($f), p::AbstractVector{<:Real}
+        )
+            y = $f(p)
+            A = poissonbinomial_pdf_partialderivatives(p)
+            return y, A' * Δp
+        end
+        function ChainRulesCore.rrule(::typeof($f), p::AbstractVector{<:Real})
+            y = $f(p)
+            A = poissonbinomial_pdf_partialderivatives(p)
+            function $pullback(Δy)
+                p̄ = ChainRulesCore.InplaceableThunk(
+                    Δ -> LinearAlgebra.mul!(Δ, A, Δy, true, true),
+                    ChainRulesCore.@thunk(A * Δy),
+                )
+                return ChainRulesCore.NoTangent(), p̄
+            end
+            return y, $pullback
+        end
+    end
+end
diff --git a/src/univariate/discrete/skellam.jl b/src/univariate/discrete/skellam.jl
index 72fb085cc..c352f3835 100644
--- a/src/univariate/discrete/skellam.jl
+++ b/src/univariate/discrete/skellam.jl
@@ -100,11 +100,13 @@ Computing cdf of the Skellam distribution.
 """
 function cdf(d::Skellam, t::Integer)
     μ1, μ2 = params(d)
-    (t < 0) ? nchisqcdf(-2*t, 2*μ1, 2*μ2) : 1.0 - nchisqcdf(2*(t+1), 2*μ2, 2*μ1)
+    return if t < 0
+        nchisqcdf(-2*t, 2*μ1, 2*μ2)
+    else
+        1 - nchisqcdf(2*(t+1), 2*μ2, 2*μ1)
+    end
 end
 
-cdf(d::Skellam, t::Real) = cdf(d, floor(Int, t))
-
 #### Sampling
 rand(rng::AbstractRNG, d::Skellam) =
     rand(rng, Poisson(d.μ1)) - rand(rng, Poisson(d.μ2))
diff --git a/src/univariate/discrete/soliton.jl b/src/univariate/discrete/soliton.jl
index 35b87516c..f13d9e3df 100644
--- a/src/univariate/discrete/soliton.jl
+++ b/src/univariate/discrete/soliton.jl
@@ -97,7 +97,7 @@ function pdf(Ω::Soliton, i::Real)
 end
 logpdf(Ω::Soliton, i::Real) = log(pdf(Ω, i))
 
-function Distributions.cdf(Ω::Soliton, i::Integer)
+function cdf(Ω::Soliton, i::Integer)
     i < Ω.degrees[1] && return 0.0
     i > Ω.degrees[end] && return 1.0
     j = searchsortedfirst(Ω.degrees, i)
diff --git a/src/univariate/locationscale.jl b/src/univariate/locationscale.jl
index 24eae55e8..ba61c8d90 100644
--- a/src/univariate/locationscale.jl
+++ b/src/univariate/locationscale.jl
@@ -103,21 +103,14 @@ mgf(d::LocationScale,t::Real) = exp(d.μ*t) * mgf(d.ρ,d.σ*t)
 
 #### Evaluation & Sampling
 
-pdf(d::ContinuousLocationScale,x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) / d.σ
+pdf(d::ContinuousLocationScale, x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) / d.σ
 pdf(d::DiscreteLocationScale, x::Real) = pdf(d.ρ,(x-d.μ)/d.σ)
 
 logpdf(d::ContinuousLocationScale,x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) - log(d.σ)
 logpdf(d::DiscreteLocationScale, x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ)
 
-# additional definitions are required to fix ambiguity errors and incorrect defaults
 for f in (:cdf, :ccdf, :logcdf, :logccdf)
-    _f = Symbol(:_, f)
-    @eval begin
-        $f(d::LocationScale, x::Real) = $_f(d, x)
-        $f(d::DiscreteLocationScale, x::Real) = $_f(d, x)
-        $f(d::DiscreteLocationScale, x::Integer) = $_f(d, x)
-        $_f(d::LocationScale, x::Real) = $f(d.ρ, (x - d.μ) / d.σ)
-    end
+    @eval $f(d::LocationScale, x::Real) = $f(d.ρ, (x - d.μ) / d.σ)
 end
 
 quantile(d::LocationScale,q::Real) = d.μ + d.σ * quantile(d.ρ,q)
@@ -134,4 +127,3 @@ Base.:*(x::Real, d::UnivariateDistribution) = LocationScale(zero(x), x, d)
 Base.:*(d::UnivariateDistribution, x::Real) = x * d
 Base.:-(d::UnivariateDistribution, x::Real) = d + -x
 Base.:/(d::UnivariateDistribution, x::Real) = inv(x) * d
-
diff --git a/src/univariates.jl b/src/univariates.jl
index 833cb9e88..52073863b 100644
--- a/src/univariates.jl
+++ b/src/univariates.jl
@@ -77,27 +77,6 @@ Get the degrees of freedom.
 """
 dof(d::UnivariateDistribution)
 
-"""
-    minimum(d::UnivariateDistribution)
-
-Return the minimum of the support of `d`.
-"""
-minimum(d::UnivariateDistribution)
-
-"""
-    maximum(d::UnivariateDistribution)
-
-Return the maximum of the support of `d`.
-"""
-maximum(d::UnivariateDistribution)
-
-"""
-    extrema(d::UnivariateDistribution)
-
-Return the minimum and maximum of the support of `d` as a 2-tuple.
-"""
-extrema(d::UnivariateDistribution) = (minimum(d), maximum(d))
-
 """
     insupport(d::UnivariateDistribution, x::Any)
 
@@ -156,15 +135,20 @@ end
 
 # multiple univariate, must allocate array
 rand(rng::AbstractRNG, s::Sampleable{Univariate}, dims::Dims) =
-    rand!(rng, sampler(s), Array{eltype(s)}(undef, dims))
+    rand!(rng, s, Array{eltype(s)}(undef, dims))
 rand(rng::AbstractRNG, s::Sampleable{Univariate,Continuous}, dims::Dims) =
-    rand!(rng, sampler(s), Array{float(eltype(s))}(undef, dims))
+    rand!(rng, s, Array{float(eltype(s))}(undef, dims))
 
 # multiple univariate with pre-allocated array
+# we use a function barrier since for some distributions `sampler(s)` is not type-stable:
+# https://github.com/JuliaStats/Distributions.jl/pull/1281
 function rand!(rng::AbstractRNG, s::Sampleable{Univariate}, A::AbstractArray)
-    smp = sampler(s)
+    return _rand_loops!(rng, sampler(s), A)
+end
+
+function _rand_loops!(rng::AbstractRNG, sampler::Sampleable{Univariate}, A::AbstractArray)
     for i in eachindex(A)
-        @inbounds A[i] = rand(rng, smp)
+        @inbounds A[i] = rand(rng, sampler)
     end
     return A
 end
@@ -211,7 +195,7 @@ std(d::UnivariateDistribution) = sqrt(var(d))
 
 Return the median value of distribution `d`.
 """
-median(d::UnivariateDistribution) = quantile(d, 0.5)
+median(d::UnivariateDistribution) = quantile(d, 1//2)
 
 """
     modes(d::UnivariateDistribution)
@@ -338,43 +322,19 @@ Evaluate the cumulative probability at `x`.
 See also [`ccdf`](@ref), [`logcdf`](@ref), and [`logccdf`](@ref).
 """
 cdf(d::UnivariateDistribution, x::Real)
-cdf(d::DiscreteUnivariateDistribution, x::Integer) = cdf(d, x, FiniteSupport{hasfinitesupport(d)})
-
-# Discrete univariate with infinite support
-function cdf(d::DiscreteUnivariateDistribution, x::Integer, ::Type{FiniteSupport{false}})
-    c = 0.0
-    for y = minimum(d):x
-        c += pdf(d, y)
-    end
-    return c
-end
-
-# Discrete univariate with finite support
-function cdf(d::DiscreteUnivariateDistribution, x::Integer, ::Type{FiniteSupport{true}})
-    # calculate from left if x < (min + max)/2
-    # (same as infinite support version)
-    x <= div(minimum(d) + maximum(d),2) && return cdf(d, x, FiniteSupport{false})
-
-    # otherwise, calculate from the right
-    c = 1.0
-    for y = x+1:maximum(d)
-        c -= pdf(d, y)
-    end
-    return c
-end
-
-cdf(d::DiscreteUnivariateDistribution, x::Real) = cdf(d, floor(Int,x))
-cdf(d::ContinuousUnivariateDistribution, x::Real) = throw(MethodError(cdf, (d, x)))
 
+# fallback for discrete distribution:
+# base computation on `cdf(d, floor(Int, x))` and handle `NaN` and `±Inf`
+# this is only correct for distributions with integer-valued support but will error if
+# `cdf(d, ::Int)` is not defined (so it should not return incorrect values silently)
+cdf(d::DiscreteUnivariateDistribution, x::Real) = cdf_int(d, x)
 
 """
     ccdf(d::UnivariateDistribution, x::Real)
 
 The complementary cumulative function evaluated at `x`, i.e. `1 - cdf(d, x)`.
 """
-ccdf(d::UnivariateDistribution, x::Real) = 1.0 - cdf(d, x)
-ccdf(d::DiscreteUnivariateDistribution, x::Integer) = 1.0 - cdf(d, x)
-ccdf(d::DiscreteUnivariateDistribution, x::Real) = ccdf(d, floor(Int,x))
+ccdf(d::UnivariateDistribution, x::Real) = 1 - cdf(d, x)
 
 """
     logcdf(d::UnivariateDistribution, x::Real)
@@ -382,8 +342,6 @@ ccdf(d::DiscreteUnivariateDistribution, x::Real) = ccdf(d, floor(Int,x))
 The logarithm of the cumulative function value(s) evaluated at `x`, i.e. `log(cdf(x))`.
 """
 logcdf(d::UnivariateDistribution, x::Real) = log(cdf(d, x))
-logcdf(d::DiscreteUnivariateDistribution, x::Integer) = log(cdf(d, x))
-logcdf(d::DiscreteUnivariateDistribution, x::Real) = logcdf(d, floor(Int,x))
 
 """
     logdiffcdf(d::UnivariateDistribution, x::Real, y::Real)
@@ -393,7 +351,7 @@ The natural logarithm of the difference between the cumulative density function
 function logdiffcdf(d::UnivariateDistribution, x::Real, y::Real)
     # Promote to ensure that we don't compute logcdf in low precision and then promote
     _x, _y = promote(x, y)
-    _x <= _y && throw(ArgumentError("requires x > y."))
+    _x < _y && throw(ArgumentError("requires x >= y."))
     u = logcdf(d, _x)
     v = logcdf(d, _y)
     return u + log1mexp(v - u)
@@ -405,8 +363,6 @@ end
 The logarithm of the complementary cumulative function values evaluated at x, i.e. `log(ccdf(x))`.
 """
 logccdf(d::UnivariateDistribution, x::Real) = log(ccdf(d, x))
-logccdf(d::DiscreteUnivariateDistribution, x::Integer) = log(ccdf(d, x))
-logccdf(d::DiscreteUnivariateDistribution, x::Real) = logccdf(d, floor(Int,x))
 
 """
     quantile(d::UnivariateDistribution, q::Real)
@@ -522,12 +478,137 @@ Here `x` can be a single scalar sample or an array of samples.
 loglikelihood(d::UnivariateDistribution, X::AbstractArray) = sum(x -> logpdf(d, x), X)
 loglikelihood(d::UnivariateDistribution, x::Real) = logpdf(d, x)
 
+### special definitions for distributions with integer-valued support
+
+function cdf_int(d::DiscreteUnivariateDistribution, x::Real)
+    # handle `NaN` and `±Inf` which can't be truncated to `Int`
+    isfinite_x = isfinite(x)
+    _x = isfinite_x ? x : zero(x)
+    c = float(cdf(d, floor(Int, _x)))
+    return if isfinite_x
+        c
+    elseif isnan(x)
+        oftype(c, NaN)
+    elseif x < 0
+        zero(c)
+    else
+        one(c)
+    end
+end
+
+function ccdf_int(d::DiscreteUnivariateDistribution, x::Real)
+    # handle `NaN` and `±Inf` which can't be truncated to `Int`
+    isfinite_x = isfinite(x)
+    _x = isfinite_x ? x : zero(x)
+    c = float(ccdf(d, floor(Int, _x)))
+    return if isfinite_x
+        c
+    elseif isnan(x)
+        oftype(c, NaN)
+    elseif x < 0
+        one(c)
+    else
+        zero(c)
+    end
+end
+
+function logcdf_int(d::DiscreteUnivariateDistribution, x::Real)
+    # handle `NaN` and `±Inf` which can't be truncated to `Int`
+    isfinite_x = isfinite(x)
+    _x = isfinite_x ? x : zero(x)
+    c = float(logcdf(d, floor(Int, _x)))
+    return if isfinite_x
+        c
+    elseif isnan(x)
+        oftype(c, NaN)
+    elseif x < 0
+        oftype(c, -Inf)
+    else
+        zero(c)
+    end
+end
+
+function logccdf_int(d::DiscreteUnivariateDistribution, x::Real)
+    # handle `NaN` and `±Inf` which can't be truncated to `Int`
+    isfinite_x = isfinite(x)
+    _x = isfinite_x ? x : zero(x)
+    c = float(logccdf(d, floor(Int, _x)))
+    return if isfinite_x
+        c
+    elseif isnan(x)
+        oftype(c, NaN)
+    elseif x < 0
+        zero(c)
+    else
+        oftype(c, -Inf)
+    end
+end
+
+# implementation of the cdf for distributions whose support is a unitrange of integers
+# note: incorrect for discrete distributions whose support includes non-integer numbers
+function integerunitrange_cdf(d::DiscreteUnivariateDistribution, x::Integer)
+    minimum_d, maximum_d = extrema(d)
+    isfinite(minimum_d) || isfinite(maximum_d) || error("support is unbounded")
+
+    result = if isfinite(minimum_d) && !(isfinite(maximum_d) && x >= div(minimum_d + maximum_d, 2))
+        c = sum(Base.Fix1(pdf, d), minimum_d:(max(x, minimum_d)))
+        x < minimum_d ? zero(c) : c
+    else
+        c = 1 - sum(Base.Fix1(pdf, d), (min(x + 1, maximum_d)):maximum_d)
+        x >= maximum_d ? one(c) : c
+    end
+
+    return result
+end
+
+function integerunitrange_ccdf(d::DiscreteUnivariateDistribution, x::Integer)
+    minimum_d, maximum_d = extrema(d)
+    isfinite(minimum_d) || isfinite(maximum_d) || error("support is unbounded")
+
+    result = if isfinite(minimum_d) && !(isfinite(maximum_d) && x >= div(minimum_d + maximum_d, 2))
+        c = 1 - sum(Base.Fix1(pdf, d), minimum_d:(max(x, minimum_d)))
+        x < minimum_d ? one(c) : c
+    else
+        c = sum(Base.Fix1(pdf, d), (min(x + 1, maximum_d)):maximum_d)
+        x >= maximum_d ? zero(c) : c
+    end
+
+    return result
+end
+
+function integerunitrange_logcdf(d::DiscreteUnivariateDistribution, x::Integer)
+    minimum_d, maximum_d = extrema(d)
+    isfinite(minimum_d) || isfinite(maximum_d) || error("support is unbounded")
+
+    result = if isfinite(minimum_d) && !(isfinite(maximum_d) && x >= div(minimum_d + maximum_d, 2))
+        c = logsumexp(logpdf(d, y) for y in minimum_d:(max(x, minimum_d)))
+        x < minimum_d ? oftype(c, -Inf) : c
+    else
+        c = log1mexp(logsumexp(logpdf(d, y) for y in (min(x + 1, maximum_d)):maximum_d))
+        x >= maximum_d ? zero(c) : c
+    end
+
+    return result
+end
+
+function integerunitrange_logccdf(d::DiscreteUnivariateDistribution, x::Integer)
+    minimum_d, maximum_d = extrema(d)
+    isfinite(minimum_d) || isfinite(maximum_d) || error("support is unbounded")
+
+    result = if isfinite(minimum_d) && !(isfinite(maximum_d) && x >= div(minimum_d + maximum_d, 2))
+        c = log1mexp(logsumexp(logpdf(d, y) for y in minimum_d:(max(x, minimum_d))))
+        x < minimum_d ? zero(c) : c
+    else
+        c = logsumexp(logpdf(d, y) for y in (min(x + 1, maximum_d)):maximum_d)
+        x >= maximum_d ? oftype(c, -Inf) : c
+    end
+
+    return result
+end
+
 ### macros to use StatsFuns for method implementation
 
 macro _delegate_statsfuns(D, fpre, psyms...)
-    dt = eval(D)
-    T = dt <: DiscreteUnivariateDistribution ? :Int : :Real
-
     # function names from StatsFuns
     fpdf = Symbol(fpre, "pdf")
     flogpdf = Symbol(fpre, "logpdf")
@@ -543,22 +624,24 @@ macro _delegate_statsfuns(D, fpre, psyms...)
     # parameter fields
     pargs = [Expr(:(.), :d, Expr(:quote, s)) for s in psyms]
 
-    esc(quote
-        pdf(d::$D, x::$T) = $(fpdf)($(pargs...), x)
-        logpdf(d::$D, x::$T) = $(flogpdf)($(pargs...), x)
-
-        cdf(d::$D, x::$T) = $(fcdf)($(pargs...), x)
-        ccdf(d::$D, x::$T) = $(fccdf)($(pargs...), x)
-        logcdf(d::$D, x::$T) = $(flogcdf)($(pargs...), x)
-        logccdf(d::$D, x::$T) = $(flogccdf)($(pargs...), x)
-
-        quantile(d::$D, q::Real) = convert($T, $(finvcdf)($(pargs...), q))
-        cquantile(d::$D, q::Real) = convert($T, $(finvccdf)($(pargs...), q))
-        invlogcdf(d::$D, lq::Real) = convert($T, $(finvlogcdf)($(pargs...), lq))
-        invlogccdf(d::$D, lq::Real) = convert($T, $(finvlogccdf)($(pargs...), lq))
-    end)
-end
+    # output type of `quantile` etc.
+    T = :($D <: DiscreteUnivariateDistribution ? Int : Real)
 
+    return quote
+        $Distributions.pdf(d::$D, x::Real) = $(fpdf)($(pargs...), x)
+        $Distributions.logpdf(d::$D, x::Real) = $(flogpdf)($(pargs...), x)
+
+        $Distributions.cdf(d::$D, x::Real) = $(fcdf)($(pargs...), x)
+        $Distributions.logcdf(d::$D, x::Real) = $(flogcdf)($(pargs...), x)
+        $Distributions.ccdf(d::$D, x::Real) = $(fccdf)($(pargs...), x)
+        $Distributions.logccdf(d::$D, x::Real) = $(flogccdf)($(pargs...), x)
+
+        $Distributions.quantile(d::$D, q::Real) = convert($T, $(finvcdf)($(pargs...), q))
+        $Distributions.cquantile(d::$D, q::Real) = convert($T, $(finvccdf)($(pargs...), q))
+        $Distributions.invlogcdf(d::$D, lq::Real) = convert($T, $(finvlogcdf)($(pargs...), lq))
+        $Distributions.invlogccdf(d::$D, lq::Real) = convert($T, $(finvlogccdf)($(pargs...), lq))
+    end
+end
 
 ##### specific distributions #####
 
@@ -618,6 +701,7 @@ const continuous_distributions = [
     "logitnormal",    # LogitNormal depends on Normal
     "pareto",
     "rayleigh",
+    "rician",
     "semicircle",
     "skewnormal",
     "studentizedrange",
@@ -626,6 +710,7 @@ const continuous_distributions = [
     "triangular",
     "triweight",
     "uniform",
+    "loguniform", # depends on Uniform
     "vonmises",
     "weibull"
 ]
diff --git a/test/binomial.jl b/test/binomial.jl
index 0991f370e..9fd3251f2 100644
--- a/test/binomial.jl
+++ b/test/binomial.jl
@@ -1,13 +1,12 @@
 using Distributions
 using Test, Random
 
+Random.seed!(1234)
 
+@testset "binomial" begin
 # Test the consistency between the recursive and nonrecursive computation of the pdf
 # of the Binomial distribution
-Random.seed!(1234)
 for (p, n) in [(0.6, 10), (0.8, 6), (0.5, 40), (0.04, 20), (1., 100), (0., 10), (0.999999, 1000), (1e-7, 1000)]
-    local p
-
     d = Binomial(n, p)
 
     a = pdf.(d, 0:n)
@@ -21,12 +20,11 @@ for (p, n) in [(0.6, 10), (0.8, 6), (0.5, 40), (0.04, 20), (1., 100), (0., 10),
     for t in rng
         @test pdf(d, t) ≈ b[t - first(rng) + 1]
     end
-
 end
 
 # Test calculation of expectation value for Binomial distribution
-@test Distributions.expectation(Binomial(6), identity) ≈ 3.0
-@test Distributions.expectation(Binomial(10, 0.2), x->-x) ≈ -2.0
+@test Distributions.expectation(identity, Binomial(6)) ≈ 3.0
+@test Distributions.expectation(x -> -x, Binomial(10, 0.2)) ≈ -2.0
 
 # Test mode
 @test Distributions.mode(Binomial(100, 0.4)) == 40
@@ -36,3 +34,4 @@ end
 @test isplatykurtic(Bernoulli(0.5))
 @test ismesokurtic(Normal(0.0, 1.0))
 @test isleptokurtic(Laplace(0.0, 1.0))
+end
diff --git a/test/categorical.jl b/test/categorical.jl
index e9d9c41ba..972b1c450 100644
--- a/test/categorical.jl
+++ b/test/categorical.jl
@@ -25,21 +25,34 @@ for p in Any[
     c = 0.0
     for i = 1:k
         c += p[i]
-        @test pdf(d, i) == p[i]
+        @test @inferred(pdf(d, i)) == p[i]
+        @test @inferred(pdf(d, float(i))) == p[i]
         @test @inferred(logpdf(d, i)) === log(p[i])
         @test @inferred(logpdf(d, float(i))) === log(p[i])
-        @test cdf(d, i)  ≈ c
-        @test ccdf(d, i) ≈ 1.0 - c
+        @test @inferred(cdf(d, i))  ≈ c
+        @test @inferred(cdf(d, i + 0.5)) ≈ c
+        @test @inferred(ccdf(d, i)) ≈ 1 - c
+        @test @inferred(ccdf(d, i + 0.5)) ≈ 1 - c
+        @test @inferred(logcdf(d, i)) ≈ log(c)
+        @test @inferred(logcdf(d, i + 0.5)) ≈ log(c)
+        @test @inferred(logccdf(d, i)) ≈ log1p(-c)
+        @test @inferred(logccdf(d, i + 0.5)) ≈ log1p(-c)
     end
 
     @test pdf(d, 0) == 0
     @test pdf(d, k+1) == 0
     @test logpdf(d, 0) == -Inf
     @test logpdf(d, k+1) == -Inf
-    @test cdf(d, 0) == 0.0
-    @test cdf(d, k+1) == 1.0
-    @test ccdf(d, 0) == 1.0
-    @test ccdf(d, k+1) == 0.0
+    @test iszero(cdf(d, -Inf))
+    @test iszero(cdf(d, 0))
+    @test isone(cdf(d, k+1))
+    @test isone(cdf(d, Inf))
+    @test isnan(cdf(d, NaN))
+    @test isone(ccdf(d, -Inf))
+    @test isone(ccdf(d, 0))
+    @test iszero(ccdf(d, k+1))
+    @test iszero(ccdf(d, Inf))
+    @test isnan(ccdf(d, NaN))
 
     @test pdf.(d, support(d)) == p
     @test pdf.(d, 1:k) == p
diff --git a/test/convolution.jl b/test/convolution.jl
index 213e22444..bcb5292d8 100644
--- a/test/convolution.jl
+++ b/test/convolution.jl
@@ -1,3 +1,9 @@
+using Distributions
+using FillArrays
+
+using LinearAlgebra
+using Test
+
 @testset "discrete univariate" begin
 
     @testset "Bernoulli" begin
@@ -139,17 +145,17 @@ end
 
     @testset "iso-/diag-normal" begin
 
-        in1 = MvNormal([1.2, 0.3], 2)
-        in2 = MvNormal([-2.0, 6.9], 0.5)
+        in1 = MvNormal([1.2, 0.3], 2 * I)
+        in2 = MvNormal([-2.0, 6.9], 0.5 * I)
 
-        zmin1 = MvNormal(2, 1.9)
-        zmin2 = MvNormal(2, 5.2)
+        zmin1 = MvNormal(Zeros(2), 1.9 * I)
+        zmin2 = MvNormal(Diagonal(Fill(5.2, 2)))
 
-        dn1 = MvNormal([0.0, 4.7], [0.1, 1.8])
-        dn2 = MvNormal([-3.4, 1.2], [3.2, 0.2])
+        dn1 = MvNormal([0.0, 4.7], Diagonal([0.1, 1.8]))
+        dn2 = MvNormal([-3.4, 1.2], Diagonal([3.2, 0.2]))
 
-        zmdn1 = MvNormal([1.2, 0.3])
-        zmdn2 = MvNormal([-0.8, 1.0])
+        zmdn1 = MvNormal(Diagonal([1.2, 0.3]))
+        zmdn2 = MvNormal(Diagonal([-0.8, 1.0]))
 
         dist_list = (in1, in2, zmin1, zmin2, dn1, dn2, zmdn1, zmdn2)
 
@@ -161,7 +167,7 @@ end
         end
 
         # erroring
-        in3 = MvNormal([1, 2, 3], 0.2)
+        in3 = MvNormal([1, 2, 3], 0.2 * I)
         @test_throws ArgumentError convolve(in1, in3)
     end
 
@@ -202,10 +208,10 @@ end
 
     @testset "mixed" begin
 
-        in1 = MvNormal([1.2, 0.3], 2)
-        zmin1 = MvNormal(2, 1.9)
-        dn1 = MvNormal([0.0, 4.7], [0.1, 1.8])
-        zmdn1 = MvNormal([1.2, 0.3])
+        in1 = MvNormal([1.2, 0.3], 2 * I)
+        zmin1 = MvNormal(Zeros(2), 1.9 * I)
+        dn1 = MvNormal([0.0, 4.7], Diagonal([0.1, 1.8]))
+        zmdn1 = MvNormal(Diagonal([1.2, 0.3]))
         m1 = Symmetric(rand(2, 2))
         m1sq = m1^2
         full = MvNormal(ones(2), m1sq.data)
diff --git a/test/density_interface.jl b/test/density_interface.jl
new file mode 100644
index 000000000..e4988986d
--- /dev/null
+++ b/test/density_interface.jl
@@ -0,0 +1,25 @@
+@testset "DensityInterface" begin
+    using DensityInterface
+
+    d_uv_continous = Normal(-1.5, 2.3)
+    d_uv_discrete = Poisson(4.7)
+    d_mv = MvNormal([2.3 0.4; 0.4 1.2])
+    d_av = Distributions.MatrixReshaped(MvNormal(rand(10)), 2, 5)
+
+    @testset "Distribution" begin
+        for d in (d_uv_continous, d_uv_discrete, d_mv, d_av)
+            x = rand(d)
+            ref_logd_at_x = logpdf(d, x)
+            DensityInterface.test_density_interface(d, x, ref_logd_at_x)
+
+            # Stricter than required by test_density_interface:
+            @test logfuncdensity(logdensityof(d)) === d
+        end
+
+        for di_func in (logdensityof, densityof)
+            @test_throws ArgumentError di_func(d_uv_continous, [rand(d_uv_continous) for i in 1:3])
+            @test_throws ArgumentError di_func(d_mv, hcat([rand(d_mv) for i in 1:3]...))
+            @test_throws ArgumentError di_func(d_av, [rand(d_av) for i in 1:3])
+        end
+    end
+end
diff --git a/test/dirac.jl b/test/dirac.jl
index 7e2053435..d3b0c7c6b 100644
--- a/test/dirac.jl
+++ b/test/dirac.jl
@@ -20,9 +20,30 @@ using Test
         @test iszero(logpdf(d, val))
         @test logpdf(d, nextfloat(float(val))) == -Inf
 
+        @test iszero(cdf(d, -Inf))
         @test iszero(cdf(d, prevfloat(float(val))))
         @test isone(cdf(d, val))
         @test isone(cdf(d, nextfloat(float(val))))
+        @test isone(cdf(d, Inf))
+        @test logcdf(d, -Inf) == -Inf
+        @test logcdf(d, prevfloat(float(val))) == -Inf
+        @test iszero(logcdf(d, val))
+        @test iszero(logcdf(d, nextfloat(float(val))))
+        @test iszero(logcdf(d, Inf))
+        @test isone(ccdf(d, -Inf))
+        @test isone(ccdf(d, prevfloat(float(val))))
+        @test iszero(ccdf(d, val))
+        @test iszero(ccdf(d, nextfloat(float(val))))
+        @test iszero(ccdf(d, Inf))
+        @test iszero(logccdf(d, -Inf))
+        @test iszero(logccdf(d, prevfloat(float(val))))
+        @test logccdf(d, val) == -Inf
+        @test logccdf(d, nextfloat(float(val))) == -Inf
+        @test logccdf(d, Inf) == -Inf
+
+        for f in (cdf, ccdf, logcdf, logccdf)
+            @test isnan(f(d, NaN))
+        end
 
         @test quantile(d, 0) == val
         @test quantile(d, 0.5) == val
diff --git a/test/dirichletmultinomial.jl b/test/dirichletmultinomial.jl
index 31bd78a42..68a5dff15 100644
--- a/test/dirichletmultinomial.jl
+++ b/test/dirichletmultinomial.jl
@@ -51,7 +51,6 @@ d = DirichletMultinomial(10, 5)
 @test !insupport(d, 3.0 * ones(5))
 
 for x in (2 * ones(5), [1, 2, 3, 4, 0], [3.0, 0.0, 3.0, 0.0, 4.0], [0, 0, 0, 0, 10])
-    local x
     @test pdf(d, x) ≈
         factorial(d.n) * gamma(d.α0) / gamma(d.n + d.α0) * prod(gamma.(d.α + x) ./ factorial.(x) ./ gamma.(d.α))
     @test logpdf(d, x) ≈
diff --git a/test/discretenonparametric.jl b/test/discretenonparametric.jl
index 828863603..52acbfa25 100644
--- a/test/discretenonparametric.jl
+++ b/test/discretenonparametric.jl
@@ -39,19 +39,45 @@ test_params(d)
 @test pdf(d, 100.) == 0.
 @test pdf(d, 120.) == .1
 
+@test iszero(cdf(d, -Inf))
 @test cdf(d, -100.) == 0.
 @test cdf(d, -100) == 0.
 @test cdf(d, 0.) ≈ .2
 @test cdf(d, 100.) ≈ .9
 @test cdf(d, 100) ≈ .9
 @test cdf(d, 150.) == 1.
+@test isone(cdf(d, Inf))
+@test isnan(cdf(d, NaN))
 
+@test isone(ccdf(d, -Inf))
 @test ccdf(d, -100.) == 1.
 @test ccdf(d, -100) == 1.
 @test ccdf(d, 0.) ≈ .8
 @test ccdf(d, 100.) ≈ .1
 @test ccdf(d, 100) ≈ .1
 @test ccdf(d, 150.) == 0
+@test iszero(ccdf(d, Inf))
+@test isnan(ccdf(d, NaN))
+
+@test logcdf(d, -Inf) == -Inf
+@test logcdf(d, -100.) == -Inf
+@test logcdf(d, -100) == -Inf
+@test logcdf(d, 0.) ≈ log(0.2)
+@test logcdf(d, 100.) ≈ log(0.9)
+@test logcdf(d, 100) ≈ log(0.9)
+@test iszero(logcdf(d, 150.))
+@test iszero(logcdf(d, Inf))
+@test isnan(logcdf(d, NaN))
+
+@test iszero(logccdf(d, -Inf))
+@test iszero(logccdf(d, -100.))
+@test iszero(logccdf(d, -100))
+@test logccdf(d, 0.) ≈ log(0.8)
+@test logccdf(d, 100.) ≈ log(0.1)
+@test logccdf(d, 100) ≈ log(0.1)
+@test logccdf(d, 150.) == -Inf
+@test logccdf(d, Inf) == -Inf
+@test isnan(logccdf(d, NaN))
 
 @test quantile(d, 0) == -60
 @test quantile(d, .1) == -60
@@ -137,4 +163,11 @@ d = DiscreteNonParametric([1, 2], [0, 1])
 
 d = DiscreteNonParametric([2, 1], [1, 0])
 @test iszero(count(isone(rand(d)) for _ in 1:100))
+
+    @testset "type stability" begin
+        d = DiscreteNonParametric([0, 1], [0.5, 0.5])
+        @inferred(var(d))
+        @inferred(kurtosis(d))
+        @inferred(skewness(d))
+    end
 end
diff --git a/test/functionals.jl b/test/functionals.jl
index 1fb384db4..16cbadee3 100644
--- a/test/functionals.jl
+++ b/test/functionals.jl
@@ -1,6 +1,112 @@
-using Test
-using Distributions: Categorical, kldivergence, expectation, Normal
-@test kldivergence(Categorical([0.0, 0.1, 0.9]), Categorical([0.1, 0.1, 0.8])) ≥ 0
-@test kldivergence(Categorical([0.0, 0.1, 0.9]), Categorical([0.1, 0.1, 0.8])) ≈
-    kldivergence([0.0, 0.1, 0.9], [0.1, 0.1, 0.8])
-@test expectation(Normal(0.0, 1.0), identity, 1e-10) ≤ 1e-9
+# Struct to test AbstractMvNormal methods
+struct CholeskyMvNormal{M,T} <: Distributions.AbstractMvNormal
+    m::M
+    L::T
+end
+
+# Constructor for diagonal covariance matrices used in the tests belows
+function CholeskyMvNormal(m::Vector, Σ::Diagonal)
+    L = Diagonal(map(sqrt, Σ.diag))
+    return CholeskyMvNormal{typeof(m),typeof(L)}(m, L)
+end
+
+Distributions.length(p::CholeskyMvNormal) = length(p.m)
+Distributions.mean(p::CholeskyMvNormal) = p.m
+Distributions.cov(p::CholeskyMvNormal) = p.L * p.L'
+Distributions.logdetcov(p::CholeskyMvNormal) = 2 * logdet(p.L)
+function Distributions.sqmahal(p::CholeskyMvNormal, x::AbstractVector)
+    return sum(abs2, p.L \ (mean(p) - x))
+end
+function Distributions._rand!(rng::AbstractRNG, p::CholeskyMvNormal, x::Vector)
+    return x .= p.m .+ p.L * randn!(rng, x)
+end
+
+@testset "Expectations" begin
+    # univariate distributions
+    for d in (Normal(), Poisson(2.0), Binomial(10, 0.4))
+        m = Distributions.expectation(identity, d)
+        @test m ≈ mean(d) atol=1e-3
+        @test Distributions.expectation(x -> (x - mean(d))^2, d) ≈ var(d) atol=1e-3
+
+        @test @test_deprecated(Distributions.expectation(d, identity, 1e-10)) == m
+        @test @test_deprecated(Distributions.expectation(d, identity)) == m
+    end
+
+    # multivariate distribution
+    d = MvNormal([1.5, -0.5], I)
+    @test Distributions.expectation(identity, d; nsamples=10_000) ≈ mean(d) atol=5e-2
+    @test @test_deprecated(Distributions.expectation(d, identity; nsamples=10_000)) ≈ mean(d) atol=5e-2
+end
+
+@testset "KL divergences" begin
+    function test_kl(p, q)
+        @test kldivergence(p, q) >= 0
+        @test kldivergence(p, p) ≈ 0 atol=1e-1
+        @test kldivergence(q, q) ≈ 0 atol=1e-1
+        if p isa UnivariateDistribution
+            @test kldivergence(p, q) ≈ invoke(kldivergence, Tuple{UnivariateDistribution,UnivariateDistribution}, p, q) atol=1e-1
+        elseif p isa MultivariateDistribution
+            @test kldivergence(p, q) ≈ invoke(kldivergence, Tuple{MultivariateDistribution,MultivariateDistribution}, p, q; nsamples=10000) atol=1e-1
+        end
+    end
+
+    @testset "univariate" begin
+        @testset "Beta" begin
+            p = Beta(2, 10)
+            q = Beta(3, 5)
+            test_kl(p, q)
+        end
+        @testset "Categorical" begin
+            @test kldivergence(Categorical([0.0, 0.1, 0.9]), Categorical([0.1, 0.1, 0.8])) ≥ 0
+            @test kldivergence(Categorical([0.0, 0.1, 0.9]), Categorical([0.1, 0.1, 0.8])) ≈
+                kldivergence([0.0, 0.1, 0.9], [0.1, 0.1, 0.8])
+        end
+        @testset "Exponential" begin
+            p = Exponential(2.0)
+            q = Exponential(3.0)
+            test_kl(p, q)
+        end
+        @testset "Gamma" begin
+            p = Gamma(2.0, 1.0)
+            q = Gamma(3.0, 2.0)
+            test_kl(p, q)
+        end
+        @testset "InverseGamma" begin
+            p = InverseGamma(2.0, 1.0)
+            q = InverseGamma(3.0, 2.0)
+            test_kl(p, q)
+        end
+        @testset "Normal" begin
+            p = Normal(0, 1)
+            q = Normal(0.5, 0.5)
+            test_kl(p, q)
+        end
+        @testset "Poisson" begin
+            p = Poisson(4.0)
+            q = Poisson(3.0)
+            test_kl(p, q)
+
+            # special case (test function also checks `kldivergence(p0, p0)`)
+            p0 = Poisson(0.0)
+            test_kl(p0, p)
+        end
+    end
+
+    @testset "multivariate" begin
+        @testset "AbstractMvNormal" begin
+            p_mvnormal = MvNormal([0.2, -0.8], Diagonal([0.5, 0.75]))
+            q_mvnormal = MvNormal([1.5, 0.5], Diagonal([1.0, 0.2]))
+            test_kl(p_mvnormal, q_mvnormal)
+
+            p_cholesky = CholeskyMvNormal([0.2, -0.8], Diagonal([0.5, 0.75]))
+            q_cholesky = CholeskyMvNormal([1.5, 0.5], Diagonal([1.0, 0.2]))
+            test_kl(p_cholesky, q_cholesky)
+
+            # check consistency and mixed computations
+            v = kldivergence(p_mvnormal, q_mvnormal)
+            @test kldivergence(p_mvnormal, q_cholesky) ≈ v
+            @test kldivergence(p_cholesky, q_mvnormal) ≈ v
+            @test kldivergence(p_cholesky, q_cholesky) ≈ v
+        end
+    end
+end
diff --git a/test/gradlogpdf.jl b/test/gradlogpdf.jl
index da4ab7e42..5f54340ff 100644
--- a/test/gradlogpdf.jl
+++ b/test/gradlogpdf.jl
@@ -4,18 +4,20 @@ using Test
 
 # Test for gradlogpdf on univariate distributions
 
-@test isapprox(gradlogpdf(Beta(1.5, 3.0), 0.7)   , -5.9523809523809526 , atol=1.0e-8)
-@test isapprox(gradlogpdf(Chi(5.0), 5.5)         , -4.7727272727272725 , atol=1.0e-8)
-@test isapprox(gradlogpdf(Chisq(7.0), 12.0)      , -0.29166666666666663, atol=1.0e-8)
-@test isapprox(gradlogpdf(Exponential(2.0), 7.0) , -0.5                , atol=1.0e-8)
-@test isapprox(gradlogpdf(Gamma(9.0, 0.5), 11.0) , -1.2727272727272727 , atol=1.0e-8)
-@test isapprox(gradlogpdf(Gumbel(3.5, 1.0), 4.0) , -1.6065306597126334 , atol=1.0e-8)
-@test isapprox(gradlogpdf(Laplace(7.0), 34.0)    , -1.0                , atol=1.0e-8)
-@test isapprox(gradlogpdf(Logistic(-6.0), 1.0)   , -0.9981778976111987 , atol=1.0e-8)
-@test isapprox(gradlogpdf(LogNormal(5.5), 2.0)   ,  1.9034264097200273 , atol=1.0e-8)
-@test isapprox(gradlogpdf(Normal(-4.5, 2.0), 1.6), -1.525              , atol=1.0e-8)
-@test isapprox(gradlogpdf(TDist(8.0), 9.1)       , -0.9018830525272548 , atol=1.0e-8)
-@test isapprox(gradlogpdf(Weibull(2.0), 3.5)     , -6.714285714285714  , atol=1.0e-8)
+@test isapprox(gradlogpdf(Beta(1.5, 3.0), 0.7)    , -5.9523809523809526 , atol=1.0e-8)
+@test isapprox(gradlogpdf(Chi(5.0), 5.5)          , -4.7727272727272725 , atol=1.0e-8)
+@test isapprox(gradlogpdf(Chisq(7.0), 12.0)       , -0.29166666666666663, atol=1.0e-8)
+@test isapprox(gradlogpdf(Exponential(2.0), 7.0)  , -0.5                , atol=1.0e-8)
+@test isapprox(gradlogpdf(Gamma(9.0, 0.5), 11.0)  , -1.2727272727272727 , atol=1.0e-8)
+@test isapprox(gradlogpdf(Gumbel(3.5, 1.0), 4.0)  , -1.6065306597126334 , atol=1.0e-8)
+@test isapprox(gradlogpdf(Laplace(7.0), 34.0)     , -1.0                , atol=1.0e-8)
+@test isapprox(gradlogpdf(Logistic(-6.0), 1.0)    , -0.9981778976111987 , atol=1.0e-8)
+@test isapprox(gradlogpdf(LogNormal(5.5), 2.0)    ,  1.9034264097200273 , atol=1.0e-8)
+@test isapprox(gradlogpdf(Normal(-4.5, 2.0), 1.6) , -1.525              , atol=1.0e-8)
+@test isapprox(gradlogpdf(TDist(8.0), 9.1)        , -0.9018830525272548 , atol=1.0e-8)
+@test isapprox(gradlogpdf(Weibull(2.0), 3.5)      , -6.714285714285714  , atol=1.0e-8)
+@test isapprox(gradlogpdf(Uniform(-1.0, 1.0), 0.3),  0.0                , atol=1.0e-8)
+
 
 # Test for gradlogpdf on multivariate distributions
 
diff --git a/test/lkjcholesky.jl b/test/lkjcholesky.jl
new file mode 100644
index 000000000..32840f14a
--- /dev/null
+++ b/test/lkjcholesky.jl
@@ -0,0 +1,219 @@
+using Distributions
+using Random
+using LinearAlgebra
+using Test
+using FiniteDifferences
+
+@testset "LKJCholesky" begin
+    function test_draw(d::LKJCholesky, x; check_uplo=true)
+        @test insupport(d, x)
+        check_uplo && @test x.uplo == d.uplo
+    end
+    function test_draws(d::LKJCholesky, xs; check_uplo=true, nkstests=1)
+        @test all(x -> insupport(d, x), xs)
+        check_uplo && @test all(x -> x.uplo == d.uplo, xs)
+
+        p = d.d
+        dmat = LKJ(p, d.η)
+        marginal = Distributions._marginal(dmat)    
+        ndraws = length(xs)
+        zs = Array{eltype(d)}(undef, p, p, ndraws)
+        for k in 1:ndraws
+            zs[:, :, k] = Matrix(xs[k])
+        end
+
+        @testset "LKJCholesky marginal moments" begin
+            @test mean(zs; dims=3)[:, :, 1] ≈ I atol=0.1
+            @test var(zs; dims=3)[:, :, 1] ≈ var(marginal) * (ones(p, p) - I) atol=0.1
+            @testset for n in 2:5
+                for i in 1:p, j in 1:(i-1)
+                    @test moment(zs[i, j, :], n) ≈ moment(rand(marginal, ndraws), n) atol=0.1
+                end
+            end
+        end
+
+        @testset "LKJCholesky marginal KS test" begin
+            α = 0.01
+            L = sum(1:(p - 1))
+            for i in 1:p, j in 1:(i-1)
+                @test pvalue_kolmogorovsmirnoff(zs[i, j, :], marginal) >= α / L / nkstests
+            end
+        end
+    end
+
+    # Compute logdetjac of ϕ: L → L L' where only strict lower triangle of L and L L' are unique
+    function cholesky_inverse_logdetjac(L)
+        size(L, 1) == 1 && return 0.0
+        J = jacobian(central_fdm(5, 1), cholesky_vec_to_corr_vec, stricttril_to_vec(L))[1]
+        return logabsdet(J)[1]
+    end
+    stricttril_to_vec(L) = [L[i, j] for i in axes(L, 1) for j in 1:(i - 1)]
+    function vec_to_stricttril(l)
+        n = length(l)
+        p = Int((1 + sqrt(8n + 1)) / 2)
+        L = similar(l, p, p)
+        fill!(L, 0)
+        k = 1
+        for i in 1:p, j in 1:(i - 1)
+            L[i, j] = l[k]
+            k += 1
+        end
+        return L
+    end
+    function cholesky_vec_to_corr_vec(l)
+        L = vec_to_stricttril(l)
+        for i in axes(L, 1)
+            w = view(L, i, 1:(i-1))
+            wnorm = norm(w)
+            if wnorm > 1
+                w ./= wnorm
+                wnorm = 1
+            end
+            L[i, i] = sqrt(1 - wnorm^2)
+        end
+        return stricttril_to_vec(L * L')
+    end
+
+    @testset "Constructors" begin
+        @testset for p in (4, 5), η in (2, 3.5)
+            d = LKJCholesky(p, η)
+            @test d.d == p
+            @test d.η == η
+            @test d.uplo == 'L'
+            @test d.logc0 == LKJ(p, η).logc0
+        end
+
+        @test_throws ArgumentError LKJCholesky(0, 2)
+        @test_throws ArgumentError LKJCholesky(4, 0.0)
+        @test_throws ArgumentError LKJCholesky(4, -1)
+
+        for uplo in (:U, 'U')
+            d = LKJCholesky(4, 2, uplo)
+            @test d.uplo === 'U'
+        end
+        for uplo in (:L, 'L')
+            d = LKJCholesky(4, 2, uplo)
+            @test d.uplo === 'L'
+        end
+        @test_throws ArgumentError LKJCholesky(4, 2, :F)
+        @test_throws ArgumentError LKJCholesky(4, 2, 'F')
+    end
+
+    @testset "REPL display" begin
+        d = LKJCholesky(5, 1)
+        @test sprint(show, d) == "$(typeof(d))(\nd: 5\nη: 1.0\nuplo: L\n)\n"
+    end
+
+    @testset "Conversion" begin
+        d = LKJCholesky(5, 3.5)
+        df0_1 = convert(LKJCholesky{Float32}, d)
+        @test df0_1 isa LKJCholesky{Float32}
+        @test df0_1.d == d.d
+        @test df0_1.η ≈ d.η
+        @test df0_1.uplo == d.uplo
+        @test df0_1.logc0 ≈ d.logc0
+
+        df0_2 = convert(LKJCholesky{BigFloat}, d.d, d.η, d.uplo, d.logc0)
+        @test df0_2 isa LKJCholesky{BigFloat}
+        @test df0_2.d == d.d
+        @test df0_2.η ≈ d.η
+        @test df0_2.uplo == d.uplo
+        @test df0_2.logc0 ≈ d.logc0
+    end
+
+    @testset "properties" begin
+        @testset for p in (4, 5), η in (2, 3.5), uplo in ('L', 'U')
+            d = LKJCholesky(p, η, uplo)
+            @test d.d == p
+            @test size(d) == (p, p)
+            @test Distributions.params(d) == (d.d, d.η, d.uplo)
+            @test partype(d) <: Float64
+
+            m = mode(d)
+            @test m isa Cholesky{eltype(d)}
+            @test Matrix(m) ≈ I
+        end
+        @test_broken partype(LKJCholesky(2, 4f0)) <: Float32
+
+        @testset "insupport" begin
+            @test insupport(LKJCholesky(40, 2, 'U'), cholesky(rand(LKJ(40, 2))))
+            @test insupport(LKJCholesky(40, 2), cholesky(rand(LKJ(40, 2))))
+            @test !insupport(LKJCholesky(40, 2), cholesky(rand(LKJ(41, 2))))
+            z = rand(LKJ(40, 1))
+            z .+= exp(Symmetric(randn(size(z)))) .* 1e-8
+            x = cholesky(z)
+            @test !insupport(LKJCholesky(4, 2), x)
+        end
+    end
+
+    @testset "Evaluation" begin
+        @testset for p in (1, 4, 10), η in (0.5, 1, 3)
+            d = LKJ(p, η)
+            dchol = LKJCholesky(p, η)
+            z = rand(d)
+            x = cholesky(z)
+            x_L = typeof(x)(Matrix(x.L), 'L', x.info)
+            logdetJ = sum(i -> (p - i) * log(x.UL[i, i]), 1:p)
+            logdetJ_approx = cholesky_inverse_logdetjac(x.L)
+            @test logdetJ ≈ logdetJ_approx
+
+            @test logpdf(dchol, x) ≈ logpdf(d, z) + logdetJ
+            @test logpdf(dchol, x_L) ≈ logpdf(dchol, x)
+
+            @test pdf(dchol, x) ≈ exp(logpdf(dchol, x))
+            @test pdf(dchol, x_L) ≈ pdf(dchol, x)
+
+            @test loglikelihood(dchol, x) ≈ logpdf(dchol, x)
+            xs = cholesky.(rand(d, 10))
+            @test loglikelihood(dchol, xs) ≈ sum(logpdf(dchol, x) for x in xs)
+        end
+    end
+
+    @testset "Sampling" begin
+        rng = MersenneTwister(66)
+        nkstests = 4 # use for appropriate Bonferroni correction for KS test
+
+        @testset "rand" begin
+            @testset for p in (2, 4, 10), η in (0.5, 1, 3), uplo in ('L', 'U')
+                d = LKJCholesky(p, η, uplo)
+                test_draw(d, rand(rng, d))
+                test_draws(d, rand(rng, d, 10^4); nkstests=nkstests)
+            end
+            @test_broken rand(rng, LKJCholesky(5, Inf)) ≈ I
+        end
+
+        @testset "rand!" begin
+            @testset for p in (2, 4, 10), η in (0.5, 1, 3), uplo in ('L', 'U')
+                d = LKJCholesky(p, η, uplo)
+                x = Cholesky(Matrix{Float64}(undef, p, p), uplo, 0)
+                rand!(rng, d, x)
+                test_draw(d, x)
+                x = Cholesky(Matrix{Float64}(undef, p, p), uplo, 0)
+                rand!(d, x)
+                test_draw(d, x)
+
+                # test that uplo of Cholesky object is respected
+                x2 = Cholesky(Matrix{Float64}(undef, p, p), uplo == 'L' ? 'U' : 'L', 0)
+                rand!(rng, d, x2)
+                test_draw(d, x2; check_uplo = false)
+
+                # allocating
+                xs = Vector{typeof(x)}(undef, 10^4)
+                rand!(rng, d, xs)
+                test_draws(d, xs; nkstests=nkstests)
+
+                F2 = cholesky(exp(Symmetric(randn(rng, p, p))))
+                xs2 = [deepcopy(F2) for _ in 1:10^4]
+                xs2[1] = cholesky(exp(Symmetric(randn(rng, p + 1, p + 1))))
+                rand!(rng, d, xs2)
+                test_draws(d, xs2; nkstests=nkstests)
+
+                # non-allocating
+                F3 = cholesky(exp(Symmetric(randn(rng, p, p))))
+                xs3 = [deepcopy(F3) for _ in 1:10^4]
+                rand!(rng, d, xs3)
+                test_draws(d, xs3; check_uplo = uplo == 'U', nkstests=nkstests)
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/test/lognormal.jl b/test/lognormal.jl
index 8ee4956e9..0806356b7 100644
--- a/test/lognormal.jl
+++ b/test/lognormal.jl
@@ -11,12 +11,15 @@ isnan_type(::Type{T}, v) where {T} = isnan(v) && v isa T
     @test logpdf(LogNormal(), Inf) === -Inf
     @test iszero(logcdf(LogNormal(0, 0), 1))
     @test iszero(logcdf(LogNormal(), Inf))
+    @test logdiffcdf(LogNormal(), Float32(exp(3)), Float32(exp(3))) === -Inf
     @test logdiffcdf(LogNormal(), Float32(exp(5)), Float32(exp(3))) ≈ -6.607938594596893 rtol=1e-12
     @test logdiffcdf(LogNormal(), Float32(exp(5)), Float64(exp(3))) ≈ -6.60793859457367 rtol=1e-12
     @test logdiffcdf(LogNormal(), Float64(exp(5)), Float64(exp(3))) ≈ -6.607938594596893 rtol=1e-12
     let d = LogNormal(Float64(0), Float64(1)), x = Float64(exp(-60)), y = Float64(exp(-60.001))
         float_res = logdiffcdf(d, x, y)
-        big_float_res = log(cdf(d, BigFloat(x, 100)) - cdf(d, BigFloat(y, 100)))
+        big_x = VERSION < v"1.1" ? BigFloat(x, 100) : BigFloat(x; precision=100)
+        big_y = VERSION < v"1.1" ? BigFloat(y, 100) : BigFloat(y; precision=100)
+        big_float_res = log(cdf(d, big_x) - cdf(d, big_y))
         @test float_res ≈ big_float_res
     end
 
diff --git a/test/loguniform.jl b/test/loguniform.jl
new file mode 100644
index 000000000..11975780a
--- /dev/null
+++ b/test/loguniform.jl
@@ -0,0 +1,85 @@
+module TestLogUniform
+using Test
+using Distributions
+import Random
+
+@testset "LogUniform" begin
+    rng = Random.MersenneTwister(0)
+
+    @test pdf(LogUniform(1f0, 2f0), 1) isa Float32
+    @test pdf(LogUniform(1, 2), 1f0) isa Float32
+    @test pdf(LogUniform(1, 2), 1) isa Float64
+    @test quantile(LogUniform(1, 2), 1) isa Float64
+    @test quantile(LogUniform(1, 2), 1f0) isa Float32
+    @testset "$f" for f in [pdf, cdf, quantile, logpdf, logcdf]
+        @test @inferred(f(LogUniform(1,2), 1)) isa Float64
+        @test @inferred(f(LogUniform(1,2), 1.0)) isa Float64
+        @test @inferred(f(LogUniform(1.0,2), 1.0)) isa Float64
+        @test @inferred(f(LogUniform(1.0f0,2), 1)) isa Float32
+        @test @inferred(f(LogUniform(1.0f0,2), 1f0)) isa Float32
+        @test @inferred(f(LogUniform(1,2), 1f0)) isa Float32
+    end
+
+    d = LogUniform(1,10)
+    @test eltype(d) === Float64
+    @test 1 <= rand(rng, d) <= 10
+    @test rand(rng, d) isa eltype(d)
+    @test @inferred(quantile(d, 0))   ≈ 1
+    @test quantile(d, 0.5) ≈ sqrt(10) # geomean
+    @test quantile(d, 1)   ≈ 10
+    @test mode(d) ≈ 1
+    @test !insupport(d, 0)
+    @test @inferred(minimum(d)) === 1
+    @test @inferred(maximum(d)) === 10
+    @test partype(d) === Int
+    @test truncated(d, 2, 14) === LogUniform(2,10)
+
+    # numbers obtained by calling scipy.stats.loguniform
+    @test @inferred(std(d)        ) ≈ 2.49399867607628
+    @test @inferred(mean(d)       ) ≈ 3.908650337129266
+    @test @inferred(pdf(d, 1.0001)) ≈ 0.43425105679757203
+    @test @inferred(pdf(d, 5     )) ≈ 0.08685889638065035
+    @test @inferred(pdf(d, 9.9999)) ≈ 0.04342988248915007
+    @test @inferred(cdf(d, 1.0001)) ≈ 4.342727686266485e-05
+    @test @inferred(cdf(d, 5     )) ≈ 0.6989700043360187
+    @test @inferred(cdf(d, 9.9999)) ≈ 0.999995657033466
+    @test @inferred(median(d)     ) ≈ 3.1622776601683795
+    @test @inferred(logpdf(d, 5)  ) ≈ -2.443470357682056
+
+    for _ in 1:10
+        lo = rand(rng)
+        hi = lo + 10*rand(rng)
+        dist = LogUniform(lo,hi)
+        q = rand(rng)
+        @test cdf(dist, quantile(dist, q)) ≈ q
+
+        u = Uniform(log(lo), log(hi))
+        @test exp(quantile(u, q)) ≈ quantile(dist, q)
+        @test exp(median(u)) ≈ median(dist)
+        x = rand(rng, dist)
+        @test cdf(u, log(x)) ≈ cdf(dist, x)
+
+        @test @inferred(entropy(dist)) ≈  Distributions.expectation(x->-logpdf(dist,x), dist)
+    end
+
+    @test kldivergence(LogUniform(1,2), LogUniform(1,2)) ≈ 0 atol=100eps(Float64)
+    @test isfinite(kldivergence(LogUniform(1,2), LogUniform(1,10)))
+    @test kldivergence(LogUniform(1.1,10), LogUniform(1,2)) === Inf
+    @test kldivergence(LogUniform(0.1,10), LogUniform(1,2)) === Inf
+    @test kldivergence(LogUniform(0.1,1), LogUniform(1,2))  === Inf
+    @test @inferred(kldivergence(LogUniform(0.1f0,1), LogUniform(1,2)))  === Inf32
+
+    for _ in 1:10
+        aq = 10*rand(rng)
+        ap = aq + 10*rand(rng)
+        bp = ap + 10*rand(rng)
+        bq = bp + 10*rand(rng)
+        p = LogUniform(ap, bp)
+        q = LogUniform(aq, bq)
+        @test @inferred(kldivergence(p, q)) ≈
+            kldivergence(Uniform(log(ap), log(bp)), Uniform(log(aq), log(bq)))
+    end
+end
+
+
+end#module
diff --git a/test/matrixvariates.jl b/test/matrixvariates.jl
index 1498a315b..23acb5db7 100644
--- a/test/matrixvariates.jl
+++ b/test/matrixvariates.jl
@@ -7,6 +7,7 @@ using Test
 import JSON
 import Distributions: _univariate, _multivariate, _rand_params
 
+@testset "matrixvariates" begin
 #=
     1. baseline tests
     2. compare 1 x 1 matrix-variate with univariate
@@ -184,23 +185,8 @@ function test_against_univariate(D::MatrixDistribution, d::UnivariateDistributio
     nothing
 end
 
-# Equivalent to `ExactOneSampleKSTest` in HypothesisTests.jl
-# We implement it here to avoid a circular dependency on HypothesisTests
-# that causes test failures when preparing a breaking release of Distributions
-function pvalue_kolmogorovsmirnoff(x::AbstractVector, d::UnivariateDistribution)
-    # compute maximum absolute deviation from the empirical cdf
-    n = length(x)
-    cdfs = sort!(map(Base.Fix1(cdf, d), x))
-    dmax = maximum(zip(cdfs, (0:(n-1))/n, (1:n)/n)) do (cdf, lower, upper)
-        return max(cdf - lower, upper - cdf)
-    end
-
-    # compute asymptotic p-value (see `KSDist`)
-    return ccdf(KSDist(n), dmax)
-end
-
 function test_draws_against_univariate_cdf(D::MatrixDistribution, d::UnivariateDistribution)
-    α = 0.05
+    α = 0.025
     M = 100000
     matvardraws = [rand(D)[1] for m in 1:M]
     @test pvalue_kolmogorovsmirnoff(matvardraws, d) >= α
@@ -341,6 +327,11 @@ function test_special(dist::Type{Wishart})
     ν, Σ = _rand_params(Wishart, Float64, n, n)
     d = Wishart(ν, Σ)
     H = rand(d, M)
+    @testset "deprecations" begin
+        for warn in (true, false)
+            @test @test_deprecated(Wishart(n - 1, Σ, warn)) == Wishart(n - 1, Σ)
+        end
+    end
     @testset "meanlogdet" begin
         @test isapprox(Distributions.meanlogdet(d), mean(logdet.(H)), atol = 0.1)
     end
@@ -364,15 +355,17 @@ function test_special(dist::Type{Wishart})
         end
     end
     @testset "Check Singular Branch" begin
-        X = H[1]
-        rank1 = Wishart(n - 2, Σ, false)
-        rank2 = Wishart(n - 1, Σ, false)
+        # Check that no warnings are shown: #1410
+        rank1 = @test_logs Wishart(n - 2, Σ)
+        rank2 = @test_logs Wishart(n - 1, Σ)
         test_draw(rank1)
         test_draw(rank2)
         test_draws(rank1, rand(rank1, 10^6))
         test_draws(rank2, rand(rank2, 10^6))
         test_cov(rank1)
         test_cov(rank2)
+
+        X = H[1]
         @test Distributions.singular_wishart_logkernel(d, X) ≈ Distributions.nonsingular_wishart_logkernel(d, X)
         @test Distributions.singular_wishart_logc0(n, ν, d.S, rank(d)) ≈ Distributions.nonsingular_wishart_logc0(n, ν, d.S)
         @test logpdf(d, X) ≈ Distributions.singular_wishart_logkernel(d, X) + Distributions.singular_wishart_logc0(n, ν, d.S, rank(d))
@@ -537,3 +530,4 @@ for distribution in matrixvariates
         test_matrixvariate(dist, n, p, M)
     end
 end
+end
diff --git a/test/mixture.jl b/test/mixture.jl
index 3873df72a..8abefb876 100644
--- a/test/mixture.jl
+++ b/test/mixture.jl
@@ -69,8 +69,17 @@ function test_mixture(g::UnivariateMixture, n::Int, ns::Int,
     @test @inferred(componentwise_pdf(g, X)) ≈ P0
     @test @inferred(componentwise_logpdf(g, X)) ≈ LP0
 
+    # quantile
+    αs = float(partype(g))[0.0; 0.49; 0.5; 0.51; 1.0]
+    for α in αs
+        @test cdf(g, @inferred(quantile(g, α))) ≈ α
+    end
+    @test @inferred(median(g)) ≈ quantile(g, 1//2)
+
     # sampling
-    if (T <: AbstractFloat)
+    # sampling does not work with `Float32` since `AliasTable` does not support `Float32`
+    # Ref: https://github.com/JuliaStats/StatsBase.jl/issues/158
+    if T <: AbstractFloat && eltype(probs(g)) === Float64
         if ismissing(rng)
             Xs = rand(g, ns)
         else
@@ -172,6 +181,17 @@ end
          "rand(rng, ...)" => MersenneTwister(123))
 
     @testset "Testing UnivariateMixture" begin
+        g_u = MixtureModel([Normal(), Normal()])
+        @test isa(g_u, MixtureModel{Univariate, Continuous, <:Normal})
+        @test ncomponents(g_u) == 2
+        test_mixture(g_u, 1000, 10^6, rng)
+        test_params(g_u)
+        @test minimum(g_u) == -Inf
+        @test maximum(g_u) == Inf
+        @test extrema(g_u) == (-Inf, Inf)
+        @test @inferred(median(g_u)) === 0.0
+        @test @inferred(quantile(g_u, 0.5f0)) === 0.0
+
         g_u = MixtureModel(Normal{Float64}, [(0.0, 1.0), (2.0, 1.0), (-4.0, 1.5)], [0.2, 0.5, 0.3])
         @test isa(g_u, MixtureModel{Univariate,Continuous,<:Normal})
         @test ncomponents(g_u) == 3
@@ -181,6 +201,17 @@ end
         @test maximum(g_u) == Inf
         @test extrema(g_u) == (-Inf, Inf)
 
+        g_u = MixtureModel(Normal{Float32}, [(0f0, 1f0), (0f0, 2f0)], [0.4f0, 0.6f0])
+        @test isa(g_u, MixtureModel{Univariate,Continuous,<:Normal})
+        @test ncomponents(g_u) == 2
+        test_mixture(g_u, 1000, 10^6, rng)
+        test_params(g_u)
+        @test minimum(g_u) == -Inf
+        @test maximum(g_u) == Inf
+        @test extrema(g_u) == (-Inf, Inf)
+        @test @inferred(median(g_u)) === 0f0
+        @test @inferred(quantile(g_u, 0.5f0)) === 0f0
+
         g_u = MixtureModel([TriangularDist(-1,2,0),TriangularDist(-.5,3,1),TriangularDist(-2,0,-1)])
         @test minimum(g_u) ≈ -2.0
         @test maximum(g_u) ≈ 3.0
@@ -222,9 +253,9 @@ end
 
     @testset "Testing MultivariatevariateMixture" begin
         g_m = MixtureModel(
-            IsoNormal[ MvNormal([0.0, 0.0], 1.0),
-                       MvNormal([0.2, 1.0], 1.0),
-                       MvNormal([-0.5, -3.0], 1.6) ],
+            IsoNormal[ MvNormal([0.0, 0.0], I),
+                       MvNormal([0.2, 1.0], I),
+                       MvNormal([-0.5, -3.0], 1.6 * I) ],
             [0.2, 0.5, 0.3])
         @test isa(g_m, MixtureModel{Multivariate, Continuous, IsoNormal})
         @test length(components(g_m)) == 3
diff --git a/test/mvlognormal.jl b/test/mvlognormal.jl
index 21860c1a6..a57caac69 100644
--- a/test/mvlognormal.jl
+++ b/test/mvlognormal.jl
@@ -96,8 +96,8 @@ end
 
 ####### Validate results for a single-dimension MvLogNormal by comparing with univariate LogNormal
 @testset "Comparing results from MvLogNormal with univariate LogNormal" begin
-    l1 = LogNormal(0.1,0.4)
-    l2 = MvLogNormal(0.1*ones(1),0.4)
+    l1 = LogNormal(0.1, 0.4)
+    l2 = MvLogNormal([0.1], 0.16 * I)
     @test [mean(l1)]     ≈ mean(l2)
     @test [median(l1)]   ≈ median(l2)
     @test [mode(l1)]     ≈ mode(l2)
@@ -120,10 +120,10 @@ end
         (MvLogNormal(mu,PDMats.PDMat(C)), mu, C),
         (MvLogNormal(mu_r,PDMats.PDMat(C)), mu_r, C),
         (MvLogNormal(PDMats.PDiagMat(sqrt.(va))), zeros(3), Matrix(Diagonal(va))),
-        (MvLogNormal(mu, sqrt(0.2)), mu, Matrix(0.2I, 3, 3)),
-        (MvLogNormal(3, sqrt(0.2)), zeros(3), Matrix(0.2I, 3, 3)),
-        (MvLogNormal(mu, Vector{Float64}(sqrt.(va))), mu, Matrix(Diagonal(va))), # Julia 0.4 loses type information so Vector{Float64} can be dropped when we don't support 0.4
-        (MvLogNormal(Vector{Float64}(sqrt.(va))), zeros(3), Matrix(Diagonal(va))), # Julia 0.4 loses type information so Vector{Float64} can be dropped when we don't support 0.4
+        (@test_deprecated(MvLogNormal(mu, sqrt(0.2))), mu, Matrix(0.2I, 3, 3)),
+        (@test_deprecated(MvLogNormal(3, sqrt(0.2))), zeros(3), Matrix(0.2I, 3, 3)),
+        (@test_deprecated(MvLogNormal(mu, Vector{Float64}(sqrt.(va)))), mu, Matrix(Diagonal(va))), # Julia 0.4 loses type information so Vector{Float64} can be dropped when we don't support 0.4
+        (@test_deprecated(MvLogNormal(Vector{Float64}(sqrt.(va)))), zeros(3), Matrix(Diagonal(va))), # Julia 0.4 loses type information so Vector{Float64} can be dropped when we don't support 0.4
         (MvLogNormal(mu, C), mu, C),
         (MvLogNormal(C), zeros(3), C) ]
         m, s = params(g)
diff --git a/test/mvnormal.jl b/test/mvnormal.jl
index 1758ed208..d5f99ab90 100644
--- a/test/mvnormal.jl
+++ b/test/mvnormal.jl
@@ -8,88 +8,7 @@ end
 using Distributions
 using LinearAlgebra, Random, Test
 using SparseArrays
-
-import Distributions: distrname
-
-
-
-####### Core testing procedure
-
-function test_mvnormal(g::AbstractMvNormal, n_tsamples::Int=10^6,
-                       rng::Union{AbstractRNG, Missing} = missing)
-    d = length(g)
-    μ = mean(g)
-    Σ = cov(g)
-    @test length(μ) == d
-    @test size(Σ) == (d, d)
-    @test var(g)     ≈ diag(Σ)
-    @test entropy(g) ≈ 0.5 * logdet(2π * ℯ * Σ)
-    ldcov = logdetcov(g)
-    @test ldcov ≈ logdet(Σ)
-    vs = diag(Σ)
-    @test g == typeof(g)(params(g)...)
-    @test g == deepcopy(g)
-
-    # test sampling for AbstractMatrix (here, a SubArray):
-    if ismissing(rng)
-        subX = view(rand(d, 2d), :, 1:d)
-        @test isa(rand!(g, subX), SubArray)
-    else
-        subX = view(rand(rng, d, 2d), :, 1:d)
-        @test isa(rand!(rng, g, subX), SubArray)
-    end
-
-    # sampling
-    if ismissing(rng)
-        @test isa(rand(g), Vector{Float64})
-        X = rand(g, n_tsamples)
-    else
-        @test isa(rand(rng, g), Vector{Float64})
-        X = rand(rng, g, n_tsamples)
-    end
-    emp_mu = vec(mean(X, dims=2))
-    Z = X .- emp_mu
-    emp_cov = (Z * Z') * inv(n_tsamples)
-    for i = 1:d
-        @test isapprox(emp_mu[i], μ[i], atol=sqrt(vs[i] / n_tsamples) * 8.0)
-    end
-    for i = 1:d, j = 1:d
-        @test isapprox(emp_cov[i,j], Σ[i,j], atol=sqrt(vs[i] * vs[j]) * 10.0 / sqrt(n_tsamples))
-    end
-
-    X = rand(MersenneTwister(14), g, n_tsamples)
-    Y = rand(MersenneTwister(14), g, n_tsamples)
-    @test X == Y
-    emp_mu = vec(mean(X, dims=2))
-    Z = X .- emp_mu
-    emp_cov = (Z * Z') * inv(n_tsamples)
-    for i = 1:d
-        @test isapprox(emp_mu[i]   , μ[i]  , atol=sqrt(vs[i] / n_tsamples) * 8.0)
-    end
-    for i = 1:d, j = 1:d
-        @test isapprox(emp_cov[i,j], Σ[i,j], atol=sqrt(vs[i] * vs[j]) * 10.0 / sqrt(n_tsamples))
-    end
-
-
-    # evaluation of sqmahal & logpdf
-    U = X .- μ
-    sqm = vec(sum(U .* (Σ \ U), dims=1))
-    for i = 1:min(100, n_tsamples)
-        @test sqmahal(g, X[:,i]) ≈ sqm[i]
-    end
-    @test sqmahal(g, X) ≈ sqm
-
-    lp = -0.5 .* sqm .- 0.5 * (d * log(2.0 * pi) + ldcov)
-    for i = 1:min(100, n_tsamples)
-        @test logpdf(g, X[:,i]) ≈ lp[i]
-    end
-    @test logpdf(g, X) ≈ lp
-
-    # log likelihood
-    @test loglikelihood(g, X) ≈ sum([Distributions._logpdf(g, X[:,i]) for i in 1:size(X, 2)])
-    @test loglikelihood(g, X[:, 1]) ≈ logpdf(g, X[:, 1])
-    @test loglikelihood(g, [X[:, i] for i in axes(X, 2)]) ≈ loglikelihood(g, X)
-end
+using FillArrays
 
 ###### General Testing
 
@@ -104,23 +23,23 @@ end
     J = [4. -2. -1.; -2. 5. -1.; -1. -1. 6.]
 
     for (g, μ, Σ) in [
-        (MvNormal(mu, sqrt(2.0)), mu, Matrix(2.0I, 3, 3)),
-        (MvNormal(mu_r, sqrt(2.0)), mu_r, Matrix(2.0I, 3, 3)),
-        (MvNormal(3, sqrt(2.0)), zeros(3), Matrix(2.0I, 3, 3)),
-        (MvNormal(mu, sqrt.(va)), mu, Matrix(Diagonal(va))),
-        (MvNormal(mu_r, sqrt.(va)), mu_r, Matrix(Diagonal(va))),
-        (MvNormal(sqrt.(va)), zeros(3), Matrix(Diagonal(va))),
+        (@test_deprecated(MvNormal(mu, sqrt(2.0))), mu, Matrix(2.0I, 3, 3)),
+        (@test_deprecated(MvNormal(mu_r, sqrt(2.0))), mu_r, Matrix(2.0I, 3, 3)),
+        (@test_deprecated(MvNormal(3, sqrt(2.0))), zeros(3), Matrix(2.0I, 3, 3)),
+        (@test_deprecated(MvNormal(mu, sqrt.(va))), mu, Matrix(Diagonal(va))),
+        (@test_deprecated(MvNormal(mu_r, sqrt.(va))), mu_r, Matrix(Diagonal(va))),
+        (@test_deprecated(MvNormal(sqrt.(va))), zeros(3), Matrix(Diagonal(va))),
         (MvNormal(mu, C), mu, C),
         (MvNormal(mu_r, C), mu_r, C),
         (MvNormal(C), zeros(3), C),
         (MvNormal(Symmetric(C)), zeros(3), Matrix(Symmetric(C))),
         (MvNormal(Diagonal(dv)), zeros(3), Matrix(Diagonal(dv))),
-        (MvNormalCanon(h, 2.0), h ./ 2.0, Matrix(0.5I, 3, 3)),
-        (MvNormalCanon(mu_r, 2.0), mu_r ./ 2.0, Matrix(0.5I, 3, 3)),
-        (MvNormalCanon(3, 2.0), zeros(3), Matrix(0.5I, 3, 3)),
-        (MvNormalCanon(h, dv), h ./ dv, Matrix(Diagonal(inv.(dv)))),
-        (MvNormalCanon(mu_r, dv), mu_r ./ dv, Matrix(Diagonal(inv.(dv)))),
-        (MvNormalCanon(dv), zeros(3), Matrix(Diagonal(inv.(dv)))),
+        (@test_deprecated(MvNormalCanon(h, 2.0)), h ./ 2.0, Matrix(0.5I, 3, 3)),
+        (@test_deprecated(MvNormalCanon(mu_r, 2.0)), mu_r ./ 2.0, Matrix(0.5I, 3, 3)),
+        (@test_deprecated(MvNormalCanon(3, 2.0)), zeros(3), Matrix(0.5I, 3, 3)),
+        (@test_deprecated(MvNormalCanon(h, dv)), h ./ dv, Matrix(Diagonal(inv.(dv)))),
+        (@test_deprecated(MvNormalCanon(mu_r, dv)), mu_r ./ dv, Matrix(Diagonal(inv.(dv)))),
+        (@test_deprecated(MvNormalCanon(dv)), zeros(3), Matrix(Diagonal(inv.(dv)))),
         (MvNormalCanon(h, J), J \ h, inv(J)),
         (MvNormalCanon(J), zeros(3), inv(J)),
         (MvNormal(mu, Symmetric(C)), mu, Matrix(Symmetric(C))),
@@ -131,7 +50,7 @@ end
         @test mean(g)   ≈ μ
         @test cov(g)    ≈ Σ
         @test invcov(g) ≈ inv(Σ)
-        test_mvnormal(g, 10^4)
+        Distributions.TestUtils.test_mvnormal(g, 10^4)
 
         # conversion between mean form and canonical form
         if isa(g, MvNormal)
@@ -159,11 +78,11 @@ end
     h = J \ mu
     @test typeof(MvNormal(mu, PDMat(Array{Float32}(C)))) == typeof(MvNormal(mu, PDMat(C)))
     @test typeof(MvNormal(mu, Array{Float32}(C))) == typeof(MvNormal(mu, PDMat(C)))
-    @test typeof(MvNormal(mu, 2.0f0)) == typeof(MvNormal(mu, 2.0))
+    @test typeof(@test_deprecated(MvNormal(mu, 2.0f0))) == typeof(@test_deprecated(MvNormal(mu, 2.0)))
 
     @test typeof(MvNormalCanon(h, PDMat(Array{Float32}(J)))) == typeof(MvNormalCanon(h, PDMat(J)))
     @test typeof(MvNormalCanon(h, Array{Float32}(J))) == typeof(MvNormalCanon(h, PDMat(J)))
-    @test typeof(MvNormalCanon(h, 2.0f0)) == typeof(MvNormalCanon(h, 2.0))
+    @test typeof(@test_deprecated(MvNormalCanon(h, 2.0f0))) == typeof(@test_deprecated(MvNormalCanon(h, 2.0)))
 
     @test typeof(MvNormalCanon(mu, Array{Float16}(h), PDMat(Array{Float32}(J)))) == typeof(MvNormalCanon(mu, h, PDMat(J)))
 
@@ -175,9 +94,21 @@ end
     @test typeof(convert(MvNormalCanon{Float64}, d)) == typeof(MvNormalCanon(mu, h, PDMat(J)))
     @test typeof(convert(MvNormalCanon{Float64}, d.μ, d.h, d.J)) == typeof(MvNormalCanon(mu, h, PDMat(J)))
 
-    @test MvNormal(mu, I) === MvNormal(mu, 1)
-    @test MvNormal(mu, 9 * I) === MvNormal(mu, 3)
-    @test MvNormal(mu, 0.25f0 * I) === MvNormal(mu, 0.5)
+    @test MvNormal(mu, I) === @test_deprecated(MvNormal(mu, 1))
+    @test MvNormal(mu, 9 * I) === @test_deprecated(MvNormal(mu, 3))
+    @test MvNormal(mu, 0.25f0 * I) === @test_deprecated(MvNormal(mu, 0.5))
+
+    @test MvNormal(mu, I) === MvNormal(mu, Diagonal(Ones(length(mu))))
+    @test MvNormal(mu, 9 * I) === MvNormal(mu, Diagonal(Fill(9, length(mu))))
+    @test MvNormal(mu, 0.25f0 * I) === MvNormal(mu, Diagonal(Fill(0.25f0, length(mu))))
+
+    @test MvNormalCanon(h, I) == MvNormalCanon(h, Diagonal(Ones(length(h))))
+    @test MvNormalCanon(h, 9 * I) == MvNormalCanon(h, Diagonal(Fill(9, length(h))))
+    @test MvNormalCanon(h, 0.25f0 * I) == MvNormalCanon(h, Diagonal(Fill(0.25f0, length(h))))
+
+    @test typeof(MvNormalCanon(h, I)) === typeof(MvNormalCanon(h, Diagonal(Ones(length(h)))))
+    @test typeof(MvNormalCanon(h, 9 * I)) === typeof(MvNormalCanon(h, Diagonal(Fill(9, length(h)))))
+    @test typeof(MvNormalCanon(h, 0.25f0 * I)) === typeof(MvNormalCanon(h, Diagonal(Fill(0.25f0, length(h)))))
 end
 
 @testset "MvNormal 32-bit logpdf" begin
@@ -349,7 +280,7 @@ end
 end
 
 @testset "MvNormal: Sampling with integer-valued parameters (#1004)" begin
-    d = MvNormal([0, 0], [1, 1])
+    d = MvNormal([0, 0], Diagonal([1, 1]))
     @test rand(d) isa Vector{Float64}
     @test rand(d, 10) isa Matrix{Float64}
     @test rand(d, (3, 2)) isa Matrix{Vector{Float64}}
diff --git a/test/mvtdist.jl b/test/mvtdist.jl
index 4e1b20324..5f1cc71d2 100644
--- a/test/mvtdist.jl
+++ b/test/mvtdist.jl
@@ -4,6 +4,7 @@ using Test
 import Distributions: GenericMvTDist
 import PDMats: PDMat
 
+@testset "mvtdist" begin
 # Set location vector mu and scale matrix Sigma as in
 # Hofert M. On Sampling from the Multivariate t Distribution. The R Journal
 mu = [1., 2]
@@ -84,3 +85,4 @@ end
     x = rand(X_implicit)
     @test logpdf(X_implicit, x) ≈ logpdf(X_expicit, x)
 end
+end
diff --git a/test/normal.jl b/test/normal.jl
index 4101abc64..70acf51b5 100644
--- a/test/normal.jl
+++ b/test/normal.jl
@@ -14,7 +14,9 @@ isnan_type(::Type{T}, v) where {T} = isnan(v) && v isa T
     @test logdiffcdf(Normal(), Float64(5), Float64(3)) ≈ -6.607938594596893 rtol=1e-12
     let d = Normal(Float64(0), Float64(1)), x = Float64(-60), y = Float64(-60.001)
         float_res = logdiffcdf(d, x, y)
-        big_float_res = log(cdf(d, BigFloat(x, 100)) - cdf(d, BigFloat(y, 100)))
+        big_x = VERSION < v"1.1" ? BigFloat(x, 100) : BigFloat(x; precision=100)
+        big_y = VERSION < v"1.1" ? BigFloat(y, 100) : BigFloat(y; precision=100)
+        big_float_res = log(cdf(d, big_x) - cdf(d, big_y))
         @test float_res ≈ big_float_res
     end
     @test_throws ArgumentError logdiffcdf(Normal(), 1.0, 2.0)
@@ -148,6 +150,8 @@ end
     @test @inferred(quantile(Normal(1.0f0, 0.0f0), 0.5f0)) ===  1.0f0
     @test isnan_type(Float32, @inferred(quantile(Normal(1.0f0, 0.0f0), NaN32)))
     @test @inferred(quantile(Normal(1//1, 0//1), 1//2))    ===  1.0
+    @test @inferred(quantile(Normal(1f0, 0f0), 1//2))      ===  1f0
+    @test @inferred(quantile(Normal(1f0, 0.0), 1//2))      ===  1.0
 
     @test @inferred(cquantile(Normal(1.0, 0.0), 0.0f0))     ===  Inf
     @test @inferred(cquantile(Normal(1.0, 0.0f0), 1.0))     === -Inf
@@ -158,6 +162,8 @@ end
     @test @inferred(cquantile(Normal(1.0f0, 0.0f0), 0.5f0)) ===  1.0f0
     @test isnan_type(Float32, @inferred(cquantile(Normal(1.0f0, 0.0f0), NaN32)))
     @test @inferred(cquantile(Normal(1//1, 0//1), 1//2))    ===  1.0
+    @test @inferred(cquantile(Normal(1f0, 0f0), 1//2))      ===  1f0
+    @test @inferred(cquantile(Normal(1f0, 0.0), 1//2))      ===  1.0
 end
 
 @testset "Normal: Sampling with integer-valued parameters" begin
@@ -166,3 +172,12 @@ end
     @test rand(d, 10) isa Vector{Float64}
     @test rand(d, (3, 2)) isa Matrix{Float64}
 end
+
+@testset "NormalCanon and conversion" begin
+    @test canonform(Normal()) == NormalCanon()
+    @test meanform(NormalCanon()) == Normal()
+    @test meanform(canonform(Normal(0.25, 0.7))) ≈ Normal(0.25, 0.7)
+    @test convert(NormalCanon, convert(Normal, NormalCanon(0.3, 0.8))) ≈ NormalCanon(0.3, 0.8)
+    @test mean(canonform(Normal(0.25, 0.7))) ≈ 0.25
+    @test std(canonform(Normal(0.25, 0.7))) ≈ 0.7
+end
diff --git a/test/poissonbinomial.jl b/test/poissonbinomial.jl
index b1adb3eb3..8e53e6edd 100644
--- a/test/poissonbinomial.jl
+++ b/test/poissonbinomial.jl
@@ -1,6 +1,9 @@
 using Distributions
+using ChainRulesTestUtils
+using ForwardDiff
 using Test
 
+@testset "poissonbinomial" begin
 function naive_esf(x::AbstractVector{T}) where T <: Real
     n = length(x)
     S = zeros(T, n+1)
@@ -36,8 +39,6 @@ naive_sol = naive_pb(p)
 # Test the special base where PoissonBinomial distribution reduces
 # to Binomial distribution
 for (p, n) in [(0.8, 6), (0.5, 10), (0.04, 20)]
-    local p
-
     d = PoissonBinomial(fill(p, n))
     dref = Binomial(n, p)
     println("   testing PoissonBinomial p=$p, n=$n")
@@ -62,14 +63,28 @@ for (p, n) in [(0.8, 6), (0.5, 10), (0.04, 20)]
         @test @inferred(quantile(d, i)) ≈ quantile(dref, i)
     end
     for i=0:n
-        @test @inferred(cdf(d, i)) ≈ cdf(dref, i) atol=1e-15
-        @test @inferred(cdf(d, i//1)) ≈ cdf(dref, i) atol=1e-15
-        @test @inferred(pdf(d, i)) ≈ pdf(dref, i) atol=1e-15
-        @test @inferred(pdf(d, i//1)) ≈ pdf(dref, i) atol=1e-15
+        @test @inferred(pdf(d, i)) ≈ pdf(dref, i) atol=1e-14
+        @test @inferred(pdf(d, i//1)) ≈ pdf(dref, i) atol=1e-14
         @test @inferred(logpdf(d, i)) ≈ logpdf(dref, i)
         @test @inferred(logpdf(d, i//1)) ≈ logpdf(dref, i)
+        for f in (cdf, ccdf, logcdf, logccdf)
+            @test @inferred(f(d, i)) ≈ f(dref, i) rtol=1e-6
+            @test @inferred(f(d, i//1)) ≈ f(dref, i) rtol=1e-6
+            @test @inferred(f(d, i + 0.5)) ≈ f(dref, i) rtol=1e-6
+        end
     end
 
+    @test iszero(@inferred(cdf(d, -Inf)))
+    @test isone(@inferred(cdf(d, Inf)))
+    @test @inferred(logcdf(d, -Inf)) == -Inf
+    @test iszero(@inferred(logcdf(d, Inf)))
+    @test isone(@inferred(ccdf(d, -Inf)))
+    @test iszero(@inferred(ccdf(d, Inf)))
+    @test iszero(@inferred(logccdf(d, -Inf)))
+    @test @inferred(logccdf(d, Inf)) == -Inf
+    for f in (cdf, ccdf, logcdf, logccdf)
+        @test isnan(f(d, NaN))
+    end
 end
 
 # Test against a sum of three Binomial distributions
@@ -109,8 +124,8 @@ for (n₁, n₂, n₃, p₁, p₂, p₃) in [(10, 10, 10, 0.1, 0.5, 0.9),
             end
             m += pmf1[i+1] * mc
         end
-        @test @inferred(pdf(d, k)) ≈ m atol=1e-15
-        @test @inferred(pdf(d, k//1)) ≈ m atol=1e-15
+        @test @inferred(pdf(d, k)) ≈ m atol=1e-14
+        @test @inferred(pdf(d, k//1)) ≈ m atol=1e-14
         @test @inferred(logpdf(d, k)) ≈ log(m)
         @test @inferred(logpdf(d, k//1)) ≈ log(m)
     end
@@ -131,7 +146,16 @@ end
     @test x ≈ fftw_fft
 end
 
-# Test autodiff using ForwardDiff
-f = x -> logpdf(PoissonBinomial(x), 0)
-at = [0.5, 0.5]
-@test isapprox(ForwardDiff.gradient(f, at), fdm(f, at), atol=1e-6)
+@testset "automatic differentiation" begin
+    # Test autodiff using ForwardDiff
+    f = x -> logpdf(PoissonBinomial(x), 0)
+    at = [0.5, 0.5]
+    @test isapprox(ForwardDiff.gradient(f, at), fdm(f, at), atol=1e-6)
+
+    # Test ChainRules definition
+    for f in (Distributions.poissonbinomial_pdf, Distributions.poissonbinomial_pdf_fft)
+        test_frule(f, rand(50))
+        test_rrule(f, rand(50))
+    end
+end
+end
diff --git a/test/product.jl b/test/product.jl
index 3dc0e8de3..44c3b83c4 100644
--- a/test/product.jl
+++ b/test/product.jl
@@ -29,7 +29,7 @@ end
     N = 11
     # Construct independent distributions and `Product` distribution from these.
     ubound = rand(N)
-    ds = Uniform.(0.0, ubound)
+    ds = Uniform.(-ubound, ubound)
     x = rand.(ds)
     d_product = product_distribution(ds)
     @test d_product isa Product
@@ -43,6 +43,10 @@ end
     @test entropy(d_product) == sum(entropy.(ds))
     @test insupport(d_product, ubound) == true
     @test insupport(d_product, ubound .+ 1) == false
+    @test minimum(d_product) == -ubound
+    @test maximum(d_product) == ubound
+    @test extrema(d_product) == (-ubound, ubound)
+    @test isless(extrema(d_product)...)
 
     y = rand(d_product)
     @test y isa typeof(x)
diff --git a/test/ref/continuous/rician.R b/test/ref/continuous/rician.R
new file mode 100644
index 000000000..3214f360b
--- /dev/null
+++ b/test/ref/continuous/rician.R
@@ -0,0 +1,23 @@
+
+Rician <- R6Class("Rician",
+    inherit = ContinuousDistribution,
+    public = list(
+        names = c("nu", "sigma"),
+        nu = NA,
+        sigma = NA,
+        initialize = function(n=0, s=1) {
+            self$nu <- n
+            self$sigma <- s
+        },
+        supp = function() { c(0, Inf) },
+        properties = function() {
+            n <- self$nu
+            s <- self$sigma
+            list(scale = n^2 + 2 * s^2,
+                 shape = n^2 / (2 * s^2))
+        },
+        pdf = function(x, log=FALSE) { VGAM::drice(x, self$sigma, self$nu, log=log) },
+        cdf = function(x){ VGAM::price(x, self$sigma, self$nu) },
+        quan = function(v){ VGAM::qrice(v, self$sigma, self$nu) }
+    )
+)
diff --git a/test/ref/continuous_test.lst b/test/ref/continuous_test.lst
index eaef0e4e0..381a1c466 100644
--- a/test/ref/continuous_test.lst
+++ b/test/ref/continuous_test.lst
@@ -149,6 +149,10 @@ Rayleigh()
 Rayleigh(3.0)
 Rayleigh(8.0)
 
+Rician(1.0, 1.0)
+Rician(5.0, 1.0)
+Rician(10.0, 1.0)
+
 StudentizedRange(2.0, 2.0)
 StudentizedRange(5.0, 10.0)
 StudentizedRange(10.0, 5.0)
diff --git a/test/ref/continuous_test.ref.json b/test/ref/continuous_test.ref.json
index dd8807a09..bb7ce9a89 100644
--- a/test/ref/continuous_test.ref.json
+++ b/test/ref/continuous_test.ref.json
@@ -4050,6 +4050,90 @@
     { "q": 0.90, "x": 17.1677282103148 }
   ]
 },
+{
+  "expr": "Rician(1.0, 1.0)",
+  "dtype": "Rician",
+  "minimum": 0,
+  "maximum": "inf",
+  "properties": {
+    "scale": 3,
+    "shape": 0.5
+  },
+  "points": [
+    { "x": 0.58680768479231, "pdf": 0.32597689889069, "logpdf": -1.12092876242478, "cdf": 0.0999999999999981 },
+    { "x": 0.8500705146284, "pdf": 0.42713703077601, "logpdf": -0.850650402069471, "cdf": 0.2 },
+    { "x": 1.06959445774872, "pdf": 0.478590262388661, "logpdf": -0.736910449747733, "cdf": 0.300000000000001 },
+    { "x": 1.27356435184454, "pdf": 0.497262612586662, "logpdf": -0.698636996890676, "cdf": 0.399999999999999 },
+    { "x": 1.47547909178815, "pdf": 0.489049243270422, "logpdf": -0.715292092592867, "cdf": 0.500000000000012 },
+    { "x": 1.6862862997492, "pdf": 0.455970051392373, "logpdf": -0.785328148395677, "cdf": 0.599999999999999 },
+    { "x": 1.91973949265777, "pdf": 0.397730897577084, "logpdf": -0.921979639122226, "cdf": 0.700000000000003 },
+    { "x": 2.2010753309479, "pdf": 0.311617302264047, "logpdf": -1.16597943936394, "cdf": 0.800000000000009 },
+    { "x": 2.6019473978067, "pdf": 0.190248024171792, "logpdf": -1.65942666772506, "cdf": 0.900000000000001 }
+  ],
+  "quans": [
+    { "q": 0.10, "x": 0.58680768479231 },
+    { "q": 0.25, "x": 0.962923340192096 },
+    { "q": 0.50, "x": 1.47547909178815 },
+    { "q": 0.75, "x": 2.05189157517447 },
+    { "q": 0.90, "x": 2.6019473978067 }
+  ]
+},
+{
+  "expr": "Rician(5.0, 1.0)",
+  "dtype": "Rician",
+  "minimum": 0,
+  "maximum": "inf",
+  "properties": {
+    "scale": 27,
+    "shape": 12.5
+  },
+  "points": [
+    { "x": 3.83337182711774, "pdf": 0.178067275660391, "logpdf": -1.72559384694809, "cdf": 0.1 },
+    { "x": 4.26739311906375, "pdf": 0.283509558779568, "logpdf": -1.26050943934722, "cdf": 0.199999999999999 },
+    { "x": 4.5808282949603, "pdf": 0.351696201209667, "logpdf": -1.04498754078411, "cdf": 0.299999999999981 },
+    { "x": 4.84891183432576, "pdf": 0.390461020883228, "logpdf": -0.940427133165443, "cdf": 0.400000000000014 },
+    { "x": 5.0996760375676, "pdf": 0.402913231155499, "logpdf": -0.909034047523853, "cdf": 0.499999999999981 },
+    { "x": 5.35060611077152, "pdf": 0.389944217738904, "logpdf": -0.941751581527125, "cdf": 0.599999999999959 },
+    { "x": 5.61923787901142, "pdf": 0.350723907317018, "logpdf": -1.04775595388, "cdf": 0.699999999999992 },
+    { "x": 5.93381683616078, "pdf": 0.282226926437332, "logpdf": -1.26504382796596, "cdf": 0.799999999999998 },
+    { "x": 6.37038420267928, "pdf": 0.17678589781236, "logpdf": -1.73281589546462, "cdf": 0.899999999999982 }
+  ],
+  "quans": [
+    { "q": 0.10, "x": 3.83337182711774 },
+    { "q": 0.25, "x": 4.43248557648109 },
+    { "q": 0.50, "x": 5.0996760375676 },
+    { "q": 0.75, "x": 5.76805276699816 },
+    { "q": 0.90, "x": 6.37038420267928 }
+  ]
+},
+{
+  "expr": "Rician(10.0, 1.0)",
+  "dtype": "Rician",
+  "minimum": 0,
+  "maximum": "inf",
+  "properties": {
+    "scale": 102,
+    "shape": 50
+  },
+  "points": [
+    { "x": 8.77189934889994, "pdf": 0.17602367421233, "logpdf": -1.73713678041993, "cdf": 0.0999999999999889 },
+    { "x": 9.21055856310058, "pdf": 0.280747347661651, "logpdf": -1.27030013273982, "cdf": 0.200000000000031 },
+    { "x": 9.52691154521682, "pdf": 0.348625118414454, "logpdf": -1.05375809337315, "cdf": 0.299999999999978 },
+    { "x": 9.79725334900319, "pdf": 0.387340667844654, "logpdf": -0.948450694502043, "cdf": 0.40000000000009 },
+    { "x": 10.0499586252229, "pdf": 0.399938412039171, "logpdf": -0.91644471363081, "cdf": 0.500000000000001 },
+    { "x": 10.3026851248306, "pdf": 0.387275589795634, "logpdf": -0.948618721053336, "cdf": 0.599999999999961 },
+    { "x": 10.5730967493158, "pdf": 0.348503584620269, "logpdf": -1.05410676298003, "cdf": 0.700000000000032 },
+    { "x": 10.8895938007392, "pdf": 0.280589475189091, "logpdf": -1.27086262025283, "cdf": 0.799999999999942 },
+    { "x": 11.3285661035151, "pdf": 0.175871273888067, "logpdf": -1.73800294990951, "cdf": 0.900000000000024 }
+  ],
+  "quans": [
+    { "q": 0.10, "x": 8.77189934889994 },
+    { "q": 0.25, "x": 9.37722801591067 },
+    { "q": 0.50, "x": 10.0499586252229 },
+    { "q": 0.75, "x": 10.7228400133078 },
+    { "q": 0.90, "x": 11.3285661035151 }
+  ]
+},
 {
   "expr": "StudentizedRange(2.0, 2.0)",
   "dtype": "StudentizedRange",
diff --git a/test/ref/rdistributions.R b/test/ref/rdistributions.R
index 699f1f718..c8b3b1031 100644
--- a/test/ref/rdistributions.R
+++ b/test/ref/rdistributions.R
@@ -68,6 +68,7 @@ source("continuous/normal.R")
 source("continuous/normalinversegaussian.R")
 source("continuous/pareto.R")
 source("continuous/rayleigh.R")
+source("continuous/rician.R")
 source("continuous/studentizedrange.R")
 source("continuous/tdist.R")
 source("continuous/triangulardist.R")
diff --git a/test/rician.jl b/test/rician.jl
new file mode 100644
index 000000000..5674373b3
--- /dev/null
+++ b/test/rician.jl
@@ -0,0 +1,97 @@
+@testset "Rician" begin
+
+    d1 = Rician(0.0, 10.0)
+    @test d1 isa Rician{Float64}
+    @test params(d1) == (0.0, 10.0)
+    @test shape(d1) == 0.0
+    @test scale(d1) == 200.0
+    @test partype(d1) === Float64
+    @test eltype(d1) === Float64
+    @test rand(d1) isa Float64
+
+    d2 = Rayleigh(10.0)
+    @test mean(d1) ≈ mean(d2)
+    @test var(d1) ≈ var(d2)
+    @test mode(d1) ≈ mode(d2)
+    @test median(d1) ≈ median(d2)
+    @test quantile.(d1, [0.25, 0.45, 0.60, 0.80, 0.90]) ≈ quantile.(d2, [0.25, 0.45, 0.60, 0.80, 0.90])
+    @test pdf.(d1, 0.0:0.1:1.0) ≈ pdf.(d2, 0.0:0.1:1.0)
+    @test cdf.(d1, 0.0:0.1:1.0) ≈ cdf.(d2, 0.0:0.1:1.0)
+
+    d1 = Rician(10.0, 10.0)
+    @test median(d1) == quantile(d1, 0.5)
+    x = quantile.(d1, [0.25, 0.45, 0.60, 0.80, 0.90])
+    @test all(cdf.(d1, x) .≈ [0.25, 0.45, 0.60, 0.80, 0.90])
+
+    x = rand(Rician(5.0, 5.0), 100000)
+    d1 = fit(Rician, x)
+    @test d1 isa Rician{Float64}
+    @test params(d1)[1] ≈ 5.0 atol=0.2
+    @test params(d1)[2] ≈ 5.0 atol=0.2
+
+    d1 = Rician(10.0f0, 10.0f0)
+    @test d1 isa Rician{Float32}
+    @test params(d1) == (10.0f0, 10.0f0)
+    @test shape(d1) == 0.5f0
+    @test scale(d1) == 300.0f0
+    @test partype(d1) === Float32
+    @test eltype(d1) === Float64
+    @test rand(d1) isa Float64
+
+    d1 = Rician()
+    @test d1 isa Rician{Float64}
+    @test params(d1) == (0.0, 1.0)
+
+    @test pdf(d1, -Inf) == 0.0
+    @test pdf(d1, -1) == 0.0
+    @test pdf(d1, Inf) == 0.0
+    @test isnan(pdf(d1, NaN))
+
+    @test logpdf(d1, -Inf) == -Inf
+    @test logpdf(d1, -1) == -Inf
+    @test logpdf(d1, Inf) == -Inf
+    @test isnan(logpdf(d1, NaN))
+
+    @test cdf(d1, -Inf) == 0.0
+    @test cdf(d1, -1) == 0.0
+    @test cdf(d1, Inf) == 1.0
+    @test isnan(cdf(d1, NaN))
+
+    @test logcdf(d1, -Inf) == -Inf
+    @test logcdf(d1, -1) == -Inf
+    @test logcdf(d1, Inf) == 0.0
+    @test isnan(logcdf(d1, NaN))
+
+    @inferred pdf(d1, -Inf32)
+    @inferred pdf(d1, -1)
+    @inferred pdf(d1, 1.0)
+    @inferred pdf(d1, 1.0f0)
+    @inferred pdf(d1, 1)
+    @inferred pdf(d1, 1//2)
+    @inferred pdf(d1, Inf)
+
+    @inferred logpdf(d1, -Inf32)
+    @inferred logpdf(d1, -1)
+    @inferred logpdf(d1, 1.0)
+    @inferred logpdf(d1, 1.0f0)
+    @inferred logpdf(d1, 1)
+    @inferred logpdf(d1, 1//2)
+    @inferred logpdf(d1, Inf)
+
+    @inferred cdf(d1, -Inf32)
+    @inferred cdf(d1, -1)
+    @inferred cdf(d1, 1.0)
+    @inferred cdf(d1, 1.0f0)
+    @inferred cdf(d1, 1)
+    @inferred cdf(d1, 1//2)
+    @inferred cdf(d1, Inf)
+
+    @inferred logcdf(d1, -Inf32)
+    @inferred logcdf(d1, -1)
+    @inferred logcdf(d1, 1.0)
+    @inferred logcdf(d1, 1.0f0)
+    @inferred logcdf(d1, 1)
+    @inferred logcdf(d1, 1//2)
+    @inferred logcdf(d1, Inf)
+
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 7ed6e3520..84e6d68af 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -11,6 +11,7 @@ import JSON
 import ForwardDiff
 
 const tests = [
+    "loguniform",
     "arcsine",
     "dirac",
     "truncate",
@@ -39,6 +40,7 @@ const tests = [
     "edgeworth",
     "matrixreshaped",
     "matrixvariates",
+    "lkjcholesky",
     "vonmisesfisher",
     "conversion",
     "convolution",
@@ -63,6 +65,9 @@ const tests = [
     "chi",
     "gumbel",
     "pdfnorm",
+    "rician",
+    "functionals",
+    "density_interface",
 ]
 
 printstyled("Running tests:\n", color=:blue)
diff --git a/test/samplers.jl b/test/samplers.jl
index 0555b1d63..68e042ec1 100644
--- a/test/samplers.jl
+++ b/test/samplers.jl
@@ -108,7 +108,7 @@ import Distributions:
             Gamma(0.1, 1.0),
             Gamma(2.0, 1.0),
             MatrixNormal(3, 4),
-            MvNormal(3, 1.0),
+            MvNormal(zeros(3), I),
             Normal(1.5, 2.0),
             Poisson(0.5),
         )
diff --git a/test/testutils.jl b/test/testutils.jl
index 35483aa0c..6a8e93bf0 100644
--- a/test/testutils.jl
+++ b/test/testutils.jl
@@ -35,6 +35,7 @@ function test_distr(distr::DiscreteUnivariateDistribution, n::Int;
     test_support(distr, vs)
     test_evaluation(distr, vs, testquan)
     test_range_evaluation(distr)
+    test_nonfinite(distr)
 
     test_stats(distr, vs)
     test_samples(distr, n)
@@ -52,6 +53,7 @@ function test_distr(distr::ContinuousUnivariateDistribution, n::Int;
 
     test_support(distr, vs)
     test_evaluation(distr, vs, testquan)
+    test_nonfinite(distr)
 
     if isa(distr, StudentizedRange)
         n = 2000 # must use fewer values due to performance
@@ -462,6 +464,27 @@ function test_evaluation(d::ContinuousUnivariateDistribution, vs::AbstractVector
     @test logccdf.(Ref(d), vs) ≈ lcc
 end
 
+function test_nonfinite(distr::UnivariateDistribution)
+    # non-finite bounds
+    @test iszero(@inferred(cdf(distr, -Inf)))
+    @test isone(@inferred(cdf(distr, Inf)))
+    @test isone(@inferred(ccdf(distr, -Inf)))
+    @test iszero(@inferred(ccdf(distr, Inf)))
+    @test @inferred(logcdf(distr, -Inf)) == -Inf
+    @test iszero(@inferred(logcdf(distr, Inf)))
+    @test iszero(@inferred(logccdf(distr, -Inf)))
+    @test @inferred(logccdf(distr, Inf)) == -Inf
+
+    # NaN
+    for f in (cdf, ccdf, logcdf, logccdf)
+        if distr isa NoncentralT
+            # broken in StatsFuns/R
+            @test_broken isnan(f(distr, NaN))
+        else
+            @test isnan(f(distr, NaN))
+        end
+    end
+end
 
 #### Testing statistics methods
 
@@ -556,7 +579,7 @@ function test_params(d::Truncated)
     d_unt = d.untruncated
     D = typeof(d_unt)
     pars = params(d_unt)
-    d_new = Truncated(D(pars...), d.lower, d.upper)
+    d_new = truncated(D(pars...), d.lower, d.upper)
     @test d_new == d
     @test d == deepcopy(d)
 end
@@ -567,3 +590,18 @@ function fdm(f, at)
         FiniteDifferences.central_fdm(5, 1)(x -> f([at[1:i-1]; x; at[i+1:end]]), at[i])
     end
 end
+
+# Equivalent to `ExactOneSampleKSTest` in HypothesisTests.jl
+# We implement it here to avoid a circular dependency on HypothesisTests
+# that causes test failures when preparing a breaking release of Distributions
+function pvalue_kolmogorovsmirnoff(x::AbstractVector, d::UnivariateDistribution)
+    # compute maximum absolute deviation from the empirical cdf
+    n = length(x)
+    cdfs = sort!(map(Base.Fix1(cdf, d), x))
+    dmax = maximum(zip(cdfs, (0:(n-1))/n, (1:n)/n)) do (cdf, lower, upper)
+        return max(cdf - lower, upper - cdf)
+    end
+
+    # compute asymptotic p-value (see `KSDist`)
+    return ccdf(KSDist(n), dmax)
+end
diff --git a/test/truncate.jl b/test/truncate.jl
index 363af62e5..d521d8ab1 100644
--- a/test/truncate.jl
+++ b/test/truncate.jl
@@ -4,6 +4,7 @@ module TestTruncate
 
 using Distributions
 using ForwardDiff: Dual, ForwardDiff
+using StatsFuns
 import JSON
 using Test
 using ..Main: fdm
@@ -19,7 +20,7 @@ function verify_and_test_drive(jsonfile, selected, n_tsamples::Int,lower::Int,up
 
         dname = string(dsym)
 
-        dsymt = Symbol("Truncated($(dct["dtype"]),$lower,$upper")
+        dsymt = Symbol("truncated($(dct["dtype"]),$lower,$upper")
         dnamet = string(dsym)
 
         # test whether it is included in the selected list
@@ -36,8 +37,8 @@ function verify_and_test_drive(jsonfile, selected, n_tsamples::Int,lower::Int,up
             continue
         end
 
-        println("    testing Truncated($(ex),$lower,$upper)")
-        d = Truncated(eval(Meta.parse(ex)),lower,upper)
+        println("    testing truncated($(ex),$lower,$upper)")
+        d = truncated(eval(Meta.parse(ex)),lower,upper)
         if dtype != Uniform && dtype != TruncatedNormal # Uniform is truncated to Uniform
             @assert isa(dtype, Type) && dtype <: UnivariateDistribution
             @test isa(d, dtypet)
@@ -71,16 +72,17 @@ function verify_and_test(d::UnivariateDistribution, dct::Dict, n_tsamples::Int)
     for pt in pts
         x = _parse_x(d, pt["x"])
         lp = d.lower <= x <= d.upper ? Float64(pt["logpdf"]) - d.logtp : -Inf
-        cf = x <= d.lower ? 0.0 : x >= d.upper ? 1.0 : (Float64(pt["cdf"]) - d.lcdf)/d.tp
+        cf = x < d.lower ? 0.0 : x >= d.upper ? 1.0 : (Float64(pt["cdf"]) - d.lcdf)/d.tp
         if !isa(d, Distributions.Truncated{Distributions.StudentizedRange{Float64},Distributions.Continuous})
-            @test isapprox(logpdf(d, x), lp, atol=sqrt(eps()))
+            @test logpdf(d, x) ≈ lp atol=sqrt(eps())
         end
-        @test isapprox(cdf(d, x)   , cf, atol=sqrt(eps()))
+        @test cdf(d, x) ≈ cf atol=sqrt(eps())
         # NOTE: some distributions use pdf() in StatsFuns.jl which have no generic support yet
         if !(typeof(d) in [Distributions.Truncated{Distributions.NoncentralChisq{Float64},Distributions.Continuous, Float64},
                            Distributions.Truncated{Distributions.NoncentralF{Float64},Distributions.Continuous, Float64},
                            Distributions.Truncated{Distributions.NoncentralT{Float64},Distributions.Continuous, Float64},
-                           Distributions.Truncated{Distributions.StudentizedRange{Float64},Distributions.Continuous, Float64}])
+                           Distributions.Truncated{Distributions.StudentizedRange{Float64},Distributions.Continuous, Float64},
+                           Distributions.Truncated{Distributions.Rician{Float64},Distributions.Continuous, Float64}])
             @test isapprox(logpdf(d, Dual(float(x))), lp, atol=sqrt(eps()))
         end
         # NOTE: this test is disabled as StatsFuns.jl doesn't have generic support for cdf()
@@ -141,4 +143,21 @@ f = x -> logpdf(truncated(Normal(x[1], x[2]), x[3], x[4]), mean(x))
 at = [0.0, 1.0, 0.0, 1.0]
 @test isapprox(ForwardDiff.gradient(f, at), fdm(f, at), atol=1e-6)
 
+    @testset "errors" begin
+        @test_throws ErrorException truncated(Normal(), 1, 0)
+        @test_throws ArgumentError truncated(Uniform(), 1, 2)
+        @test_throws ErrorException truncated(Exponential(), 3, 1)
+    end
+
+    @testset "#1328" begin
+        dist = Poisson(2.0)
+        dist_zeroinflated = MixtureModel([Dirac(0.0), dist], [0.4, 0.6])
+        dist_zerotruncated = truncated(dist, 1, Inf)
+        dist_zeromodified = MixtureModel([Dirac(0.0), dist_zerotruncated], [0.4, 0.6])
+
+        @test logsumexp(logpdf(dist, x) for x in 0:1000) ≈ 0 atol=1e-15
+        @test logsumexp(logpdf(dist_zeroinflated, x) for x in 0:1000) ≈ 0 atol=1e-15
+        @test logsumexp(logpdf(dist_zerotruncated, x) for x in 0:1000) ≈ 0 atol=1e-15
+        @test logsumexp(logpdf(dist_zeromodified, x) for x in 0:1000) ≈ 0 atol=1e-15
+    end
 end
diff --git a/test/truncnormal.jl b/test/truncnormal.jl
index 8f5c1be0e..a6f93b0b5 100644
--- a/test/truncnormal.jl
+++ b/test/truncnormal.jl
@@ -41,3 +41,13 @@ end
         @test abs(var(X) - var(trunc)) < 0.01
     end
 end
+
+@testset "Truncated normal should be numerically stable at low probability regions" begin
+    original = Normal(-5.0, 0.2)
+    trunc = truncated(original, 0.0, 5.0)
+    for x in LinRange(0.0, 5.0, 100)
+        @test isfinite(logpdf(original, x))
+        @test isfinite(logpdf(trunc, x))
+        @test isfinite(pdf(trunc, x))
+    end
+end
diff --git a/test/types.jl b/test/types.jl
index 4beb0148b..4eefd5d96 100644
--- a/test/types.jl
+++ b/test/types.jl
@@ -26,14 +26,17 @@ using ForwardDiff: Dual
 @testset "Test Sample Type" begin
     for T in (Float64,Float32,Dual{Nothing,Float64,0})
         @testset "Type $T" begin
-            for d in (MvNormal,MvLogNormal,MvNormalCanon,Dirichlet)
-                dist = d(map(T,ones(2)))
-                @test eltype(typeof(dist)) == T
-                @test eltype(rand(dist)) == eltype(dist)
+            dists = (
+                MvNormal(Diagonal(ones(T, 2))),
+                MvLogNormal(Diagonal(ones(T, 2))),
+                MvNormalCanon(Diagonal(ones(T, 2))),
+                Dirichlet(ones(T, 2)),
+                Distributions.mvtdist(one(T), Matrix{T}(I, 2, 2)),
+            )
+            for dist in dists
+                @test eltype(typeof(dist)) === T
+                @test eltype(rand(dist)) === eltype(dist)
             end
-            dist = Distributions.mvtdist(map(T,1.0),map(T,[1.0 0.0; 0.0 1.0]))
-            @test eltype(typeof(dist)) == T
-            @test eltype(rand(dist)) == eltype(dist)
         end
     end
 end
diff --git a/test/univariate_bounds.jl b/test/univariate_bounds.jl
index bc3769700..275eb112c 100644
--- a/test/univariate_bounds.jl
+++ b/test/univariate_bounds.jl
@@ -66,23 +66,37 @@ filter!(x -> isbounded(x()), dists)
     ub = nextfloat(ub)
     @test iszero(cdf(d, lb))
     @test isone(cdf(d, ub))
+    @test iszero(cdf(d, -Inf))
+    @test isone(cdf(d, Inf))
+    @test isnan(cdf(d, NaN))
 
     lb_lcdf = logcdf(d,lb)
     @test isinf(lb_lcdf) & (lb_lcdf < 0)
     @test iszero(logcdf(d, ub))
+    @test logcdf(d, -Inf) == -Inf
+    @test iszero(logcdf(d, Inf))
+    @test isnan(logcdf(d, NaN))
 
     @test isone(ccdf(d, lb))
     @test iszero(ccdf(d, ub))
+    @test isone(ccdf(d, -Inf))
+    @test iszero(ccdf(d, Inf))
+    @test isnan(ccdf(d, NaN))
 
     ub_lccdf = logccdf(d,ub)
     @test isinf(ub_lccdf) & (ub_lccdf < 0)
     @test iszero(logccdf(d, lb))
+    @test iszero(logccdf(d, -Inf))
+    @test logccdf(d, Inf) == -Inf
+    @test isnan(logccdf(d, NaN))
 
     @test iszero(pdf(d, lb))
     @test iszero(pdf(d, ub))
 
     lb_lpdf = logpdf(d, lb)
-    @test isinf(lb_lpdf) & (lb_lpdf < 0)
+    @test isinf(lb_lpdf) && lb_lpdf < 0
     ub_lpdf = logpdf(d, ub)
-    @test isinf(ub_lpdf) & (ub_lpdf < 0)
+    @test isinf(ub_lpdf) && ub_lpdf < 0
+    @test logpdf(d, -Inf) == -Inf
+    @test logpdf(d, Inf) == -Inf
 end
diff --git a/test/univariates.jl b/test/univariates.jl
index 8913b3866..7a6b85172 100644
--- a/test/univariates.jl
+++ b/test/univariates.jl
@@ -166,3 +166,36 @@ for c in ["discrete",
     verify_and_test_drive(jsonfile, ARGS, 10^6)
     println()
 end
+
+# #1358
+@testset "Poisson quantile" begin
+    d = Poisson(1)
+    @test quantile(d, 0.2) isa Int
+    @test cquantile(d, 0.4) isa Int
+    @test invlogcdf(d, log(0.2)) isa Int
+    @test invlogccdf(d, log(0.6)) isa Int
+end
+
+@testset "Uniform type inference" begin
+    for T in (Int, Float32)
+        d = Uniform{T}(T(2), T(3))
+        FT = float(T)
+        XFT = promote_type(FT, Float64)
+
+        @test @inferred(pdf(d, 1.5)) === zero(FT)
+        @test @inferred(pdf(d, 2.5)) === one(FT)
+        @test @inferred(pdf(d, 3.5)) === zero(FT)
+
+        @test @inferred(logpdf(d, 1.5)) === FT(-Inf)
+        @test @inferred(logpdf(d, 2.5)) === -zero(FT) # negative zero
+        @test @inferred(logpdf(d, 3.5)) === FT(-Inf)
+
+        @test @inferred(cdf(d, 1.5)) === zero(XFT)
+        @test @inferred(cdf(d, 2.5)) === XFT(1//2)
+        @test @inferred(cdf(d, 3.5)) === one(XFT)
+
+        @test @inferred(ccdf(d, 1.5)) === one(XFT)
+        @test @inferred(ccdf(d, 2.5)) === XFT(1//2)
+        @test @inferred(ccdf(d, 3.5)) === zero(XFT)
+    end
+end
\ No newline at end of file