From 1d9739abeed447d00c1cd53ae432ff2eb09aa0f5 Mon Sep 17 00:00:00 2001 From: Moritz Schauer Date: Fri, 17 Jun 2022 17:31:37 +0200 Subject: [PATCH 01/93] Add sentence about distribution objects (#1565) * Add sentence about distribution objects * Apply suggestions from code review Co-authored-by: David Widmann Co-authored-by: David Widmann --- docs/src/starting.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/src/starting.md b/docs/src/starting.md index 37ccd66dbe..4ef4ff8569 100644 --- a/docs/src/starting.md +++ b/docs/src/starting.md @@ -22,7 +22,18 @@ Then, we create a standard-normal distribution `d` and obtain samples using `ran ```julia julia> d = Normal() Normal(μ=0.0, σ=1.0) +``` + +The object `d` represents a probability distribution, in our case the standard-normal distribution. +One can query its properties such as the mean: + +```julia +julia> mean(d) +0.0 +``` +We can also draw samples from `d` with `rand`. +```julia julia> x = rand(d, 100) 100-element Array{Float64,1}: 0.376264 From 7831193ecbb70967b9f1af71e1c8f0850b89f3a5 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Tue, 28 Jun 2022 11:15:47 +0200 Subject: [PATCH 02/93] Fix deprecations of `dim` (#1572) * Fix deprecations of `dim` * Fix deprecation * Use `size` instead of forwarding `rank` for distributions of full-rank matrices --- Project.toml | 2 +- src/Distributions.jl | 1 - src/deprecates.jl | 4 ++++ src/matrix/inversewishart.jl | 20 ++++++++++---------- src/matrix/lkj.jl | 9 ++++----- src/matrix/matrixbeta.jl | 6 ++---- src/matrix/matrixfdist.jl | 16 +++++++--------- src/matrix/matrixnormal.jl | 8 ++++---- src/matrix/matrixtdist.jl | 8 ++++---- src/matrix/wishart.jl | 18 +++++++++--------- src/multivariate/mvnormal.jl | 4 ++-- src/multivariate/mvnormalcanon.jl | 4 ++-- src/multivariate/mvtdist.jl | 4 ++-- test/matrixvariates.jl | 9 +++++---- 14 files changed, 56 insertions(+), 57 deletions(-) diff --git a/Project.toml b/Project.toml index 354ba28379..71a1c911da 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.62" +version = "0.25.63" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" diff --git a/src/Distributions.jl b/src/Distributions.jl index 2e4ef62e53..6312d26b53 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -191,7 +191,6 @@ export componentwise_logpdf, # component-wise logpdf for mixture models concentration, # the concentration parameter convolve, # convolve distributions of the same type - dim, # sample dimension of multivariate distribution dof, # get the degree of freedom entropy, # entropy of distribution in nats failprob, # failing probability diff --git a/src/deprecates.jl b/src/deprecates.jl index cf4031b331..e779d6dbb0 100644 --- a/src/deprecates.jl +++ b/src/deprecates.jl @@ -57,3 +57,7 @@ const MatrixReshaped{S<:ValueSupport,D<:MultivariateDistribution{S}} = ReshapedD @deprecate MatrixReshaped( d::MultivariateDistribution, n::Integer, p::Integer=n ) reshape(d, (n, p)) + +for D in (:InverseWishart, :LKJ, :MatrixBeta, :MatrixFDist, :Wishart) + @eval @deprecate dim(d::$D) size(d, 1) +end diff --git a/src/matrix/inversewishart.jl b/src/matrix/inversewishart.jl index 0d186a27e8..775f62316b 100644 --- a/src/matrix/inversewishart.jl +++ b/src/matrix/inversewishart.jl @@ -29,7 +29,7 @@ end # ----------------------------------------------------------------------------- function InverseWishart(df::T, Ψ::AbstractPDMat{T}) where T<:Real - p = dim(Ψ) + p = size(Ψ, 1) df > p - 1 || throw(ArgumentError("df should be greater than dim - 1.")) logc0 = invwishart_logc0(df, Ψ) R = Base.promote_eltype(T, logc0) @@ -74,31 +74,31 @@ end insupport(::Type{InverseWishart}, X::Matrix) = isposdef(X) insupport(d::InverseWishart, X::Matrix) = size(X) == size(d) && isposdef(X) -dim(d::InverseWishart) = dim(d.Ψ) -size(d::InverseWishart) = (p = dim(d); (p, p)) -rank(d::InverseWishart) = dim(d) +size(d::InverseWishart) = size(d.Ψ) +rank(d::InverseWishart) = rank(d.Ψ) + params(d::InverseWishart) = (d.df, d.Ψ) @inline partype(d::InverseWishart{T}) where {T<:Real} = T function mean(d::InverseWishart) df = d.df - p = dim(d) + p = size(d, 1) r = df - (p + 1) r > 0.0 || throw(ArgumentError("mean only defined for df > p + 1")) return Matrix(d.Ψ) * (1.0 / r) end -mode(d::InverseWishart) = d.Ψ * inv(d.df + dim(d) + 1.0) +mode(d::InverseWishart) = d.Ψ * inv(d.df + size(d, 1) + 1.0) # https://en.wikipedia.org/wiki/Inverse-Wishart_distribution#Moments function cov(d::InverseWishart, i::Integer, j::Integer, k::Integer, l::Integer) - p, ν, Ψ = (dim(d), d.df, Matrix(d.Ψ)) + p, ν, Ψ = (size(d, 1), d.df, Matrix(d.Ψ)) ν > p + 3 || throw(ArgumentError("cov only defined for df > dim + 3")) inv((ν - p)*(ν - p - 3)*(ν - p - 1)^2)*(2Ψ[i,j]*Ψ[k,l] + (ν-p-1)*(Ψ[i,k]*Ψ[j,l] + Ψ[i,l]*Ψ[k,j])) end function var(d::InverseWishart, i::Integer, j::Integer) - p, ν, Ψ = (dim(d), d.df, Matrix(d.Ψ)) + p, ν, Ψ = (size(d, 1), d.df, Matrix(d.Ψ)) ν > p + 3 || throw(ArgumentError("var only defined for df > dim + 3")) inv((ν - p)*(ν - p - 3)*(ν - p - 1)^2)*((ν - p + 1)*Ψ[i,j]^2 + (ν - p - 1)*Ψ[i,i]*Ψ[j,j]) end @@ -109,12 +109,12 @@ end function invwishart_logc0(df::Real, Ψ::AbstractPDMat) h_df = df / 2 - p = dim(Ψ) + p = size(Ψ, 1) -h_df * (p * typeof(df)(logtwo) - logdet(Ψ)) - logmvgamma(p, h_df) end function logkernel(d::InverseWishart, X::AbstractMatrix) - p = dim(d) + p = size(d, 1) df = d.df Xcf = cholesky(X) # we use the fact: tr(Ψ * inv(X)) = tr(inv(X) * Ψ) = tr(X \ Ψ) diff --git a/src/matrix/lkj.jl b/src/matrix/lkj.jl index 8534bf39d1..b76675eab5 100644 --- a/src/matrix/lkj.jl +++ b/src/matrix/lkj.jl @@ -66,15 +66,14 @@ end # Properties # ----------------------------------------------------------------------------- -dim(d::LKJ) = d.d -size(d::LKJ) = (dim(d), dim(d)) +size(d::LKJ) = (d.d, d.d) -rank(d::LKJ) = dim(d) +rank(d::LKJ) = d.d insupport(d::LKJ, R::AbstractMatrix) = isreal(R) && size(R) == size(d) && isone(Diagonal(R)) && isposdef(R) -mean(d::LKJ) = Matrix{partype(d)}(I, dim(d), dim(d)) +mean(d::LKJ) = Matrix{partype(d)}(I, d.d, d.d) function mode(d::LKJ; check_args::Bool=true) @check_args( @@ -86,7 +85,7 @@ function mode(d::LKJ; check_args::Bool=true) end function var(lkj::LKJ) - d = dim(lkj) + d = lkj.d d > 1 || return zeros(d, d) σ² = var(_marginal(lkj)) σ² * (ones(partype(lkj), d, d) - I) diff --git a/src/matrix/matrixbeta.jl b/src/matrix/matrixbeta.jl index 01fae77349..588c818909 100644 --- a/src/matrix/matrixbeta.jl +++ b/src/matrix/matrixbeta.jl @@ -72,15 +72,13 @@ end # Properties # ----------------------------------------------------------------------------- -dim(d::MatrixBeta) = dim(d.W1) - size(d::MatrixBeta) = size(d.W1) -rank(d::MatrixBeta) = dim(d) +rank(d::MatrixBeta) = size(d, 1) insupport(d::MatrixBeta, U::AbstractMatrix) = isreal(U) && size(U) == size(d) && isposdef(U) && isposdef(I - U) -params(d::MatrixBeta) = (dim(d), d.W1.df, d.W2.df) +params(d::MatrixBeta) = (size(d, 1), d.W1.df, d.W2.df) mean(d::MatrixBeta) = ((p, n1, n2) = params(d); Matrix((n1 / (n1 + n2)) * I, p, p)) diff --git a/src/matrix/matrixfdist.jl b/src/matrix/matrixfdist.jl index 86d0104c06..5a7e18efb7 100644 --- a/src/matrix/matrixfdist.jl +++ b/src/matrix/matrixfdist.jl @@ -41,7 +41,7 @@ end # ----------------------------------------------------------------------------- function MatrixFDist(n1::Real, n2::Real, B::AbstractPDMat) - p = dim(B) + p = size(B, 1) n1 > p - 1 || throw(ArgumentError("first degrees of freedom must be larger than $(p - 1)")) n2 > p - 1 || throw(ArgumentError("second degrees of freedom must be larger than $(p - 1)")) logc0 = matrixfdist_logc0(n1, n2, B) @@ -78,18 +78,16 @@ end # Properties # ----------------------------------------------------------------------------- -dim(d::MatrixFDist) = dim(d.W) - size(d::MatrixFDist) = size(d.W) -rank(d::MatrixFDist) = dim(d) +rank(d::MatrixFDist) = size(d, 1) insupport(d::MatrixFDist, Σ::AbstractMatrix) = isreal(Σ) && size(Σ) == size(d) && isposdef(Σ) params(d::MatrixFDist) = (d.W.df, d.n2, d.W.S) function mean(d::MatrixFDist) - p = dim(d) + p = size(d, 1) n1, n2, B = params(d) n2 > p + 1 || throw(ArgumentError("mean only defined for df2 > dim + 1")) return (n1 / (n2 - p - 1)) * Matrix(B) @@ -99,7 +97,7 @@ end # Konno (1988 JJSS) Corollary 2.4.i function cov(d::MatrixFDist, i::Integer, j::Integer, k::Integer, l::Integer) - p = dim(d) + p = size(d, 1) n1, n2, PDB = params(d) n2 > p + 3 || throw(ArgumentError("cov only defined for df2 > dim + 3")) n = n1 + n2 @@ -108,7 +106,7 @@ function cov(d::MatrixFDist, i::Integer, j::Integer, k::Integer, l::Integer) end function var(d::MatrixFDist, i::Integer, j::Integer) - p = dim(d) + p = size(d, 1) n1, n2, PDB = params(d) n2 > p + 3 || throw(ArgumentError("var only defined for df2 > dim + 3")) n = n1 + n2 @@ -122,14 +120,14 @@ end function matrixfdist_logc0(n1::Real, n2::Real, B::AbstractPDMat) # returns the natural log of the normalizing constant for the pdf - p = dim(B) + p = size(B, 1) term1 = -logmvbeta(p, n1 / 2, n2 / 2) term2 = (n2 / 2) * logdet(B) return term1 + term2 end function logkernel(d::MatrixFDist, Σ::AbstractMatrix) - p = dim(d) + p = size(d, 1) n1, n2, B = params(d) ((n1 - p - 1) / 2) * logdet(Σ) - ((n1 + n2) / 2) * logdet(pdadd(Σ, B)) end diff --git a/src/matrix/matrixnormal.jl b/src/matrix/matrixnormal.jl index d19e4757c0..9d480bd69d 100644 --- a/src/matrix/matrixnormal.jl +++ b/src/matrix/matrixnormal.jl @@ -30,8 +30,8 @@ end function MatrixNormal(M::AbstractMatrix{T}, U::AbstractPDMat{T}, V::AbstractPDMat{T}) where T <: Real n, p = size(M) - n == dim(U) || throw(ArgumentError("Number of rows of M must equal dim of U.")) - p == dim(V) || throw(ArgumentError("Number of columns of M must equal dim of V.")) + n == size(U, 1) || throw(ArgumentError("Number of rows of M must equal dim of U.")) + p == size(V, 1) || throw(ArgumentError("Number of columns of M must equal dim of V.")) logc0 = matrixnormal_logc0(U, V) R = Base.promote_eltype(T, logc0) prom_M = convert(AbstractArray{R}, M) @@ -105,8 +105,8 @@ params(d::MatrixNormal) = (d.M, d.U, d.V) # ----------------------------------------------------------------------------- function matrixnormal_logc0(U::AbstractPDMat, V::AbstractPDMat) - n = dim(U) - p = dim(V) + n = size(U, 1) + p = size(V, 1) -(n * p / 2) * (logtwo + logπ) - (n / 2) * logdet(V) - (p / 2) * logdet(U) end diff --git a/src/matrix/matrixtdist.jl b/src/matrix/matrixtdist.jl index a929aa1b1f..e37119603c 100644 --- a/src/matrix/matrixtdist.jl +++ b/src/matrix/matrixtdist.jl @@ -50,8 +50,8 @@ end function MatrixTDist(ν::T, M::AbstractMatrix{T}, Σ::AbstractPDMat{T}, Ω::AbstractPDMat{T}) where T <: Real n, p = size(M) 0 < ν < Inf || throw(ArgumentError("degrees of freedom must be positive and finite.")) - n == dim(Σ) || throw(ArgumentError("Number of rows of M must equal dim of Σ.")) - p == dim(Ω) || throw(ArgumentError("Number of columns of M must equal dim of Ω.")) + n == size(Σ, 1) || throw(ArgumentError("Number of rows of M must equal dim of Σ.")) + p == size(Ω, 1) || throw(ArgumentError("Number of columns of M must equal dim of Ω.")) logc0 = matrixtdist_logc0(Σ, Ω, ν) R = Base.promote_eltype(T, logc0) prom_M = convert(AbstractArray{R}, M) @@ -128,8 +128,8 @@ params(d::MatrixTDist) = (d.ν, d.M, d.Σ, d.Ω) function matrixtdist_logc0(Σ::AbstractPDMat, Ω::AbstractPDMat, ν::Real) # returns the natural log of the normalizing constant for the pdf - n = dim(Σ) - p = dim(Ω) + n = size(Σ, 1) + p = size(Ω, 1) term1 = logmvgamma(p, (ν + n + p - 1) / 2) term2 = - (n * p / 2) * logπ term3 = - logmvgamma(p, (ν + p - 1) / 2) diff --git a/src/matrix/wishart.jl b/src/matrix/wishart.jl index 5fef6ad183..e8da450060 100644 --- a/src/matrix/wishart.jl +++ b/src/matrix/wishart.jl @@ -44,7 +44,7 @@ end function Wishart(df::T, S::AbstractPDMat{T}) where T<:Real df > 0 || throw(ArgumentError("df must be positive. got $(df).")) - p = dim(S) + p = size(S, 1) singular = df <= p - 1 if singular isinteger(df) || throw( @@ -100,8 +100,8 @@ function insupport(d::Wishart, X::AbstractMatrix) end end -dim(d::Wishart) = dim(d.S) -size(d::Wishart) = (p = dim(d); (p, p)) +size(d::Wishart) = size(d.S) + rank(d::Wishart) = d.rank params(d::Wishart) = (d.df, d.S) @inline partype(d::Wishart{T}) where {T<:Real} = T @@ -109,14 +109,14 @@ params(d::Wishart) = (d.df, d.S) mean(d::Wishart) = d.df * Matrix(d.S) function mode(d::Wishart) - r = d.df - dim(d) - 1 + r = d.df - size(d, 1) - 1 r > 0 || throw(ArgumentError("mode is only defined when df > p + 1")) return Matrix(d.S) * r end function meanlogdet(d::Wishart) logdet_S = logdet(d.S) - p = dim(d) + p = size(d, 1) v = logdet_S + p * oftype(logdet_S, logtwo) df = oftype(logdet_S, d.df) for i in 0:(p - 1) @@ -127,7 +127,7 @@ end function entropy(d::Wishart) d.singular && throw(ArgumentError("entropy not defined for singular Wishart.")) - p = dim(d) + p = size(d, 1) df = d.df return -d.logc0 - ((df - p - 1) * meanlogdet(d) - df * p) / 2 end @@ -148,7 +148,7 @@ end # ----------------------------------------------------------------------------- function wishart_logc0(df::T, S::AbstractPDMat{T}, rnk::Integer) where {T<:Real} - p = dim(S) + p = size(S, 1) if df <= p - 1 return singular_wishart_logc0(p, df, S, rnk) else @@ -172,7 +172,7 @@ function singular_wishart_logc0(p::Integer, df::T, S::AbstractPDMat{T}, rnk::Int end function singular_wishart_logkernel(d::Wishart, X::AbstractMatrix) - p = dim(d) + p = size(d, 1) r = rank(d) L = eigvals(Hermitian(X), (p - r + 1):p) return ((d.df - (p + 1)) * sum(log, L) - tr(d.S \ X)) / 2 @@ -186,7 +186,7 @@ function nonsingular_wishart_logc0(p::Integer, df::T, S::AbstractPDMat{T}) where end function nonsingular_wishart_logkernel(d::Wishart, X::AbstractMatrix) - return ((d.df - (dim(d) + 1)) * logdet(cholesky(X)) - tr(d.S \ X)) / 2 + return ((d.df - (size(d, 1) + 1)) * logdet(cholesky(X)) - tr(d.S \ X)) / 2 end # ----------------------------------------------------------------------------- diff --git a/src/multivariate/mvnormal.jl b/src/multivariate/mvnormal.jl index 36992d3bd3..6126c1d8f1 100644 --- a/src/multivariate/mvnormal.jl +++ b/src/multivariate/mvnormal.jl @@ -183,7 +183,7 @@ const ZeroMeanFullNormal{Axes} = MvNormal{Float64,PDMat{Float64,Matrix{Float64}} ### Construction function MvNormal(μ::AbstractVector{T}, Σ::AbstractPDMat{T}) where {T<:Real} - dim(Σ) == length(μ) || throw(DimensionMismatch("The dimensions of mu and Sigma are inconsistent.")) + size(Σ, 1) == length(μ) || throw(DimensionMismatch("The dimensions of mu and Sigma are inconsistent.")) MvNormal{T,typeof(Σ), typeof(μ)}(μ, Σ) end @@ -317,7 +317,7 @@ MvNormalKnownCov(d::Int, σ::Real) = MvNormalKnownCov(ScalMat(d, abs2(Float64(σ MvNormalKnownCov(σ::Vector{Float64}) = MvNormalKnownCov(PDiagMat(abs2.(σ))) MvNormalKnownCov(Σ::Matrix{Float64}) = MvNormalKnownCov(PDMat(Σ)) -length(g::MvNormalKnownCov) = dim(g.Σ) +length(g::MvNormalKnownCov) = size(g.Σ, 1) struct MvNormalKnownCovStats{Cov<:AbstractPDMat} invΣ::Cov # inverse covariance diff --git a/src/multivariate/mvnormalcanon.jl b/src/multivariate/mvnormalcanon.jl index 5b29515fd2..587b20ba5c 100644 --- a/src/multivariate/mvnormalcanon.jl +++ b/src/multivariate/mvnormalcanon.jl @@ -57,7 +57,7 @@ const ZeroMeanIsoNormalCanon{Axes} = MvNormalCanon{Float64,ScalMat{Float64},Zer ### Constructors function MvNormalCanon(μ::AbstractVector{T}, h::AbstractVector{T}, J::AbstractPDMat{T}) where {T<:Real} - length(μ) == length(h) == dim(J) || throw(DimensionMismatch("Inconsistent argument dimensions")) + length(μ) == length(h) == size(J, 1) || throw(DimensionMismatch("Inconsistent argument dimensions")) if typeof(μ) === typeof(h) return MvNormalCanon{T,typeof(J),typeof(μ)}(μ, h, J) else @@ -76,7 +76,7 @@ function MvNormalCanon(μ::AbstractVector{<:Real}, h::AbstractVector{<:Real}, J: end function MvNormalCanon(h::AbstractVector{<:Real}, J::AbstractPDMat) - length(h) == dim(J) || throw(DimensionMismatch("Inconsistent argument dimensions")) + length(h) == size(J, 1) || throw(DimensionMismatch("Inconsistent argument dimensions")) R = Base.promote_eltype(h, J) hh = convert(AbstractArray{R}, h) JJ = convert(AbstractArray{R}, J) diff --git a/src/multivariate/mvtdist.jl b/src/multivariate/mvtdist.jl index dcdc27b02f..e3b5f92daa 100644 --- a/src/multivariate/mvtdist.jl +++ b/src/multivariate/mvtdist.jl @@ -18,7 +18,7 @@ end function GenericMvTDist(df::T, μ::Mean, Σ::Cov) where {Cov<:AbstractPDMat, Mean<:AbstractVector, T<:Real} d = length(μ) - dim(Σ) == d || throw(DimensionMismatch("The dimensions of μ and Σ are inconsistent.")) + size(Σ, 1) == d || throw(DimensionMismatch("The dimensions of μ and Σ are inconsistent.")) R = Base.promote_eltype(T, μ, Σ) S = convert(AbstractArray{R}, Σ) m = convert(AbstractArray{R}, μ) @@ -27,7 +27,7 @@ end function GenericMvTDist(df::Real, Σ::AbstractPDMat) R = Base.promote_eltype(df, Σ) - GenericMvTDist(df, Zeros{R}(dim(Σ)), Σ) + GenericMvTDist(df, Zeros{R}(size(Σ, 1)), Σ) end GenericMvTDist{T,Cov,Mean}(df, μ, Σ) where {T,Cov,Mean} = diff --git a/test/matrixvariates.jl b/test/matrixvariates.jl index 4f2f07db84..adb67e62c1 100644 --- a/test/matrixvariates.jl +++ b/test/matrixvariates.jl @@ -145,10 +145,11 @@ test_cov(d::LKJ) = nothing # -------------------------------------------------- function test_dim(d::MatrixDistribution) - @test dim(d) == size(d, 1) - @test dim(d) == size(d, 2) - @test dim(d) == size(mean(d), 1) - @test dim(d) == size(mean(d), 2) + n = @test_deprecated(dim(d)) + @test n == size(d, 1) + @test n == size(d, 2) + @test n == size(mean(d), 1) + @test n == size(mean(d), 2) end test_dim(d::Union{MatrixNormal, MatrixTDist}) = nothing From 73f64dddd286cc184e8c5eef46c62163fae290ab Mon Sep 17 00:00:00 2001 From: Simone Carlo Surace <51025924+simsurace@users.noreply.github.com> Date: Fri, 1 Jul 2022 12:15:35 +0200 Subject: [PATCH 03/93] Allow sampling `NegativeBinomial` for `p==1` (#1575) * Allow sampling `NegativeBinomial` for `p==1` * Draw multiple samples Co-authored-by: David Widmann Co-authored-by: David Widmann --- src/univariate/discrete/negativebinomial.jl | 8 +++++++- test/negativebinomial.jl | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/univariate/discrete/negativebinomial.jl b/src/univariate/discrete/negativebinomial.jl index e995f4f341..bd3b0c2a4c 100644 --- a/src/univariate/discrete/negativebinomial.jl +++ b/src/univariate/discrete/negativebinomial.jl @@ -118,7 +118,13 @@ invlogcdf(d::NegativeBinomial, lq::Real) = convert(Int, nbinominvlogcdf(d.r, d.p invlogccdf(d::NegativeBinomial, lq::Real) = convert(Int, nbinominvlogccdf(d.r, d.p, lq)) ## sampling -rand(rng::AbstractRNG, d::NegativeBinomial) = rand(rng, Poisson(rand(rng, Gamma(d.r, (1 - d.p)/d.p)))) +function rand(rng::AbstractRNG, d::NegativeBinomial) + if isone(d.p) + return 0 + else + return rand(rng, Poisson(rand(rng, Gamma(d.r, (1 - d.p)/d.p)))) + end +end function mgf(d::NegativeBinomial, t::Real) r, p = params(d) diff --git a/test/negativebinomial.jl b/test/negativebinomial.jl index 7b4b03dc5b..2c14d971ce 100644 --- a/test/negativebinomial.jl +++ b/test/negativebinomial.jl @@ -17,4 +17,5 @@ end @testset "Check the corner case p==1" begin @test logpdf(NegativeBinomial(0.5, 1.0), 0) === 0.0 @test logpdf(NegativeBinomial(0.5, 1.0), 1) === -Inf + @test all(iszero, rand(NegativeBinomial(rand(), 1.0), 10)) end From 6ab4c1f5bd1b5b6890bbb6afc9d3349dc90cad6a Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 1 Jul 2022 12:15:51 +0200 Subject: [PATCH 04/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 71a1c911da..0d50b387f2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.63" +version = "0.25.64" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 254f27f1b32a72d6529e2f78a3745b68bd38b31e Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sat, 2 Jul 2022 21:48:33 +0200 Subject: [PATCH 05/93] Include and update tests for `BetaBinomial` (#1578) --- test/betabinomial.jl | 6 +++--- test/runtests.jl | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/test/betabinomial.jl b/test/betabinomial.jl index 79d099b353..f389560634 100644 --- a/test/betabinomial.jl +++ b/test/betabinomial.jl @@ -2,12 +2,12 @@ using Distributions using Test @testset "Log of Beta-binomial distribution" begin - d = BetaBinomial(50, 0.2,0.6) + d = BetaBinomial(50, 0.2, 0.6) - for k in Base.OneTo(50) + for k in 1:50 p = pdf(d, k) lp = logpdf(d, k) - @test_approx_eq lp log(p) + @test lp ≈ log(p) @test insupport(d, k) end @test !insupport(d, 51) diff --git a/test/runtests.jl b/test/runtests.jl index a08d4620af..6f61732e81 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -37,6 +37,7 @@ const tests = [ "fit", "multinomial", "binomial", + "betabinomial", "poissonbinomial", "dirichlet", "dirichletmultinomial", From e60a581c888003a9406f66d15239fb91f230f534 Mon Sep 17 00:00:00 2001 From: Jan Weidner Date: Mon, 4 Jul 2022 10:14:06 +0200 Subject: [PATCH 06/93] speed up Semicirle sampling (#1580) * speed up Semicirle sampling * fix rand for Semicircle * add semicircle rand tests * Update src/univariate/continuous/semicircle.jl Co-authored-by: David Widmann * improve semicirle rand tests Co-authored-by: David Widmann --- src/univariate/continuous/semicircle.jl | 10 ++++++++ test/semicircle.jl | 31 ++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/univariate/continuous/semicircle.jl b/src/univariate/continuous/semicircle.jl index 532d442ca8..0480000e35 100644 --- a/src/univariate/continuous/semicircle.jl +++ b/src/univariate/continuous/semicircle.jl @@ -72,4 +72,14 @@ function cdf(d::Semicircle, x::Real) end end +function rand(rng::AbstractRNG, d::Semicircle) + # Idea: + # sample polar coodinates r,θ + # of point uniformly distributed on radius d.r half disk + # project onto x axis + θ = rand(rng) # multiple of π + r = d.r * sqrt(rand(rng)) + return cospi(θ) * r +end + @quantile_newton Semicircle diff --git a/test/semicircle.jl b/test/semicircle.jl index 2456b5302d..8dbf2ff2c4 100644 --- a/test/semicircle.jl +++ b/test/semicircle.jl @@ -1,5 +1,6 @@ using Distributions -using Test +using Random: MersenneTwister +using Test d = Semicircle(2.0) @@ -37,3 +38,31 @@ d = Semicircle(2.0) @test quantile(d, .0) == -2.0 @test quantile(d, .5) == .0 @test quantile(d, 1.0) == +2.0 + +rng = MersenneTwister(0) +for r in rand(rng, Uniform(0,10), 5) + N = 10^4 + semi = Semicircle(r) + sample = rand(rng, semi, N) + mi, ma = extrema(sample) + @test -r <= mi < ma <= r + + # test order statistic of sample min is sane + d_min = Beta(1, N) + lo = quantile(d_min, 0.01) + hi = quantile(d_min, 0.99) + @test lo < cdf(semi, mi) < hi + + # test order statistic of sample max is sane + d_max = Beta(N, 1) + lo = quantile(d_max, 0.01) + hi = quantile(d_max, 0.99) + @test lo < cdf(semi, ma) < hi + + # central limit theorem + dmean = Normal(mean(semi), std(semi)/√(N)) + @test quantile(dmean, 0.01) < mean(sample) < quantile(dmean, 0.99) + + pvalue = pvalue_kolmogorovsmirnoff(sample, semi) + @test pvalue > 1e-2 +end From 84bdc8751499978a5f1d3a9bff10eded716fe0c7 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Mon, 4 Jul 2022 10:14:30 +0200 Subject: [PATCH 07/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 0d50b387f2..ddee76fb2e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.64" +version = "0.25.65" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 38dafb738253d594426d12a26c33927208e0b2ec Mon Sep 17 00:00:00 2001 From: Jan Weidner Date: Tue, 5 Jul 2022 01:38:11 +0200 Subject: [PATCH 08/93] Update ksdist.jl (#1581) While the sqrt(n) factor is often part of the test statistic this is not what is implemented here. To witness, see ```julia https://github.com/JuliaStats/Distributions.jl/blob/254f27f1b32a72d6529e2f78a3745b68bd38b31e/test/testutils.jl#L597-L607 ``` where there is no sqrt(n) factor. --- src/univariate/continuous/ksdist.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/univariate/continuous/ksdist.jl b/src/univariate/continuous/ksdist.jl index 849a795e5a..674bcec95a 100644 --- a/src/univariate/continuous/ksdist.jl +++ b/src/univariate/continuous/ksdist.jl @@ -4,7 +4,7 @@ Distribution of the (two-sided) Kolmogorov-Smirnoff statistic ```math -D_n = \\sup_x | \\hat{F}_n(x) -F(x)| \\sqrt(n) +D_n = \\sup_x | \\hat{F}_n(x) -F(x)| ``` ``D_n`` converges a.s. to the Kolmogorov distribution. From c430f395d93ec08e8dbd9fc9c685bf8be11c71ae Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 7 Jul 2022 13:10:22 +0200 Subject: [PATCH 09/93] Generalize `Product` (#1391) * Move `src/multivariate/product.jl` * Generalize `Product` to `ProductDistribution` * Add implementations for more general product distributions * Unify and generalize `rand!`, `logpdf` and `pdf` * Revert unrelated changes and fix tests * Propagate `@inbounds` * Remove unneeded implementation * Fix typos * Fix some dispatches * More fixes * Support tuple of distributions and mix of discrete + continuous * Fix additional test errors * Fix method ambiguity * Fix `VonMisesFisherSampler` * Fix mixture sampler * Simplify multinomial sampler * Fix `loglikelihood` for univariate distributions * Add ReshapedDistribution * Fix typo * Revert some changes * Update product.jl * Remove duplicate `eachvariate`/`EachVariate` * Reintroduce `Product` * Improve type inference * Add explanations of `ValueSupport` * Fix typo * Remove another breaking change --- docs/src/types.md | 15 +- src/Distributions.jl | 3 +- src/common.jl | 38 +++++- src/multivariate/product.jl | 36 ++--- src/multivariates.jl | 2 +- src/product.jl | 246 +++++++++++++++++++++++++++++++++ test/product.jl | 266 +++++++++++++++++++++++++++++++++++- 7 files changed, 563 insertions(+), 43 deletions(-) create mode 100644 src/product.jl diff --git a/docs/src/types.md b/docs/src/types.md index 9812cd2f68..daa1ce9202 100644 --- a/docs/src/types.md +++ b/docs/src/types.md @@ -33,16 +33,21 @@ The `VariateForm` sub-types defined in `Distributions.jl` are: ### ValueSupport -```@doc +```@docs Distributions.ValueSupport ``` The `ValueSupport` sub-types defined in `Distributions.jl` are: -**Type** | **Element type** | **Descriptions** ---- | --- | --- -`Discrete` | `Int` | Samples take discrete values -`Continuous` | `Float64` | Samples take continuous real values +```@docs +Distributions.Discrete +Distributions.Continuous +``` + +**Type** | **Default element type** | **Description** | **Examples** +--- | --- | --- | --- +`Discrete` | `Int` | Samples take countably many values | $\{0,1,2,3\}$, $\mathbb{N}$ +`Continuous` | `Float64` | Samples take uncountably many values | $[0, 1]$, $\mathbb{R}$ Multiple samples are often organized into an array, depending on the variate form. diff --git a/src/Distributions.jl b/src/Distributions.jl index 6312d26b53..a27cb5ec17 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -145,7 +145,7 @@ export Pareto, PGeneralizedGaussian, SkewedExponentialPower, - Product, + Product, # deprecated Poisson, PoissonBinomial, QQPair, @@ -293,6 +293,7 @@ include("cholesky/lkjcholesky.jl") include("samplers.jl") # others +include("product.jl") include("reshaped.jl") include("truncate.jl") include("censored.jl") diff --git a/src/common.jl b/src/common.jl index 3602372a5c..aecad0e1a9 100644 --- a/src/common.jl +++ b/src/common.jl @@ -23,13 +23,42 @@ const Matrixvariate = ArrayLikeVariate{2} abstract type CholeskyVariate <: VariateForm end """ -`S <: ValueSupport` specifies the support of sample elements, -either discrete or continuous. + ValueSupport + +Abstract type that specifies the support of elements of samples. + +It is either [`Discrete`](@ref) or [`Continuous`](@ref). """ abstract type ValueSupport end + +""" + Discrete <: ValueSupport + +This type represents the support of a discrete random variable. + +It is countable. For instance, it can be a finite set or a countably infinite set such as +the natural numbers. + +See also: [`Continuous`](@ref), [`ValueSupport`](@ref) +""" struct Discrete <: ValueSupport end + +""" + Continuous <: ValueSupport + +This types represents the support of a continuous random variable. + +It is uncountably infinite. For instance, it can be an interval on the real line. + +See also: [`Discrete`](@ref), [`ValueSupport`](@ref) +""" struct Continuous <: ValueSupport end +# promotions (e.g., in product distribution): +# combination of discrete support (countable) and continuous support (uncountable) yields +# continuous support (uncountable) +Base.promote_rule(::Type{Continuous}, ::Type{Discrete}) = Continuous + ## Sampleable """ @@ -42,7 +71,6 @@ Any `Sampleable` implements the `Base.rand` method. """ abstract type Sampleable{F<:VariateForm,S<:ValueSupport} end - variate_form(::Type{<:Sampleable{VF}}) where {VF} = VF value_support(::Type{<:Sampleable{<:VariateForm,VS}}) where {VS} = VS @@ -142,10 +170,6 @@ const ContinuousMultivariateDistribution = Distribution{Multivariate, Continuou const DiscreteMatrixDistribution = Distribution{Matrixvariate, Discrete} const ContinuousMatrixDistribution = Distribution{Matrixvariate, Continuous} -variate_form(::Type{<:Distribution{VF}}) where {VF} = VF - -value_support(::Type{<:Distribution{VF,VS}}) where {VF,VS} = VS - # allow broadcasting over distribution objects # to be decided: how to handle multivariate/matrixvariate distributions? Broadcast.broadcastable(d::UnivariateDistribution) = Ref(d) diff --git a/src/multivariate/product.jl b/src/multivariate/product.jl index 4e304d76b1..d2bcd7a956 100644 --- a/src/multivariate/product.jl +++ b/src/multivariate/product.jl @@ -1,4 +1,5 @@ -import Statistics: mean, var, cov +# Deprecated product distribution +# TODO: Remove in next breaking release """ Product <: MultivariateDistribution @@ -20,6 +21,10 @@ struct Product{ V<:AbstractVector{T} where T<:UnivariateDistribution{S} where S<:ValueSupport + Base.depwarn( + "`Product(v)` is deprecated, please use `product_distribution(v)`", + :Product, + ) return new{S, T, V}(v) end end @@ -43,26 +48,9 @@ insupport(d::Product, x::AbstractVector) = all(insupport.(d.v, x)) minimum(d::Product) = map(minimum, d.v) maximum(d::Product) = map(maximum, d.v) -""" - product_distribution(dists::AbstractVector{<:UnivariateDistribution}) - -Creates a multivariate product distribution `P` from a vector of univariate distributions. -Fallback is the `Product constructor`, but specialized methods can be defined -for distributions with a special multivariate product. -""" -function product_distribution(dists::AbstractVector{<:UnivariateDistribution}) - return Product(dists) -end - -""" - product_distribution(dists::AbstractVector{<:Normal}) - -Computes the multivariate Normal distribution obtained by stacking the univariate -normal distributions. The result is a multivariate Gaussian with a diagonal -covariance matrix. -""" -function product_distribution(dists::AbstractVector{<:Normal}) - µ = mean.(dists) - σ2 = var.(dists) - return MvNormal(µ, Diagonal(σ2)) -end +# TODO: remove deprecation when `Product` is removed +# it will return a `ProductDistribution` then which is already the default for +# higher-dimensional arrays and distributions +Base.@deprecate product_distribution( + dists::AbstractVector{<:UnivariateDistribution} +) Product(dists) diff --git a/src/multivariates.jl b/src/multivariates.jl index 1a087f1ba5..477c78ba5a 100644 --- a/src/multivariates.jl +++ b/src/multivariates.jl @@ -116,7 +116,7 @@ for fname in ["dirichlet.jl", "mvnormalcanon.jl", "mvlognormal.jl", "mvtdist.jl", - "product.jl", + "product.jl", # deprecated "vonmisesfisher.jl"] include(joinpath("multivariate", fname)) end diff --git a/src/product.jl b/src/product.jl new file mode 100644 index 0000000000..049a5888d9 --- /dev/null +++ b/src/product.jl @@ -0,0 +1,246 @@ +""" + ProductDistribution <: Distribution{<:ValueSupport,<:ArrayLikeVariate} + +A distribution of `M + N`-dimensional arrays, constructed from an `N`-dimensional array of +independent `M`-dimensional distributions by stacking them. + +Users should use [`product_distribution`](@ref) to construct a product distribution of +independent distributions instead of constructing a `ProductDistribution` directly. +""" +struct ProductDistribution{N,M,D,S<:ValueSupport,T} <: Distribution{ArrayLikeVariate{N},S} + dists::D + size::Dims{N} + + function ProductDistribution{N,M,D}(dists::D) where {N,M,D} + isempty(dists) && error("product distribution must consist of at least one distribution") + return new{N,M,D,_product_valuesupport(dists),_product_eltype(dists)}( + dists, + _product_size(dists), + ) + end +end + +function ProductDistribution(dists::AbstractArray{<:Distribution{ArrayLikeVariate{M}},N}) where {M,N} + return ProductDistribution{M + N,M,typeof(dists)}(dists) +end + +function ProductDistribution(dists::Tuple{Vararg{<:Distribution{ArrayLikeVariate{M}},N}}) where {M,N} + return ProductDistribution{M + 1,M,typeof(dists)}(dists) +end + +# default definitions (type stable e.g. for arrays with concrete `eltype`) +_product_valuesupport(dists) = mapreduce(value_support ∘ typeof, promote_type, dists) +_product_eltype(dists) = mapreduce(eltype, promote_type, dists) + +# type-stable and faster implementations for tuples +function _product_valuesupport(dists::Tuple{Vararg{<:Distribution}}) + return __product_promote_type(value_support, typeof(dists)) +end +function _product_eltype(dists::Tuple{Vararg{<:Distribution}}) + return __product_promote_type(eltype, typeof(dists)) +end + +__product_promote_type(f::F, ::Type{Tuple{D}}) where {F,D<:Distribution} = f(D) +function __product_promote_type(f::F, ::Type{T}) where {F,T} + return promote_type( + f(Base.tuple_type_head(T)), + __product_promote_type(f, Base.tuple_type_tail(T)), + ) +end + +function _product_size(dists::AbstractArray{<:Distribution{<:ArrayLikeVariate{M}},N}) where {M,N} + size_d = size(first(dists)) + all(size(d) == size_d for d in dists) || error("all distributions must be of the same size") + size_dists = size(dists) + return ntuple(i -> i <= M ? size_d[i] : size_dists[i-M], Val(M + N)) +end +function _product_size(dists::Tuple{Vararg{<:Distribution{<:ArrayLikeVariate{M}},N}}) where {M,N} + size_d = size(first(dists)) + all(size(d) == size_d for d in dists) || error("all distributions must be of the same size") + return ntuple(i -> i <= M ? size_d[i] : N, Val(M + 1)) +end + +## aliases +const VectorOfUnivariateDistribution{D,S<:ValueSupport,T} = ProductDistribution{1,0,D,S,T} +const MatrixOfUnivariateDistribution{D,S<:ValueSupport,T} = ProductDistribution{2,0,D,S,T} +const ArrayOfUnivariateDistribution{N,D,S<:ValueSupport,T} = ProductDistribution{N,0,D,S,T} + +const FillArrayOfUnivariateDistribution{N,D<:Fill{<:Any,N},S<:ValueSupport,T} = ProductDistribution{N,0,D,S,T} + +## General definitions +function Base.eltype(::Type{<:ProductDistribution{<:Any,<:Any,<:Any,<:ValueSupport,T}}) where {T} + return T +end + +size(d::ProductDistribution) = d.size + +mean(d::ProductDistribution) = reshape(mapreduce(vec ∘ mean, vcat, d.dists), size(d)) +var(d::ProductDistribution) = reshape(mapreduce(vec ∘ var, vcat, d.dists), size(d)) +cov(d::ProductDistribution) = Diagonal(vec(var(d))) + +## For product distributions of univariate distributions +mean(d::ArrayOfUnivariateDistribution) = map(mean, d.dists) +mean(d::VectorOfUnivariateDistribution{<:Tuple}) = collect(map(mean, d.dists)) +var(d::ArrayOfUnivariateDistribution) = map(var, d.dists) +var(d::VectorOfUnivariateDistribution{<:Tuple}) = collect(map(var, d.dists)) + +function insupport(d::ArrayOfUnivariateDistribution{N}, x::AbstractArray{<:Real,N}) where {N} + size(d) == size(x) && all(insupport(vi, xi) for (vi, xi) in zip(d.dists, x)) +end + +minimum(d::ArrayOfUnivariateDistribution) = map(minimum, d.dists) +minimum(d::VectorOfUnivariateDistribution{<:Tuple}) = collect(map(minimum, d.dists)) +maximum(d::ArrayOfUnivariateDistribution) = map(maximum, d.dists) +maximum(d::VectorOfUnivariateDistribution{<:Tuple}) = collect(map(maximum, d.dists)) + +function entropy(d::ArrayOfUnivariateDistribution) + # we use pairwise summation (https://github.com/JuliaLang/julia/pull/31020) + return sum(Broadcast.instantiate(Broadcast.broadcasted(entropy, d.dists))) +end +# fix type instability with tuples +entropy(d::VectorOfUnivariateDistribution{<:Tuple}) = sum(entropy, d.dists) + +## Vector of univariate distributions +length(d::VectorOfUnivariateDistribution) = length(d.dists) + +## For matrix distributions +cov(d::ProductDistribution{2}, ::Val{false}) = reshape(cov(d), size(d)..., size(d)...) + +# `_rand!` for arrays of univariate distributions +function _rand!( + rng::AbstractRNG, + d::ArrayOfUnivariateDistribution{N}, + x::AbstractArray{<:Real,N}, +) where {N} + @inbounds for (i, di) in zip(eachindex(x), d.dists) + x[i] = rand(rng, di) + end + return x +end + +# `_logpdf` for arrays of univariate distributions +# we have to fix a method ambiguity +function _logpdf(d::ArrayOfUnivariateDistribution, x::AbstractArray{<:Real,N}) where {N} + return __logpdf(d, x) +end +_logpdf(d::MatrixOfUnivariateDistribution, x::AbstractMatrix{<:Real}) = __logpdf(d, x) +function __logpdf(d::ArrayOfUnivariateDistribution, x::AbstractArray{<:Real,N}) where {N} + # we use pairwise summation (https://github.com/JuliaLang/julia/pull/31020) + # without allocations to compute `sum(logpdf.(d.dists, x))` + broadcasted = Broadcast.broadcasted(logpdf, d.dists, x) + return sum(Broadcast.instantiate(broadcasted)) +end + +# more efficient implementation of `_rand!` for `Fill` array of univariate distributions +function _rand!( + rng::AbstractRNG, + d::FillArrayOfUnivariateDistribution{N}, + x::AbstractArray{<:Real,N}, +) where {N} + return @inbounds rand!(rng, sampler(first(d.dists)), x) +end + +# more efficient implementation of `_logpdf` for `Fill` array of univariate distributions +# we have to fix a method ambiguity +function _logpdf( + d::FillArrayOfUnivariateDistribution{N}, x::AbstractArray{<:Real,N} +) where {N} + return __logpdf(d, x) +end +_logpdf(d::FillArrayOfUnivariateDistribution{2}, x::AbstractMatrix{<:Real}) = __logpdf(d, x) +function __logpdf( + d::FillArrayOfUnivariateDistribution{N}, x::AbstractArray{<:Real,N} +) where {N} + return @inbounds loglikelihood(first(d.dists), x) +end + +# `_rand! for arrays of distributions +function _rand!( + rng::AbstractRNG, + d::ProductDistribution{N,M}, + A::AbstractArray{<:Real,N}, +) where {N,M} + @inbounds for (di, Ai) in zip(d.dists, eachvariate(A, ArrayLikeVariate{M})) + rand!(rng, di, Ai) + end + return A +end + +# `_logpdf` for arrays of distributions +# we have to fix a method ambiguity +_logpdf(d::ProductDistribution{N}, x::AbstractArray{<:Real,N}) where {N} = __logpdf(d, x) +_logpdf(d::ProductDistribution{2}, x::AbstractMatrix{<:Real}) = __logpdf(d, x) +function __logpdf( + d::ProductDistribution{N,M}, + x::AbstractArray{<:Real,N}, +) where {N,M} + # we use pairwise summation (https://github.com/JuliaLang/julia/pull/31020) + # to compute `sum(logpdf.(d.dists, eachvariate))` + @inbounds broadcasted = Broadcast.broadcasted( + logpdf, d.dists, eachvariate(x, ArrayLikeVariate{M}), + ) + return sum(Broadcast.instantiate(broadcasted)) +end + +# more efficient implementation of `_rand!` for `Fill` arrays of distributions +function _rand!( + rng::AbstractRNG, + d::ProductDistribution{N,M,<:Fill}, + A::AbstractArray{<:Real,N}, +) where {N,M} + @inbounds rand!(rng, sampler(first(d.dists)), A) + return A +end + +# more efficient implementation of `_logpdf` for `Fill` arrays of distributions +# we have to fix a method ambiguity +function _logpdf( + d::ProductDistribution{N,M,<:Fill}, + x::AbstractArray{<:Real,N}, +) where {N,M} + return __logpdf(d, x) +end +function _logpdf( + d::ProductDistribution{2,M,<:Fill}, + x::AbstractMatrix{<:Real}, +) where {M} + return __logpdf(d, x) +end +function __logpdf( + d::ProductDistribution{N,M,<:Fill}, + x::AbstractArray{<:Real,N}, +) where {N,M} + return @inbounds loglikelihood(first(d.dists), x) +end + +""" + product_distribution(dists::AbstractArray{<:Distribution{<:ArrayLikeVariate{M}},N}) + +Create a distribution of `M + N`-dimensional arrays as a product distribution of +independent `M`-dimensional distributions by stacking them. + +The function falls back to constructing a [`ProductDistribution`](@ref) distribution but +specialized methods can be defined. +""" +function product_distribution(dists::AbstractArray{<:Distribution{<:ArrayLikeVariate}}) + return ProductDistribution(dists) +end + +function product_distribution( + dist::Distribution{ArrayLikeVariate{N}}, dists::Distribution{ArrayLikeVariate{N}}..., +) where {N} + return ProductDistribution((dist, dists...)) +end + +""" + product_distribution(dists::AbstractVector{<:Normal}) + +Create a multivariate normal distribution by stacking the univariate normal distributions. + +The resulting distribution of type [`MvNormal`](@ref) has a diagonal covariance matrix. +""" +function product_distribution(dists::AbstractVector{<:Normal}) + µ = map(mean, dists) + σ2 = map(var, dists) + return MvNormal(µ, Diagonal(σ2)) +end diff --git a/test/product.jl b/test/product.jl index d7bf0ae787..7d19898db5 100644 --- a/test/product.jl +++ b/test/product.jl @@ -1,6 +1,14 @@ -using Distributions, Test, Random, LinearAlgebra, FillArrays +using Distributions +using FillArrays + +using Test +using Random +using LinearAlgebra + using Distributions: Product +# TODO: remove when `Product` is removed +@testset "Deprecated `Product` distribution" begin @testset "Testing normal product distributions" begin Random.seed!(123456) N = 11 @@ -8,8 +16,8 @@ using Distributions: Product μ = randn(N) ds = Normal.(μ, 1.0) x = rand.(ds) - d_product = product_distribution(ds) - @test d_product isa MvNormal + d_product = @test_deprecated(Product(ds)) + @test d_product isa Product # Check that methods for `Product` are consistent. @test length(d_product) == length(ds) @test eltype(d_product) === eltype(ds[1]) @@ -31,7 +39,7 @@ end ubound = rand(N) ds = Uniform.(-ubound, ubound) x = rand.(ds) - d_product = product_distribution(ds) + d_product = @test_deprecated(product_distribution(ds)) @test d_product isa Product # Check that methods for `Product` are consistent. @test length(d_product) == length(ds) @@ -62,7 +70,7 @@ end support = fill(a, N) ds = DiscreteNonParametric.(support, Ref([0.5, 0.5])) x = rand.(ds) - d_product = product_distribution(ds) + d_product = @test_deprecated(product_distribution(ds)) @test d_product isa Product # Check that methods for `Product` are consistent. @test length(d_product) == length(ds) @@ -89,4 +97,252 @@ end @test mean(d) === Fill(0.0, N) @test cov(d) === Diagonal(Fill(var(Laplace(0.0, 2.3)), N)) end +end + +@testset "Testing normal product distributions" begin + Random.seed!(123456) + N = 11 + + # Construct independent distributions and `ProductDistribution` from these. + μ = randn(N) + + ds1 = Normal.(μ, 1.0) + d_product1 = @inferred(product_distribution(ds1)) + @test d_product1 isa Distributions.DiagNormal + + ds2 = Fill(Normal(first(μ), 1.0), N) + d_product2 = @inferred(product_distribution(ds2)) + @test d_product2 isa MvNormal{Float64,Distributions.ScalMat{Float64},<:Fill{Float64,1}} + + # Check that methods for `ProductDistribution` are consistent. + for (ds, d_product) in ((ds1, d_product1), (ds2, d_product2)) + @test length(d_product) == length(ds) + @test eltype(d_product) === eltype(ds[1]) + @test mean(d_product) == mean.(ds) + @test var(d_product) == var.(ds) + @test cov(d_product) == Diagonal(var.(ds)) + @test entropy(d_product) ≈ sum(entropy.(ds)) + + x = rand(d_product) + @test x isa typeof(rand.(collect(ds))) + @test length(x) == N + @test logpdf(d_product, x) ≈ sum(logpdf.(ds, x)) + end +end + +@testset "Testing generic VectorOfUnivariateDistribution" begin + Random.seed!(123456) + N = 11 + + # Construct independent distributions and `ProductDistribution` from these. + ubound = rand(N) + + ds1 = Uniform.(0.0, ubound) + # Replace with + # d_product1 = @inferred(product_distribution(ds1)) + # when `Product` is removed + d_product1 = @inferred(Distributions.ProductDistribution(ds1)) + @test d_product1 isa Distributions.VectorOfUnivariateDistribution{<:Vector,Continuous,Float64} + + d_product2 = @inferred(product_distribution(ntuple(i -> Uniform(0.0, ubound[i]), 11)...)) + @test d_product2 isa Distributions.VectorOfUnivariateDistribution{<:Tuple,Continuous,Float64} + + ds3 = Fill(Uniform(0.0, first(ubound)), N) + # Replace with + # d_product3 = @inferred(product_distribution(ds3)) + # when `Product` is removed + d_product3 = @inferred(Distributions.ProductDistribution(ds3)) + @test d_product3 isa Distributions.VectorOfUnivariateDistribution{<:Fill,Continuous,Float64} + + # Check that methods for `VectorOfUnivariateDistribution` are consistent. + for (ds, d_product) in ((ds1, d_product1), (ds1, d_product2), (ds3, d_product3)) + @test length(d_product) == length(ds) + @test eltype(d_product) === eltype(ds[1]) + @test @inferred(mean(d_product)) == mean.(ds) + @test @inferred(var(d_product)) == var.(ds) + @test @inferred(cov(d_product)) == Diagonal(var.(ds)) + @test @inferred(entropy(d_product)) == sum(entropy.(ds)) + @test insupport(d_product, zeros(N)) + @test insupport(d_product, maximum.(ds)) + @test !insupport(d_product, maximum.(ds) .+ 1) + @test !insupport(d_product, zeros(N + 1)) + + @test minimum(d_product) == map(minimum, ds) + @test maximum(d_product) == map(maximum, ds) + @test extrema(d_product) == (map(minimum, ds), map(maximum, ds)) + + x = @inferred(rand(d_product)) + @test x isa typeof(rand.(collect(ds))) + @test length(x) == length(d_product) + @test insupport(d_product, x) + @test @inferred(logpdf(d_product, x)) ≈ sum(logpdf.(ds, x)) + # ensure that samples are different, in particular if `Fill` is used + @test length(unique(x)) == N + end +end + +@testset "Testing discrete non-parametric VectorOfUnivariateDistribution" begin + Random.seed!(123456) + N = 11 + + for a in ([0, 1], [-0.5, 0.5]) + # Construct independent distributions and `ProductDistribution` from these. + ds1 = DiscreteNonParametric.(fill(a, N), Ref([0.5, 0.5])) + # Replace with + # d_product1 = @inferred(product_distribution(ds1)) + # when `Product` is removed + d_product1 = @inferred(Distributions.ProductDistribution(ds1)) + @test d_product1 isa Distributions.VectorOfUnivariateDistribution{<:Vector{<:DiscreteNonParametric},Discrete,eltype(a)} + + d_product2 = @inferred(product_distribution(ntuple(_ -> DiscreteNonParametric(a, [0.5, 0.5]), 11)...)) + @test d_product2 isa Distributions.VectorOfUnivariateDistribution{<:NTuple{N,<:DiscreteNonParametric},Discrete,eltype(a)} + + ds3 = Fill(DiscreteNonParametric(a, [0.5, 0.5]), N) + # Replace with + # d_product3 = @inferred(product_distribution(ds3)) + # when `Product` is removed + d_product3 = @inferred(Distributions.ProductDistribution(ds3)) + @test d_product3 isa Distributions.VectorOfUnivariateDistribution{<:Fill{<:DiscreteNonParametric,1},Discrete,eltype(a)} + + # Check that methods for `VectorOfUnivariateDistribution` are consistent. + for (ds, d_product) in ((ds1, d_product1), (ds1, d_product3), (ds3, d_product2)) + @test length(d_product) == length(ds) + @test eltype(d_product) === eltype(ds[1]) + @test @inferred(mean(d_product)) == mean.(ds) + @test @inferred(var(d_product)) == var.(ds) + @test @inferred(cov(d_product)) == Diagonal(var.(ds)) + @test @inferred(entropy(d_product)) == sum(entropy.(ds)) + @test insupport(d_product, fill(a[2], N)) + @test !insupport(d_product, fill(a[2] + 1, N)) + @test !insupport(d_product, fill(a[2], N + 1)) + + @test minimum(d_product) == map(minimum, ds) + @test maximum(d_product) == map(maximum, ds) + @test extrema(d_product) == (map(minimum, ds), map(maximum, ds)) + + x = @inferred(rand(d_product)) + @test x isa typeof(rand.(collect(ds))) + @test length(x) == length(d_product) + @test insupport(d_product, x) + @test @inferred(logpdf(d_product, x)) ≈ sum(logpdf.(ds, x)) + # ensure that samples are different, in particular if `Fill` is used + @test length(unique(x)) == 2 + end + end +end +@testset "Testing tuple of continuous and discrete distribution" begin + Random.seed!(123456) + N = 11 + + ds = (Bernoulli(0.3), Uniform(0.0, 0.7), Categorical([0.4, 0.2, 0.4])) + d_product = @inferred(product_distribution(ds...)) + @test d_product isa Distributions.VectorOfUnivariateDistribution{<:Tuple,Continuous,Float64} + + ds_vec = vcat(ds...) + + @test length(d_product) == 3 + @test eltype(d_product) === Float64 + @test @inferred(mean(d_product)) == mean.(ds_vec) + @test @inferred(var(d_product)) == var.(ds_vec) + @test @inferred(cov(d_product)) == Diagonal(var.(ds_vec)) + @test @inferred(entropy(d_product)) == sum(entropy.(ds_vec)) + @test insupport(d_product, [0, 0.2, 3]) + @test !insupport(d_product, [-0.5, 0.2, 3]) + @test !insupport(d_product, [0, -0.5, 3]) + @test !insupport(d_product, [0, 0.2, -0.5]) + + @test @inferred(minimum(d_product)) == map(minimum, ds_vec) + @test @inferred(maximum(d_product)) == map(maximum, ds_vec) + @test @inferred(extrema(d_product)) == (map(minimum, ds_vec), map(maximum, ds_vec)) + + x = @inferred(rand(d_product)) + @test x isa Vector{Float64} + @test length(x) == length(d_product) + @test insupport(d_product, x) + @test @inferred(logpdf(d_product, x)) ≈ sum(logpdf.(ds, x)) +end + +@testset "Testing generic MatrixOfUnivariateDistribution" begin + Random.seed!(123456) + M, N = 11, 16 + + # Construct independent distributions and `ProductDistribution` from these. + ubound = rand(M, N) + + ds1 = Uniform.(0.0, ubound) + d_product1 = @inferred(product_distribution(ds1)) + @test d_product1 isa Distributions.MatrixOfUnivariateDistribution{<:Matrix{<:Uniform},Continuous,Float64} + + ds2 = Fill(Uniform(0.0, first(ubound)), M, N) + d_product2 = @inferred(product_distribution(ds2)) + @test d_product2 isa Distributions.MatrixOfUnivariateDistribution{<:Fill{<:Uniform,2},Continuous,Float64} + + # Check that methods for `MatrixOfUnivariateDistribution` are consistent. + for (ds, d_product) in ((ds1, d_product1), (ds2, d_product2)) + @test size(d_product) == size(ds) + @test eltype(d_product) === eltype(ds[1]) + @test @inferred(mean(d_product)) == mean.(ds) + @test @inferred(var(d_product)) == var.(ds) + @test @inferred(cov(d_product)) == Diagonal(vec(var.(ds))) + @test @inferred(cov(d_product, Val(false))) == reshape(Diagonal(vec(var.(ds))), M, N, M, N) + + @test minimum(d_product) == map(minimum, ds) + @test maximum(d_product) == map(maximum, ds) + @test extrema(d_product) == (map(minimum, ds), map(maximum, ds)) + + x = @inferred(rand(d_product)) + @test size(x) == size(d_product) + @test x isa typeof(rand.(collect(ds))) + @test @inferred(logpdf(d_product, x)) ≈ sum(logpdf.(ds, x)) + # ensure that samples are different, in particular if `Fill` is used + @test length(unique(x)) == length(d_product) + end +end + +@testset "Testing generic array of multivariate distribution" begin + Random.seed!(123456) + M = 3 + + for N in ((11,), (11, 3)) + # Construct independent distributions and `ProductDistribution` from these. + alphas = [normalize!(rand(M), 1) for _ in Iterators.product(map(x -> 1:x, N)...)] + + ds1 = Dirichlet.(alphas) + d_product1 = @inferred(product_distribution(ds1)) + @test d_product1 isa Distributions.ProductDistribution{length(N) + 1,1,<:Array{<:Dirichlet{Float64},length(N)},Continuous,Float64} + + ds2 = Fill(Dirichlet(first(alphas)), N...) + d_product2 = @inferred(product_distribution(ds2)) + @test d_product2 isa Distributions.ProductDistribution{length(N) + 1,1,<:Fill{<:Dirichlet{Float64},length(N)},Continuous,Float64} + + # Check that methods for `VectorOfMultivariateDistribution` are consistent. + for (ds, d_product) in ((ds1, d_product1), (ds2, d_product2)) + @test size(d_product) == (length(ds[1]), size(ds)...) + @test eltype(d_product) === eltype(ds[1]) + @test @inferred(mean(d_product)) == reshape(mapreduce(mean, (x, y) -> cat(x, y; dims=ndims(ds) + 1), ds), size(d_product)) + @test @inferred(var(d_product)) == reshape(mapreduce(var, (x, y) -> cat(x, y; dims=ndims(ds) + 1), ds), size(d_product)) + @test @inferred(cov(d_product)) == Diagonal(mapreduce(var, vcat, ds)) + + if d_product isa MatrixDistribution + @test @inferred(cov(d_product, Val(false))) == reshape( + Diagonal(mapreduce(var, vcat, ds)), M, length(ds), M, length(ds) + ) + end + + x = @inferred(rand(d_product)) + @test size(x) == size(d_product) + @test x isa typeof(mapreduce(rand, (x, y) -> cat(x, y; dims=ndims(ds) + 1), ds)) + + # inference broken for non-Fill arrays + y = reshape(x, Val(2)) + if ds isa Fill + @test @inferred(logpdf(d_product, x)) ≈ sum(logpdf(d, y[:, i]) for (i, d) in enumerate(ds)) + else + @test logpdf(d_product, x) ≈ sum(logpdf(d, y[:, i]) for (i, d) in enumerate(ds)) + end + # ensure that samples are different, in particular if `Fill` is used + @test length(unique(x)) == length(d_product) + end + end +end \ No newline at end of file From 03269b47fb7a92de6a4a51c361719ca55474376b Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 8 Jul 2022 17:52:14 +0200 Subject: [PATCH 10/93] Add `rrule` for `logpdf` of `NegativeBinomial` (completes #1568) (#1579) * Add `rrule` for `logpdf` of `NegativeBinomial` * Remove unnecessary module prefix Co-authored-by: David Widmann * Use explicit division Co-authored-by: David Widmann * Refator and correct pullback * Add tests for rrule * Use `forward_fdm` for testing `rrule` * Fix tests * Update test/negativebinomial.jl * Fix tests (without `p = 1 - eps()`) * Use FD for all tests, use random parameters * Avoid type instability Co-authored-by: David Widmann * Split and rearrange ForwardDiff and rrule tests * Bump version * Fix typo * Clean tests (revert unrelated changes) and fix them Co-authored-by: Simone Carlo Surace <51025924+simsurace@users.noreply.github.com> Co-authored-by: Simone Carlo Surace --- src/univariate/discrete/negativebinomial.jl | 35 +++++++++++++++++++++ test/negativebinomial.jl | 26 ++++++++++++++- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/univariate/discrete/negativebinomial.jl b/src/univariate/discrete/negativebinomial.jl index bd3b0c2a4c..0208cdca00 100644 --- a/src/univariate/discrete/negativebinomial.jl +++ b/src/univariate/discrete/negativebinomial.jl @@ -135,3 +135,38 @@ function cf(d::NegativeBinomial, t::Real) r, p = params(d) return (((1 - p) * cis(t)) / (1 - p * cis(t)))^r end + +# ChainRules definitions + +function ChainRulesCore.rrule(::typeof(logpdf), d::NegativeBinomial, k::Real) + # Compute log probability + r, p = params(d) + edgecase = isone(p) && iszero(k) + insupp = insupport(d, k) + + # Primal computation + Ω = r * log(p) + k * log1p(-p) + if edgecase + Ω = zero(Ω) + elseif !insupp + Ω = oftype(Ω, -Inf) + else + Ω = Ω - log(k + r) - logbeta(r, k + 1) + end + + # Define pullback + function logpdf_NegativeBinomial_pullback(Δ) + Δr = Δ * (log(p) - inv(k + r) - digamma(r) + digamma(r + k + 1)) + Δp = Δ * (r / p - k / (1 - p)) + if edgecase + Δp = oftype(Δp, Δ * r) + elseif !insupp + Δr = oftype(Δr, NaN) + Δp = oftype(Δp, NaN) + end + Δd = ChainRulesCore.Tangent{typeof(d)}(; r=Δr, p=Δp) + return ChainRulesCore.NoTangent(), Δd, ChainRulesCore.NoTangent() + end + + return Ω, logpdf_NegativeBinomial_pullback +end diff --git a/test/negativebinomial.jl b/test/negativebinomial.jl index 2c14d971ce..2d2ff0f1a0 100644 --- a/test/negativebinomial.jl +++ b/test/negativebinomial.jl @@ -1,7 +1,9 @@ using Distributions using Test, ForwardDiff +using ChainRulesTestUtils +using FiniteDifferences -# Currently, most of the tests for NegativeBinomail are in the "ref" folder. +# Currently, most of the tests for NegativeBinomial are in the "ref" folder. # Eventually, we might want to consolidate the tests here mydiffp(r, p, k) = r/p - k/(1 - p) @@ -19,3 +21,25 @@ end @test logpdf(NegativeBinomial(0.5, 1.0), 1) === -Inf @test all(iszero, rand(NegativeBinomial(rand(), 1.0), 10)) end + +@testset "rrule: logpdf of NegativeBinomial" begin + r = randexp() + + # Test with values in and outside of support + p = rand() + dist = NegativeBinomial(r, p) + fdm = central_fdm(5, 1; max_range=min(r, p, 1-p)/2) # avoids numerical issues with finite differencing + for k in (0, 10, 42, -1, -5, -13) + # Test both integers and floating point numbers. + # For floating point numbers we have to tell FiniteDifferences explicitly that the + # argument is non-differentiable. Otherwise it will compute `NaN` as derivative. + test_rrule(logpdf, dist, k; fdm=fdm, nans=true) + test_rrule(logpdf, dist, float(k) ⊢ ChainRulesTestUtils.NoTangent(); fdm=fdm, nans=true) + end + + # Test edge case `p = 1` and `k = 0` + dist = NegativeBinomial(r, 1) + fdm = backward_fdm(5, 1; max_range = r/10) + test_rrule(logpdf, dist, 0; fdm=fdm) + test_rrule(logpdf, dist, 0.0 ⊢ ChainRulesTestUtils.NoTangent(); fdm=fdm) +end From c664d78f8a1b5efd438824996a026aa5779a3154 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 10 Jul 2022 00:21:34 +0200 Subject: [PATCH 11/93] CompatHelper: bump compat for GR to 0.65 for package docs, (keep existing compat) (#1587) Co-authored-by: CompatHelper Julia --- docs/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index 5d71a527ca..e12762f473 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,4 +4,4 @@ GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" [compat] Documenter = "0.26, 0.27" -GR = "0.61, 0.62, 0.63, 0.64" +GR = "0.61, 0.62, 0.63, 0.64, 0.65" From 0c9367ca7a7549d46c12d05b0ee5ec8e5000bc13 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 11 Jul 2022 22:40:17 +0200 Subject: [PATCH 12/93] CompatHelper: bump compat for GR to 0.66 for package docs, (keep existing compat) (#1588) Co-authored-by: CompatHelper Julia --- docs/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index e12762f473..07d4f098f5 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,4 +4,4 @@ GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" [compat] Documenter = "0.26, 0.27" -GR = "0.61, 0.62, 0.63, 0.64, 0.65" +GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66" From 7c3af32615a7360dc6169f0a4f449c7782cc197c Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sat, 23 Jul 2022 20:59:33 +0200 Subject: [PATCH 13/93] Fix deprecation of `Product` (#1590) * Fix deprecation of `Product` * Fix typo * Fix other deprecations * Fix deprecation warning in test --- Project.toml | 2 +- src/multivariate/product.jl | 27 ++++++++++++++------------- src/product.jl | 8 ++++---- test/product.jl | 8 ++++---- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/Project.toml b/Project.toml index ddee76fb2e..7579e7b8a0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.65" +version = "0.25.66" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" diff --git a/src/multivariate/product.jl b/src/multivariate/product.jl index d2bcd7a956..411fd4ab04 100644 --- a/src/multivariate/product.jl +++ b/src/multivariate/product.jl @@ -17,18 +17,19 @@ struct Product{ V<:AbstractVector{T}, } <: MultivariateDistribution{S} v::V - function Product(v::V) where - V<:AbstractVector{T} where - T<:UnivariateDistribution{S} where - S<:ValueSupport - Base.depwarn( - "`Product(v)` is deprecated, please use `product_distribution(v)`", - :Product, - ) - return new{S, T, V}(v) + function Product{S,T,V}(v::V) where {S<:ValueSupport,T<:UnivariateDistribution{S},V<:AbstractVector{T}} + return new{S,T,V}(v) end end +function Product(v::V) where {S<:ValueSupport,T<:UnivariateDistribution{S},V<:AbstractVector{T}} + Base.depwarn( + "`Product(v)` is deprecated, please use `product_distribution(v)`", + :Product, + ) + return Product{S, T, V}(v) +end + length(d::Product) = length(d.v) function Base.eltype(::Type{<:Product{S,T}}) where {S<:ValueSupport, T<:UnivariateDistribution{S}} @@ -48,9 +49,9 @@ insupport(d::Product, x::AbstractVector) = all(insupport.(d.v, x)) minimum(d::Product) = map(minimum, d.v) maximum(d::Product) = map(maximum, d.v) -# TODO: remove deprecation when `Product` is removed +# will be removed when `Product` is removed # it will return a `ProductDistribution` then which is already the default for # higher-dimensional arrays and distributions -Base.@deprecate product_distribution( - dists::AbstractVector{<:UnivariateDistribution} -) Product(dists) +function product_distribution(dists::V) where {S<:ValueSupport,T<:UnivariateDistribution{S},V<:AbstractVector{T}} + return Product{S,T,V}(dists) +end diff --git a/src/product.jl b/src/product.jl index 049a5888d9..7a4904ae7a 100644 --- a/src/product.jl +++ b/src/product.jl @@ -24,7 +24,7 @@ function ProductDistribution(dists::AbstractArray{<:Distribution{ArrayLikeVariat return ProductDistribution{M + N,M,typeof(dists)}(dists) end -function ProductDistribution(dists::Tuple{Vararg{<:Distribution{ArrayLikeVariate{M}},N}}) where {M,N} +function ProductDistribution(dists::NTuple{N,Distribution{ArrayLikeVariate{M}}}) where {M,N} return ProductDistribution{M + 1,M,typeof(dists)}(dists) end @@ -33,10 +33,10 @@ _product_valuesupport(dists) = mapreduce(value_support ∘ typeof, promote_type, _product_eltype(dists) = mapreduce(eltype, promote_type, dists) # type-stable and faster implementations for tuples -function _product_valuesupport(dists::Tuple{Vararg{<:Distribution}}) +function _product_valuesupport(dists::NTuple{<:Any,Distribution}) return __product_promote_type(value_support, typeof(dists)) end -function _product_eltype(dists::Tuple{Vararg{<:Distribution}}) +function _product_eltype(dists::NTuple{<:Any,Distribution}) return __product_promote_type(eltype, typeof(dists)) end @@ -54,7 +54,7 @@ function _product_size(dists::AbstractArray{<:Distribution{<:ArrayLikeVariate{M} size_dists = size(dists) return ntuple(i -> i <= M ? size_d[i] : size_dists[i-M], Val(M + N)) end -function _product_size(dists::Tuple{Vararg{<:Distribution{<:ArrayLikeVariate{M}},N}}) where {M,N} +function _product_size(dists::NTuple{N,Distribution{<:ArrayLikeVariate{M}}}) where {M,N} size_d = size(first(dists)) all(size(d) == size_d for d in dists) || error("all distributions must be of the same size") return ntuple(i -> i <= M ? size_d[i] : N, Val(M + 1)) diff --git a/test/product.jl b/test/product.jl index 7d19898db5..16d2bb80f3 100644 --- a/test/product.jl +++ b/test/product.jl @@ -39,7 +39,7 @@ end ubound = rand(N) ds = Uniform.(-ubound, ubound) x = rand.(ds) - d_product = @test_deprecated(product_distribution(ds)) + d_product = product_distribution(ds) @test d_product isa Product # Check that methods for `Product` are consistent. @test length(d_product) == length(ds) @@ -70,7 +70,7 @@ end support = fill(a, N) ds = DiscreteNonParametric.(support, Ref([0.5, 0.5])) x = rand.(ds) - d_product = @test_deprecated(product_distribution(ds)) + d_product = product_distribution(ds) @test d_product isa Product # Check that methods for `Product` are consistent. @test length(d_product) == length(ds) @@ -92,7 +92,7 @@ end @testset "Testing iid product distributions" begin Random.seed!(123456) N = 11 - d = Product(Fill(Laplace(0.0, 2.3), N)) + d = @test_deprecated(Product(Fill(Laplace(0.0, 2.3), N))) @test N == length(unique(rand(d))); @test mean(d) === Fill(0.0, N) @test cov(d) === Diagonal(Fill(var(Laplace(0.0, 2.3)), N)) @@ -345,4 +345,4 @@ end @test length(unique(x)) == length(d_product) end end -end \ No newline at end of file +end From b9578723f742635ce5267df1de5bb877b40a2ec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Sun, 31 Jul 2022 10:50:42 +0200 Subject: [PATCH 14/93] Differentiation Dirichlet (#1534) * constructor frule * frule tested * rrule tests * logpdf test * signature for conflict * TestUtils out of Project * ChainRules itself not needed (yet?) * remove checkarg * Update src/multivariate/dirichlet.jl Co-authored-by: David Widmann * Update test/dirichlet.jl Co-authored-by: David Widmann * Update test/dirichlet.jl Co-authored-by: David Widmann * Update test/dirichlet.jl Co-authored-by: David Widmann * Update src/multivariate/dirichlet.jl Co-authored-by: David Widmann * conflict * eltype instability * single loop * fix tests * forward finite diff * switch to broadcast * fix broadcast * switch off-support value to NaN * Update src/multivariate/dirichlet.jl Co-authored-by: David Widmann * Update src/multivariate/dirichlet.jl Co-authored-by: David Widmann * do not assume inplace * fixed temp * Simplify implementation and tests in #1534 (#1555) * Simplify implementation and tests * Precompute `digamma(alpha0)` * Relax type signature Co-authored-by: David Widmann --- src/multivariate/dirichlet.jl | 61 ++++++++++++++++++++++++++++++++++- test/dirichlet.jl | 33 ++++++++++++++++++- 2 files changed, 92 insertions(+), 2 deletions(-) diff --git a/src/multivariate/dirichlet.jl b/src/multivariate/dirichlet.jl index 8a8865e779..d77d4f5d0d 100644 --- a/src/multivariate/dirichlet.jl +++ b/src/multivariate/dirichlet.jl @@ -72,7 +72,7 @@ Base.show(io::IO, d::Dirichlet) = show(io, d, (:alpha,)) length(d::Dirichlet) = length(d.alpha) mean(d::Dirichlet) = d.alpha .* inv(d.alpha0) params(d::Dirichlet) = (d.alpha,) -@inline partype(d::Dirichlet{T}) where {T<:Real} = T +@inline partype(::Dirichlet{T}) where {T<:Real} = T function var(d::Dirichlet) α0 = d.alpha0 @@ -375,3 +375,62 @@ function fit_mle(::Type{<:Dirichlet}, P::AbstractMatrix{Float64}, elogp = mean_logp(suffstats(Dirichlet, P, w)) fit_dirichlet!(elogp, α; maxiter=maxiter, tol=tol, debug=debug) end + +## Differentiation +function ChainRulesCore.frule((_, Δalpha)::Tuple{Any,Any}, ::Type{DT}, alpha::AbstractVector{T}; check_args::Bool = true) where {T <: Real, DT <: Union{Dirichlet{T}, Dirichlet}} + d = DT(alpha; check_args=check_args) + ∂alpha0 = sum(Δalpha) + digamma_alpha0 = SpecialFunctions.digamma(d.alpha0) + ∂lmnB = sum(Broadcast.instantiate(Broadcast.broadcasted(Δalpha, alpha) do Δalphai, alphai + Δalphai * (SpecialFunctions.digamma(alphai) - digamma_alpha0) + end)) + Δd = ChainRulesCore.Tangent{typeof(d)}(; alpha=Δalpha, alpha0=∂alpha0, lmnB=∂lmnB) + return d, Δd +end + +function ChainRulesCore.rrule(::Type{DT}, alpha::AbstractVector{T}; check_args::Bool = true) where {T <: Real, DT <: Union{Dirichlet{T}, Dirichlet}} + d = DT(alpha; check_args=check_args) + digamma_alpha0 = SpecialFunctions.digamma(d.alpha0) + function Dirichlet_pullback(_Δd) + Δd = ChainRulesCore.unthunk(_Δd) + Δalpha = Δd.alpha .+ Δd.alpha0 .+ Δd.lmnB .* (SpecialFunctions.digamma.(alpha) .- digamma_alpha0) + return ChainRulesCore.NoTangent(), Δalpha + end + return d, Dirichlet_pullback +end + +function ChainRulesCore.frule((_, Δd, Δx)::Tuple{Any,Any,Any}, ::typeof(_logpdf), d::Dirichlet, x::AbstractVector{<:Real}) + Ω = _logpdf(d, x) + ∂alpha = sum(Broadcast.instantiate(Broadcast.broadcasted(Δd.alpha, Δx, d.alpha, x) do Δalphai, Δxi, alphai, xi + xlogy(Δalphai, xi) + (alphai - 1) * Δxi / xi + end)) + ∂lmnB = -Δd.lmnB + ΔΩ = ∂alpha + ∂lmnB + if !isfinite(Ω) + ΔΩ = oftype(ΔΩ, NaN) + end + return Ω, ΔΩ +end + +function ChainRulesCore.rrule(::typeof(_logpdf), d::T, x::AbstractVector{<:Real}) where {T<:Dirichlet} + Ω = _logpdf(d, x) + isfinite_Ω = isfinite(Ω) + alpha = d.alpha + function _logpdf_Dirichlet_pullback(_ΔΩ) + ΔΩ = ChainRulesCore.unthunk(_ΔΩ) + ∂alpha = _logpdf_Dirichlet_∂alphai.(x, ΔΩ, isfinite_Ω) + ∂lmnB = isfinite_Ω ? -float(ΔΩ) : oftype(float(ΔΩ), NaN) + Δd = ChainRulesCore.Tangent{T}(; alpha=∂alpha, lmnB=∂lmnB) + Δx = _logpdf_Dirichlet_Δxi.(ΔΩ, alpha, x, isfinite_Ω) + return ChainRulesCore.NoTangent(), Δd, Δx + end + return Ω, _logpdf_Dirichlet_pullback +end +function _logpdf_Dirichlet_∂alphai(xi, ΔΩi, isfinite::Bool) + ∂alphai = xlogy.(ΔΩi, xi) + return isfinite ? ∂alphai : oftype(∂alphai, NaN) +end +function _logpdf_Dirichlet_Δxi(ΔΩi, alphai, xi, isfinite::Bool) + Δxi = ΔΩi * (alphai - 1) / xi + return isfinite ? Δxi : oftype(Δxi, NaN) +end diff --git a/test/dirichlet.jl b/test/dirichlet.jl index 1b3a18b521..78de162dca 100644 --- a/test/dirichlet.jl +++ b/test/dirichlet.jl @@ -2,7 +2,9 @@ using Distributions using Test, Random, LinearAlgebra - +using ChainRulesCore +using ChainRulesTestUtils +using FiniteDifferences Random.seed!(34567) @@ -127,3 +129,32 @@ end @test entropy(Dirichlet(N, 1)) ≈ -loggamma(N) @test entropy(Dirichlet(ones(N))) ≈ -loggamma(N) end + +@testset "Dirichlet: ChainRules (length=$n)" for n in (2, 10) + alpha = rand(n) + d = Dirichlet(alpha) + + @testset "constructor $T" for T in (Dirichlet, Dirichlet{Float64}) + # Avoid issues with finite differencing if values in `alpha` become negative or zero + # by using forward differencing + test_frule(T, alpha; fdm=forward_fdm(5, 1)) + test_rrule(T, alpha; fdm=forward_fdm(5, 1)) + end + + @testset "_logpdf" begin + # `x1` is in the support, `x2` isn't + x1 = rand(n) + x1 ./= sum(x1) + x2 = x1 .+ 1 + + # Use special finite differencing method that tries to avoid moving outside of the + # support by limiting the range of the points around the input that are evaluated + fdm = central_fdm(5, 1; max_range=1e-9) + + for x in (x1, x2) + # We have to adjust the tolerance since the finite differencing method is rough + test_frule(Distributions._logpdf, d, x; fdm=fdm, rtol=1e-5, nans=true) + test_rrule(Distributions._logpdf, d, x; fdm=fdm, rtol=1e-5, nans=true) + end + end +end From 506ec162912a2edff1b78fc06f274a815f3acd76 Mon Sep 17 00:00:00 2001 From: Simone Carlo Surace <51025924+simsurace@users.noreply.github.com> Date: Tue, 9 Aug 2022 00:04:05 +0200 Subject: [PATCH 15/93] Refactor tests (second attempt) (#1584) * Wrap tests in `@testset` to count number of tests * Move tests to subfolders * Document missing and additional files vs. /src * Simplify testsets Co-authored-by: David Widmann * Move `multivariate/product.jl` to `product.jl` Co-authored-by: David Widmann --- test/{ => cholesky}/lkjcholesky.jl | 0 test/{ => multivariate}/dirichlet.jl | 0 .../dirichletmultinomial.jl | 0 test/{ => multivariate}/multinomial.jl | 0 test/{ => multivariate}/mvlognormal.jl | 0 test/{ => multivariate}/mvnormal.jl | 0 test/{ => multivariate}/mvtdist.jl | 0 test/{ => multivariate}/vonmisesfisher.jl | 0 test/runtests.jl | 178 +++++++++++++----- .../discrete_uniform.jl} | 0 .../exponential.jl} | 0 test/{truncnormal.jl => truncated/normal.jl} | 0 .../uniform.jl} | 0 test/{ => univariate}/continuous.jl | 0 test/{ => univariate/continuous}/arcsine.jl | 0 test/{ => univariate/continuous}/cauchy.jl | 0 test/{ => univariate/continuous}/chernoff.jl | 0 test/{ => univariate/continuous}/chi.jl | 0 test/{ => univariate/continuous}/gumbel.jl | 0 .../{ => univariate/continuous}/kolmogorov.jl | 0 test/{ => univariate/continuous}/laplace.jl | 0 .../continuous}/logitnormal.jl | 0 test/{ => univariate/continuous}/lognormal.jl | 0 .../{ => univariate/continuous}/loguniform.jl | 0 .../continuous}/noncentralt.jl | 0 test/{ => univariate/continuous}/normal.jl | 0 .../continuous}/pgeneralizedgaussian.jl | 0 test/{ => univariate/continuous}/rician.jl | 0 .../{ => univariate/continuous}/semicircle.jl | 0 .../continuous}/skewedexponentialpower.jl | 0 .../{ => univariate/continuous}/skewnormal.jl | 0 test/{ => univariate/continuous}/uniform.jl | 0 test/{ => univariate/discrete}/bernoulli.jl | 0 .../{ => univariate/discrete}/betabinomial.jl | 0 test/{ => univariate/discrete}/binomial.jl | 0 test/{ => univariate/discrete}/categorical.jl | 0 test/{ => univariate/discrete}/dirac.jl | 0 .../discrete}/discretenonparametric.jl | 0 .../discrete}/discreteuniform.jl | 0 .../discrete}/negativebinomial.jl | 0 .../discrete}/poissonbinomial.jl | 0 test/{ => univariate/discrete}/soliton.jl | 0 test/{ => univariate}/locationscale.jl | 0 43 files changed, 126 insertions(+), 52 deletions(-) rename test/{ => cholesky}/lkjcholesky.jl (100%) rename test/{ => multivariate}/dirichlet.jl (100%) rename test/{ => multivariate}/dirichletmultinomial.jl (100%) rename test/{ => multivariate}/multinomial.jl (100%) rename test/{ => multivariate}/mvlognormal.jl (100%) rename test/{ => multivariate}/mvnormal.jl (100%) rename test/{ => multivariate}/mvtdist.jl (100%) rename test/{ => multivariate}/vonmisesfisher.jl (100%) rename test/{truncated_discrete_uniform.jl => truncated/discrete_uniform.jl} (100%) rename test/{truncated_exponential.jl => truncated/exponential.jl} (100%) rename test/{truncnormal.jl => truncated/normal.jl} (100%) rename test/{truncated_uniform.jl => truncated/uniform.jl} (100%) rename test/{ => univariate}/continuous.jl (100%) rename test/{ => univariate/continuous}/arcsine.jl (100%) rename test/{ => univariate/continuous}/cauchy.jl (100%) rename test/{ => univariate/continuous}/chernoff.jl (100%) rename test/{ => univariate/continuous}/chi.jl (100%) rename test/{ => univariate/continuous}/gumbel.jl (100%) rename test/{ => univariate/continuous}/kolmogorov.jl (100%) rename test/{ => univariate/continuous}/laplace.jl (100%) rename test/{ => univariate/continuous}/logitnormal.jl (100%) rename test/{ => univariate/continuous}/lognormal.jl (100%) rename test/{ => univariate/continuous}/loguniform.jl (100%) rename test/{ => univariate/continuous}/noncentralt.jl (100%) rename test/{ => univariate/continuous}/normal.jl (100%) rename test/{ => univariate/continuous}/pgeneralizedgaussian.jl (100%) rename test/{ => univariate/continuous}/rician.jl (100%) rename test/{ => univariate/continuous}/semicircle.jl (100%) rename test/{ => univariate/continuous}/skewedexponentialpower.jl (100%) rename test/{ => univariate/continuous}/skewnormal.jl (100%) rename test/{ => univariate/continuous}/uniform.jl (100%) rename test/{ => univariate/discrete}/bernoulli.jl (100%) rename test/{ => univariate/discrete}/betabinomial.jl (100%) rename test/{ => univariate/discrete}/binomial.jl (100%) rename test/{ => univariate/discrete}/categorical.jl (100%) rename test/{ => univariate/discrete}/dirac.jl (100%) rename test/{ => univariate/discrete}/discretenonparametric.jl (100%) rename test/{ => univariate/discrete}/discreteuniform.jl (100%) rename test/{ => univariate/discrete}/negativebinomial.jl (100%) rename test/{ => univariate/discrete}/poissonbinomial.jl (100%) rename test/{ => univariate/discrete}/soliton.jl (100%) rename test/{ => univariate}/locationscale.jl (100%) diff --git a/test/lkjcholesky.jl b/test/cholesky/lkjcholesky.jl similarity index 100% rename from test/lkjcholesky.jl rename to test/cholesky/lkjcholesky.jl diff --git a/test/dirichlet.jl b/test/multivariate/dirichlet.jl similarity index 100% rename from test/dirichlet.jl rename to test/multivariate/dirichlet.jl diff --git a/test/dirichletmultinomial.jl b/test/multivariate/dirichletmultinomial.jl similarity index 100% rename from test/dirichletmultinomial.jl rename to test/multivariate/dirichletmultinomial.jl diff --git a/test/multinomial.jl b/test/multivariate/multinomial.jl similarity index 100% rename from test/multinomial.jl rename to test/multivariate/multinomial.jl diff --git a/test/mvlognormal.jl b/test/multivariate/mvlognormal.jl similarity index 100% rename from test/mvlognormal.jl rename to test/multivariate/mvlognormal.jl diff --git a/test/mvnormal.jl b/test/multivariate/mvnormal.jl similarity index 100% rename from test/mvnormal.jl rename to test/multivariate/mvnormal.jl diff --git a/test/mvtdist.jl b/test/multivariate/mvtdist.jl similarity index 100% rename from test/mvtdist.jl rename to test/multivariate/mvtdist.jl diff --git a/test/vonmisesfisher.jl b/test/multivariate/vonmisesfisher.jl similarity index 100% rename from test/vonmisesfisher.jl rename to test/multivariate/vonmisesfisher.jl diff --git a/test/runtests.jl b/test/runtests.jl index 6f61732e81..ae113aac6c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,71 +11,145 @@ import JSON import ForwardDiff const tests = [ - "loguniform", - "arcsine", - "dirac", + "univariate/continuous/loguniform", + "univariate/continuous/arcsine", + "univariate/discrete/dirac", "truncate", - "truncnormal", - "truncated_exponential", - "truncated_uniform", - "truncated_discrete_uniform", + "truncated/normal", + "truncated/exponential", + "truncated/uniform", + "truncated/discrete_uniform", "censored", - "normal", - "laplace", - "cauchy", - "uniform", - "lognormal", - "mvnormal", - "mvlognormal", - "types", + "univariate/continuous/normal", + "univariate/continuous/laplace", + "univariate/continuous/cauchy", + "univariate/continuous/uniform", + "univariate/continuous/lognormal", + "multivariate/mvnormal", + "multivariate/mvlognormal", + "types", # extra file compared to /src "utils", "samplers", - "categorical", + "univariate/discrete/categorical", "univariates", - "continuous", - "edgecases", - "fit", - "multinomial", - "binomial", - "betabinomial", - "poissonbinomial", - "dirichlet", - "dirichletmultinomial", - "logitnormal", - "mvtdist", - "kolmogorov", + "univariate/continuous", # extra file compared to /src + "edgecases", # extra file compared to /src + "fit", # extra file compared to /src + "multivariate/multinomial", + "univariate/discrete/binomial", + "univariate/discrete/betabinomial", + "univariate/discrete/poissonbinomial", + "multivariate/dirichlet", + "multivariate/dirichletmultinomial", + "univariate/continuous/logitnormal", + "multivariate/mvtdist", + "univariate/continuous/kolmogorov", "edgeworth", - "matrixreshaped", + "matrixreshaped", # extra file compared to /src "matrixvariates", - "lkjcholesky", - "vonmisesfisher", + "cholesky/lkjcholesky", + "multivariate/vonmisesfisher", "conversion", "convolution", - "mixture", - "gradlogpdf", - "noncentralt", - "locationscale", - "quantile_newton", - "semicircle", + "mixture", # extra file compared to /src + "gradlogpdf", # extra file compared to /src + "univariate/continuous/noncentralt", + "univariate/locationscale", + "quantile_newton", # extra file compared to /src + "univariate/continuous/semicircle", "qq", - "pgeneralizedgaussian", + "univariate/continuous/pgeneralizedgaussian", "product", - "discretenonparametric", - "chernoff", - "univariate_bounds", - "negativebinomial", - "bernoulli", - "soliton", - "skewnormal", - "chi", - "gumbel", + "univariate/discrete/discretenonparametric", + "univariate/continuous/chernoff", + "univariate_bounds", # extra file compared to /src + "univariate/discrete/negativebinomial", + "univariate/discrete/bernoulli", + "univariate/discrete/soliton", + "univariate/continuous/skewnormal", + "univariate/continuous/chi", + "univariate/continuous/gumbel", "pdfnorm", - "rician", + "univariate/continuous/rician", "functionals", "density_interface", "reshaped", - "skewedexponentialpower", - "discreteuniform", + "univariate/continuous/skewedexponentialpower", + "univariate/discrete/discreteuniform", + + ### missing files compared to /src: + # "common", + # "estimators", + # "genericfit", + # "matrix/inversewishart", + # "matrix/lkj", + # "matrix/matrixbeta", + # "matrix/matrixfdist", + # "matrix/matrixnormal", + # "matrix/matrixtdist", + # "matrix/wishart", + # "mixtures/mixturemodel", + # "mixtures/unigmm", + # "multivariate/mvnormalcanon", + # "multivariate/product", + # "quantilealgs", + # "samplers/aliastable", + # "samplers/binomial", + # "samplers/categorical", + # "samplers/discretenonparametric", + # "samplers/exponential", + # "samplers/gamma", + # "samplers/multinomial", + # "samplers/obsoleted", + # "samplers/poisson", + # "samplers/poissonbinomial", + # "samplers/vonmises", + # "samplers/vonmisesfisher", + # "show", + # "truncated/loguniform", + # "univariate/continuous/beta", + # "univariate/continuous/beta", + # "univariate/continuous/betaprime", + # "univariate/continuous/biweight", + # "univariate/continuous/chisq", + # "univariate/continuous/cosine", + # "univariate/continuous/epanechnikov", + # "univariate/continuous/erlang", + # "univariate/continuous/exponential", + # "univariate/continuous/fdist", + # "univariate/continuous/frechet", + # "univariate/continuous/gamma", + # "univariate/continuous/generalizedextremevalue", + # "univariate/continuous/generalizedpareto", + # "univariate/continuous/inversegamma", + # "univariate/continuous/inversegaussian", + # "univariate/continuous/ksdist", + # "univariate/continuous/ksonesided", + # "univariate/continuous/levy", + # "univariate/continuous/logistic", + # "univariate/continuous/noncentralbeta", + # "univariate/continuous/noncentralchisq", + # "univariate/continuous/noncentralf", + # "univariate/continuous/normalcanon", + # "univariate/continuous/normalinversegaussian", + # "univariate/continuous/pareto", + # "univariate/continuous/rayleigh", + # "univariate/continuous/studentizedrange", + # "univariate/continuous/symtriangular", + # "univariate/continuous/tdist", + # "univariate/continuous/triangular", + # "univariate/continuous/triweight", + # "univariate/continuous/weibull", + # "univariate/continuous/noncentralf", + # "univariate/discrete/geometric", + # "univariate/discrete/hypergeometric", + # "univariate/discrete/noncentralhypergeometric", + # "univariate/discrete/poisson", + # "univariate/discrete/skellam", + + ### file is present but was not included in list + # "multivariate_stats", # extra file compared to /src + # "univariate/continuous/vonmises", ] printstyled("Running tests:\n", color=:blue) @@ -85,8 +159,8 @@ Random.seed!(345679) # to reduce redundancy, we might break this file down into seperate `$t * "_utils.jl"` files include("testutils.jl") -for t in tests - @testset "Test $t" begin +@testset "Distributions" begin + @testset "Test $t" for t in tests include("$t.jl") end end diff --git a/test/truncated_discrete_uniform.jl b/test/truncated/discrete_uniform.jl similarity index 100% rename from test/truncated_discrete_uniform.jl rename to test/truncated/discrete_uniform.jl diff --git a/test/truncated_exponential.jl b/test/truncated/exponential.jl similarity index 100% rename from test/truncated_exponential.jl rename to test/truncated/exponential.jl diff --git a/test/truncnormal.jl b/test/truncated/normal.jl similarity index 100% rename from test/truncnormal.jl rename to test/truncated/normal.jl diff --git a/test/truncated_uniform.jl b/test/truncated/uniform.jl similarity index 100% rename from test/truncated_uniform.jl rename to test/truncated/uniform.jl diff --git a/test/continuous.jl b/test/univariate/continuous.jl similarity index 100% rename from test/continuous.jl rename to test/univariate/continuous.jl diff --git a/test/arcsine.jl b/test/univariate/continuous/arcsine.jl similarity index 100% rename from test/arcsine.jl rename to test/univariate/continuous/arcsine.jl diff --git a/test/cauchy.jl b/test/univariate/continuous/cauchy.jl similarity index 100% rename from test/cauchy.jl rename to test/univariate/continuous/cauchy.jl diff --git a/test/chernoff.jl b/test/univariate/continuous/chernoff.jl similarity index 100% rename from test/chernoff.jl rename to test/univariate/continuous/chernoff.jl diff --git a/test/chi.jl b/test/univariate/continuous/chi.jl similarity index 100% rename from test/chi.jl rename to test/univariate/continuous/chi.jl diff --git a/test/gumbel.jl b/test/univariate/continuous/gumbel.jl similarity index 100% rename from test/gumbel.jl rename to test/univariate/continuous/gumbel.jl diff --git a/test/kolmogorov.jl b/test/univariate/continuous/kolmogorov.jl similarity index 100% rename from test/kolmogorov.jl rename to test/univariate/continuous/kolmogorov.jl diff --git a/test/laplace.jl b/test/univariate/continuous/laplace.jl similarity index 100% rename from test/laplace.jl rename to test/univariate/continuous/laplace.jl diff --git a/test/logitnormal.jl b/test/univariate/continuous/logitnormal.jl similarity index 100% rename from test/logitnormal.jl rename to test/univariate/continuous/logitnormal.jl diff --git a/test/lognormal.jl b/test/univariate/continuous/lognormal.jl similarity index 100% rename from test/lognormal.jl rename to test/univariate/continuous/lognormal.jl diff --git a/test/loguniform.jl b/test/univariate/continuous/loguniform.jl similarity index 100% rename from test/loguniform.jl rename to test/univariate/continuous/loguniform.jl diff --git a/test/noncentralt.jl b/test/univariate/continuous/noncentralt.jl similarity index 100% rename from test/noncentralt.jl rename to test/univariate/continuous/noncentralt.jl diff --git a/test/normal.jl b/test/univariate/continuous/normal.jl similarity index 100% rename from test/normal.jl rename to test/univariate/continuous/normal.jl diff --git a/test/pgeneralizedgaussian.jl b/test/univariate/continuous/pgeneralizedgaussian.jl similarity index 100% rename from test/pgeneralizedgaussian.jl rename to test/univariate/continuous/pgeneralizedgaussian.jl diff --git a/test/rician.jl b/test/univariate/continuous/rician.jl similarity index 100% rename from test/rician.jl rename to test/univariate/continuous/rician.jl diff --git a/test/semicircle.jl b/test/univariate/continuous/semicircle.jl similarity index 100% rename from test/semicircle.jl rename to test/univariate/continuous/semicircle.jl diff --git a/test/skewedexponentialpower.jl b/test/univariate/continuous/skewedexponentialpower.jl similarity index 100% rename from test/skewedexponentialpower.jl rename to test/univariate/continuous/skewedexponentialpower.jl diff --git a/test/skewnormal.jl b/test/univariate/continuous/skewnormal.jl similarity index 100% rename from test/skewnormal.jl rename to test/univariate/continuous/skewnormal.jl diff --git a/test/uniform.jl b/test/univariate/continuous/uniform.jl similarity index 100% rename from test/uniform.jl rename to test/univariate/continuous/uniform.jl diff --git a/test/bernoulli.jl b/test/univariate/discrete/bernoulli.jl similarity index 100% rename from test/bernoulli.jl rename to test/univariate/discrete/bernoulli.jl diff --git a/test/betabinomial.jl b/test/univariate/discrete/betabinomial.jl similarity index 100% rename from test/betabinomial.jl rename to test/univariate/discrete/betabinomial.jl diff --git a/test/binomial.jl b/test/univariate/discrete/binomial.jl similarity index 100% rename from test/binomial.jl rename to test/univariate/discrete/binomial.jl diff --git a/test/categorical.jl b/test/univariate/discrete/categorical.jl similarity index 100% rename from test/categorical.jl rename to test/univariate/discrete/categorical.jl diff --git a/test/dirac.jl b/test/univariate/discrete/dirac.jl similarity index 100% rename from test/dirac.jl rename to test/univariate/discrete/dirac.jl diff --git a/test/discretenonparametric.jl b/test/univariate/discrete/discretenonparametric.jl similarity index 100% rename from test/discretenonparametric.jl rename to test/univariate/discrete/discretenonparametric.jl diff --git a/test/discreteuniform.jl b/test/univariate/discrete/discreteuniform.jl similarity index 100% rename from test/discreteuniform.jl rename to test/univariate/discrete/discreteuniform.jl diff --git a/test/negativebinomial.jl b/test/univariate/discrete/negativebinomial.jl similarity index 100% rename from test/negativebinomial.jl rename to test/univariate/discrete/negativebinomial.jl diff --git a/test/poissonbinomial.jl b/test/univariate/discrete/poissonbinomial.jl similarity index 100% rename from test/poissonbinomial.jl rename to test/univariate/discrete/poissonbinomial.jl diff --git a/test/soliton.jl b/test/univariate/discrete/soliton.jl similarity index 100% rename from test/soliton.jl rename to test/univariate/discrete/soliton.jl diff --git a/test/locationscale.jl b/test/univariate/locationscale.jl similarity index 100% rename from test/locationscale.jl rename to test/univariate/locationscale.jl From aca39200d7e2288b571895a09d8e0ec34b8dcf83 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Sun, 14 Aug 2022 01:36:52 +0100 Subject: [PATCH 16/93] Fix mini typos in docs (#1600) * Fixes as per Grammarly and Google check * Fixes as per Google + Grammarly * Fixes as per Google + Grammarly * Fixed as per Google + Grammarly * `subtype` as its more popular * Fixes as per Grammarly and Google * Fixes as per Grammarly * Fixed as per Wikipedia term * Alignment to in-place form as per Wikipedia * Alignment to `logscale` form * Alignment to in-place form as per Wikipedia * Alignment to `log scale` as per Wikipedia * Update README.md Co-authored-by: David Widmann * Update docs/src/multivariate.md Co-authored-by: David Widmann * Update src/univariate/continuous/skewedexponentialpower.jl Co-authored-by: David Widmann Co-authored-by: David Widmann --- README.md | 10 ++++---- docs/src/extends.md | 24 +++++++++---------- docs/src/fit.md | 6 ++--- docs/src/mixture.md | 6 ++--- docs/src/multivariate.md | 8 +++---- docs/src/types.md | 6 ++--- docs/src/univariate.md | 4 ++-- src/Distributions.jl | 2 +- .../continuous/skewedexponentialpower.jl | 2 +- src/utils.jl | 2 +- 10 files changed, 35 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 7c9103b011..80a39ecb72 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ A Julia package for probability distributions and associated functions. Particul * Moments (e.g mean, variance, skewness, and kurtosis), entropy, and other properties * Probability density/mass functions (pdf) and their logarithm (logpdf) * Moment generating functions and characteristic functions -* Sampling from population or from a distribution +* Sampling from a population or from a distribution * Maximum likelihood estimation **Note:** The functionalities related to conjugate priors have been moved to the [ConjugatePriors package](https://github.com/JuliaStats/ConjugatePriors.jl). @@ -32,7 +32,7 @@ Also, for casual conversation and quick questions, there are the channels `#help ### Reporting issues -* If you need help or an explanation how to use *Distributions* ask in the forum (https://discourse.julialang.org) or, for informal questions, visit the chat (https://julialang.slack.com). +* If you need help or an explanation of how to use *Distributions* ask in the forum (https://discourse.julialang.org) or, for informal questions, visit the chat (https://julialang.slack.com). If you have a bug linked with *Distributions*, check that it has not been reported yet on the issues of the repository. @@ -42,8 +42,8 @@ which you can get with this command in the Julia REPL: julia> ]status Distributions ``` -Be exhaustive in your report, give the summary of the bug, -a Minimal Working Example (MWE), what happens and what you +Be exhaustive in your report, summarize the bug, and provide: +a Minimal Working Example (MWE), what happens, and what you expected to happen. ### Workflow with Git and GitHub @@ -61,7 +61,7 @@ the following are required for contributions to be accepted: 1. Docstrings must be added to all interface and non-trivial functions. 2. Tests validating the modified behavior in the `test` folder. If new test files are added, do not forget to add them in `test/runtests.jl`. Cover possible edge cases. Run the tests locally before submitting the PR. 3. At the end of the tests, `Test.detect_ambiguities(Distributions)` is run to check method ambiguities. Verify that your modified code did not yield method ambiguities. -4. Make according modifications to the `docs` folder, build the documentation locally and verify that your modifications display correctly and did not yield warnings. To build the documentation locally, you first need to instantiate the `docs/` project: +4. Make corresponding modifications to the `docs` folder, build the documentation locally and verify that your modifications display correctly and did not yield warnings. To build the documentation locally, you first need to instantiate the `docs/` project: julia --project=docs/ pkg> instantiate diff --git a/docs/src/extends.md b/docs/src/extends.md index abaf373dbd..d731754205 100644 --- a/docs/src/extends.md +++ b/docs/src/extends.md @@ -1,21 +1,21 @@ # Create New Samplers and Distributions -Whereas this package already provides a large collection of common distributions out of box, there are still occasions where you want to create new distributions (*e.g* your application requires a special kind of distributions, or you want to contribute to this package). +Whereas this package already provides a large collection of common distributions out of the box, there are still occasions where you want to create new distributions (*e.g.* your application requires a special kind of distribution, or you want to contribute to this package). Generally, you don't have to implement every API method listed in the documentation. This package provides a series of generic functions that turn a small number of internal methods into user-end API methods. What you need to do is to implement this small set of internal methods for your distributions. -By default, `Discrete` sampleables have support of type `Int` while `Continuous` sampleables have support of type `Float64`. If this assumption does not hold for your new distribution or sampler, or its `ValueSupport` is neither `Discrete` nor `Continuous`, you should implement the `eltype` method in addition to the other methods listed below. +By default, `Discrete` sampleables have the support of type `Int` while `Continuous` sampleables have the support of type `Float64`. If this assumption does not hold for your new distribution or sampler, or its `ValueSupport` is neither `Discrete` nor `Continuous`, you should implement the `eltype` method in addition to the other methods listed below. -**Note:** the methods need to be implemented are different for distributions of different variate forms. +**Note:** The methods that need to be implemented are different for distributions of different variate forms. ## Create a Sampler -Unlike a full fledged distributions, a sampler, in general, only provides limited functionalities, mainly to support sampling. +Unlike full-fledged distributions, a sampler, in general, only provides limited functionalities, mainly to support sampling. ### Univariate Sampler -To implement a univariate sampler, one can define a sub type (say `Spl`) of `Sampleable{Univariate,S}` (where `S` can be `Discrete` or `Continuous`), and provide a `rand` method, as +To implement a univariate sampler, one can define a subtype (say `Spl`) of `Sampleable{Univariate,S}` (where `S` can be `Discrete` or `Continuous`), and provide a `rand` method, as ```julia function rand(rng::AbstractRNG, s::Spl) @@ -27,7 +27,7 @@ The package already implements a vectorized version of `rand!` and `rand` that r ### Multivariate Sampler -To implement a multivariate sampler, one can define a sub type of `Sampleable{Multivariate,S}`, and provide both `length` and `_rand!` methods, as +To implement a multivariate sampler, one can define a subtype of `Sampleable{Multivariate,S}`, and provide both `length` and `_rand!` methods, as ```julia Base.length(s::Spl) = ... # return the length of each sample @@ -68,7 +68,7 @@ rand(rng::AbstractRNG, s::Sampleable{Multivariate,S}, n::Int) where {S<:ValueSup _rand!(rng, s, Matrix{eltype(S)}(length(s), n)) ``` -If there is a more efficient method to generate multiple vector samples in batch, one should provide the following method +If there is a more efficient method to generate multiple vector samples in a batch, one should provide the following method ```julia function _rand!(rng::AbstractRNG, s::Spl, A::DenseMatrix{T}) where T<:Real @@ -80,7 +80,7 @@ Remember that each *column* of A is a sample. ### Matrix-variate Sampler -To implement a multivariate sampler, one can define a sub type of `Sampleable{Multivariate,S}`, and provide both `size` and `_rand!` method, as +To implement a multivariate sampler, one can define a subtype of `Sampleable{Multivariate,S}`, and provide both `size` and `_rand!` methods, as ```julia Base.size(s::Spl) = ... # the size of each matrix sample @@ -104,7 +104,7 @@ sampler(d::Distribution) A univariate distribution type should be defined as a subtype of `DiscreteUnivarateDistribution` or `ContinuousUnivariateDistribution`. -Following methods need to be implemented for each univariate distribution type: +The following methods need to be implemented for each univariate distribution type: - [`rand(::AbstractRNG, d::UnivariateDistribution)`](@ref) - [`sampler(d::Distribution)`](@ref) @@ -134,7 +134,7 @@ You may refer to the source file `src/univariates.jl` to see details about how g A multivariate distribution type should be defined as a subtype of `DiscreteMultivarateDistribution` or `ContinuousMultivariateDistribution`. -Following methods need to be implemented for each multivariate distribution type: +The following methods need to be implemented for each multivariate distribution type: - [`length(d::MultivariateDistribution)`](@ref) - [`sampler(d::Distribution)`](@ref) @@ -142,7 +142,7 @@ Following methods need to be implemented for each multivariate distribution type - [`Distributions._rand!(::AbstractRNG, d::MultivariateDistribution, x::AbstractArray)`](@ref) - [`Distributions._logpdf(d::MultivariateDistribution, x::AbstractArray)`](@ref) -Note that if there exists faster methods for batch evaluation, one should override `_logpdf!` and `_pdf!`. +Note that if there exist faster methods for batch evaluation, one should override `_logpdf!` and `_pdf!`. Furthermore, the generic `loglikelihood` function repeatedly calls `_logpdf`. If there is a better way to compute the log-likelihood, one should override `loglikelihood`. @@ -158,7 +158,7 @@ It is also recommended that one also implements the following statistics functio A multivariate distribution type should be defined as a subtype of `DiscreteMatrixDistribution` or `ContinuousMatrixDistribution`. -Following methods need to be implemented for each matrix-variate distribution type: +The following methods need to be implemented for each matrix-variate distribution type: - [`size(d::MatrixDistribution)`](@ref) - [`rand(d::MatrixDistribution)`](@ref) diff --git a/docs/src/fit.md b/docs/src/fit.md index eb91aa39be..24ddbad5ce 100644 --- a/docs/src/fit.md +++ b/docs/src/fit.md @@ -10,7 +10,7 @@ This statement fits a distribution of type `D` to a given dataset `x`, where `x` !!! note - One can use as first argument simply the distribution name, like `Binomial`, + One can use as the first argument simply the distribution name, like `Binomial`, or a concrete distribution with a type parameter, like `Normal{Float64}` or `Exponential{Float32}`. However, in the latter case the type parameter of the distribution will be ignored: @@ -61,7 +61,7 @@ The `fit_mle` method has been implemented for the following distributions: - [`MvNormal`](@ref) - [`Dirichlet`](@ref) -For most of these distributions, the usage is as described above. For a few special distributions that require additional information for estimation, we have to use modified interface: +For most of these distributions, the usage is as described above. For a few special distributions that require additional information for estimation, we have to use a modified interface: ```julia fit_mle(Binomial, n, x) # n is the number of trials in each experiment @@ -76,7 +76,7 @@ fit_mle(Categorical, x, w) ## Sufficient Statistics -For many distributions, estimation can be based on (sum of) sufficient statistics computed from a dataset. To simplify implementation, for such distributions, we implement `suffstats` method instead of `fit_mle` directly: +For many distributions, the estimation can be based on (sum of) sufficient statistics computed from a dataset. To simplify implementation, for such distributions, we implement `suffstats` method instead of `fit_mle` directly: ```julia ss = suffstats(D, x) # ss captures the sufficient statistics of x diff --git a/docs/src/mixture.md b/docs/src/mixture.md index 54014fb1d8..e6b24b103c 100644 --- a/docs/src/mixture.md +++ b/docs/src/mixture.md @@ -1,6 +1,6 @@ # Mixture Models -A [mixture model](http://en.wikipedia.org/wiki/Mixture_model) is a probabilistic distribution that combines a set of *component* to represent the overall distribution. Generally, the probability density/mass function is given by a convex combination of the pdf/pmf of individual components, as +A [mixture model](http://en.wikipedia.org/wiki/Mixture_model) is a probabilistic distribution that combines a set of *components* to represent the overall distribution. Generally, the probability density/mass function is given by a convex combination of the pdf/pmf of individual components, as ```math f_{mix}(x; \Theta, \pi) = \sum_{k=1}^K \pi_k f(x; \theta_k) @@ -27,7 +27,7 @@ const MultivariateMixture = AbstractMixtureModel{Multivariate} **Remarks:** -- We introduce `AbstractMixtureModel` as a base type, which allows one to define a mixture model with different internal implementation, while still being able to leverage the common methods defined for `AbstractMixtureModel`. +- We introduce `AbstractMixtureModel` as a base type, which allows one to define a mixture model with different internal implementations, while still being able to leverage the common methods defined for `AbstractMixtureModel`. ```@docs AbstractMixtureModel @@ -105,5 +105,5 @@ rand!(::AbstractMixtureModel, ::AbstractArray) ## Estimation -There are a number of methods for estimating of mixture models from data, and this problem remains an open research topic. +There are several methods for the estimation of mixture models from data, and this problem remains an open research topic. This package does not provide facilities for estimating mixture models. One can resort to other packages, *e.g.* [*GaussianMixtures.jl*](https://github.com/davidavdav/GaussianMixtures.jl), for this purpose. diff --git a/docs/src/multivariate.md b/docs/src/multivariate.md index be4f5427a6..2ae5ef59bd 100644 --- a/docs/src/multivariate.md +++ b/docs/src/multivariate.md @@ -11,7 +11,7 @@ const ContinuousMultivariateDistribution = Distribution{Multivariate, Continuous ## Common Interface -The methods listed as below are implemented for each multivariate distribution, which provides a consistent interface to work with multivariate distributions. +The methods listed below are implemented for each multivariate distribution, which provides a consistent interface to work with multivariate distributions. ### Computation of statistics @@ -35,7 +35,7 @@ pdf(::MultivariateDistribution, ::AbstractArray) logpdf(::MultivariateDistribution, ::AbstractArray) loglikelihood(::MultivariateDistribution, ::AbstractVector{<:Real}) ``` -**Note:** For multivariate distributions, the pdf value is usually very small or large, and therefore direct evaluating the pdf may cause numerical problems. It is generally advisable to perform probability computation in log-scale. +**Note:** For multivariate distributions, the pdf value is usually very small or large, and therefore direct evaluation of the pdf may cause numerical problems. It is generally advisable to perform probability computation in log scale. ### Sampling @@ -45,7 +45,7 @@ rand(rng::AbstractRNG, ::MultivariateDistribution) rand!(rng::AbstractRNG, d::MultivariateDistribution, x::AbstractArray) ``` -**Note:** In addition to these common methods, each multivariate distribution has its own special methods, as introduced below. +**Note:** In addition to these common methods, each multivariate distribution has its special methods, as introduced below. ## Distributions @@ -98,7 +98,7 @@ scale!{D<:Distributions.AbstractMvLogNormal}(::Type{D},s::Symbol,m::AbstractVect params{D<:Distributions.AbstractMvLogNormal}(::Type{D},m::AbstractVector,S::AbstractMatrix) ``` -## Internal Methods (for creating you own multivariate distribution) +## Internal Methods (for creating your own multivariate distribution) ```@docs Distributions._logpdf(d::MultivariateDistribution, x::AbstractArray) diff --git a/docs/src/types.md b/docs/src/types.md index daa1ce9202..f0ed23836a 100644 --- a/docs/src/types.md +++ b/docs/src/types.md @@ -23,7 +23,7 @@ Base.rand(::Distributions.Sampleable) Distributions.VariateForm ``` -The `VariateForm` sub-types defined in `Distributions.jl` are: +The `VariateForm` subtypes defined in `Distributions.jl` are: **Type** | **A single sample** | **Multiple samples** --- | --- |--- @@ -51,7 +51,7 @@ Distributions.Continuous Multiple samples are often organized into an array, depending on the variate form. -The basic functionalities that a sampleable object provides is to *retrieve information about the samples it generates* and to *draw samples*. Particularly, the following functions are provided for sampleable objects: +The basic functionalities that a sampleable object provides are to *retrieve information about the samples it generates* and to *draw samples*. Particularly, the following functions are provided for sampleable objects: ```@docs length(::Sampleable) @@ -64,7 +64,7 @@ rand!(::AbstractRNG, ::Sampleable, ::AbstractArray) ## Distributions -We use `Distribution`, a subtype of `Sampleable` as defined below, to capture probabilistic distributions. In addition to being sampleable, a *distribution* typically comes with an explicit way to combine its domain, probability density functions, among many other quantities. +We use `Distribution`, a subtype of `Sampleable` as defined below, to capture probabilistic distributions. In addition to being sampleable, a *distribution* typically comes with an explicit way to combine its domain, probability density function, and many other quantities. ```julia abstract type Distribution{F<:VariateForm,S<:ValueSupport} <: Sampleable{F,S} end diff --git a/docs/src/univariate.md b/docs/src/univariate.md index 56aed383dd..cc43f7cece 100644 --- a/docs/src/univariate.md +++ b/docs/src/univariate.md @@ -11,7 +11,7 @@ const ContinuousUnivariateDistribution = Distribution{Univariate, Continuous} ## Common Interface -A series of methods are implemented for each univariate distribution, which provide +A series of methods is implemented for each univariate distribution, which provides useful functionalities such as moment computation, pdf evaluation, and sampling (*i.e.* random number generation). @@ -488,7 +488,7 @@ Skellam ### Vectorized evaluation -Vectorized computation and inplace vectorized computation have been deprecated. +Vectorized computation and in-place vectorized computation have been deprecated. ## Index diff --git a/src/Distributions.jl b/src/Distributions.jl index a27cb5ec17..e05b429ef7 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -220,7 +220,7 @@ export invscale, # Inverse scale parameter sqmahal, # squared Mahalanobis distance to Gaussian center - sqmahal!, # inplace evaluation of sqmahal + sqmahal!, # in-place evaluation of sqmahal location, # get the location parameter location!, # provide storage for the location parameter (used in multivariate distribution mvlognormal) mean, # mean of distribution diff --git a/src/univariate/continuous/skewedexponentialpower.jl b/src/univariate/continuous/skewedexponentialpower.jl index 427f775c41..1840f55c0d 100644 --- a/src/univariate/continuous/skewedexponentialpower.jl +++ b/src/univariate/continuous/skewedexponentialpower.jl @@ -74,7 +74,7 @@ function m_k(d::SkewedExponentialPower, k::Integer) loggamma(inv_p) + log(abs((-1)^k * α^(1 + k) + (1 - α)^(1 + k))) end -# needed for odd moments on log-scale +# needed for odd moments in log scale sgn(d::SkewedExponentialPower) = d.α > 1//2 ? -1 : 1 mean(d::SkewedExponentialPower) = d.α == 1//2 ? float(d.μ) : sgn(d)*exp(m_k(d, 1)) + d.μ diff --git a/src/utils.jl b/src/utils.jl index 4ba6aecff2..fa75875a4b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -102,7 +102,7 @@ isprobvec(p::AbstractVector{<:Real}) = # get a type wide enough to represent all a distributions's parameters # (if the distribution is parametric) # if the distribution is not parametric, we need this to be a float so that -# inplace pdf calculations, etc. allocate storage correctly +# in-place pdf calculations, etc. allocate storage correctly @inline partype(::Distribution) = Float64 # because X == X' keeps failing due to floating point nonsense From 5582c443d5f1647fa68f845239c91428c40bd3c9 Mon Sep 17 00:00:00 2001 From: Jan Weidner Date: Tue, 16 Aug 2022 09:58:50 +0200 Subject: [PATCH 17/93] fix mgf of NegativeBinomial (#1604) * fix mgf of NegativeBinomial * fix mgf of Geometric * Update src/univariate/discrete/geometric.jl Co-authored-by: David Widmann * create test/.../geometric.jl * fix geometric cf * fix NegativeBinomial cf * fix * Update src/univariate/discrete/geometric.jl Co-authored-by: David Widmann * Update src/univariate/discrete/geometric.jl Co-authored-by: David Widmann * Update test/univariate/discrete/geometric.jl Co-authored-by: David Widmann * Update test/univariate/discrete/negativebinomial.jl Co-authored-by: David Widmann * Update test/univariates.jl Co-authored-by: David Widmann * Update test/univariates.jl Co-authored-by: David Widmann * improve cf, mgf or Geometric, NegativeBinomial * fix * fix Co-authored-by: David Widmann --- src/univariate/discrete/geometric.jl | 13 ++++--------- src/univariate/discrete/negativebinomial.jl | 10 ++++------ test/runtests.jl | 1 + test/univariate/discrete/geometric.jl | 18 ++++++++++++++++++ test/univariate/discrete/negativebinomial.jl | 14 ++++++++++++++ 5 files changed, 41 insertions(+), 15 deletions(-) create mode 100644 test/univariate/discrete/geometric.jl diff --git a/src/univariate/discrete/geometric.jl b/src/univariate/discrete/geometric.jl index fe3f35feb6..4cdb614137 100644 --- a/src/univariate/discrete/geometric.jl +++ b/src/univariate/discrete/geometric.jl @@ -122,17 +122,12 @@ function invlogccdf(d::Geometric{T}, lp::Real) where T<:Real max(ceil(lp/log1p(-d.p)) - 1, zero(T)) end -function mgf(d::Geometric, t::Real) +function laplace_transform(d::Geometric, t) p = succprob(d) - p / (expm1(-t) + p) + p / (p - (1 - p) * expm1(-t)) end - -function cf(d::Geometric, t::Real) - p = succprob(d) - # replace with expm1 when complex version available - p / (exp(-t*im) - 1 + p) -end - +mgf(d::Geometric, t::Real) = laplace_transform(d, -t) +cf(d::Geometric, t::Real) = laplace_transform(d, -t*im) ### Sampling diff --git a/src/univariate/discrete/negativebinomial.jl b/src/univariate/discrete/negativebinomial.jl index 0208cdca00..128e3a0ab0 100644 --- a/src/univariate/discrete/negativebinomial.jl +++ b/src/univariate/discrete/negativebinomial.jl @@ -126,15 +126,13 @@ function rand(rng::AbstractRNG, d::NegativeBinomial) end end -function mgf(d::NegativeBinomial, t::Real) +function laplace_transform(d::NegativeBinomial, t) r, p = params(d) - return ((1 - p) * exp(t))^r / (1 - p * exp(t))^r + return laplace_transform(Geometric(p, check_args=false), t)^r end -function cf(d::NegativeBinomial, t::Real) - r, p = params(d) - return (((1 - p) * cis(t)) / (1 - p * cis(t)))^r -end +mgf(d::NegativeBinomial, t::Real) = laplace_transform(d, -t) +cf(d::NegativeBinomial, t::Real) = laplace_transform(d, -t*im) # ChainRules definitions diff --git a/test/runtests.jl b/test/runtests.jl index ae113aac6c..cddc68121c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -64,6 +64,7 @@ const tests = [ "univariate/continuous/chernoff", "univariate_bounds", # extra file compared to /src "univariate/discrete/negativebinomial", + "univariate/discrete/geometric", "univariate/discrete/bernoulli", "univariate/discrete/soliton", "univariate/continuous/skewnormal", diff --git a/test/univariate/discrete/geometric.jl b/test/univariate/discrete/geometric.jl new file mode 100644 index 0000000000..2274e523ee --- /dev/null +++ b/test/univariate/discrete/geometric.jl @@ -0,0 +1,18 @@ +using Distributions +using Test +using FiniteDifferences + +@testset "Geometric mgf and k vs k-1 parametrization #1604" begin + d = Geometric(0.2) + @test mgf(d, 0) == 1 + @test cf(d, 0) == 1 + + fdm1 = central_fdm(5, 1) + @test fdm1(Base.Fix1(mgf, d), 0) ≈ mean(d) + @test fdm1(Base.Fix1(cf, d), 0) ≈ mean(d) * im + + fdm2 = central_fdm(5, 2) + m2 = var(d) + mean(d)^2 + @test fdm2(Base.Fix1(mgf, d), 0) ≈ m2 + @test fdm2(Base.Fix1(cf, d), 0) ≈ -m2 +end diff --git a/test/univariate/discrete/negativebinomial.jl b/test/univariate/discrete/negativebinomial.jl index 2d2ff0f1a0..14f5402fce 100644 --- a/test/univariate/discrete/negativebinomial.jl +++ b/test/univariate/discrete/negativebinomial.jl @@ -7,6 +7,20 @@ using FiniteDifferences # Eventually, we might want to consolidate the tests here mydiffp(r, p, k) = r/p - k/(1 - p) +@testset "issue #1603" begin + d = NegativeBinomial(4, 0.2) + fdm = central_fdm(5, 1) + @test fdm(Base.Fix1(mgf, d), 0) ≈ mean(d) + d = NegativeBinomial(1, 0.2) + @test fdm(Base.Fix1(mgf, d), 0) ≈ mean(d) + @test fdm(Base.Fix1(cf, d), 0) ≈ mean(d) * im + + fdm2 = central_fdm(5, 2) + m2 = var(d) + mean(d)^2 + @test fdm2(Base.Fix1(mgf, d), 0) ≈ m2 + @test fdm2(Base.Fix1(cf, d), 0) ≈ -m2 +end + @testset "NegativeBinomial r=$r, p=$p, k=$k" for p in exp10.(-10:0) .- eps(), # avoid p==1 since it's not differentiable From 7b3890d6547385ca460c209b993481335a629995 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Tue, 16 Aug 2022 09:59:08 +0200 Subject: [PATCH 18/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 7579e7b8a0..ee78ca36ba 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.66" +version = "0.25.67" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From b7916c62c4c916cffddb189dc67cbc39c5ea8670 Mon Sep 17 00:00:00 2001 From: Martin Trapp Date: Tue, 23 Aug 2022 23:30:13 +0300 Subject: [PATCH 19/93] Bug fix for CF and MGF of discretenonparametric (#1606) * Bug fix for CF and MGF of discretenonparametric * added test * Update test/univariate/discrete/categorical.jl Nice, TBH, I didn't check. Thanks! Co-authored-by: David Widmann Co-authored-by: David Widmann --- src/univariate/discrete/discretenonparametric.jl | 6 ++++-- test/univariate/discrete/categorical.jl | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/univariate/discrete/discretenonparametric.jl b/src/univariate/discrete/discretenonparametric.jl index 55dbcef87c..d94c63aa1e 100644 --- a/src/univariate/discrete/discretenonparametric.jl +++ b/src/univariate/discrete/discretenonparametric.jl @@ -215,7 +215,8 @@ function modes(d::DiscreteNonParametric) end function mgf(d::DiscreteNonParametric, t::Real) - x, p = params(d) + x = support(d) + p = probs(d) s = zero(Float64) for i in 1:length(x) s += p[i] * exp(t*x[i]) @@ -224,7 +225,8 @@ function mgf(d::DiscreteNonParametric, t::Real) end function cf(d::DiscreteNonParametric, t::Real) - x, p = params(d) + x = support(d) + p = probs(d) s = zero(Complex{Float64}) for i in 1:length(x) s += p[i] * cis(t*x[i]) diff --git a/test/univariate/discrete/categorical.jl b/test/univariate/discrete/categorical.jl index 0288c2dcd2..28da036157 100644 --- a/test/univariate/discrete/categorical.jl +++ b/test/univariate/discrete/categorical.jl @@ -57,6 +57,12 @@ for p in Any[ @test pdf.(d, support(d)) == p @test pdf.(d, 1:k) == p + @test cf(d, 0) ≈ 1.0 + @test cf(d, 1) ≈ p' * cis.(1:length(p)) + + @test mgf(d, 0) ≈ 1.0 + @test mgf(d, 1) ≈ p' * exp.(1:length(p)) + # The test utilities are currently only able to handle Float64s if partype(d) === Float64 test_distr(d, 10^6) From e71089949396ba2d5c7ed2042b80576e483e9049 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Tue, 23 Aug 2022 22:31:27 +0200 Subject: [PATCH 20/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index ee78ca36ba..579689c3d2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.67" +version = "0.25.68" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 7056beb0670173bc13371101bccf9d476971aa7e Mon Sep 17 00:00:00 2001 From: Jan Weidner Date: Thu, 1 Sep 2022 22:07:31 +0200 Subject: [PATCH 21/93] add cgf (#1609) * add cgf * simplify cgf tests * Update Project.toml Co-authored-by: David Widmann * Update Project.toml Co-authored-by: David Widmann * Update src/cgf.jl Co-authored-by: David Widmann * Update src/cgf.jl Co-authored-by: David Widmann * Update src/cgf.jl Co-authored-by: David Widmann * Update src/cgf.jl Co-authored-by: David Widmann * Update src/cgf.jl Co-authored-by: David Widmann * Update src/cgf.jl Co-authored-by: David Widmann * Update src/cgf.jl Co-authored-by: David Widmann * move cgf to each covered distribution file * add cgf for logistic * add cgf for Erlang * fix julia 1.3 * fix * Update src/univariate/discrete/binomial.jl Co-authored-by: David Widmann * Update src/univariate/discrete/geometric.jl Co-authored-by: David Widmann * Update src/univariate/discrete/binomial.jl Co-authored-by: David Widmann * Update src/univariate/discrete/negativebinomial.jl Co-authored-by: David Widmann * Update src/univariate/discrete/negativebinomial.jl Co-authored-by: David Widmann * Update src/univariate/discrete/geometric.jl Co-authored-by: David Widmann * improve and test cgf uniform accuracy * fix * Update src/univariate/discrete/dirac.jl Co-authored-by: David Widmann * move cgf tests to respective distributions test files * Update test/univariate/continuous/exponential.jl Co-authored-by: David Widmann * replace some Vector{Any} by Tuple Co-authored-by: David Widmann --- src/Distributions.jl | 1 + src/univariate/continuous/chisq.jl | 5 +++ src/univariate/continuous/erlang.jl | 4 +++ src/univariate/continuous/exponential.jl | 4 +++ src/univariate/continuous/gamma.jl | 4 +++ src/univariate/continuous/laplace.jl | 4 +++ src/univariate/continuous/logistic.jl | 5 ++- src/univariate/continuous/noncentralchisq.jl | 4 +++ src/univariate/continuous/normal.jl | 4 +++ src/univariate/continuous/uniform.jl | 30 +++++++++++++++++ src/univariate/discrete/bernoulli.jl | 5 +++ src/univariate/discrete/binomial.jl | 4 +++ src/univariate/discrete/dirac.jl | 1 + src/univariate/discrete/geometric.jl | 5 +++ src/univariate/discrete/negativebinomial.jl | 4 +++ src/univariate/discrete/poisson.jl | 1 + src/univariates.jl | 11 +++++++ test/runtests.jl | 13 ++++---- test/testutils.jl | 20 +++++++++++ test/univariate/continuous/chisq.jl | 2 ++ test/univariate/continuous/erlang.jl | 2 ++ test/univariate/continuous/exponential.jl | 4 +++ test/univariate/continuous/gamma.jl | 3 ++ test/univariate/continuous/laplace.jl | 1 + test/univariate/continuous/logistic.jl | 2 ++ test/univariate/continuous/noncentralchisq.jl | 1 + test/univariate/continuous/normal.jl | 2 ++ test/univariate/continuous/uniform.jl | 33 +++++++++++++++++++ test/univariate/discrete/bernoulli.jl | 3 ++ test/univariate/discrete/dirac.jl | 2 ++ test/univariate/discrete/geometric.jl | 2 ++ test/univariate/discrete/negativebinomial.jl | 2 ++ test/univariate/discrete/poisson.jl | 4 +++ 33 files changed, 185 insertions(+), 7 deletions(-) create mode 100644 test/univariate/continuous/chisq.jl create mode 100644 test/univariate/continuous/erlang.jl create mode 100644 test/univariate/continuous/exponential.jl create mode 100644 test/univariate/continuous/gamma.jl create mode 100644 test/univariate/continuous/logistic.jl create mode 100644 test/univariate/continuous/noncentralchisq.jl create mode 100644 test/univariate/discrete/poisson.jl diff --git a/src/Distributions.jl b/src/Distributions.jl index e05b429ef7..6b74340eda 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -229,6 +229,7 @@ export meanlogx, # the mean of log(x) median, # median of distribution mgf, # moment generating function + cgf, # cumulant generating function mode, # the mode of a unimodal distribution modes, # mode(s) of distribution as vector moment, # moments of distribution diff --git a/src/univariate/continuous/chisq.jl b/src/univariate/continuous/chisq.jl index 18884c1f6c..5a5a8240d5 100644 --- a/src/univariate/continuous/chisq.jl +++ b/src/univariate/continuous/chisq.jl @@ -83,6 +83,11 @@ end @_delegate_statsfuns Chisq chisq ν mgf(d::Chisq, t::Real) = (1 - 2 * t)^(-d.ν/2) +function cgf(d::Chisq, t) + ν = dof(d) + return -ν/2 * log1p(-2*t) +end + cf(d::Chisq, t::Real) = (1 - 2 * im * t)^(-d.ν/2) diff --git a/src/univariate/continuous/erlang.jl b/src/univariate/continuous/erlang.jl index 367d1168ba..cd984415e0 100644 --- a/src/univariate/continuous/erlang.jl +++ b/src/univariate/continuous/erlang.jl @@ -77,6 +77,10 @@ function entropy(d::Erlang) end mgf(d::Erlang, t::Real) = (1 - t * d.θ)^(-d.α) +function cgf(d::Erlang, t) + α, θ = params(d) + -α * log1p(-t*θ) +end cf(d::Erlang, t::Real) = (1 - im * t * d.θ)^(-d.α) diff --git a/src/univariate/continuous/exponential.jl b/src/univariate/continuous/exponential.jl index c84988d61e..d1c487057f 100644 --- a/src/univariate/continuous/exponential.jl +++ b/src/univariate/continuous/exponential.jl @@ -97,6 +97,10 @@ invlogccdf(d::Exponential, lp::Real) = -xval(d, lp) gradlogpdf(d::Exponential{T}, x::Real) where {T<:Real} = x > 0 ? -rate(d) : zero(T) mgf(d::Exponential, t::Real) = 1/(1 - t * scale(d)) +function cgf(d::Exponential, t) + μ = mean(d) + return - log1p(- t * μ) +end cf(d::Exponential, t::Real) = 1/(1 - t * im * scale(d)) diff --git a/src/univariate/continuous/gamma.jl b/src/univariate/continuous/gamma.jl index fe98c34d2b..96ca058afa 100644 --- a/src/univariate/continuous/gamma.jl +++ b/src/univariate/continuous/gamma.jl @@ -77,6 +77,10 @@ function entropy(d::Gamma) end mgf(d::Gamma, t::Real) = (1 - t * d.θ)^(-d.α) +function cgf(d::Gamma, t) + α, θ = params(d) + return α * cgf(Exponential{typeof(θ)}(θ), t) +end cf(d::Gamma, t::Real) = (1 - im * t * d.θ)^(-d.α) diff --git a/src/univariate/continuous/laplace.jl b/src/univariate/continuous/laplace.jl index dea2a295da..2a7bf04a47 100644 --- a/src/univariate/continuous/laplace.jl +++ b/src/univariate/continuous/laplace.jl @@ -107,6 +107,10 @@ function mgf(d::Laplace, t::Real) st = d.θ * t exp(t * d.μ) / ((1 - st) * (1 + st)) end +function cgf(d::Laplace, t) + μ, θ = params(d) + t*μ - log1p(-(θ*t)^2) +end function cf(d::Laplace, t::Real) st = d.θ * t cis(t * d.μ) / (1+st*st) diff --git a/src/univariate/continuous/logistic.jl b/src/univariate/continuous/logistic.jl index fdb95e6959..67e3608947 100644 --- a/src/univariate/continuous/logistic.jl +++ b/src/univariate/continuous/logistic.jl @@ -97,7 +97,10 @@ function gradlogpdf(d::Logistic, x::Real) end mgf(d::Logistic, t::Real) = exp(t * d.μ) / sinc(d.θ * t) - +function cgf(d::Logistic, t) + μ, θ = params(d) + t*μ - log(sinc(θ*t)) +end function cf(d::Logistic, t::Real) a = (π * t) * d.θ a == zero(a) ? complex(one(a)) : cis(t * d.μ) * (a / sinh(a)) diff --git a/src/univariate/continuous/noncentralchisq.jl b/src/univariate/continuous/noncentralchisq.jl index 5340b9d71d..24a7558b4a 100644 --- a/src/univariate/continuous/noncentralchisq.jl +++ b/src/univariate/continuous/noncentralchisq.jl @@ -65,6 +65,10 @@ kurtosis(d::NoncentralChisq) = 12(d.ν + 4d.λ)/(d.ν + 2d.λ)^2 function mgf(d::NoncentralChisq, t::Real) exp(d.λ * t/(1 - 2t))*(1 - 2t)^(-d.ν/2) end +function cgf(d::NoncentralChisq, t) + ν, λ = params(d) + return λ*t/(1 - 2*t) + cgf(Chisq{typeof(ν)}(ν), t) +end function cf(d::NoncentralChisq, t::Real) cis(d.λ * t/(1 - 2im*t))*(1 - 2im*t)^(-d.ν/2) diff --git a/src/univariate/continuous/normal.jl b/src/univariate/continuous/normal.jl index a4007c1e3e..6f8d131b66 100644 --- a/src/univariate/continuous/normal.jl +++ b/src/univariate/continuous/normal.jl @@ -92,6 +92,10 @@ end gradlogpdf(d::Normal, x::Real) = (d.μ - x) / d.σ^2 mgf(d::Normal, t::Real) = exp(t * d.μ + d.σ^2 / 2 * t^2) +function cgf(d::Normal, t) + μ,σ = params(d) + t*μ + (σ*t)^2/2 +end cf(d::Normal, t::Real) = exp(im * t * d.μ - d.σ^2 / 2 * t^2) #### Affine transformations diff --git a/src/univariate/continuous/uniform.jl b/src/univariate/continuous/uniform.jl index f707b38beb..dd3fd59ed6 100644 --- a/src/univariate/continuous/uniform.jl +++ b/src/univariate/continuous/uniform.jl @@ -101,6 +101,36 @@ function mgf(d::Uniform, t::Real) v = (a + b) * t / 2 exp(v) * (sinh(u) / u) end +function cgf_uniform_around_zero_kernel(x) + # taylor series of (exp(x) - x - 1) / x + T = typeof(x) + a0 = inv(T(2)) + a1 = inv(T(6)) + a2 = inv(T(24)) + a3 = inv(T(120)) + x*@evalpoly(x, a0, a1, a2, a3) +end + +function cgf(d::Uniform, t) + # log((exp(t*b) - exp(t*a))/ (t*(b-a))) + a,b = params(d) + x = t*(b-a) + if abs(x) <= sqrt(eps(float(one(x)))) + cgf_around_zero(d, t) + else + cgf_away_from_zero(d, t) + end +end +function cgf_around_zero(d::Uniform, t) + a,b = params(d) + x = t*(b-a) + t*a + log1p(cgf_uniform_around_zero_kernel(x)) +end +function cgf_away_from_zero(d::Uniform, t) + a,b = params(d) + x = t*(b-a) + logsubexp(t*b, t*a) - log(abs(x)) +end function cf(d::Uniform, t::Real) (a, b) = params(d) diff --git a/src/univariate/discrete/bernoulli.jl b/src/univariate/discrete/bernoulli.jl index d27bc9b50f..c1dee968ce 100644 --- a/src/univariate/discrete/bernoulli.jl +++ b/src/univariate/discrete/bernoulli.jl @@ -104,6 +104,11 @@ function cquantile(d::Bernoulli{T}, p::Real) where T<:Real end mgf(d::Bernoulli, t::Real) = failprob(d) + succprob(d) * exp(t) +function cgf(d::Bernoulli, t) + p, = params(d) + # log(1-p+p*exp(t)) + logaddexp(log1p(-p), t+log(p)) +end cf(d::Bernoulli, t::Real) = failprob(d) + succprob(d) * cis(t) diff --git a/src/univariate/discrete/binomial.jl b/src/univariate/discrete/binomial.jl index e8e17afdd1..f4102cbb80 100644 --- a/src/univariate/discrete/binomial.jl +++ b/src/univariate/discrete/binomial.jl @@ -147,6 +147,10 @@ function mgf(d::Binomial, t::Real) n, p = params(d) (one(p) - p + p * exp(t)) ^ n end +function cgf(d::Binomial, t) + n, p = params(d) + n * cgf(Bernoulli{typeof(p)}(p), t) +end function cf(d::Binomial, t::Real) n, p = params(d) diff --git a/src/univariate/discrete/dirac.jl b/src/univariate/discrete/dirac.jl index 7be41459fa..94d082b0fa 100644 --- a/src/univariate/discrete/dirac.jl +++ b/src/univariate/discrete/dirac.jl @@ -50,6 +50,7 @@ logccdf(d::Dirac, x::Real) = x < d.value ? 0.0 : isnan(x) ? NaN : -Inf quantile(d::Dirac{T}, p::Real) where {T} = 0 <= p <= 1 ? d.value : T(NaN) mgf(d::Dirac, t) = exp(t * d.value) +cgf(d::Dirac, t) = t*d.value cf(d::Dirac, t) = cis(t * d.value) #### Sampling diff --git a/src/univariate/discrete/geometric.jl b/src/univariate/discrete/geometric.jl index 4cdb614137..f89cebfedd 100644 --- a/src/univariate/discrete/geometric.jl +++ b/src/univariate/discrete/geometric.jl @@ -127,6 +127,11 @@ function laplace_transform(d::Geometric, t) p / (p - (1 - p) * expm1(-t)) end mgf(d::Geometric, t::Real) = laplace_transform(d, -t) +function cgf(d::Geometric, t) + p = succprob(d) + # log(p / (1 - (1-p) * exp(t))) + log(p) - log1mexp(t + log1p(-p)) +end cf(d::Geometric, t::Real) = laplace_transform(d, -t*im) ### Sampling diff --git a/src/univariate/discrete/negativebinomial.jl b/src/univariate/discrete/negativebinomial.jl index 128e3a0ab0..fc8caf8ae1 100644 --- a/src/univariate/discrete/negativebinomial.jl +++ b/src/univariate/discrete/negativebinomial.jl @@ -132,6 +132,10 @@ function laplace_transform(d::NegativeBinomial, t) end mgf(d::NegativeBinomial, t::Real) = laplace_transform(d, -t) +function cgf(d::NegativeBinomial, t) + r, p = params(d) + r * cgf(Geometric{typeof(p)}(p), t) +end cf(d::NegativeBinomial, t::Real) = laplace_transform(d, -t*im) # ChainRules definitions diff --git a/src/univariate/discrete/poisson.jl b/src/univariate/discrete/poisson.jl index 5a1cd428ef..b7f64dba7f 100644 --- a/src/univariate/discrete/poisson.jl +++ b/src/univariate/discrete/poisson.jl @@ -101,6 +101,7 @@ function mgf(d::Poisson, t::Real) λ = rate(d) return exp(λ * (exp(t) - 1)) end +cgf(d::Poisson, t) = mean(d) * expm1(t) function cf(d::Poisson, t::Real) λ = rate(d) diff --git a/src/univariates.jl b/src/univariates.jl index 1131570150..0384f45b33 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -274,6 +274,17 @@ Evaluate the moment generating function of distribution `d`. """ mgf(d::UnivariateDistribution, t) +""" + cgf(d::UnivariateDistribution, t) + +Evaluate the [cumulant-generating-function](https://en.wikipedia.org/wiki/Cumulant) of `distribution` at `t`. +Mathematically the cumulant-generating-function is the logarithm of the [moment-generating-function](https://en.wikipedia.org/wiki/Moment-generating_function): +`cgf = log ∘ mgf`. In practice, however, the right hand side may have overflow issues. + +See also [`mgf`](@ref) +""" +cgf(d::UnivariateDistribution, t) + """ cf(d::UnivariateDistribution, t) diff --git a/test/runtests.jl b/test/runtests.jl index cddc68121c..deb1affd4f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -66,10 +66,17 @@ const tests = [ "univariate/discrete/negativebinomial", "univariate/discrete/geometric", "univariate/discrete/bernoulli", + "univariate/discrete/poisson", "univariate/discrete/soliton", "univariate/continuous/skewnormal", "univariate/continuous/chi", + "univariate/continuous/chisq", + "univariate/continuous/erlang", + "univariate/continuous/exponential", + "univariate/continuous/gamma", "univariate/continuous/gumbel", + "univariate/continuous/logistic", + "univariate/continuous/noncentralchisq", "pdfnorm", "univariate/continuous/rician", "functionals", @@ -112,14 +119,10 @@ const tests = [ # "univariate/continuous/beta", # "univariate/continuous/betaprime", # "univariate/continuous/biweight", - # "univariate/continuous/chisq", # "univariate/continuous/cosine", # "univariate/continuous/epanechnikov", - # "univariate/continuous/erlang", - # "univariate/continuous/exponential", # "univariate/continuous/fdist", # "univariate/continuous/frechet", - # "univariate/continuous/gamma", # "univariate/continuous/generalizedextremevalue", # "univariate/continuous/generalizedpareto", # "univariate/continuous/inversegamma", @@ -127,9 +130,7 @@ const tests = [ # "univariate/continuous/ksdist", # "univariate/continuous/ksonesided", # "univariate/continuous/levy", - # "univariate/continuous/logistic", # "univariate/continuous/noncentralbeta", - # "univariate/continuous/noncentralchisq", # "univariate/continuous/noncentralf", # "univariate/continuous/normalcanon", # "univariate/continuous/normalinversegaussian", diff --git a/test/testutils.jl b/test/testutils.jl index 4d773f3d17..eb90b830fa 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -5,6 +5,7 @@ using Random using Printf: @printf using Test: @test import FiniteDifferences +import ForwardDiff # to workaround issues of Base.linspace function _linspace(a::Float64, b::Float64, n::Int) @@ -43,6 +44,25 @@ function test_distr(distr::DiscreteUnivariateDistribution, n::Int; test_params(distr) end +function test_cgf(dist, ts) + κ₀ = cgf(dist, 0) + @test κ₀ ≈ 0 atol=2*eps(one(float(κ₀))) + d(f) = Base.Fix1(ForwardDiff.derivative, f) + κ₁ = d(Base.Fix1(cgf, dist))(0) + @test κ₁ ≈ mean(dist) + if VERSION >= v"1.4" + κ₂ = d(d(Base.Fix1(cgf, dist)))(0) + @test κ₂ ≈ var(dist) + end + for t in ts + val = @inferred cgf(dist, t) + @test isfinite(val) + if isfinite(mgf(dist, t)) + rtol = eps(float(one(t)))^(1/2) + @test (exp∘cgf)(dist, t) ≈ mgf(dist, t) rtol=rtol + end + end +end # testing the implementation of a continuous univariate distribution # diff --git a/test/univariate/continuous/chisq.jl b/test/univariate/continuous/chisq.jl new file mode 100644 index 0000000000..156bcdb5fa --- /dev/null +++ b/test/univariate/continuous/chisq.jl @@ -0,0 +1,2 @@ +test_cgf(Chisq(1), (0.49, -1, -100, -1f6)) +test_cgf(Chisq(3), (0.49, -1, -100, -1f6)) diff --git a/test/univariate/continuous/erlang.jl b/test/univariate/continuous/erlang.jl new file mode 100644 index 0000000000..dc96b517a9 --- /dev/null +++ b/test/univariate/continuous/erlang.jl @@ -0,0 +1,2 @@ +test_cgf(Erlang(1,0.4) , (1, 1/0.400001, -1, -100f0, -1e6)) +test_cgf(Erlang(10,0.01), (1, 1/0.010001f0, -1, -100f0, -1e6)) diff --git a/test/univariate/continuous/exponential.jl b/test/univariate/continuous/exponential.jl new file mode 100644 index 0000000000..6704554a79 --- /dev/null +++ b/test/univariate/continuous/exponential.jl @@ -0,0 +1,4 @@ + +test_cgf(Exponential(1), (0.9, -1, -100f0, -1e6)) +test_cgf(Exponential(0.91), (0.9, -1, -100f0, -1e6)) +test_cgf(Exponential(10 ), (0.08, -1, -100f0, -1e6)) diff --git a/test/univariate/continuous/gamma.jl b/test/univariate/continuous/gamma.jl new file mode 100644 index 0000000000..b7b866a746 --- /dev/null +++ b/test/univariate/continuous/gamma.jl @@ -0,0 +1,3 @@ +test_cgf(Gamma(1 ,1 ), (0.9, -1, -100f0, -1e6)) +test_cgf(Gamma(10 ,1 ), (0.9, -1, -100f0, -1e6)) +test_cgf(Gamma(0.2, 10), (0.08, -1, -100f0, -1e6)) diff --git a/test/univariate/continuous/laplace.jl b/test/univariate/continuous/laplace.jl index a551f714a8..9288927982 100644 --- a/test/univariate/continuous/laplace.jl +++ b/test/univariate/continuous/laplace.jl @@ -1,4 +1,5 @@ @testset "laplace.jl" begin # affine transformations + test_cgf(Laplace(1, 1), (0.99, -0.99, 1f-2, -1f-5)) test_affine_transformations(Laplace, randn(), randn()^2) end diff --git a/test/univariate/continuous/logistic.jl b/test/univariate/continuous/logistic.jl new file mode 100644 index 0000000000..3eb0b8f2d0 --- /dev/null +++ b/test/univariate/continuous/logistic.jl @@ -0,0 +1,2 @@ +test_cgf(Logistic(0, 1), (-0.99,0.99, 1f-2, -1f-2)) +test_cgf(Logistic(100,10), (-0.099,0.099, 1f-2, -1f-2)) diff --git a/test/univariate/continuous/noncentralchisq.jl b/test/univariate/continuous/noncentralchisq.jl new file mode 100644 index 0000000000..b73d4e0426 --- /dev/null +++ b/test/univariate/continuous/noncentralchisq.jl @@ -0,0 +1 @@ +test_cgf(NoncentralChisq(3,2), (0.49, -1, -100, -1f6)) diff --git a/test/univariate/continuous/normal.jl b/test/univariate/continuous/normal.jl index 37780d03d3..6f494d9383 100644 --- a/test/univariate/continuous/normal.jl +++ b/test/univariate/continuous/normal.jl @@ -3,6 +3,8 @@ using Test, Distributions, ForwardDiff isnan_type(::Type{T}, v) where {T} = isnan(v) && v isa T @testset "Normal" begin + test_cgf(Normal(0,1 ), (1, -1, 100f0, 1e6, -1e6)) + test_cgf(Normal(1,0.4), (1, -1, 100f0, 1e6, -1e6)) @test isa(convert(Normal{Float64}, Float16(0), Float16(1)), Normal{Float64}) d = Normal(1.1, 2.3) diff --git a/test/univariate/continuous/uniform.jl b/test/univariate/continuous/uniform.jl index d80b963010..5936c3ea8a 100644 --- a/test/univariate/continuous/uniform.jl +++ b/test/univariate/continuous/uniform.jl @@ -8,6 +8,8 @@ using Test @testset "uniform.jl" begin # affine transformations test_affine_transformations(Uniform, rand(), 4 + rand()) + test_cgf(Uniform(0,1), (1, -1, 100f0, 1e6, -1e6)) + test_cgf(Uniform(100f0,101f0), (1, -1, 100f0, 1e6, -1e6)) @testset "ChainRules" begin # run test suite for values in the support @@ -47,4 +49,35 @@ using Test @test fit(Uniform, data) == Uniform(10, 20) end end + @testset "cgf uniform around 0" begin + for (lo, hi, t) in [ + ((Float16(0), Float16(1), sqrt(eps(Float16)))), + ((Float16(0), Float16(1), Float16(0))), + ((Float16(0), Float16(1), -sqrt(eps(Float16)))), + (0f0, 1f0, sqrt(eps(Float32))), + (0f0, 1f0, 0f0), + (0f0, 1f0, -sqrt(eps(Float32))), + (-2f0, 1f0, 1f-30), + (-2f-4, -1f-4, -2f-40), + (0.0, 1.0, sqrt(eps(Float64))), + (0.0, 1.0, 0.0), + (0.0, 1.0, -sqrt(eps(Float64))), + (-2.0, 5.0, -1e-35), + ] + T = typeof(lo) + @assert T == typeof(lo) == typeof(hi) == typeof(t) + @assert t <= sqrt(eps(T)) + d = Uniform(lo, hi) + precision = 512 + d_big = Uniform(BigFloat(lo, precision=precision), BigFloat(hi; precision=precision)) + t_big = BigFloat(t, precision=precision) + @test cgf(d, t) isa T + if iszero(t) + @test cgf(d,t) === zero(t) + else + @test Distributions.cgf_around_zero(d, t) ≈ Distributions.cgf_away_from_zero(d_big, t_big) atol=eps(t) rtol=0 + @test Distributions.cgf_around_zero(d, t) === cgf(d, t) + end + end + end end diff --git a/test/univariate/discrete/bernoulli.jl b/test/univariate/discrete/bernoulli.jl index e9961f8d39..2d72b33c0a 100644 --- a/test/univariate/discrete/bernoulli.jl +++ b/test/univariate/discrete/bernoulli.jl @@ -3,3 +3,6 @@ using Test, Random @test rand(Bernoulli()) isa Bool @test rand(Bernoulli(), 10) isa Vector{Bool} + +test_cgf(Bernoulli(0.5), (1f0, -1f0,1e6, -1e6)) +test_cgf(Bernoulli(0.1), (1f0, -1f0,1e6, -1e6)) diff --git a/test/univariate/discrete/dirac.jl b/test/univariate/discrete/dirac.jl index d3b0c7c6b8..cf027918b6 100644 --- a/test/univariate/discrete/dirac.jl +++ b/test/univariate/discrete/dirac.jl @@ -2,6 +2,8 @@ using Distributions using Test @testset "Dirac tests" begin + test_cgf(Dirac(13) , (1, 1f-4, 1e10, 10, -4)) + test_cgf(Dirac(-1f2), (1, 1f-4, 1e10, 10,-4)) for val in (3, 3.0, -3.5) d = Dirac(val) diff --git a/test/univariate/discrete/geometric.jl b/test/univariate/discrete/geometric.jl index 2274e523ee..4fd9ecd047 100644 --- a/test/univariate/discrete/geometric.jl +++ b/test/univariate/discrete/geometric.jl @@ -15,4 +15,6 @@ using FiniteDifferences m2 = var(d) + mean(d)^2 @test fdm2(Base.Fix1(mgf, d), 0) ≈ m2 @test fdm2(Base.Fix1(cf, d), 0) ≈ -m2 + test_cgf(Geometric(0.1), (1f-1, -1e6)) + test_cgf(Geometric(0.5), (1f-1, -1e6)) end diff --git a/test/univariate/discrete/negativebinomial.jl b/test/univariate/discrete/negativebinomial.jl index 14f5402fce..6278442193 100644 --- a/test/univariate/discrete/negativebinomial.jl +++ b/test/univariate/discrete/negativebinomial.jl @@ -6,6 +6,8 @@ using FiniteDifferences # Currently, most of the tests for NegativeBinomial are in the "ref" folder. # Eventually, we might want to consolidate the tests here +test_cgf(NegativeBinomial(10,0.5), (-1f0, -200.0,-1e6)) +test_cgf(NegativeBinomial(3,0.1), (-1f0, -200.0,-1e6)) mydiffp(r, p, k) = r/p - k/(1 - p) @testset "issue #1603" begin d = NegativeBinomial(4, 0.2) diff --git a/test/univariate/discrete/poisson.jl b/test/univariate/discrete/poisson.jl new file mode 100644 index 0000000000..657acb3c1f --- /dev/null +++ b/test/univariate/discrete/poisson.jl @@ -0,0 +1,4 @@ + +test_cgf(Poisson(1 ), (1f0,2f0,10.0,50.0)) +test_cgf(Poisson(10 ), (1f0,2f0,10.0,50.0)) +test_cgf(Poisson(1e-3), (1f0,2f0,10.0,50.0)) From 2dc764e450a5b33f1506669e3649f37a1dbda4f1 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 1 Sep 2022 22:07:55 +0200 Subject: [PATCH 22/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 579689c3d2..1c9e6f76b9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.68" +version = "0.25.69" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From ca13aa7a922a06fb6f56bef4fc75414cfa22b61c Mon Sep 17 00:00:00 2001 From: Mohamed Tarek Date: Fri, 2 Sep 2022 09:36:45 +1000 Subject: [PATCH 23/93] Return -Inf in logpdf of LKJCholesky when out of support (#1610) * ArgumentError to DomainError * add test * return -Inf * Update test/cholesky/lkjcholesky.jl Co-authored-by: David Widmann * fix and test Float32 support * remove redundant test * Update src/matrix/lkj.jl Co-authored-by: David Widmann * Update test/cholesky/lkjcholesky.jl Co-authored-by: David Widmann * fix sumlogs * simplify condition * Update src/matrix/lkj.jl Co-authored-by: David Widmann * Update src/matrix/lkj.jl Co-authored-by: David Widmann * Update src/matrix/lkj.jl Co-authored-by: David Widmann * Remove static type parameter * Update src/matrix/lkj.jl * fix tests * more type generalisations * Update test/cholesky/lkjcholesky.jl Co-authored-by: David Widmann * Update test/cholesky/lkjcholesky.jl Co-authored-by: David Widmann * address comments Co-authored-by: David Widmann Co-authored-by: mohamed82008 --- src/cholesky/lkjcholesky.jl | 4 ++-- src/matrix/lkj.jl | 37 +++++++++++++++++++----------------- test/cholesky/lkjcholesky.jl | 7 ++++++- test/matrixvariates.jl | 8 ++++---- 4 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/cholesky/lkjcholesky.jl b/src/cholesky/lkjcholesky.jl index 8020e45045..89f3d738d7 100644 --- a/src/cholesky/lkjcholesky.jl +++ b/src/cholesky/lkjcholesky.jl @@ -136,8 +136,8 @@ function logkernel(d::LKJCholesky, R::LinearAlgebra.Cholesky) end function logpdf(d::LKJCholesky, R::LinearAlgebra.Cholesky) - insupport(d, R) || throw(ArgumentError("provided point is not in the support")) - return _logpdf(d, R) + lp = _logpdf(d, R) + return insupport(d, R) ? lp : oftype(lp, -Inf) end _logpdf(d::LKJCholesky, R::LinearAlgebra.Cholesky) = logkernel(d, R) + d.logc0 diff --git a/src/matrix/lkj.jl b/src/matrix/lkj.jl index b76675eab5..309d71bda9 100644 --- a/src/matrix/lkj.jl +++ b/src/matrix/lkj.jl @@ -100,12 +100,13 @@ params(d::LKJ) = (d.d, d.η) # ----------------------------------------------------------------------------- function lkj_logc0(d::Integer, η::Real) + T = float(Base.promote_typeof(d, η)) d > 1 || return zero(η) if isone(η) if iseven(d) - logc0 = -lkj_onion_loginvconst_uniform_even(d) + logc0 = -lkj_onion_loginvconst_uniform_even(d, T) else - logc0 = -lkj_onion_loginvconst_uniform_odd(d) + logc0 = -lkj_onion_loginvconst_uniform_odd(d, T) end else logc0 = -lkj_onion_loginvconst(d, η) @@ -188,32 +189,34 @@ end function lkj_onion_loginvconst(d::Integer, η::Real) # Equation (17) in LKJ (2009 JMA) - sumlogs = zero(η) - for k in 2:d - 1 - sumlogs += 0.5k*logπ + loggamma(η + 0.5(d - 1 - k)) + T = float(Base.promote_typeof(d, η)) + h = T(1//2) + α = η + h * d - 1 + loginvconst = (2*η + d - 3)*T(logtwo) + (T(logπ) / 4) * (d * (d - 1) - 2) + logbeta(α, α) - (d - 2) * loggamma(η + h * (d - 1)) + for k in 2:(d - 1) + loginvconst += loggamma(η + h * (d - 1 - k)) end - α = η + 0.5d - 1 - loginvconst = (2η + d - 3)*logtwo + logbeta(α, α) + sumlogs - (d - 2) * loggamma(η + 0.5(d - 1)) return loginvconst end -function lkj_onion_loginvconst_uniform_odd(d::Integer) +function lkj_onion_loginvconst_uniform_odd(d::Integer, ::Type{T}) where {T <: Real} # Theorem 5 in LKJ (2009 JMA) - sumlogs = 0.0 - for k in 1:div(d - 1, 2) - sumlogs += loggamma(2k) + h = T(1//2) + loginvconst = (d - 1) * ((d + 1) * (T(logπ) / 4) - (d - 1) * (T(logtwo) / 4) - loggamma(h * (d + 1))) + for k in 2:2:(d - 1) + loginvconst += loggamma(T(k)) end - loginvconst = 0.25(d^2 - 1)*logπ + sumlogs - 0.25(d - 1)^2*logtwo - (d - 1)*loggamma(0.5(d + 1)) return loginvconst end -function lkj_onion_loginvconst_uniform_even(d::Integer) +function lkj_onion_loginvconst_uniform_even(d::Integer, ::Type{T}) where {T <: Real} # Theorem 5 in LKJ (2009 JMA) - sumlogs = 0.0 - for k in 1:div(d - 2, 2) - sumlogs += loggamma(2k) + h = T(1//2) + loginvconst = d * ((d - 2) * (T(logπ) / 4) + (3 * d - 4) * (T(logtwo) / 4) + loggamma(h * d)) - (d - 1) * loggamma(T(d)) + for k in 2:2:(d - 2) + loginvconst += loggamma(k) end - loginvconst = 0.25d*(d - 2)*logπ + 0.25(3d^2 - 4d)*logtwo + d*loggamma(0.5d) + sumlogs - (d - 1)*loggamma(d) + return loginvconst end function lkj_vine_loginvconst(d::Integer, η::Real) diff --git a/test/cholesky/lkjcholesky.jl b/test/cholesky/lkjcholesky.jl index c7c561a265..a0e8283436 100644 --- a/test/cholesky/lkjcholesky.jl +++ b/test/cholesky/lkjcholesky.jl @@ -135,12 +135,17 @@ using FiniteDifferences @test m isa Cholesky{eltype(d)} @test Matrix(m) ≈ I end - @test_broken partype(LKJCholesky(2, 4f0)) <: Float32 + for (d, η) in ((2, 4), (2, 1), (3, 1)), T in (Float32, Float64) + @test @inferred(partype(LKJCholesky(d, T(η)))) === T + end @testset "insupport" begin @test insupport(LKJCholesky(40, 2, 'U'), cholesky(rand(LKJ(40, 2)))) @test insupport(LKJCholesky(40, 2), cholesky(rand(LKJ(40, 2)))) @test !insupport(LKJCholesky(40, 2), cholesky(rand(LKJ(41, 2)))) + for (d, η) in ((2, 4), (2, 1), (3, 1)), T in (Float32, Float64) + @test @inferred(logpdf(LKJCholesky(40, T(2)), cholesky(T.(rand(LKJ(41, 2)))))) === T(-Inf) + end z = rand(LKJ(40, 1)) z .+= exp(Symmetric(randn(size(z)))) .* 1e-8 x = cholesky(z) diff --git a/test/matrixvariates.jl b/test/matrixvariates.jl index adb67e62c1..3d0dddfff6 100644 --- a/test/matrixvariates.jl +++ b/test/matrixvariates.jl @@ -454,11 +454,11 @@ function test_special(dist::Type{LKJ}) η = 1.0 lkj = LKJ(d, η) @test Distributions.lkj_vine_loginvconst(d, η) ≈ Distributions.lkj_onion_loginvconst(d, η) - @test Distributions.lkj_onion_loginvconst(d, η) ≈ Distributions.lkj_onion_loginvconst_uniform_odd(d) + @test Distributions.lkj_onion_loginvconst(d, η) ≈ Distributions.lkj_onion_loginvconst_uniform_odd(d, Float64) @test Distributions.lkj_vine_loginvconst(d, η) ≈ Distributions.lkj_vine_loginvconst_uniform(d) @test Distributions.lkj_onion_loginvconst(d, η) ≈ Distributions.lkj_loginvconst_alt(d, η) @test Distributions.lkj_onion_loginvconst(d, η) ≈ Distributions.corr_logvolume(d) - @test lkj.logc0 == -Distributions.lkj_onion_loginvconst_uniform_odd(d) + @test lkj.logc0 == -Distributions.lkj_onion_loginvconst_uniform_odd(d, Float64) # ============= # even non-uniform # ============= @@ -475,11 +475,11 @@ function test_special(dist::Type{LKJ}) η = 1.0 lkj = LKJ(d, η) @test Distributions.lkj_vine_loginvconst(d, η) ≈ Distributions.lkj_onion_loginvconst(d, η) - @test Distributions.lkj_onion_loginvconst(d, η) ≈ Distributions.lkj_onion_loginvconst_uniform_even(d) + @test Distributions.lkj_onion_loginvconst(d, η) ≈ Distributions.lkj_onion_loginvconst_uniform_even(d, Float64) @test Distributions.lkj_vine_loginvconst(d, η) ≈ Distributions.lkj_vine_loginvconst_uniform(d) @test Distributions.lkj_onion_loginvconst(d, η) ≈ Distributions.lkj_loginvconst_alt(d, η) @test Distributions.lkj_onion_loginvconst(d, η) ≈ Distributions.corr_logvolume(d) - @test lkj.logc0 == -Distributions.lkj_onion_loginvconst_uniform_even(d) + @test lkj.logc0 == -Distributions.lkj_onion_loginvconst_uniform_even(d, Float64) end @testset "check integrating constant as a volume" begin # d = 2: Lebesgue measure of the set of correlation matrices is 2. From 2689a6f3024afa78e36bac906f27728ab79ccffb Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 2 Sep 2022 01:37:30 +0200 Subject: [PATCH 24/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 1c9e6f76b9..6351ba0d8d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.69" +version = "0.25.70" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 371a427205b605df38eb7d4f2aedc2ecd0d9047b Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sat, 3 Sep 2022 22:14:16 +0200 Subject: [PATCH 25/93] Add `cgf` to documentation (#1612) * Add `cgf` to documentation * Improve docstrings --- docs/src/univariate.md | 1 + src/univariates.jl | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/src/univariate.md b/docs/src/univariate.md index cc43f7cece..0929a3d749 100644 --- a/docs/src/univariate.md +++ b/docs/src/univariate.md @@ -62,6 +62,7 @@ entropy(::UnivariateDistribution) entropy(::UnivariateDistribution, ::Bool) entropy(::UnivariateDistribution, ::Real) mgf(::UnivariateDistribution, ::Any) +cgf(::UnivariateDistribution, ::Any) cf(::UnivariateDistribution, ::Any) pdfsquaredL2norm ``` diff --git a/src/univariates.jl b/src/univariates.jl index 0384f45b33..dcbd38eadc 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -270,16 +270,20 @@ proper_kurtosis(d::Distribution) = kurtosis(d, false) """ mgf(d::UnivariateDistribution, t) -Evaluate the moment generating function of distribution `d`. +Evaluate the [moment-generating function](https://en.wikipedia.org/wiki/Moment-generating_function) of distribution `d` at `t`. + +See also [`cgf`](@ref) """ mgf(d::UnivariateDistribution, t) """ cgf(d::UnivariateDistribution, t) -Evaluate the [cumulant-generating-function](https://en.wikipedia.org/wiki/Cumulant) of `distribution` at `t`. -Mathematically the cumulant-generating-function is the logarithm of the [moment-generating-function](https://en.wikipedia.org/wiki/Moment-generating_function): -`cgf = log ∘ mgf`. In practice, however, the right hand side may have overflow issues. +Evaluate the [cumulant-generating function](https://en.wikipedia.org/wiki/Cumulant) of distribution `d` at `t`. + +The cumulant-generating-function is the logarithm of the [moment-generating function](https://en.wikipedia.org/wiki/Moment-generating_function): +`cgf = log ∘ mgf`. +In practice, however, the right hand side may have overflow issues. See also [`mgf`](@ref) """ From e67d6b3952ba775dcc10a2fc1c7546246539b10a Mon Sep 17 00:00:00 2001 From: Moritz Schauer Date: Tue, 6 Sep 2022 14:41:56 +0200 Subject: [PATCH 26/93] Docs: Creating matrix variate (#1613) Co-authored-by: David Widmann --- docs/src/extends.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/src/extends.md b/docs/src/extends.md index d731754205..0c87b8bfbe 100644 --- a/docs/src/extends.md +++ b/docs/src/extends.md @@ -154,13 +154,13 @@ It is also recommended that one also implements the following statistics functio - [`entropy(d::MultivariateDistribution)`](@ref) - [`cov(d::MultivariateDistribution)`](@ref) -## Create a Matrix-variate Distribution +## Create a Matrix-Variate Distribution -A multivariate distribution type should be defined as a subtype of `DiscreteMatrixDistribution` or `ContinuousMatrixDistribution`. +A matrix-variate distribution type should be defined as a subtype of `DiscreteMatrixDistribution` or `ContinuousMatrixDistribution`. The following methods need to be implemented for each matrix-variate distribution type: - [`size(d::MatrixDistribution)`](@ref) -- [`rand(d::MatrixDistribution)`](@ref) +- [`Distributions._rand!(rng::AbstractRNG, d::MatrixDistribution, A::AbstractMatrix)`](@ref) - [`sampler(d::MatrixDistribution)`](@ref) - [`Distributions._logpdf(d::MatrixDistribution, x::AbstractArray)`](@ref) From 973f86db8816c562eaa6b8e687732bd7698d1efb Mon Sep 17 00:00:00 2001 From: Neven Sajko Date: Tue, 13 Sep 2022 23:53:29 +0200 Subject: [PATCH 27/93] TDist: use one instead of 1 for type stability (#1614) * TDist: use one instead of 1 for type stability * Update src/univariate/continuous/tdist.jl * Add some tests * Update runtests.jl Co-authored-by: David Widmann --- src/univariate/continuous/tdist.jl | 6 +++++- test/runtests.jl | 1 + test/univariate/continuous/tdist.jl | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 test/univariate/continuous/tdist.jl diff --git a/src/univariate/continuous/tdist.jl b/src/univariate/continuous/tdist.jl index 106e92b3d3..4e55e8cfbc 100644 --- a/src/univariate/continuous/tdist.jl +++ b/src/univariate/continuous/tdist.jl @@ -79,7 +79,11 @@ end @_delegate_statsfuns TDist tdist ν -rand(rng::AbstractRNG, d::TDist) = randn(rng) / ( isinf(d.ν) ? 1 : sqrt(rand(rng, Chisq(d.ν))/d.ν) ) +function rand(rng::AbstractRNG, d::TDist) + ν = d.ν + z = sqrt(rand(rng, Chisq{typeof(ν)}(ν)) / ν) + return randn(rng) / (isinf(ν) ? one(z) : z) +end function cf(d::TDist{T}, t::Real) where T <: Real isinf(d.ν) && return cf(Normal(zero(T), one(T)), t) diff --git a/test/runtests.jl b/test/runtests.jl index deb1affd4f..45df7fe324 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -84,6 +84,7 @@ const tests = [ "reshaped", "univariate/continuous/skewedexponentialpower", "univariate/discrete/discreteuniform", + "univariate/continuous/tdist", ### missing files compared to /src: # "common", diff --git a/test/univariate/continuous/tdist.jl b/test/univariate/continuous/tdist.jl new file mode 100644 index 0000000000..4f0809b156 --- /dev/null +++ b/test/univariate/continuous/tdist.jl @@ -0,0 +1,9 @@ +using Distributions +using ForwardDiff + +using Test + +@testset "Type stability of `rand` (#1614)" begin + @inferred(rand(TDist(big"1.0"))) + @inferred(rand(TDist(ForwardDiff.Dual(1.0)))) +end From 35b3170349d325c4b6648f0b8cdafc904718b6b5 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Tue, 13 Sep 2022 23:53:55 +0200 Subject: [PATCH 28/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 6351ba0d8d..08e6dfa4b7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.70" +version = "0.25.71" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 55664889948af13ff5d28f1d3a415f8c0cd04d68 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 16 Sep 2022 10:50:33 +0200 Subject: [PATCH 29/93] CompatHelper: bump compat for GR to 0.67 for package docs, (keep existing compat) (#1618) Co-authored-by: CompatHelper Julia --- docs/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index 07d4f098f5..1984c419d3 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,4 +4,4 @@ GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" [compat] Documenter = "0.26, 0.27" -GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66" +GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67" From c6ef060d529260a010dc7b90e462f081e6b78e8e Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sat, 17 Sep 2022 23:39:31 +0200 Subject: [PATCH 30/93] Improve the `logpdf` of `NegativeBinomial` (#1583) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Improve the `logpdf` of `NegativeBinomial` * Update tests * Update `rrule` * Bump version * Update test/univariate/discrete/negativebinomial.jl * Update negativebinomial.jl Co-authored-by: Mathieu Besançon --- src/univariate/discrete/negativebinomial.jl | 66 +++++++++++--------- test/univariate/discrete/negativebinomial.jl | 32 ++++++++-- 2 files changed, 63 insertions(+), 35 deletions(-) diff --git a/src/univariate/discrete/negativebinomial.jl b/src/univariate/discrete/negativebinomial.jl index fc8caf8ae1..8a0a079db8 100644 --- a/src/univariate/discrete/negativebinomial.jl +++ b/src/univariate/discrete/negativebinomial.jl @@ -97,14 +97,16 @@ end # Implement native pdf and logpdf since it's relatively straight forward and allows for ForwardDiff function logpdf(d::NegativeBinomial, k::Real) - r = d.r * log(d.p) + k * log1p(-d.p) - if isone(d.p) && iszero(k) - return zero(r) - elseif !insupport(d, k) - return oftype(r, -Inf) - else - return r - log(k + d.r) - logbeta(d.r, k + 1) + r, p = params(d) + z = xlogy(r, p) + xlog1py(k, -p) + + if iszero(k) + # in this case `logpdf(d, k) = z - log(k + r) - logbeta(r, k + 1) = z` analytically + # but unfortunately not numerically, so we handle this case separately to improve accuracy + return z end + + return insupport(d, k) ? z - log(k + r) - logbeta(r, k + 1) : oftype(z, -Inf) end # cdf and quantile functions are more involved so we still rely on Rmath @@ -140,35 +142,39 @@ cf(d::NegativeBinomial, t::Real) = laplace_transform(d, -t*im) # ChainRules definitions +## Callable struct to fix type inference issues caused by captured values +struct LogPDFNegativeBinomialPullback{D,T<:Real} + ∂r::T + ∂p::T +end + +function (f::LogPDFNegativeBinomialPullback{D})(Δ) where {D} + Δr = Δ * f.∂r + Δp = Δ * f.∂p + Δd = ChainRulesCore.Tangent{D}(; r=Δr, p=Δp) + return ChainRulesCore.NoTangent(), Δd, ChainRulesCore.NoTangent() +end + function ChainRulesCore.rrule(::typeof(logpdf), d::NegativeBinomial, k::Real) - # Compute log probability + # Compute log probability (as in the definition of `logpdf(d, k)` above) r, p = params(d) - edgecase = isone(p) && iszero(k) - insupp = insupport(d, k) - - # Primal computation - Ω = r * log(p) + k * log1p(-p) - if edgecase - Ω = zero(Ω) - elseif !insupp - Ω = oftype(Ω, -Inf) + z = xlogy(r, p) + xlog1py(k, -p) + if iszero(k) + Ω = z + ∂r = oftype(z, log(p)) + ∂p = oftype(z, r/p) + elseif insupport(d, k) + Ω = z - log(k + r) - logbeta(r, k + 1) + ∂r = oftype(z, log(p) - inv(k + r) - digamma(r) + digamma(r + k + 1)) + ∂p = oftype(z, r/p - k / (1 - p)) else - Ω = Ω - log(k + r) - logbeta(r, k + 1) + Ω = oftype(z, -Inf) + ∂r = oftype(z, NaN) + ∂p = oftype(z, NaN) end # Define pullback - function logpdf_NegativeBinomial_pullback(Δ) - Δr = Δ * (log(p) - inv(k + r) - digamma(r) + digamma(r + k + 1)) - Δp = Δ * (r / p - k / (1 - p)) - if edgecase - Δp = oftype(Δp, Δ * r) - elseif !insupp - Δr = oftype(Δr, NaN) - Δp = oftype(Δp, NaN) - end - Δd = ChainRulesCore.Tangent{typeof(d)}(; r=Δr, p=Δp) - return ChainRulesCore.NoTangent(), Δd, ChainRulesCore.NoTangent() - end + logpdf_NegativeBinomial_pullback = LogPDFNegativeBinomialPullback{typeof(d),typeof(z)}(∂r, ∂p) return Ω, logpdf_NegativeBinomial_pullback end diff --git a/test/univariate/discrete/negativebinomial.jl b/test/univariate/discrete/negativebinomial.jl index 6278442193..f1c86199e2 100644 --- a/test/univariate/discrete/negativebinomial.jl +++ b/test/univariate/discrete/negativebinomial.jl @@ -2,13 +2,17 @@ using Distributions using Test, ForwardDiff using ChainRulesTestUtils using FiniteDifferences +using StatsFuns # Currently, most of the tests for NegativeBinomial are in the "ref" folder. # Eventually, we might want to consolidate the tests here test_cgf(NegativeBinomial(10,0.5), (-1f0, -200.0,-1e6)) test_cgf(NegativeBinomial(3,0.1), (-1f0, -200.0,-1e6)) -mydiffp(r, p, k) = r/p - k/(1 - p) + +mydiffp(r, p, k) = iszero(k) ? r/p : r/p - k/(1 - p) +mydiffr(r, p, k) = iszero(k) ? log(p) : log(p) - inv(k + r) - digamma(r) + digamma(r + k + 1) + @testset "issue #1603" begin d = NegativeBinomial(4, 0.2) fdm = central_fdm(5, 1) @@ -23,19 +27,29 @@ mydiffp(r, p, k) = r/p - k/(1 - p) @test fdm2(Base.Fix1(cf, d), 0) ≈ -m2 end - @testset "NegativeBinomial r=$r, p=$p, k=$k" for p in exp10.(-10:0) .- eps(), # avoid p==1 since it's not differentiable r in exp10.(range(-10, stop=2, length=25)), k in (0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024) @test ForwardDiff.derivative(_p -> logpdf(NegativeBinomial(r, _p), k), p) ≈ mydiffp(r, p, k) rtol=1e-12 atol=1e-12 + @test ForwardDiff.derivative(_r -> logpdf(NegativeBinomial(_r, p), k), r) ≈ mydiffr(r, p, k) rtol=1e-12 atol=1e-12 end @testset "Check the corner case p==1" begin - @test logpdf(NegativeBinomial(0.5, 1.0), 0) === 0.0 - @test logpdf(NegativeBinomial(0.5, 1.0), 1) === -Inf - @test all(iszero, rand(NegativeBinomial(rand(), 1.0), 10)) + for r in randexp(10) + d = NegativeBinomial(r, 1.0) + @test @inferred(logpdf(d, 0)) === 0.0 + @test @inferred(logpdf(d, -1)) === -Inf + @test @inferred(logpdf(d, 1)) === -Inf + @test all(iszero, rand(d, 10)) + end +end + +@testset "Check the corner case k==0" begin + for r in randexp(5), p in rand(5) + @test @inferred(logpdf(NegativeBinomial(r, p), 0)) === xlogy(r, p) + end end @testset "rrule: logpdf of NegativeBinomial" begin @@ -59,3 +73,11 @@ end test_rrule(logpdf, dist, 0; fdm=fdm) test_rrule(logpdf, dist, 0.0 ⊢ ChainRulesTestUtils.NoTangent(); fdm=fdm) end + +@testset "issue #1582" begin + dp = mydiffp(1.0, 1.0, 0.0) + @test ForwardDiff.derivative(p -> logpdf(NegativeBinomial(1.0, p), 0.0), 1.0) == dp == 1.0 + + dr = mydiffr(1.0, 1.0, 0.0) + @test ForwardDiff.derivative(r -> logpdf(NegativeBinomial(r, 1.0), 0.0), 1.0) == dr == 0.0 +end From 3f7976bb4e13f96d6aa820a095b14bbe94547b3c Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sat, 17 Sep 2022 23:40:05 +0200 Subject: [PATCH 31/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 08e6dfa4b7..354693b6f1 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.71" +version = "0.25.72" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 1c4317df411e2fc843cc1f6f9a4d97c50e5e152d Mon Sep 17 00:00:00 2001 From: Marc Pabst <2624210+marcpabst@users.noreply.github.com> Date: Sun, 18 Sep 2022 00:48:25 +0200 Subject: [PATCH 32/93] Use besselix instead of besseli for better numerical stability. (#1598) * Use besselix instead of besseli for better nummerical stability. * fix typo * Update src/multivariate/vonmisesfisher.jl Co-authored-by: David Widmann * adding test * re-enable other tests ... * Apply suggestions from code review Co-authored-by: Marc Pabst Co-authored-by: David Widmann Co-authored-by: David Widmann --- src/multivariate/vonmisesfisher.jl | 4 ++-- test/multivariate/vonmisesfisher.jl | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/multivariate/vonmisesfisher.jl b/src/multivariate/vonmisesfisher.jl index de1e154017..e4fe981fe6 100644 --- a/src/multivariate/vonmisesfisher.jl +++ b/src/multivariate/vonmisesfisher.jl @@ -65,7 +65,7 @@ function _vmflck(p, κ) T = typeof(κ) hp = T(p/2) q = hp - 1 - q * log(κ) - hp * log2π - log(besseli(q, κ)) + q * log(κ) - hp * log2π - log(besselix(q, κ)) - κ end _vmflck3(κ) = log(κ) - log2π - κ - log1mexp(-2κ) vmflck(p, κ) = (p == 3 ? _vmflck3(κ) : _vmflck(p, κ)) @@ -124,4 +124,4 @@ function _vmf_estkappa(p::Int, ρ::Float64) return κ end -_vmfA(half_p::Float64, κ::Float64) = besseli(half_p, κ) / besseli(half_p - 1.0, κ) +_vmfA(half_p::Float64, κ::Float64) = besselix(half_p, κ) / besselix(half_p - 1.0, κ) diff --git a/test/multivariate/vonmisesfisher.jl b/test/multivariate/vonmisesfisher.jl index a654a0a91d..cc45f41ed5 100644 --- a/test/multivariate/vonmisesfisher.jl +++ b/test/multivariate/vonmisesfisher.jl @@ -5,9 +5,9 @@ using LinearAlgebra, Test using SpecialFunctions -vmfCp(p::Int, κ::Real) = (κ ^ (p/2 - 1)) / ((2π)^(p/2) * besseli(p/2-1, κ)) +logvmfCp(p::Int, κ::Real) = (p / 2 - 1) * log(κ) - log(besselix(p / 2 - 1, κ)) - κ - p / 2 * log(2π) -safe_vmfpdf(μ::Vector, κ::Real, x::Vector) = vmfCp(length(μ), κ) * exp(κ * dot(μ, x)) +safe_logvmfpdf(μ::Vector, κ::Real, x::Vector) = logvmfCp(length(μ), κ) + κ * dot(μ, x) function gen_vmf_tdata(n::Int, p::Int, rng::Union{AbstractRNG, Missing} = missing) @@ -99,13 +99,13 @@ function test_vonmisesfisher(p::Int, κ::Real, n::Int, ns::Int, @test meandir(d2) ≈ μ @test concentration(d2) ≈ κ - @test isapprox(d.logCκ, log(vmfCp(p, κ)), atol=1.0e-12) + @test d.logCκ ≈ logvmfCp(p, κ) atol=1.0e-12 X = gen_vmf_tdata(n, p, rng) lp0 = zeros(n) for i = 1:n xi = X[:,i] - lp0[i] = log(safe_vmfpdf(μ, κ, xi)) + lp0[i] = safe_logvmfpdf(μ, κ, xi) @test logpdf(d, xi) ≈ lp0[i] end @test logpdf(d, X) ≈ lp0 @@ -170,7 +170,8 @@ ns = 10^6 (3, 1.0), (3, 5.0), (5, 2.0), - (2, 2)] + (2, 2), + (2, 1000)] # test with large κ test_vonmisesfisher(p, κ, n, ns, rng) test_vmf_rot(p, rng) end From 9f9c5caf6cd99c5205a11ca1c8d6b6471fc9ce29 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sun, 18 Sep 2022 00:49:47 +0200 Subject: [PATCH 33/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 354693b6f1..9a60c5eccf 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.72" +version = "0.25.73" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From cec029f96c4824a77bec479bb0a468221fdb93d3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 22 Sep 2022 11:27:10 +0200 Subject: [PATCH 34/93] CompatHelper: bump compat for GR to 0.68 for package docs, (keep existing compat) (#1620) Co-authored-by: CompatHelper Julia --- docs/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index 1984c419d3..1d0784bec9 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,4 +4,4 @@ GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" [compat] Documenter = "0.26, 0.27" -GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67" +GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68" From 6c93553f957c7ba8ed20031112eb415d32d2c682 Mon Sep 17 00:00:00 2001 From: Philipp Gabler Date: Thu, 22 Sep 2022 16:39:42 +0200 Subject: [PATCH 35/93] Add `init` argument to `Product` logpdf (#1619) * Add init argument to product logpdf * Add tests * Fix tests Co-authored-by: David Widmann --- src/multivariate/product.jl | 9 ++- test/multivariate/product.jl | 108 +++++++++++++++++++++++++++++++++++ test/product.jl | 94 ------------------------------ test/runtests.jl | 2 +- 4 files changed, 116 insertions(+), 97 deletions(-) create mode 100644 test/multivariate/product.jl diff --git a/src/multivariate/product.jl b/src/multivariate/product.jl index 411fd4ab04..3bb0f0d9ee 100644 --- a/src/multivariate/product.jl +++ b/src/multivariate/product.jl @@ -38,8 +38,13 @@ end _rand!(rng::AbstractRNG, d::Product, x::AbstractVector{<:Real}) = map!(Base.Fix1(rand, rng), x, d.v) -_logpdf(d::Product, x::AbstractVector{<:Real}) = - sum(n->logpdf(d.v[n], x[n]), 1:length(d)) +function _logpdf(d::Product, x::AbstractVector{<:Real}) + dists = d.v + if isempty(dists) + return sum(map(logpdf, dists, x)) + end + return sum(n -> logpdf(dists[n], x[n]), 1:length(d)) +end mean(d::Product) = mean.(d.v) var(d::Product) = var.(d.v) diff --git a/test/multivariate/product.jl b/test/multivariate/product.jl new file mode 100644 index 0000000000..840eb409f2 --- /dev/null +++ b/test/multivariate/product.jl @@ -0,0 +1,108 @@ +using Distributions +using FillArrays + +using LinearAlgebra +using Random +using Test + +using Distributions: Product + +# TODO: remove when `Product` is removed +@testset "Deprecated `Product` distribution" begin +@testset "Testing normal product distributions" begin + Random.seed!(123456) + N = 11 + # Construct independent distributions and `Product` distribution from these. + μ = randn(N) + ds = Normal.(μ, 1.0) + x = rand.(ds) + d_product = @test_deprecated(Product(ds)) + @test d_product isa Product + # Check that methods for `Product` are consistent. + @test length(d_product) == length(ds) + @test eltype(d_product) === eltype(ds[1]) + @test @inferred(logpdf(d_product, x)) ≈ sum(logpdf.(ds, x)) + @test mean(d_product) == mean.(ds) + @test var(d_product) == var.(ds) + @test cov(d_product) == Diagonal(var.(ds)) + @test entropy(d_product) ≈ sum(entropy.(ds)) + + y = rand(d_product) + @test y isa typeof(x) + @test length(y) == N +end + +@testset "Testing generic product distributions" begin + Random.seed!(123456) + N = 11 + # Construct independent distributions and `Product` distribution from these. + ubound = rand(N) + ds = Uniform.(-ubound, ubound) + x = rand.(ds) + d_product = product_distribution(ds) + @test d_product isa Product + # Check that methods for `Product` are consistent. + @test length(d_product) == length(ds) + @test eltype(d_product) === eltype(ds[1]) + @test @inferred(logpdf(d_product, x)) ≈ sum(logpdf.(ds, x)) + @test mean(d_product) == mean.(ds) + @test var(d_product) == var.(ds) + @test cov(d_product) == Diagonal(var.(ds)) + @test entropy(d_product) == sum(entropy.(ds)) + @test insupport(d_product, ubound) == true + @test insupport(d_product, ubound .+ 1) == false + @test minimum(d_product) == -ubound + @test maximum(d_product) == ubound + @test extrema(d_product) == (-ubound, ubound) + @test isless(extrema(d_product)...) + + y = rand(d_product) + @test y isa typeof(x) + @test length(y) == N +end + +@testset "Testing discrete non-parametric product distribution" begin + Random.seed!(123456) + N = 11 + + for a in ([0, 1], [-0.5, 0.5]) + # Construct independent distributions and `Product` distribution from these. + support = fill(a, N) + ds = DiscreteNonParametric.(support, Ref([0.5, 0.5])) + x = rand.(ds) + d_product = product_distribution(ds) + @test d_product isa Product + # Check that methods for `Product` are consistent. + @test length(d_product) == length(ds) + @test eltype(d_product) === eltype(ds[1]) + @test @inferred(logpdf(d_product, x)) ≈ sum(logpdf.(ds, x)) + @test mean(d_product) == mean.(ds) + @test var(d_product) == var.(ds) + @test cov(d_product) == Diagonal(var.(ds)) + @test entropy(d_product) == sum(entropy.(ds)) + @test insupport(d_product, fill(a[2], N)) == true + @test insupport(d_product, fill(a[2] + 1, N)) == false + + y = rand(d_product) + @test y isa typeof(x) + @test length(y) == N + end +end + +@testset "Testing iid product distributions" begin + Random.seed!(123456) + N = 11 + d = @test_deprecated(Product(Fill(Laplace(0.0, 2.3), N))) + @test N == length(unique(rand(d))); + @test mean(d) === Fill(0.0, N) + @test cov(d) === Diagonal(Fill(var(Laplace(0.0, 2.3)), N)) +end + +@testset "Empty vector of distributions (#1619)" begin + d = @inferred(product_distribution(typeof(Beta(1, 1))[])) + @test d isa Product + @test iszero(@inferred(logpdf(d, Float64[]))) + @test_throws DimensionMismatch logpdf(d, rand(1)) + @test_throws DimensionMismatch logpdf(d, rand(3)) +end +end \ No newline at end of file diff --git a/test/product.jl b/test/product.jl index 16d2bb80f3..829e80d1b8 100644 --- a/test/product.jl +++ b/test/product.jl @@ -5,100 +5,6 @@ using Test using Random using LinearAlgebra -using Distributions: Product - -# TODO: remove when `Product` is removed -@testset "Deprecated `Product` distribution" begin -@testset "Testing normal product distributions" begin - Random.seed!(123456) - N = 11 - # Construct independent distributions and `Product` distribution from these. - μ = randn(N) - ds = Normal.(μ, 1.0) - x = rand.(ds) - d_product = @test_deprecated(Product(ds)) - @test d_product isa Product - # Check that methods for `Product` are consistent. - @test length(d_product) == length(ds) - @test eltype(d_product) === eltype(ds[1]) - @test logpdf(d_product, x) ≈ sum(logpdf.(ds, x)) - @test mean(d_product) == mean.(ds) - @test var(d_product) == var.(ds) - @test cov(d_product) == Diagonal(var.(ds)) - @test entropy(d_product) ≈ sum(entropy.(ds)) - - y = rand(d_product) - @test y isa typeof(x) - @test length(y) == N -end - -@testset "Testing generic product distributions" begin - Random.seed!(123456) - N = 11 - # Construct independent distributions and `Product` distribution from these. - ubound = rand(N) - ds = Uniform.(-ubound, ubound) - x = rand.(ds) - d_product = product_distribution(ds) - @test d_product isa Product - # Check that methods for `Product` are consistent. - @test length(d_product) == length(ds) - @test eltype(d_product) === eltype(ds[1]) - @test logpdf(d_product, x) ≈ sum(logpdf.(ds, x)) - @test mean(d_product) == mean.(ds) - @test var(d_product) == var.(ds) - @test cov(d_product) == Diagonal(var.(ds)) - @test entropy(d_product) == sum(entropy.(ds)) - @test insupport(d_product, ubound) == true - @test insupport(d_product, ubound .+ 1) == false - @test minimum(d_product) == -ubound - @test maximum(d_product) == ubound - @test extrema(d_product) == (-ubound, ubound) - @test isless(extrema(d_product)...) - - y = rand(d_product) - @test y isa typeof(x) - @test length(y) == N -end - -@testset "Testing discrete non-parametric product distribution" begin - Random.seed!(123456) - N = 11 - - for a in ([0, 1], [-0.5, 0.5]) - # Construct independent distributions and `Product` distribution from these. - support = fill(a, N) - ds = DiscreteNonParametric.(support, Ref([0.5, 0.5])) - x = rand.(ds) - d_product = product_distribution(ds) - @test d_product isa Product - # Check that methods for `Product` are consistent. - @test length(d_product) == length(ds) - @test eltype(d_product) === eltype(ds[1]) - @test logpdf(d_product, x) ≈ sum(logpdf.(ds, x)) - @test mean(d_product) == mean.(ds) - @test var(d_product) == var.(ds) - @test cov(d_product) == Diagonal(var.(ds)) - @test entropy(d_product) == sum(entropy.(ds)) - @test insupport(d_product, fill(a[2], N)) == true - @test insupport(d_product, fill(a[2] + 1, N)) == false - - y = rand(d_product) - @test y isa typeof(x) - @test length(y) == N - end -end - -@testset "Testing iid product distributions" begin - Random.seed!(123456) - N = 11 - d = @test_deprecated(Product(Fill(Laplace(0.0, 2.3), N))) - @test N == length(unique(rand(d))); - @test mean(d) === Fill(0.0, N) - @test cov(d) === Diagonal(Fill(var(Laplace(0.0, 2.3)), N)) -end -end - @testset "Testing normal product distributions" begin Random.seed!(123456) N = 11 diff --git a/test/runtests.jl b/test/runtests.jl index 45df7fe324..715247bf85 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -85,6 +85,7 @@ const tests = [ "univariate/continuous/skewedexponentialpower", "univariate/discrete/discreteuniform", "univariate/continuous/tdist", + "multivariate/product", ### missing files compared to /src: # "common", @@ -100,7 +101,6 @@ const tests = [ # "mixtures/mixturemodel", # "mixtures/unigmm", # "multivariate/mvnormalcanon", - # "multivariate/product", # "quantilealgs", # "samplers/aliastable", # "samplers/binomial", From 97f5ed0070274b30a33cd16de362162c3b14ddb9 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 22 Sep 2022 16:40:08 +0200 Subject: [PATCH 36/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 9a60c5eccf..506dfc1a1f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.73" +version = "0.25.74" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 852af6ae1434aa6ac17c20a89f40e7490063c14f Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sat, 24 Sep 2022 23:31:33 +0200 Subject: [PATCH 37/93] Improve `GammaIPSampler` and `GammaMTSampler` (#1617) * Improve `GammaIPSampler` * Add tests * Generalize and fix `GammaMTSampler` * Avoid argument checks * Update src/univariate/continuous/gamma.jl Co-authored-by: David Widmann * Update src/univariate/continuous/gamma.jl Co-authored-by: David Widmann * Fix test issues Co-authored-by: Andreas Noack --- src/multivariate/mvtdist.jl | 4 +- src/samplers/gamma.jl | 72 ++++++++++++++++------------- src/univariate/continuous/chisq.jl | 13 ++++-- src/univariate/continuous/gamma.jl | 9 ++-- test/fit.jl | 2 +- test/samplers.jl | 22 +++++++++ test/univariate/continuous/tdist.jl | 5 +- 7 files changed, 84 insertions(+), 43 deletions(-) diff --git a/src/multivariate/mvtdist.jl b/src/multivariate/mvtdist.jl index e3b5f92daa..dbd13ce60f 100644 --- a/src/multivariate/mvtdist.jl +++ b/src/multivariate/mvtdist.jl @@ -156,7 +156,7 @@ end # Sampling (for GenericMvTDist) function _rand!(rng::AbstractRNG, d::GenericMvTDist, x::AbstractVector{<:Real}) - chisqd = Chisq(d.df) + chisqd = Chisq{partype(d)}(d.df) y = sqrt(rand(rng, chisqd) / d.df) unwhiten!(d.Σ, randn!(rng, x)) x .= x ./ y .+ d.μ @@ -165,7 +165,7 @@ end function _rand!(rng::AbstractRNG, d::GenericMvTDist, x::AbstractMatrix{T}) where T<:Real cols = size(x,2) - chisqd = Chisq(d.df) + chisqd = Chisq{partype(d)}(d.df) y = Matrix{T}(undef, 1, cols) unwhiten!(d.Σ, randn!(rng, x)) rand!(rng, chisqd, y) diff --git a/src/samplers/gamma.jl b/src/samplers/gamma.jl index 70d9d8b1af..9a40da98a3 100644 --- a/src/samplers/gamma.jl +++ b/src/samplers/gamma.jl @@ -162,32 +162,49 @@ end # doi:10.1145/358407.358414 # http://www.cparity.com/projects/AcmClassification/samples/358414.pdf -struct GammaMTSampler <: Sampleable{Univariate,Continuous} - d::Float64 - c::Float64 - κ::Float64 +# valid for shape >= 1 +struct GammaMTSampler{T<:Real} <: Sampleable{Univariate,Continuous} + d::T + c::T + κ::T + r::T end function GammaMTSampler(g::Gamma) - d = shape(g) - 1/3 - c = 1.0 / sqrt(9.0 * d) + # Setup (Step 1) + d = shape(g) - 1//3 + c = inv(3 * sqrt(d)) + + # Pre-compute scaling factor κ = d * scale(g) - GammaMTSampler(d, c, κ) + + # We also pre-compute the factor in the squeeze function + return GammaMTSampler(promote(d, c, κ, 331//10_000)...) end -function rand(rng::AbstractRNG, s::GammaMTSampler) +function rand(rng::AbstractRNG, s::GammaMTSampler{T}) where {T<:Real} + d = s.d + c = s.c + κ = s.κ + r = s.r + z = zero(T) while true - x = randn(rng) - v = 1.0 + s.c * x - while v <= 0.0 - x = randn(rng) - v = 1.0 + s.c * x + # Generate v (Step 2) + x = randn(rng, T) + cbrt_v = 1 + c * x + while cbrt_v <= z # requires x <= -sqrt(9 * shape - 3) + x = randn(rng, T) + cbrt_v = 1 + c * x end - v *= (v * v) - u = rand(rng) - x2 = x * x - if u < 1.0 - 0.331 * abs2(x2) || log(u) < 0.5 * x2 + s.d * (1.0 - v + log(v)) - return v*s.κ + v = cbrt_v^3 + + # Generate uniform u (Step 3) + u = rand(rng, T) + + # Check acceptance (Step 4 and 5) + xsq = x^2 + if u < 1 - r * xsq^2 || log(u) < xsq / 2 + d * logmxp1(v) + return v * κ end end end @@ -199,24 +216,15 @@ struct GammaIPSampler{S<:Sampleable{Univariate,Continuous},T<:Real} <: Sampleabl nia::T #-1/scale end -function GammaIPSampler(d::Gamma,::Type{S}) where S<:Sampleable - GammaIPSampler(Gamma(1.0 + shape(d), scale(d)), -1.0 / shape(d)) +GammaIPSampler(d::Gamma) = GammaIPSampler(d, GammaMTSampler) +function GammaIPSampler(d::Gamma, ::Type{S}) where {S<:Sampleable} + shape_d = shape(d) + sampler = S(Gamma{partype(d)}(1 + shape_d, scale(d))) + return GammaIPSampler(sampler, -inv(shape_d)) end -GammaIPSampler(d::Gamma) = GammaIPSampler(d,GammaMTSampler) function rand(rng::AbstractRNG, s::GammaIPSampler) x = rand(rng, s.s) e = randexp(rng) x*exp(s.nia*e) end - -# function sampler(d::Gamma) -# if d.shape < 1.0 -# # TODO: d.shape = 0.5 : use scaled chisq -# GammaIPSampler(d) -# elseif d.shape == 1.0 -# Exponential(d.scale) -# else -# GammaGDSampler(d) -# end -# end diff --git a/src/univariate/continuous/chisq.jl b/src/univariate/continuous/chisq.jl index 5a5a8240d5..3ca0ac54c4 100644 --- a/src/univariate/continuous/chisq.jl +++ b/src/univariate/continuous/chisq.jl @@ -96,7 +96,14 @@ gradlogpdf(d::Chisq{T}, x::Real) where {T<:Real} = x > 0 ? (d.ν/2 - 1) / x - 1 #### Sampling -rand(rng::AbstractRNG, d::Chisq) = - (ν = d.ν; rand(rng, Gamma(ν / 2.0, 2.0one(ν)))) +function rand(rng::AbstractRNG, d::Chisq) + α = dof(d) / 2 + θ = oftype(α, 2) + return rand(rng, Gamma{typeof(α)}(α, θ)) +end -sampler(d::Chisq) = (ν = d.ν; sampler(Gamma(ν / 2.0, 2.0one(ν)))) +function sampler(d::Chisq) + α = dof(d) / 2 + θ = oftype(α, 2) + return sampler(Gamma{typeof(α)}(α, θ)) +end diff --git a/src/univariate/continuous/gamma.jl b/src/univariate/continuous/gamma.jl index 96ca058afa..3c4e3c44c4 100644 --- a/src/univariate/continuous/gamma.jl +++ b/src/univariate/continuous/gamma.jl @@ -105,9 +105,10 @@ function rand(rng::AbstractRNG, d::Gamma) # TODO: shape(d) = 0.5 : use scaled chisq return rand(rng, GammaIPSampler(d)) elseif shape(d) == 1.0 - return rand(rng, Exponential(d.θ)) + θ = + return rand(rng, Exponential{partype(d)}(scale(d))) else - return rand(rng, GammaGDSampler(d)) + return rand(rng, GammaMTSampler(d)) end end @@ -116,9 +117,9 @@ function sampler(d::Gamma) # TODO: shape(d) = 0.5 : use scaled chisq return GammaIPSampler(d) elseif shape(d) == 1.0 - return sampler(Exponential(d.θ)) + return sampler(Exponential{partype(d)}(scale(d))) else - return GammaGDSampler(d) + return GammaMTSampler(d) end end diff --git a/test/fit.jl b/test/fit.jl index 50ef4499a4..4483dd1ec6 100644 --- a/test/fit.jl +++ b/test/fit.jl @@ -370,7 +370,7 @@ end d = fit(dist, func[2](dist(5.0, 3.0), N + 1)) @test isa(d, dist) @test isapprox(location(d), 5.0, atol=0.02) - @test isapprox(scale(d) , 3.0, atol=0.02) + @test isapprox(scale(d) , 3.0, atol=0.03) end end diff --git a/test/samplers.jl b/test/samplers.jl index 68e042ec17..2744ae9acb 100644 --- a/test/samplers.jl +++ b/test/samplers.jl @@ -101,6 +101,28 @@ import Distributions: end end + @testset "GammaIPSampler" begin + @testset "d=$d" for d in [Gamma(0.1, 1.0), Gamma(0.9, 1.0)] + s = sampler(d) + @test s isa GammaIPSampler{<:GammaMTSampler} + @test s.s isa GammaMTSampler + test_samples(s, d, n_tsamples) + test_samples(s, d, n_tsamples, rng=rng) + + s = @inferred(GammaIPSampler(d, GammaMTSampler)) + @test s isa GammaIPSampler{<:GammaMTSampler} + @test s.s isa GammaMTSampler + test_samples(s, d, n_tsamples) + test_samples(s, d, n_tsamples, rng=rng) + + s = @inferred(GammaIPSampler(d, GammaGDSampler)) + @test s isa GammaIPSampler{<:GammaGDSampler} + @test s.s isa GammaGDSampler + test_samples(s, d, n_tsamples) + test_samples(s, d, n_tsamples, rng=rng) + end + end + @testset "Random.Sampler" begin for dist in ( Binomial(5, 0.3), diff --git a/test/univariate/continuous/tdist.jl b/test/univariate/continuous/tdist.jl index 4f0809b156..16fab2812c 100644 --- a/test/univariate/continuous/tdist.jl +++ b/test/univariate/continuous/tdist.jl @@ -4,6 +4,9 @@ using ForwardDiff using Test @testset "Type stability of `rand` (#1614)" begin - @inferred(rand(TDist(big"1.0"))) + if VERSION >= v"1.9.0-DEV.348" + # randn(::BigFloat) was only added in https://github.com/JuliaLang/julia/pull/44714 + @inferred(rand(TDist(big"1.0"))) + end @inferred(rand(TDist(ForwardDiff.Dual(1.0)))) end From 4c27f7d4da28b1c9e033c699e8225b72283e6f29 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sat, 24 Sep 2022 23:32:04 +0200 Subject: [PATCH 38/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 506dfc1a1f..2a5cc3d668 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.74" +version = "0.25.75" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From d3a433fa9bbf6ab78c092137393cf34c629a5d2a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 5 Oct 2022 20:40:48 +0200 Subject: [PATCH 39/93] CompatHelper: bump compat for GR to 0.69 for package docs, (keep existing compat) (#1622) Co-authored-by: CompatHelper Julia --- docs/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index 1d0784bec9..2aa64d9549 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,4 +4,4 @@ GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" [compat] Documenter = "0.26, 0.27" -GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68" +GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69" From 2d0bce03cee7aa0da4e7c43fa78d35f0d9b72479 Mon Sep 17 00:00:00 2001 From: Frankie Robertson Date: Mon, 10 Oct 2022 14:16:55 +0300 Subject: [PATCH 40/93] Add missing univariate dists to documentation (#1626) * Improve/standardise docstring for SkewNormal distribution * Add SkewNormal distribution to docs * Add Soliton distribution to docs * Standardise SkewNormal p.d.f. formatting in docstring Co-authored-by: David Widmann Co-authored-by: David Widmann --- docs/src/univariate.md | 8 ++++++++ src/univariate/continuous/skewnormal.jl | 18 +++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/docs/src/univariate.md b/docs/src/univariate.md index 0929a3d749..4be7353bbc 100644 --- a/docs/src/univariate.md +++ b/docs/src/univariate.md @@ -417,6 +417,13 @@ SkewedExponentialPower plotdensity((-8, 5), SkewedExponentialPower, (0, 1, 0.7, 0.7)) # hide ``` +```@docs +SkewNormal +``` +```@example plotdensity +plotdensity((-4, 4), SkewNormal, (0, 1, -1)) # hide +``` + ```@docs StudentizedRange SymTriangularDist @@ -485,6 +492,7 @@ NegativeBinomial Poisson PoissonBinomial Skellam +Soliton ``` ### Vectorized evaluation diff --git a/src/univariate/continuous/skewnormal.jl b/src/univariate/continuous/skewnormal.jl index a7f71af022..dd9215deae 100644 --- a/src/univariate/continuous/skewnormal.jl +++ b/src/univariate/continuous/skewnormal.jl @@ -1,11 +1,23 @@ """ -SkewNormal(ξ, ω, α) - The *skew normal distribution* is a continuous probability distribution - that generalises the normal distribution to allow for non-zero skewness. + SkewNormal(ξ, ω, α) + +The *skew normal distribution* is a continuous probability distribution that +generalises the normal distribution to allow for non-zero skewness. Given a +location `ξ`, scale `ω`, and shape `α`, it has the probability density function + +```math +f(x; \\xi, \\omega, \\alpha) = +\\frac{2}{\\omega \\sqrt{2 \\pi}} \\exp{\\bigg(-\\frac{(x-\\xi)^2}{2\\omega^2}\\bigg)} +\\int_{-\\infty}^{\\alpha\\left(\\frac{x-\\xi}{\\omega}\\right)} +\\frac{1}{\\sqrt{2 \\pi}} \\exp{\\bigg(-\\frac{t^2}{2}\\bigg)} \\, \\mathrm{d}t +``` + External links + * [Skew normal distribution on Wikipedia](https://en.wikipedia.org/wiki/Skew_normal_distribution) * [Discourse](https://discourse.julialang.org/t/skew-normal-distribution/21549/7) * [SkewDist.jl](https://github.com/STOR-i/SkewDist.jl) + """ struct SkewNormal{T<:Real} <: ContinuousUnivariateDistribution ξ::T From a31ebc4de29a491971587cf159b184349d6a24e9 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Tue, 11 Oct 2022 09:35:47 +0100 Subject: [PATCH 41/93] Chain rule for `EachVariate` constructor (#1627) * added chain rule for eachvariate * version bump * added comment to explain why to_vec overload is needed * simplified impl of rrule * Update src/eachvariate.jl Co-authored-by: David Widmann * Update runtests.jl * Update src/eachvariate.jl Co-authored-by: David Widmann --- Project.toml | 2 +- src/eachvariate.jl | 11 +++++++++++ test/eachvariate.jl | 17 +++++++++++++++++ test/runtests.jl | 1 + 4 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 test/eachvariate.jl diff --git a/Project.toml b/Project.toml index 2a5cc3d668..23422b5343 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.75" +version = "0.25.76" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" diff --git a/src/eachvariate.jl b/src/eachvariate.jl index 701be99faa..36a9ae9e97 100644 --- a/src/eachvariate.jl +++ b/src/eachvariate.jl @@ -11,6 +11,17 @@ function EachVariate{V}(x::AbstractArray{<:Real,M}) where {V,M} return EachVariate{V,typeof(x),typeof(ax),T,M-V}(x, ax) end +function ChainRulesCore.rrule(::Type{EachVariate{V}}, x::AbstractArray{<:Real}) where {V} + y = EachVariate{V}(x) + size_x = size(x) + function EachVariate_pullback(Δ) + # TODO: Should we also handle `Tangent{<:EachVariate}`? + Δ_out = reshape(mapreduce(vec, vcat, ChainRulesCore.unthunk(Δ)), size_x) + return (ChainRulesCore.NoTangent(), Δ_out) + end + return y, EachVariate_pullback +end + Base.IteratorSize(::Type{EachVariate{V,P,A,T,N}}) where {V,P,A,T,N} = Base.HasShape{N}() Base.axes(x::EachVariate) = x.axes diff --git a/test/eachvariate.jl b/test/eachvariate.jl new file mode 100644 index 0000000000..f41a5207d2 --- /dev/null +++ b/test/eachvariate.jl @@ -0,0 +1,17 @@ +using ChainRulesTestUtils +using ChainRulesTestUtils: FiniteDifferences + +# Without this, `to_vec` will also include the `axes` field of `EachVariate`. +function FiniteDifferences.to_vec(xs::Distributions.EachVariate{V}) where {V} + vals, vals_from_vec = FiniteDifferences.to_vec(xs.parent) + return vals, x -> Distributions.EachVariate{V}(vals_from_vec(x)) +end + +@testset "eachvariate.jl" begin + @testset "ChainRules" begin + xs = randn(2, 3, 4, 5) + test_rrule(Distributions.EachVariate{1}, xs) + test_rrule(Distributions.EachVariate{2}, xs) + test_rrule(Distributions.EachVariate{3}, xs) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 715247bf85..614ec7fb1c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -86,6 +86,7 @@ const tests = [ "univariate/discrete/discreteuniform", "univariate/continuous/tdist", "multivariate/product", + "eachvariate", ### missing files compared to /src: # "common", From 529003860da12b40c17aeda85483ae4a6e2e44da Mon Sep 17 00:00:00 2001 From: Rik Huijzer Date: Thu, 27 Oct 2022 09:30:11 +0200 Subject: [PATCH 42/93] Document parameters for noncentral distributions (#1632) * Document parameters for noncentral distributions * Apply suggestions from code review Co-authored-by: David Widmann Co-authored-by: David Widmann --- src/univariate/continuous/noncentralbeta.jl | 2 ++ src/univariate/continuous/noncentralf.jl | 2 ++ src/univariate/continuous/noncentralt.jl | 2 ++ 3 files changed, 6 insertions(+) diff --git a/src/univariate/continuous/noncentralbeta.jl b/src/univariate/continuous/noncentralbeta.jl index e79ea2c31d..222963e472 100644 --- a/src/univariate/continuous/noncentralbeta.jl +++ b/src/univariate/continuous/noncentralbeta.jl @@ -1,5 +1,7 @@ """ NoncentralBeta(α, β, λ) + +*Noncentral Beta distribution* with shape parameters `α > 0` and `β > 0` and noncentrality parameter `λ >= 0`. """ struct NoncentralBeta{T<:Real} <: ContinuousUnivariateDistribution α::T diff --git a/src/univariate/continuous/noncentralf.jl b/src/univariate/continuous/noncentralf.jl index 8d12f10ae4..9a493f8d4e 100644 --- a/src/univariate/continuous/noncentralf.jl +++ b/src/univariate/continuous/noncentralf.jl @@ -1,5 +1,7 @@ """ NoncentralF(ν1, ν2, λ) + +*Noncentral F-distribution* with `ν1 > 0` and `ν2 > 0` degrees of freedom and noncentrality parameter `λ >= 0`. """ struct NoncentralF{T<:Real} <: ContinuousUnivariateDistribution ν1::T diff --git a/src/univariate/continuous/noncentralt.jl b/src/univariate/continuous/noncentralt.jl index 1cc037c80c..dfcdb6e3ca 100644 --- a/src/univariate/continuous/noncentralt.jl +++ b/src/univariate/continuous/noncentralt.jl @@ -1,5 +1,7 @@ """ NoncentralT(ν, λ) + +*Noncentral Student's t-distribution* with `v > 0` degrees of freedom and noncentrality parameter `λ`. """ struct NoncentralT{T<:Real} <: ContinuousUnivariateDistribution ν::T From 9a544d91b0b8d81a031153cd80ae425841d7af5a Mon Sep 17 00:00:00 2001 From: David Widmann Date: Tue, 8 Nov 2022 08:40:07 +0000 Subject: [PATCH 43/93] Add `BernoulliLogit` (#1623) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add `BernoulliLogit` * Fixes and tests * Update test/univariate/discrete/bernoullilogit.jl * Update src/univariate/discrete/bernoullilogit.jl Co-authored-by: Mathieu Besançon --- docs/src/univariate.md | 1 + src/Distributions.jl | 1 + src/univariate/discrete/bernoullilogit.jl | 108 +++++++++++++++++++++ src/univariates.jl | 1 + test/univariate/discrete/bernoullilogit.jl | 81 ++++++++++++++++ 5 files changed, 192 insertions(+) create mode 100644 src/univariate/discrete/bernoullilogit.jl create mode 100644 test/univariate/discrete/bernoullilogit.jl diff --git a/docs/src/univariate.md b/docs/src/univariate.md index 4be7353bbc..caa1e22e0b 100644 --- a/docs/src/univariate.md +++ b/docs/src/univariate.md @@ -480,6 +480,7 @@ plotdensity((0.001, 3), Weibull, (0.5, 1)) # hide ```@docs Bernoulli +BernoulliLogit BetaBinomial Binomial Categorical diff --git a/src/Distributions.jl b/src/Distributions.jl index 6b74340eda..46b18f5418 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -67,6 +67,7 @@ export # distribution types Arcsine, Bernoulli, + BernoulliLogit, Beta, BetaBinomial, BetaPrime, diff --git a/src/univariate/discrete/bernoullilogit.jl b/src/univariate/discrete/bernoullilogit.jl new file mode 100644 index 0000000000..f82059fed9 --- /dev/null +++ b/src/univariate/discrete/bernoullilogit.jl @@ -0,0 +1,108 @@ +""" + BernoulliLogit(logitp=0.0) + +A *Bernoulli distribution* that is parameterized by the logit `logitp = logit(p) = log(p/(1-p))` of its success rate `p`. + +```math +P(X = k) = \\begin{cases} +\\operatorname{logistic}(-logitp) = \\frac{1}{1 + \\exp{(logitp)}} & \\quad \\text{for } k = 0, \\\\ +\\operatorname{logistic}(logitp) = \\frac{1}{1 + \\exp{(-logitp)}} & \\quad \\text{for } k = 1. +\\end{cases} +``` + +External links: + +* [Bernoulli distribution on Wikipedia](http://en.wikipedia.org/wiki/Bernoulli_distribution) + +See also [`Bernoulli`](@ref) +""" +struct BernoulliLogit{T<:Real} <: DiscreteUnivariateDistribution + logitp::T +end + +BernoulliLogit() = BernoulliLogit(0.0) + +@distr_support BernoulliLogit false true + +Base.eltype(::Type{<:BernoulliLogit}) = Bool + +#### Conversions +Base.convert(::Type{BernoulliLogit{T}}, d::BernoulliLogit) where {T<:Real} = BernoulliLogit{T}(T(d.logitp)) +Base.convert(::Type{BernoulliLogit{T}}, d::BernoulliLogit{T}) where {T<:Real} = d + +#### Parameters + +succprob(d::BernoulliLogit) = logistic(d.logitp) +failprob(d::BernoulliLogit) = logistic(-d.logitp) +logsuccprob(d::BernoulliLogit) = -log1pexp(-d.logitp) +logfailprob(d::BernoulliLogit) = -log1pexp(d.logitp) + +params(d::BernoulliLogit) = (d.logitp,) +partype(::BernoulliLogit{T}) where {T} = T + +#### Properties + +mean(d::BernoulliLogit) = succprob(d) +var(d::BernoulliLogit) = succprob(d) * failprob(d) +function skewness(d::BernoulliLogit) + p0 = failprob(d) + p1 = succprob(d) + return (p0 - p1) / sqrt(p0 * p1) +end +kurtosis(d::BernoulliLogit) = 1 / var(d) - 6 + +mode(d::BernoulliLogit) = d.logitp > 0 ? 1 : 0 + +function modes(d::BernoulliLogit) + logitp = d.logitp + z = zero(logitp) + logitp < z ? [false] : (logitp > z ? [true] : [false, true]) +end + +median(d::BernoulliLogit) = d.logitp > 0 + +function entropy(d::BernoulliLogit) + logitp = d.logitp + (logitp == -Inf || logitp == Inf) ? float(zero(logitp)) : (logitp > 0 ? -(succprob(d) * logitp + logfailprob(d)) : -(logsuccprob(d) - failprob(d) * logitp)) +end + +#### Evaluation + +pdf(d::BernoulliLogit, x::Bool) = x ? succprob(d) : failprob(d) +pdf(d::BernoulliLogit, x::Real) = x == 0 ? failprob(d) : (x == 1 ? succprob(d) : zero(float(d.logitp))) + +logpdf(d::BernoulliLogit, x::Bool) = x ? logsuccprob(d) : logfailprob(d) +logpdf(d::BernoulliLogit, x::Real) = x == 0 ? logfailprob(d) : (x == 1 ? logsuccprob(d) : oftype(float(d.logitp), -Inf)) + +cdf(d::BernoulliLogit, x::Bool) = x ? one(float(d.logitp)) : failprob(d) +cdf(d::BernoulliLogit, x::Int) = x < 0 ? zero(float(d.logitp)) : (x < 1 ? failprob(d) : one(float(d.logitp))) + +logcdf(d::BernoulliLogit, x::Bool) = x ? zero(float(d.logitp)) : logfailprob(d) +logcdf(d::BernoulliLogit, x::Int) = x < 0 ? oftype(float(d.logitp), -Inf) : (x < 1 ? logfailprob(d) : zero(float(d.logitp))) + +ccdf(d::BernoulliLogit, x::Bool) = x ? zero(float(d.logitp)) : succprob(d) +ccdf(d::BernoulliLogit, x::Int) = x < 0 ? one(float(d.logitp)) : (x < 1 ? succprob(d) : zero(float(d.logitp))) + +logccdf(d::BernoulliLogit, x::Bool) = x ? oftype(float(d.logitp), -Inf) : logsuccprob(d) +logccdf(d::BernoulliLogit, x::Int) = x < 0 ? zero(float(d.logitp)) : (x < 1 ? logsuccprob(d) : oftype(float(d.logitp), -Inf)) + +function quantile(d::BernoulliLogit, p::Real) + T = float(partype(d)) + 0 <= p <= 1 ? (p <= failprob(d) ? zero(T) : one(T)) : T(NaN) +end +function cquantile(d::BernoulliLogit, p::Real) + T = float(partype(d)) + 0 <= p <= 1 ? (p >= succprob(d) ? zero(T) : one(T)) : T(NaN) +end + +mgf(d::BernoulliLogit, t::Real) = failprob(d) + exp(t + logsuccprob(d)) +function cgf(d::BernoulliLogit, t) + # log(1-p+p*exp(t)) = logaddexp(log(1-p), t + log(p)) + logaddexp(logfailprob(d), t + logsuccprob(d)) +end +cf(d::BernoulliLogit, t::Real) = failprob(d) + succprob(d) * cis(t) + + +#### Sampling + +rand(rng::AbstractRNG, d::BernoulliLogit) = logit(rand(rng)) <= d.logitp diff --git a/src/univariates.jl b/src/univariates.jl index dcbd38eadc..e5efe3133f 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -650,6 +650,7 @@ end const discrete_distributions = [ "bernoulli", + "bernoullilogit", "betabinomial", "binomial", "dirac", diff --git a/test/univariate/discrete/bernoullilogit.jl b/test/univariate/discrete/bernoullilogit.jl new file mode 100644 index 0000000000..55560692a0 --- /dev/null +++ b/test/univariate/discrete/bernoullilogit.jl @@ -0,0 +1,81 @@ +using Distributions +using StatsFuns +using Test, Random + +@testset "basic properties" begin + @test BernoulliLogit() === BernoulliLogit(0.0) + + for logitp in (-0.3, 0.2, 0.1f0) + d = BernoulliLogit(logitp) + @test d isa BernoulliLogit{typeof(logitp)} + @test convert(typeof(d), d) === d + @test convert(BernoulliLogit{Float16}, d) === BernoulliLogit(Float16(logitp)) + @test eltype(typeof(d)) === Bool + @test params(d) == (logitp,) + @test partype(d) === typeof(logitp) + end +end + +@testset "succprob/failprob" begin + for p in (0.0, 0.1, 0.31f0, 0.5, 0.7f0, 0.95, 1.0) + d = BernoulliLogit(logit(p)) + @test @inferred(succprob(d)) ≈ p + @test @inferred(failprob(d)) ≈ 1 - p + @test @inferred(Distributions.logsuccprob(d)) ≈ log(p) + @test @inferred(Distributions.logfailprob(d)) ≈ log1p(-p) + end +end + +@testset "rand" begin + @test rand(BernoulliLogit()) isa Bool + @test rand(BernoulliLogit(), 10) isa Vector{Bool} + + N = 10_000 + for p in (0.0, 0.1, 0.31f0, 0.5, 0.7f0, 0.95, 1.0) + d = BernoulliLogit(logit(p)) + @test @inferred(rand(d)) isa Bool + @test @inferred(rand(d, 10)) isa Vector{Bool} + @test mean(rand(d, N)) ≈ p atol=0.01 + end +end + +@testset "cgf" begin + test_cgf(BernoulliLogit(), (1f0, -1f0, 1e6, -1e6)) + test_cgf(BernoulliLogit(0.1), (1f0, -1f0, 1e6, -1e6)) +end + +@testset "comparison with `Bernoulli`" begin + for p in (0.0, 0.1, 0.31f0, 0.5, 0.7f0, 0.95, 1.0) + d = BernoulliLogit(logit(p)) + d0 = Bernoulli(p) + + @test @inferred(mean(d)) ≈ mean(d0) + @test @inferred(var(d)) ≈ var(d0) + @test @inferred(skewness(d)) ≈ skewness(d0) + @test @inferred(kurtosis(d)) ≈ kurtosis(d0) + @test @inferred(mode(d)) ≈ mode(d0) + @test @inferred(modes(d)) ≈ modes(d0) + @test @inferred(median(d)) ≈ median(d0) + @test @inferred(entropy(d)) ≈ entropy(d0) + + for x in (true, false, 0, 1, -3, 5) + @test @inferred(pdf(d, x)) ≈ pdf(d0, x) + @test @inferred(logpdf(d, x)) ≈ logpdf(d0, x) + @test @inferred(cdf(d, x)) ≈ cdf(d0, x) + @test @inferred(logcdf(d, x)) ≈ logcdf(d0, x) + @test @inferred(ccdf(d, x)) ≈ ccdf(d0, x) + @test @inferred(logccdf(d, x)) ≈ logccdf(d0, x) + end + + for q in (-0.2f0, 0.25, 0.6f0, 1.5) + @test @inferred(quantile(d, q)) ≈ quantile(d0, q) nans=true + @test @inferred(cquantile(d, q)) ≈ cquantile(d0, q) nans=true + end + + for t in (-5.2, 1.2f0) + @test @inferred(mgf(d, t)) ≈ mgf(d0, t) rtol=1e-6 + @test @inferred(cgf(d, t)) ≈ cgf(d0, t) rtol=1e-6 + @test @inferred(cf(d, t)) ≈ cf(d0, t) rtol=1e-6 + end + end +end From 01f1f7d105c15d6747ccf9d827eaf10ddc0afa37 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Tue, 8 Nov 2022 08:41:45 +0000 Subject: [PATCH 44/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 23422b5343..c16ebdcb41 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.76" +version = "0.25.77" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 6b69b0274d6c14018d78bd2537b8348783da8a6a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 11 Nov 2022 08:28:33 +0000 Subject: [PATCH 45/93] CompatHelper: bump compat for GR to 0.70 for package docs, (keep existing compat) (#1637) Co-authored-by: CompatHelper Julia --- docs/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index 2aa64d9549..3d0476dcb6 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,4 +4,4 @@ GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" [compat] Documenter = "0.26, 0.27" -GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69" +GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.70" From a1e42f3199c527fcca73c799e9b8b3546107f983 Mon Sep 17 00:00:00 2001 From: Damon Bayer Date: Thu, 17 Nov 2022 00:31:13 -0800 Subject: [PATCH 46/93] Fix argument checks for BetaBinomial (#1635) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix argument checks for BetaBinomial Letting α = 0 or β=0 results in many methods (pdf, quantile, rand). Per Wikipedia, α and β should be positive. https://en.wikipedia.org/wiki/Beta-binomial_distribution. * Add tests * Fix tests Co-authored-by: David Widmann --- src/univariate/discrete/betabinomial.jl | 2 +- test/univariate/discrete/betabinomial.jl | 44 +++++++++++++++++++----- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/univariate/discrete/betabinomial.jl b/src/univariate/discrete/betabinomial.jl index 7e93b360cc..618b3f83ef 100644 --- a/src/univariate/discrete/betabinomial.jl +++ b/src/univariate/discrete/betabinomial.jl @@ -27,7 +27,7 @@ struct BetaBinomial{T<:Real} <: DiscreteUnivariateDistribution end function BetaBinomial(n::Integer, α::T, β::T; check_args::Bool=true) where {T <: Real} - @check_args BetaBinomial (n, n >= zero(n)) (α, α >= zero(α)) (β, β >= zero(β)) + @check_args BetaBinomial (n, n >= zero(n)) (α, α > zero(α)) (β, β > zero(β)) return BetaBinomial{T}(n, α, β) end diff --git a/test/univariate/discrete/betabinomial.jl b/test/univariate/discrete/betabinomial.jl index f389560634..b1e05ad772 100644 --- a/test/univariate/discrete/betabinomial.jl +++ b/test/univariate/discrete/betabinomial.jl @@ -1,14 +1,40 @@ using Distributions using Test -@testset "Log of Beta-binomial distribution" begin - d = BetaBinomial(50, 0.2, 0.6) - - for k in 1:50 - p = pdf(d, k) - lp = logpdf(d, k) - @test lp ≈ log(p) - @test insupport(d, k) +@testset "betabinomial.jl" begin + @testset "logpdf" begin + d = BetaBinomial(50, 0.2, 0.6) + + for k in 1:50 + p = @inferred(pdf(d, k)) + lp = @inferred(logpdf(d, k)) + @test lp ≈ log(p) + end + end + + @testset "support" begin + d = BetaBinomial(50, 0.2, 0.6) + + for k in 1:50 + @test insupport(d, k) + end + @test !insupport(d, 51) + end + + @testset "checks" begin + for T in (Int, Float64), S in (Int, Float64) + ST = float(promote_type(S, T)) + for n in (-1, 0, 3), α in (S(-1), S(0), S(1)), β in (T(-1), T(0), T(1)) + if n >= 0 && α > 0 && β > 0 + @test @inferred(BetaBinomial(n, α, β)) isa BetaBinomial{ST} + @test @inferred(BetaBinomial(n, α, β; check_args=true)) isa BetaBinomial{ST} + else + @test_throws DomainError BetaBinomial(n, α, β) + @test_throws DomainError BetaBinomial(n, α, β; check_args=true) + end + + @test @inferred(BetaBinomial(n, α, β; check_args=false)) isa BetaBinomial{ST} + end + end end - @test !insupport(d, 51) end From da151b8e1bce1ccf54890c700155e2a51e32a5f4 Mon Sep 17 00:00:00 2001 From: Jakub Slama <42158529+slamajakub@users.noreply.github.com> Date: Thu, 17 Nov 2022 09:33:32 +0100 Subject: [PATCH 47/93] Fix PGeneralizedGaussian description (#1636) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed description of PGeneralizedGaussian to be compliant with mathematical formula and implementation (mismatch between μ, α). --- src/univariate/continuous/pgeneralizedgaussian.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/univariate/continuous/pgeneralizedgaussian.jl b/src/univariate/continuous/pgeneralizedgaussian.jl index 3485fdc097..7ae6d18313 100644 --- a/src/univariate/continuous/pgeneralizedgaussian.jl +++ b/src/univariate/continuous/pgeneralizedgaussian.jl @@ -1,5 +1,5 @@ """ - PGeneralizedGaussian(α, μ, p) + PGeneralizedGaussian(μ, α, p) The *p-Generalized Gaussian distribution*, more commonly known as the exponential power or the generalized normal distribution, with scale `α`, location `μ`, and @@ -15,9 +15,9 @@ the distribution approaches the Uniform distribution on `[μ-α, μ+α]`. ```julia PGeneralizedGaussian() # GGD with shape 2, scale 1, location 0, (the Normal distribution) -PGeneralizedGaussian(α, μ, p) # GGD with location α, scale μ, and shape p +PGeneralizedGaussian(μ, α, p) # GGD with location μ, scale α, and shape p -params(d) # Get the parameters, i.e. (α, μ, p) +params(d) # Get the parameters, i.e. (μ, α, p) shape(d) # Get the shape parameter, p scale(d) # Get the scale parameter, α location(d) # Get the location parameter, μ From c42b1734d8796bd07bed26875de628e6171f7a07 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 17 Nov 2022 08:34:17 +0000 Subject: [PATCH 48/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index c16ebdcb41..3c3bfa82a7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.77" +version = "0.25.78" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 80caa028e5923dfb1605d1cbb574be64988e6af3 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Mon, 21 Nov 2022 21:42:44 +0100 Subject: [PATCH 49/93] Fix `logpdf` and `pdf` of `Weibull` (#1639) * Fix `logpdf` and `pdf` of `Weibull` * Add tests * More fixes --- src/univariate/continuous/weibull.jl | 26 +++++++++--------------- test/runtests.jl | 2 +- test/univariate/continuous/weibull.jl | 29 +++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 17 deletions(-) create mode 100644 test/univariate/continuous/weibull.jl diff --git a/src/univariate/continuous/weibull.jl b/src/univariate/continuous/weibull.jl index fbc7fe9ca5..a08a4c0f79 100644 --- a/src/univariate/continuous/weibull.jl +++ b/src/univariate/continuous/weibull.jl @@ -93,24 +93,18 @@ end #### Evaluation -function pdf(d::Weibull{T}, x::Real) where T<:Real - if x >= 0 - α, θ = params(d) - z = x / θ - (α / θ) * z^(α - 1) * exp(-z^α) - else - zero(T) - end +function pdf(d::Weibull, x::Real) + α, θ = params(d) + z = abs(x) / θ + res = (α / θ) * z^(α - 1) * exp(-z^α) + x < 0 || isinf(x) ? zero(res) : res end -function logpdf(d::Weibull{T}, x::Real) where T<:Real - if x >= 0 - α, θ = params(d) - z = x / θ - log(α / θ) + (α - 1) * log(z) - z^α - else - -T(Inf) - end +function logpdf(d::Weibull, x::Real) + α, θ = params(d) + z = abs(x) / θ + res = log(α / θ) + xlogy(α - 1, z) - z^α + x < 0 || isinf(x) ? oftype(res, -Inf) : res end zval(d::Weibull, x::Real) = (max(x, 0) / d.θ) ^ d.α diff --git a/test/runtests.jl b/test/runtests.jl index 614ec7fb1c..3547a30d17 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -77,6 +77,7 @@ const tests = [ "univariate/continuous/gumbel", "univariate/continuous/logistic", "univariate/continuous/noncentralchisq", + "univariate/continuous/weibull", "pdfnorm", "univariate/continuous/rician", "functionals", @@ -143,7 +144,6 @@ const tests = [ # "univariate/continuous/tdist", # "univariate/continuous/triangular", # "univariate/continuous/triweight", - # "univariate/continuous/weibull", # "univariate/continuous/noncentralf", # "univariate/discrete/geometric", # "univariate/discrete/hypergeometric", diff --git a/test/univariate/continuous/weibull.jl b/test/univariate/continuous/weibull.jl new file mode 100644 index 0000000000..7e0463a73c --- /dev/null +++ b/test/univariate/continuous/weibull.jl @@ -0,0 +1,29 @@ +using Distributions + +using Test + +@testset "weibull.jl" begin + # issue #1638 + @testset "pdf/logpdf: special cases" begin + for S in (Float32, Float64), T in (Int, Float64, Float32) + for α in (1, 2, 2.5), θ in (1, 1.5, 3) + d = Weibull(S(α), S(θ)) + ST = promote_type(S, T) + + @test @inferred(pdf(d, T(-3))) === ST(0) + @test @inferred(logpdf(d, T(-3))) === ST(-Inf) + @test @inferred(pdf(d, T(0))) === (α == 1 ? ST(S(α) / S(θ)) : ST(0)) + @test @inferred(logpdf(d, T(0))) === (α == 1 ? ST(log(S(α) / S(θ))) : ST(-Inf)) + + if T <: AbstractFloat + @test @inferred(pdf(d, T(-Inf))) === ST(0) + @test @inferred(logpdf(d, T(-Inf))) === ST(-Inf) + @test @inferred(pdf(d, T(Inf))) === ST(0) + @test @inferred(logpdf(d, T(Inf))) === ST(-Inf) + @test isnan(@inferred(pdf(d, T(NaN)))) + @test isnan(@inferred(logpdf(d, T(NaN)))) + end + end + end + end +end From 72a3f784b14a99c8465462a921b522033b02c9e5 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Mon, 21 Nov 2022 21:43:05 +0100 Subject: [PATCH 50/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 3c3bfa82a7..ccc86805da 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.78" +version = "0.25.79" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From c79812fe4d272061961726db7e83e5e5780103d2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 4 Dec 2022 20:41:25 +0100 Subject: [PATCH 51/93] CompatHelper: bump compat for GR to 0.71 for package docs, (keep existing compat) (#1642) Co-authored-by: CompatHelper Julia --- docs/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index 3d0476dcb6..3d6a9ee4ea 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,4 +4,4 @@ GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" [compat] Documenter = "0.26, 0.27" -GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.70" +GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.70, 0.71" From 116446273d6727b69f6982b6a74b7cb1992ce9df Mon Sep 17 00:00:00 2001 From: Ewout ter Hoeven Date: Mon, 12 Dec 2022 16:41:46 +0100 Subject: [PATCH 52/93] Add Dependabot configuration for GitHub Actions updates (#1646) Add a Dependabot configuration that checks once a week if the GitHub Actions are still using the latest version. If not, it opens a PR to update them. It will actually open few PRs, since only major versions are specified (like v3), so only on a major release (like v4) it will update and open a PR. But it helps actively keep GitHub Actions workflows up to date and secure. See https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot --- .github/dependabot.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..6fddca0d6e --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" From ad6ed5bd0fa1398b1d509c8359176e7fb971b379 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 Dec 2022 10:17:40 +0100 Subject: [PATCH 53/93] Bump actions/cache from 1 to 3 (#1648) Bumps [actions/cache](https://github.com/actions/cache) from 1 to 3. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/v1...v3) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index cfa5a47625..45bfaa9f37 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -37,7 +37,7 @@ jobs: with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: actions/cache@v1 + - uses: actions/cache@v3 env: cache-name: cache-artifacts with: From cd45ecc6ab9ed186ad741de41a64b24c5336e4cc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 Dec 2022 10:20:18 +0100 Subject: [PATCH 54/93] Bump actions/checkout from 2 to 3 (#1649) Bumps [actions/checkout](https://github.com/actions/checkout) from 2 to 3. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v2...v3) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/CI.yml | 4 ++-- .github/workflows/DocPreviewCleanup.yml | 2 +- .github/workflows/IntegrationTest.yml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 45bfaa9f37..42a484c7aa 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -32,7 +32,7 @@ jobs: arch: - x64 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.version }} @@ -62,7 +62,7 @@ jobs: name: Documentation runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: julia-actions/setup-julia@v1 with: version: '1' diff --git a/.github/workflows/DocPreviewCleanup.yml b/.github/workflows/DocPreviewCleanup.yml index bc29462c05..844c4c9a51 100644 --- a/.github/workflows/DocPreviewCleanup.yml +++ b/.github/workflows/DocPreviewCleanup.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout gh-pages branch - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: ref: gh-pages - name: Delete preview and history + push changes diff --git a/.github/workflows/IntegrationTest.yml b/.github/workflows/IntegrationTest.yml index fc315aa58e..29af889646 100644 --- a/.github/workflows/IntegrationTest.yml +++ b/.github/workflows/IntegrationTest.yml @@ -31,14 +31,14 @@ jobs: #- {user: TuringLang, repo: DistributionsAD.jl, group: ForwardDiff} takes > 1 hour steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: julia-actions/setup-julia@v1 with: version: 1 arch: x64 - uses: julia-actions/julia-buildpkg@latest - name: Clone Downstream - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: repository: ${{ matrix.package.user }}/${{ matrix.package.repo }} path: downstream From 432a7f93c0cb096b1852b01ef0abd678eba1bc60 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 23 Dec 2022 22:08:10 +0100 Subject: [PATCH 55/93] Bump codecov/codecov-action from 1 to 3 (#1647) * Bump codecov/codecov-action from 1 to 3 Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 1 to 3. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v1...v3) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Update CI.yml Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: David Widmann --- .github/workflows/CI.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 42a484c7aa..ee60fd4fc5 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -55,9 +55,9 @@ jobs: Pkg.instantiate()' - run: julia --project=perf perf/samplers.jl - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v1 + - uses: codecov/codecov-action@v3 with: - file: lcov.info + files: lcov.info docs: name: Documentation runs-on: ubuntu-latest From a14a940f9933ac10687538d817cbf5930573192c Mon Sep 17 00:00:00 2001 From: Carlos Parada Date: Thu, 12 Jan 2023 09:32:47 -0600 Subject: [PATCH 56/93] Generalize affine distributions to negative scales (#1657) * Generalize affine distributions to negative scales * Update src/Distributions.jl Co-authored-by: David Widmann * Apply suggestions from code review Co-authored-by: David Widmann * Corrections * Apply suggestions from code review * Update locationscale.jl * Update src/univariate/locationscale.jl * Correct floating-point error in tests * correct typo Co-authored-by: David Widmann --- src/univariate/locationscale.jl | 61 ++++++++++++++++++++++++-------- src/univariates.jl | 1 - test/univariate/locationscale.jl | 31 ++++++++-------- 3 files changed, 63 insertions(+), 30 deletions(-) diff --git a/src/univariate/locationscale.jl b/src/univariate/locationscale.jl index 772e9c1fc5..97e47d6f74 100644 --- a/src/univariate/locationscale.jl +++ b/src/univariate/locationscale.jl @@ -39,7 +39,7 @@ struct AffineDistribution{T<:Real, S<:ValueSupport, D<:UnivariateDistribution{S} ρ::D # TODO: Remove? It is not used in Distributions anymore function AffineDistribution{T,S,D}(μ::T, σ::T, ρ::D; check_args::Bool=true) where {T<:Real, S<:ValueSupport, D<:UnivariateDistribution{S}} - @check_args AffineDistribution (σ, σ > zero(σ)) + @check_args AffineDistribution (σ, !iszero(σ)) new{T, S, D}(μ, σ, ρ) end function AffineDistribution{T}(μ::T, σ::T, ρ::UnivariateDistribution) where {T<:Real} @@ -50,7 +50,7 @@ struct AffineDistribution{T<:Real, S<:ValueSupport, D<:UnivariateDistribution{S} end function AffineDistribution(μ::T, σ::T, ρ::UnivariateDistribution; check_args::Bool=true) where {T<:Real} - @check_args AffineDistribution (σ, σ > zero(σ)) + @check_args AffineDistribution (σ, !iszero(σ)) _T = promote_type(eltype(ρ), T) return AffineDistribution{_T}(_T(μ), _T(σ), ρ) end @@ -62,7 +62,7 @@ end # aliases const LocationScale{T,S,D} = AffineDistribution{T,S,D} function LocationScale(μ::Real, σ::Real, ρ::UnivariateDistribution; check_args::Bool=true) - Base.depwarn("`LocationScale` is deprecated, use `AffineDistribution` instead", :LocationScale) + Base.depwarn("`LocationScale` is deprecated. Use `+` and `*` instead", :LocationScale) # preparation for future PR where I remove σ > 0 check @check_args LocationScale (σ, σ > zero(σ)) return AffineDistribution(μ, σ, ρ; check_args=false) @@ -73,13 +73,19 @@ const DiscreteAffineDistribution{T<:Real,D<:DiscreteUnivariateDistribution} = Af Base.eltype(::Type{<:AffineDistribution{T}}) where T = T -minimum(d::AffineDistribution) = d.μ + d.σ * minimum(d.ρ) -maximum(d::AffineDistribution) = d.μ + d.σ * maximum(d.ρ) +minimum(d::AffineDistribution) = + d.σ > 0 ? d.μ + d.σ * minimum(d.ρ) : d.μ + d.σ * maximum(d.ρ) +maximum(d::AffineDistribution) = + d.σ > 0 ? d.μ + d.σ * maximum(d.ρ) : d.μ + d.σ * minimum(d.ρ) support(d::AffineDistribution) = affinedistribution_support(d.μ, d.σ, support(d.ρ)) function affinedistribution_support(μ::Real, σ::Real, support::RealInterval) - return RealInterval(μ + σ * support.lb, μ + σ * support.ub) + if σ > 0 + return RealInterval(μ + σ * support.lb, μ + σ * support.ub) + else + return RealInterval(μ + σ * support.ub, μ + σ * support.lb) + end end -affinedistribution_support(μ::Real, σ::Real, support) = μ .+ σ .* support +affinedistribution_support(μ::Real, σ::Real, support) = σ > 0 ? μ .+ σ .* support : μ .+ σ .* reverse(support) AffineDistribution(μ::Real, σ::Real, d::AffineDistribution) = AffineDistribution(μ + d.μ * σ, σ * d.σ, d.ρ) @@ -106,32 +112,56 @@ mode(d::AffineDistribution) = d.μ + d.σ * mode(d.ρ) modes(d::AffineDistribution) = d.μ .+ d.σ .* modes(d.ρ) var(d::AffineDistribution) = d.σ^2 * var(d.ρ) -std(d::AffineDistribution) = d.σ * std(d.ρ) -skewness(d::AffineDistribution) = skewness(d.ρ) +std(d::AffineDistribution) = abs(d.σ) * std(d.ρ) +skewness(d::AffineDistribution) = sign(d.σ) * skewness(d.ρ) kurtosis(d::AffineDistribution) = kurtosis(d.ρ) isplatykurtic(d::AffineDistribution) = isplatykurtic(d.ρ) isleptokurtic(d::AffineDistribution) = isleptokurtic(d.ρ) ismesokurtic(d::AffineDistribution) = ismesokurtic(d.ρ) -entropy(d::ContinuousAffineDistribution) = entropy(d.ρ) + log(d.σ) +entropy(d::ContinuousAffineDistribution) = entropy(d.ρ) + log(abs(d.σ)) entropy(d::DiscreteAffineDistribution) = entropy(d.ρ) mgf(d::AffineDistribution,t::Real) = exp(d.μ*t) * mgf(d.ρ,d.σ*t) #### Evaluation & Sampling -pdf(d::ContinuousAffineDistribution, x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) / d.σ +pdf(d::ContinuousAffineDistribution, x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) / abs(d.σ) pdf(d::DiscreteAffineDistribution, x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) -logpdf(d::ContinuousAffineDistribution,x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) - log(d.σ) +logpdf(d::ContinuousAffineDistribution,x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) - log(abs(d.σ)) logpdf(d::DiscreteAffineDistribution, x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) -for f in (:cdf, :ccdf, :logcdf, :logccdf) - @eval $f(d::AffineDistribution, x::Real) = $f(d.ρ, (x - d.μ) / d.σ) +# CDF methods + +for (f, fc) in ((:cdf, :ccdf), (:ccdf, :cdf), (:logcdf, :logccdf), (:logccdf, :logcdf)) + @eval function $f(d::ContinuousAffineDistribution, x::Real) + z = (x - d.μ) / d.σ + return d.σ > 0 ? $f(d.ρ, z) : $fc(d.ρ, z) + end +end + +function cdf(d::DiscreteAffineDistribution, x::Real) + z = (x - d.μ) / d.σ + # Have to include probability mass at endpoints + return d.σ > 0 ? cdf(d.ρ, z) : (ccdf(d.ρ, z) + pdf(d.ρ, z)) +end +function ccdf(d::DiscreteAffineDistribution, x::Real) + z = (x - d.μ) / d.σ + # Have to exclude probability mass at endpoints + return d.σ > 0 ? ccdf(d.ρ, z) : (cdf(d.ρ, z) - pdf(d.ρ, z)) +end +function logcdf(d::DiscreteAffineDistribution, x::Real) + z = (x - d.μ) / d.σ + return d.σ > 0 ? logcdf(d.ρ, z) : logaddexp(logccdf(d.ρ, z), logpdf(d.ρ, z)) +end +function logccdf(d::DiscreteAffineDistribution, x::Real) + z = (x - d.μ) / d.σ + return d.σ > 0 ? logccdf(d.ρ, z) : logsubexp(logcdf(d.ρ, z), logpdf(d.ρ, z)) end -quantile(d::AffineDistribution,q::Real) = d.μ + d.σ * quantile(d.ρ,q) +quantile(d::AffineDistribution, q::Real) = d.μ + d.σ * quantile(d.ρ, d.σ > 0 ? q : 1 - q) rand(rng::AbstractRNG, d::AffineDistribution) = d.μ + d.σ * rand(rng, d.ρ) cf(d::AffineDistribution, t::Real) = cf(d.ρ,t*d.σ) * exp(1im*t*d.μ) @@ -144,4 +174,5 @@ Base.:+(x::Real, d::UnivariateDistribution) = d + x Base.:*(x::Real, d::UnivariateDistribution) = AffineDistribution(zero(x), x, d) Base.:*(d::UnivariateDistribution, x::Real) = x * d Base.:-(d::UnivariateDistribution, x::Real) = d + -x +Base.:-(d::UnivariateDistribution) = -one(partype(d)) * d Base.:/(d::UnivariateDistribution, x::Real) = inv(x) * d diff --git a/src/univariates.jl b/src/univariates.jl index e5efe3133f..177940a44b 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -6,7 +6,6 @@ struct RealInterval{T<:Real} end RealInterval(lb::Real, ub::Real) = RealInterval(promote(lb, ub)...) - minimum(r::RealInterval) = r.lb maximum(r::RealInterval) = r.ub extrema(r::RealInterval) = (r.lb, r.ub) diff --git a/test/univariate/locationscale.jl b/test/univariate/locationscale.jl index ce05dc04d4..761ce55bbf 100644 --- a/test/univariate/locationscale.jl +++ b/test/univariate/locationscale.jl @@ -40,7 +40,7 @@ function test_location_scale( @testset "Promotions and conversions" begin @testset "$k" for (k,dtest) in d_dict - if dtest isa LocationScale + if dtest isa Distributions.AffineDistribution @test typeof(dtest.μ) === typeof(dtest.σ) @test location(dtest) ≈ μ atol=1e-15 @test scale(dtest) ≈ σ atol=1e-15 @@ -90,7 +90,7 @@ function test_location_scale( @test loglikelihood(dtest, xs) ≈ loglikelihood(dref, xs) @test cdf(dtest, x) ≈ cdf(dref, x) - @test logcdf(dtest, x) ≈ logcdf(dref, x) + @test logcdf(dtest, x) ≈ logcdf(dref, x) atol=1e-14 @test ccdf(dtest, x) ≈ ccdf(dref, x) atol=1e-14 @test logccdf(dtest, x) ≈ logccdf(dref, x) atol=1e-14 @@ -131,7 +131,7 @@ function test_location_scale_normal( rng::Union{AbstractRNG, Missing}, μ::Real, σ::Real, μD::Real, σD::Real, ) ρ = Normal(μD, σD) - dref = Normal(μ + σ * μD, σ * σD) + dref = Normal(μ + σ * μD, abs(σ) * σD) @test dref === μ + σ * ρ return test_location_scale(rng, μ, σ, ρ, dref) end @@ -147,18 +147,21 @@ end @testset "AffineDistribution" begin rng = MersenneTwister(123) - for _rng in (missing, rng) - test_location_scale_normal(_rng, 0.3, 0.2, 0.1, 0.2) - test_location_scale_normal(_rng, -0.3, 0.1, -0.1, 0.3) - test_location_scale_normal(_rng, 1.3, 0.4, -0.1, 0.5) + @testset "Normal" begin + for _rng in (missing, rng), sign in (1, -1) + test_location_scale_normal(_rng, 0.3, sign * 0.2, 0.1, 0.2) + test_location_scale_normal(_rng, -0.3, sign * 0.1, -0.1, 0.3) + test_location_scale_normal(_rng, 1.3, sign * 0.4, -0.1, 0.5) + end + test_location_scale_normal(rng, ForwardDiff.Dual(0.3), 0.2, 0.1, 0.2) end - test_location_scale_normal(rng, ForwardDiff.Dual(0.3), 0.2, 0.1, 0.2) - - probs = normalize!(rand(10), 1) - for _rng in (missing, rng) - test_location_scale_discretenonparametric(_rng, 1//3, 1//2, 1:10, probs) - test_location_scale_discretenonparametric(_rng, -1//4, 1//3, (-10):(-1), probs) - test_location_scale_discretenonparametric(_rng, 6//5, 3//2, 15:24, probs) + @testset "DiscreteNonParametric" begin + probs = normalize!(rand(10), 1) + for _rng in (missing, rng), sign in (1, -1) + test_location_scale_discretenonparametric(_rng, 1//3, sign * 1//2, 1:10, probs) + test_location_scale_discretenonparametric(_rng, -1//4, sign * 1//3, (-10):(-1), probs) + test_location_scale_discretenonparametric(_rng, 6//5, sign * 3//2, 15:24, probs) + end end @test_logs Distributions.AffineDistribution(1.0, 1, Normal()) From d21c5a3d2386910b586cd9da188721f313073570 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 12 Jan 2023 16:35:07 +0100 Subject: [PATCH 57/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index ccc86805da..ceb309e885 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.79" +version = "0.25.80" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 34a05d8a1671052624e7fa246b58484acc32cfe5 Mon Sep 17 00:00:00 2001 From: Priya Nagda <64613009+pri1311@users.noreply.github.com> Date: Mon, 16 Jan 2023 21:57:34 +0530 Subject: [PATCH 58/93] add docstring for `DirichletMultinomial` (#1656) * add docstring for `DirichletMultinomial` * add type signature * add fixes * remove spaces * Update dirichletmultinomial.jl Fix minor typo (extra space) * Additional fixes Co-authored-by: Carlos Parada Co-authored-by: David Widmann --- src/multivariate/dirichletmultinomial.jl | 32 ++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/multivariate/dirichletmultinomial.jl b/src/multivariate/dirichletmultinomial.jl index 4dc0d3fbb7..eb15990cb2 100644 --- a/src/multivariate/dirichletmultinomial.jl +++ b/src/multivariate/dirichletmultinomial.jl @@ -1,3 +1,35 @@ +""" + DirichletMultinomial + +The [Dirichlet-multinomial distribution](https://en.wikipedia.org/wiki/Dirichlet-multinomial_distribution) +is the distribution of a draw from a multinomial distribution where each sample has a +slightly different probability vector, drawn from a common Dirichlet distribution. + +This contrasts with the multinomial distribution, which assumes that all observations arise +from a single fixed probability vector. This enables the Dirichlet-multinomial distribution to +accommodate more variable (a.k.a, over-dispersed) count data than the multinomial distribution. + +The probability mass function is given by + +```math +f(x; \\alpha) = \\frac{n! \\Gamma(\\alpha_0)} +{\\Gamma(n+\\alpha_0)}\\prod_{k=1}^K\\frac{\\Gamma(x_{k}+\\alpha_{k})} +{x_{k}! \\Gamma(\\alpha_{k})} +``` +where +- ``n = \\sum_k x_k`` +- ``\\alpha_0 = \\sum_k \\alpha_k`` + +```julia +# Let α be a vector +DirichletMultinomial(n, α) # Dirichlet-multinomial distribution for n trials with parameter +vector α. + +# Let k be a positive integer +DirichletMultinomial(n, k) # Dirichlet-multinomial distribution with n trials and parameter +vector of length k of ones. +``` +""" struct DirichletMultinomial{T <: Real} <: DiscreteMultivariateDistribution n::Int α::Vector{T} From c431d20d3a0f00e0cf22f9fd959b9bb11d978517 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 2 Feb 2023 18:23:04 +0100 Subject: [PATCH 59/93] change some floating point equality checks to approximate (#1671) --- test/univariate/discrete/geometric.jl | 4 ++-- test/univariates.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/univariate/discrete/geometric.jl b/test/univariate/discrete/geometric.jl index 4fd9ecd047..9845946db7 100644 --- a/test/univariate/discrete/geometric.jl +++ b/test/univariate/discrete/geometric.jl @@ -4,8 +4,8 @@ using FiniteDifferences @testset "Geometric mgf and k vs k-1 parametrization #1604" begin d = Geometric(0.2) - @test mgf(d, 0) == 1 - @test cf(d, 0) == 1 + @test mgf(d, 0) ≈ 1 + @test cf(d, 0) ≈ 1 fdm1 = central_fdm(5, 1) @test fdm1(Base.Fix1(mgf, d), 0) ≈ mean(d) diff --git a/test/univariates.jl b/test/univariates.jl index c4ecbcc053..66fcbc6b3a 100644 --- a/test/univariates.jl +++ b/test/univariates.jl @@ -127,13 +127,13 @@ function verify_and_test(D::Union{Type,Function}, d::UnivariateDistribution, dct try m = mgf(d,0.0) - @test m == 1.0 + @test m ≈ 1.0 catch e isa(e, MethodError) || throw(e) end try c = cf(d,0.0) - @test c == 1.0 + @test c ≈ 1.0 # test some extra values: should all be well-defined for t in (0.1,-0.1,1.0,-1.0) @test !isnan(cf(d,t)) From dca29d9b29bf8d77e0bbb29047a6fd68583df0fe Mon Sep 17 00:00:00 2001 From: quildtide <42811940+quildtide@users.noreply.github.com> Date: Wed, 15 Feb 2023 05:12:19 -0500 Subject: [PATCH 60/93] Replace GLOBAL_RNG with default_rng (#1679) Replace GLOBAL_RNG with default_rng --- src/Distributions.jl | 2 +- src/cholesky/lkjcholesky.jl | 2 +- src/functionals.jl | 2 +- src/genericrand.jl | 8 ++++---- src/matrix/lkj.jl | 2 +- src/test_utils.jl | 2 +- src/univariate/continuous/chernoff.jl | 2 +- test/testutils.jl | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Distributions.jl b/src/Distributions.jl index 46b18f5418..3b9397736c 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -14,7 +14,7 @@ using LinearAlgebra, Printf import LinearAlgebra: dot, rank using Random -import Random: GLOBAL_RNG, rand!, SamplerRangeInt +import Random: default_rng, rand!, SamplerRangeInt import Statistics: mean, median, quantile, std, var, cov, cor import StatsBase: kurtosis, skewness, entropy, mode, modes, diff --git a/src/cholesky/lkjcholesky.jl b/src/cholesky/lkjcholesky.jl index 89f3d738d7..3540e31e29 100644 --- a/src/cholesky/lkjcholesky.jl +++ b/src/cholesky/lkjcholesky.jl @@ -172,7 +172,7 @@ function Base.rand(rng::AbstractRNG, d::LKJCholesky, dims::Dims) return Rs end -Random.rand!(d::LKJCholesky, R::LinearAlgebra.Cholesky) = Random.rand!(GLOBAL_RNG, d, R) +Random.rand!(d::LKJCholesky, R::LinearAlgebra.Cholesky) = Random.rand!(default_rng(), d, R) function Random.rand!(rng::AbstractRNG, d::LKJCholesky, R::LinearAlgebra.Cholesky) return _lkj_cholesky_onion_sampler!(rng, d, R) end diff --git a/src/functionals.jl b/src/functionals.jl index 410a9c396c..7ccc80c0e6 100644 --- a/src/functionals.jl +++ b/src/functionals.jl @@ -11,7 +11,7 @@ function expectation(g, distr::DiscreteUnivariateDistribution; epsilon::Real=1e- return sum(x -> pdf(distr, x) * g(x), minval:maxval) end -function expectation(g, distr::MultivariateDistribution; nsamples::Int=100, rng::AbstractRNG=GLOBAL_RNG) +function expectation(g, distr::MultivariateDistribution; nsamples::Int=100, rng::AbstractRNG=default_rng()) nsamples > 0 || throw(ArgumentError("number of samples should be > 0")) # We use a function barrier to work around type instability of `sampler(dist)` return mcexpectation(rng, g, sampler(distr), nsamples) diff --git a/src/genericrand.jl b/src/genericrand.jl index 58914e75d9..80123b39fc 100644 --- a/src/genericrand.jl +++ b/src/genericrand.jl @@ -19,8 +19,8 @@ Generate `n` samples from `s`. The form of the returned object depends on the va Generate an array of samples from `s` whose shape is determined by the given dimensions. """ -rand(s::Sampleable, dims::Int...) = rand(GLOBAL_RNG, s, dims...) -rand(s::Sampleable, dims::Dims) = rand(GLOBAL_RNG, s, dims) +rand(s::Sampleable, dims::Int...) = rand(default_rng(), s, dims...) +rand(s::Sampleable, dims::Dims) = rand(default_rng(), s, dims) rand(rng::AbstractRNG, s::Sampleable, dim1::Int, moredims::Int...) = rand(rng, s, (dim1, moredims...)) @@ -74,7 +74,7 @@ form as specified above. The rules are summarized as below: matrices with each element for a sample matrix. """ function rand! end -Base.@propagate_inbounds rand!(s::Sampleable, X::AbstractArray) = rand!(GLOBAL_RNG, s, X) +Base.@propagate_inbounds rand!(s::Sampleable, X::AbstractArray) = rand!(default_rng(), s, X) Base.@propagate_inbounds function rand!(rng::AbstractRNG, s::Sampleable, X::AbstractArray) return _rand!(rng, s, X) end @@ -134,7 +134,7 @@ Base.@propagate_inbounds function rand!( x::AbstractArray{<:AbstractArray{<:Real,N}}, allocate::Bool, ) where {N} - return rand!(GLOBAL_RNG, s, x, allocate) + return rand!(default_rng(), s, x, allocate) end @inline function rand!( rng::AbstractRNG, diff --git a/src/matrix/lkj.jl b/src/matrix/lkj.jl index 309d71bda9..4c62e43025 100644 --- a/src/matrix/lkj.jl +++ b/src/matrix/lkj.jl @@ -124,7 +124,7 @@ function _rand!(rng::AbstractRNG, d::LKJ, R::AbstractMatrix) R .= _lkj_onion_sampler(d.d, d.η, rng) end -function _lkj_onion_sampler(d::Integer, η::Real, rng::AbstractRNG = Random.GLOBAL_RNG) +function _lkj_onion_sampler(d::Integer, η::Real, rng::AbstractRNG = Random.default_rng()) # Section 3.2 in LKJ (2009 JMA) # 1. Initialization R = ones(typeof(η), d, d) diff --git a/src/test_utils.jl b/src/test_utils.jl index 98d1f8a7a2..41e0d588ae 100644 --- a/src/test_utils.jl +++ b/src/test_utils.jl @@ -14,7 +14,7 @@ __rand!(rng::AbstractRNG, args...) = rand!(rng, args...) """ test_mvnormal( - g::AbstractMvNormal, n_tsamples::Int=10^6, rng::AbstractRNG=Random.GLOBAL_RNG + g::AbstractMvNormal, n_tsamples::Int=10^6, rng::AbstractRNG=Random.default_rng() ) Test that `AbstractMvNormal` implements the expected API. diff --git a/src/univariate/continuous/chernoff.jl b/src/univariate/continuous/chernoff.jl index 920f05b2c1..3d319db252 100644 --- a/src/univariate/continuous/chernoff.jl +++ b/src/univariate/continuous/chernoff.jl @@ -213,7 +213,7 @@ kurtosis(d::Chernoff, excess::Bool) = kurtosis(d) + (excess ? 0.0 : 3.0) entropy(d::Chernoff) = -0.7515605300273104 ### Random number generation -rand(d::Chernoff) = rand(GLOBAL_RNG, d) +rand(d::Chernoff) = rand(default_rng(), d) function rand(rng::AbstractRNG, d::Chernoff) # Ziggurat random number generator --- slow in the tails # constants needed for the Ziggurat algorithm A = 0.03248227216266608 diff --git a/test/testutils.jl b/test/testutils.jl index eb90b830fa..42ca1bc7e1 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -143,7 +143,7 @@ function test_samples(s::Sampleable{Univariate, Discrete}, # the sampleable @assert cub[i] >= clb[i] end - # generate samples using RNG passed or global RNG + # generate samples using RNG passed or default RNG samples = ismissing(rng) ? rand(s, n) : rand(rng, s, n) @assert length(samples) == n From 747409c2b7973205a60ba57531eb0648da44aac6 Mon Sep 17 00:00:00 2001 From: quildtide <42811940+quildtide@users.noreply.github.com> Date: Sat, 18 Feb 2023 17:46:23 -0500 Subject: [PATCH 61/93] Add vectorized rand for Normal dist (#1680) * Add vectorized rand for Normal dist * Use muladd Co-authored-by: David Widmann --------- Co-authored-by: David Widmann --- src/univariate/continuous/normal.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/univariate/continuous/normal.jl b/src/univariate/continuous/normal.jl index 6f8d131b66..f652a27d77 100644 --- a/src/univariate/continuous/normal.jl +++ b/src/univariate/continuous/normal.jl @@ -107,6 +107,8 @@ Base.:*(c::Real, d::Normal) = Normal(c * d.μ, abs(c) * d.σ) rand(rng::AbstractRNG, d::Normal{T}) where {T} = d.μ + d.σ * randn(rng, float(T)) +rand!(rng::AbstractRNG, d::Normal, A::AbstractArray{<:Real}) = A .= muladd.(d.σ, randn!(rng, A), d.μ) + #### Fitting struct NormalStats <: SufficientStats From 7a9494396711c54489ec825d9cbd94abfeab070a Mon Sep 17 00:00:00 2001 From: spaette <111918424+spaette@users.noreply.github.com> Date: Sun, 19 Feb 2023 11:07:29 -0600 Subject: [PATCH 62/93] typos (#1681) --- src/matrix/lkj.jl | 2 +- src/truncated/exponential.jl | 2 +- src/univariate/continuous/rayleigh.jl | 2 +- src/univariate/continuous/semicircle.jl | 2 +- test/functionals.jl | 2 +- test/multivariate/mvnormal.jl | 2 +- test/runtests.jl | 2 +- test/testutils.jl | 4 ++-- test/univariate_bounds.jl | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/matrix/lkj.jl b/src/matrix/lkj.jl index 4c62e43025..e56b7888e0 100644 --- a/src/matrix/lkj.jl +++ b/src/matrix/lkj.jl @@ -181,7 +181,7 @@ function _rand_params(::Type{LKJ}, elty, n::Int, p::Int) end # ----------------------------------------------------------------------------- -# Several redundant implementations of the recipricol integrating constant. +# Several redundant implementations of the reciprocal integrating constant. # If f(R; n) = c₀ |R|ⁿ⁻¹, these give log(1 / c₀). # Every integrating constant formula given in LKJ (2009 JMA) is an expression # for 1 / c₀, even if they say that it is not. diff --git a/src/truncated/exponential.jl b/src/truncated/exponential.jl index 6bbdc95d5a..c04c90d870 100644 --- a/src/truncated/exponential.jl +++ b/src/truncated/exponential.jl @@ -1,5 +1,5 @@ ##### -##### Truncated exponential distribition +##### Truncated exponential distribution ##### function mean(d::Truncated{<:Exponential,Continuous}) diff --git a/src/univariate/continuous/rayleigh.jl b/src/univariate/continuous/rayleigh.jl index 90ac952d99..9b475bf125 100644 --- a/src/univariate/continuous/rayleigh.jl +++ b/src/univariate/continuous/rayleigh.jl @@ -99,7 +99,7 @@ rand(rng::AbstractRNG, d::Rayleigh) = d.σ * sqrt(2 * randexp(rng)) #### Fitting function fit_mle(::Type{<:Rayleigh}, x::AbstractArray{T}) where {T<:Real} - # Compute MLE (and unbiasd estimator) of σ^2 + # Compute MLE (and unbiased estimator) of σ^2 s2 = zero(T) for xi in x s2 += xi^2 diff --git a/src/univariate/continuous/semicircle.jl b/src/univariate/continuous/semicircle.jl index 0480000e35..f04d4c9ed2 100644 --- a/src/univariate/continuous/semicircle.jl +++ b/src/univariate/continuous/semicircle.jl @@ -74,7 +74,7 @@ end function rand(rng::AbstractRNG, d::Semicircle) # Idea: - # sample polar coodinates r,θ + # sample polar coordinates r,θ # of point uniformly distributed on radius d.r half disk # project onto x axis θ = rand(rng) # multiple of π diff --git a/test/functionals.jl b/test/functionals.jl index 43ac8e3b6b..01357355a5 100644 --- a/test/functionals.jl +++ b/test/functionals.jl @@ -4,7 +4,7 @@ struct CholeskyMvNormal{M,T} <: Distributions.AbstractMvNormal L::T end -# Constructor for diagonal covariance matrices used in the tests belows +# Constructor for diagonal covariance matrices used in the tests below function CholeskyMvNormal(m::Vector, Σ::Diagonal) L = Diagonal(map(sqrt, Σ.diag)) return CholeskyMvNormal{typeof(m),typeof(L)}(m, L) diff --git a/test/multivariate/mvnormal.jl b/test/multivariate/mvnormal.jl index 1386764a63..4af9275b3f 100644 --- a/test/multivariate/mvnormal.jl +++ b/test/multivariate/mvnormal.jl @@ -241,7 +241,7 @@ end @test g.Σ.diag ≈ diag(Cw) end -@testset "MvNormal affine tranformations" begin +@testset "MvNormal affine transformations" begin @testset "moment identities" begin for n in 1:5 # dimension # distribution diff --git a/test/runtests.jl b/test/runtests.jl index 3547a30d17..8b4d88b988 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -160,7 +160,7 @@ printstyled("Running tests:\n", color=:blue) Random.seed!(345679) -# to reduce redundancy, we might break this file down into seperate `$t * "_utils.jl"` files +# to reduce redundancy, we might break this file down into separate `$t * "_utils.jl"` files include("testutils.jl") @testset "Distributions" begin diff --git a/test/testutils.jl b/test/testutils.jl index 42ca1bc7e1..531aa78bf3 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -105,7 +105,7 @@ function test_samples(s::Sampleable{Univariate, Discrete}, # the sampleable # The basic idea # ------------------ - # Generate n samples, and count the occurences of each value within a reasonable range. + # Generate n samples, and count the occurrences of each value within a reasonable range. # For each distinct value, it computes an confidence interval of the counts # and checks whether the count is within this interval. # @@ -627,7 +627,7 @@ function pvalue_kolmogorovsmirnoff(x::AbstractVector, d::UnivariateDistribution) end function test_affine_transformations(::Type{T}, params...) where {T<:UnivariateDistribution} - @testset "affine tranformations ($T)" begin + @testset "affine transformations ($T)" begin # distribution d = T(params...) diff --git a/test/univariate_bounds.jl b/test/univariate_bounds.jl index 275eb112cb..aa633ec39a 100644 --- a/test/univariate_bounds.jl +++ b/test/univariate_bounds.jl @@ -1,7 +1,7 @@ using Distributions using Test -# to make sure that subtypes provides the required behavoir without having to add +# to make sure that subtypes provides the required behavior without having to add # a dependency to InteractiveUtils function _subtypes(m::Module, x::Type, sts=Base.IdSet{Any}(), visited=Base.IdSet{Module}()) push!(visited, m) From 37f43c3bfe9c955b22dd7791e260ad08be499661 Mon Sep 17 00:00:00 2001 From: jwright11 <40541257+jwright11@users.noreply.github.com> Date: Mon, 20 Feb 2023 15:52:27 -0600 Subject: [PATCH 63/93] fix cdf & pdf for TriangularDist (#1674) * fix cdf & pdf for TriangularDist * type stable for cdf & pdf of triangulardist * triangulardist - restored pdf, type stable cdf * triangulardist: add a --- src/univariate/continuous/triangular.jl | 105 ++++++++++++++++---- test/runtests.jl | 2 +- test/univariate/continuous/triangular.jl | 118 +++++++++++++++++++++++ 3 files changed, 203 insertions(+), 22 deletions(-) create mode 100644 test/univariate/continuous/triangular.jl diff --git a/src/univariate/continuous/triangular.jl b/src/univariate/continuous/triangular.jl index d40196465a..4d715c8960 100644 --- a/src/univariate/continuous/triangular.jl +++ b/src/univariate/continuous/triangular.jl @@ -93,20 +93,26 @@ entropy(d::TriangularDist{T}) where {T<:Real} = one(T)/2 + log((d.b - d.a) / 2) function pdf(d::TriangularDist, x::Real) a, b, c = params(d) - return if x < c - 2 * max(x - a, 0) / ((b - a) * (c - a)) + res = if x < c + 2 * (x - a) / ((b - a) * (c - a)) + elseif x > c + 2 * (b - x) / ((b - a) * (b - c)) else - 2 * max(b - x, 0) / ((b - a) * (b - c)) + # Handle x == c separately to avoid `NaN` if `c == a` or `c == b` + oftype(x - a, 2) / (b - a) end + return insupport(d, x) ? res : zero(res) end logpdf(d::TriangularDist, x::Real) = log(pdf(d, x)) function cdf(d::TriangularDist, x::Real) a, b, c = params(d) - return if x < c - max(x - a, 0)^2 / ((b - a) * (c - a)) + if x < c + res = (x - a)^2 / ((b - a) * (c - a)) + return x < a ? zero(res) : res else - 1 - max(b - x, 0)^2 / ((b - a) * (b - c)) + res = 1 - (b - x)^2 / ((b - a) * (b - c)) + return x ≥ b ? one(res) : res end end @@ -119,26 +125,83 @@ function quantile(d::TriangularDist, p::Real) b - sqrt(b_m_a * (b - c) * (1 - p)) end -function mgf(d::TriangularDist{T}, t::Real) where T<:Real - if t == zero(t) - return one(T) +_expm1(x::Number) = expm1(x) +if VERSION < v"1.7.0-DEV.1172" + # expm1(::Float16) is not defined in older Julia versions + _expm1(x::Float16) = Float16(expm1(Float32(x))) + function _expm1(x::Complex{Float16}) + xr, xi = reim(x) + yr, yi = reim(expm1(complex(Float32(xr), Float32(xi)))) + return complex(Float16(yr), Float16(yi)) + end +end + +""" + _phi2(x::Real) + +Compute +```math +2 (exp(x) - 1 - x) / x^2 +``` +with the correct limit at ``x = 0``. +""" +function _phi2(x::Real) + res = 2 * (_expm1(x) - x) / x^2 + return iszero(x) ? one(res) : res +end +function mgf(d::TriangularDist, t::Real) + a, b, c = params(d) + # In principle, only two branches (degenerate + non-degenerate case) are needed + # But writing out all four cases will avoid unnecessary computations + if a < c + if c < b + # Case: a < c < b + return exp(c * t) * ((c - a) * _phi2((a - c) * t) + (b - c) * _phi2((b - c) * t)) / (b - a) + else + # Case: a < c = b + return exp(c * t) * _phi2((a - c) * t) + end + elseif c < b + # Case: a = c < b + return exp(c * t) * _phi2((b - c) * t) else - (a, b, c) = params(d) - u = (b - c) * exp(a * t) - (b - a) * exp(c * t) + (c - a) * exp(b * t) - v = (b - a) * (c - a) * (b - c) * t^2 - return 2u / v + # Case: a = c = b + return exp(c * t) end end -function cf(d::TriangularDist{T}, t::Real) where T<:Real - # Is this correct? - if t == zero(t) - return one(Complex{T}) +""" + _cisphi2(x::Real) + +Compute +```math +- 2 (exp(x im) - 1 - x im) / x^2 +``` +with the correct limit at ``x = 0``. +""" +function _cisphi2(x::Real) + z = x * im + res = -2 * (_expm1(z) - z) / x^2 + return iszero(x) ? one(res) : res +end +function cf(d::TriangularDist, t::Real) + a, b, c = params(d) + # In principle, only two branches (degenerate + non-degenerate case) are needed + # But writing out all four cases will avoid unnecessary computations + if a < c + if c < b + # Case: a < c < b + return cis(c * t) * ((c - a) * _cisphi2((a - c) * t) + (b - c) * _cisphi2((b - c) * t)) / (b - a) + else + # Case: a < c = b + return cis(c * t) * _cisphi2((a - c) * t) + end + elseif c < b + # Case: a = c < b + return cis(c * t) * _cisphi2((b - c) * t) else - (a, b, c) = params(d) - u = (b - c) * cis(a * t) - (b - a) * cis(c * t) + (c - a) * cis(b * t) - v = (b - a) * (c - a) * (b - c) * t^2 - return -2u / v + # Case: a = c = b + return cis(c * t) end end diff --git a/test/runtests.jl b/test/runtests.jl index 8b4d88b988..186004edd6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -88,6 +88,7 @@ const tests = [ "univariate/continuous/tdist", "multivariate/product", "eachvariate", + "univariate/continuous/triangular", ### missing files compared to /src: # "common", @@ -142,7 +143,6 @@ const tests = [ # "univariate/continuous/studentizedrange", # "univariate/continuous/symtriangular", # "univariate/continuous/tdist", - # "univariate/continuous/triangular", # "univariate/continuous/triweight", # "univariate/continuous/noncentralf", # "univariate/discrete/geometric", diff --git a/test/univariate/continuous/triangular.jl b/test/univariate/continuous/triangular.jl new file mode 100644 index 0000000000..213c7fc1c4 --- /dev/null +++ b/test/univariate/continuous/triangular.jl @@ -0,0 +1,118 @@ +using Distributions +using FiniteDifferences + +using Statistics +using Test + +@testset "triangular" begin + @testset "constructor" begin + @test_throws ArgumentError TriangularDist(1, 0, 0) + @test_throws ArgumentError TriangularDist(1, 1, 0) + @test_throws ArgumentError TriangularDist(0, 0, 1) + end + + @testset "type stability" begin + Ts = (Float16, Float32, Float64, BigFloat, Rational{Int}, Int, BigInt) + for T1 in Ts + a = T1(1) + b = T1(7) + c = T1(5) + dist = TriangularDist(a, b, c) + distf64 = TriangularDist(Float64(a), Float64(b), Float64(c)) + + for T2 in Ts + xa = T2(1) + xb = T2(7) + xsmall = T2(0) + xlarge = T2(8) + x_middle_a_c = T2(3) + x_middle_b_c = T2(6) + t1 = T2(2) + t2 = T2(5) + + for f in (pdf, cdf) + @inferred f(dist, xa) ≈ f(distf64, Float64(xa)) + @inferred f(dist, xb) ≈ f(distf64, Float64(xb)) + @inferred f(dist, xsmall) ≈ f(distf64, Float64(xsmall)) + @inferred f(dist, xlarge) ≈ f(distf64, Float64(xlarge)) + @inferred f(dist, x_middle_a_c) ≈ f(distf64, Float64(x_middle_a_c)) + @inferred f(dist, x_middle_b_c) ≈ f(distf64, Float64(x_middle_b_c)) + end + + for f in (mgf, cf) + @inferred f(dist, t1) ≈ f(distf64, Float64(t1)) + @inferred f(dist, t2) ≈ f(distf64, Float64(t2)) + end + end + end + end + + @testset "interface" begin + fdm = central_fdm(5, 1) + fdm2 = central_fdm(5, 2) + for (a, b, c) in ((2, 10, 8), (2, 6, 4), (2, 4, 4), (2, 4, 2), (2, 2, 2)) + d = TriangularDist(a, b, c) + + @test params(d) == (a, b, c) + @test mode(d) == c + @test mean(d) == (a + b + c) / 3 + @test median(d) == (c >= middle(a, b) ? + a + sqrt((b - a) * (c - a) / 2) : + b - sqrt((b - a) * (b - c) / 2)) + @test var(d) == (a^2 + b^2 + c^2 - a * b - a * c - b * c) / 18 + + @test kurtosis(d) == -3 / 5 + @test entropy(d) == 1 / 2 + log((b - a) / 2) + + # x < a + for x in (a - 1, a - 3) + @test pdf(d, x) == 0 + @test logpdf(d, x) == -Inf + @test cdf(d, x) == 0 + end + # x = a + @test pdf(d, a) == (a == b ? Inf : (a == c ? 2 / (b - a) : 0)) + @test logpdf(d, a) == log(pdf(d, a)) + @test cdf(d, a) == (a == b ? 1 : 0) + # a < x < c + if a < c + x = (a + c) / 2 + @test pdf(d, x) == 2 * (x - a) / ((b - a) * (c - a)) + @test logpdf(d, x) == log(pdf(d, x)) + @test cdf(d, x) == (x - a)^2 / ((b - a) * (c - a)) + end + # x = c + @test pdf(d, c) == (a == b ? Inf : 2 / (b - a)) + @test logpdf(d, c) == log(pdf(d, c)) + @test cdf(d, c) == (c == b ? 1 : (c - a) / (b - a)) + # c < x < b + if c < b + x = (c + b) / 2 + @test pdf(d, x) == 2 * (b - x) / ((b - a) * (b - c)) + @test logpdf(d, x) == log(pdf(d, x)) + @test cdf(d, x) == 1 - (b - x)^2 / ((b - a) * (b - c)) + end + # x = b + @test pdf(d, b) == (b == a ? Inf : (b == c ? 2 / (b - a) : 0)) + @test logpdf(d, b) == log(pdf(d, b)) + @test cdf(d, b) == 1 + # x > b + for x in (b + 1, b + 3) + @test pdf(d, x) == 0 + @test logpdf(d, x) == -Inf + @test cdf(d, x) == 1 + end + + @test quantile(d, 0) == a + @test quantile(d, 0.5) == median(d) + @test quantile(d, 1) == b + + @test mgf(d, 0) == 1 + @test fdm(Base.Fix1(mgf, d), 0.0) ≈ mean(d) + @test fdm2(Base.Fix1(mgf, d), 0.0) ≈ mean(d)^2 + var(d) rtol=1e-6 + @test cf(d, 0) == 1 + @test fdm(Base.Fix1(cf, d), 0.0) ≈ mean(d) * im + @test fdm2(Base.Fix1(cf, d), 0.0) ≈ -(mean(d)^2 + var(d)) rtol=1e-6 + end + end +end From 221a9e801c7cf5280f85b2c893862688c60322d8 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Mon, 20 Feb 2023 22:54:32 +0100 Subject: [PATCH 64/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index ceb309e885..0621e87158 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.80" +version = "0.25.81" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 2fd8bf07a22ed1f44defe07510cb01e3346f50b5 Mon Sep 17 00:00:00 2001 From: Philip Bittihn Date: Thu, 23 Feb 2023 13:34:50 +0100 Subject: [PATCH 65/93] SkewedExponentialPower, PGeneralizedGaussian: reproducible rand (#1664) * SkewedExponentialPower, PGeneralizedGaussian: reproducible rand * Avoid Float64 literal in PGeneralizedGaussian rand implementation Co-authored-by: David Widmann * Fix sampling of `PGeneralizedGaussian` + add tests * Improve `logcdf` * Fix typo --------- Co-authored-by: Philip Bittihn Co-authored-by: David Widmann Co-authored-by: David Widmann --- .../continuous/pgeneralizedgaussian.jl | 136 +++++----- .../continuous/skewedexponentialpower.jl | 25 +- test/ref/continuous/pgeneralizedgaussian.R | 33 +++ test/ref/continuous_test.lst | 4 + test/ref/continuous_test.ref.json | 108 ++++++++ test/ref/rdistributions.R | 1 + test/ref/readme.md | 1 + test/testutils.jl | 32 ++- .../continuous/pgeneralizedgaussian.jl | 245 ++++++++---------- .../continuous/skewedexponentialpower.jl | 18 +- test/univariates.jl | 6 - 11 files changed, 389 insertions(+), 220 deletions(-) create mode 100644 test/ref/continuous/pgeneralizedgaussian.R diff --git a/src/univariate/continuous/pgeneralizedgaussian.jl b/src/univariate/continuous/pgeneralizedgaussian.jl index 7ae6d18313..9d41d9d464 100644 --- a/src/univariate/continuous/pgeneralizedgaussian.jl +++ b/src/univariate/continuous/pgeneralizedgaussian.jl @@ -10,41 +10,48 @@ f(x, \\mu, \\alpha, p) = \\frac{p}{2\\alpha\\Gamma(1/p)} e^{-(\\frac{|x-\\mu|}{\ ``` The p-Generalized Gaussian (GGD) is a parametric distribution that incorporates the -Normal and Laplacian distributions as special cases where `p = 1` and `p = 2`. As `p → ∞`, -the distribution approaches the Uniform distribution on `[μ-α, μ+α]`. +normal (`p = 2`) and Laplacian (`p = 1`) distributions as special cases. +As `p → ∞`, the distribution approaches the Uniform distribution on `[μ - α, μ + α]`. ```julia -PGeneralizedGaussian() # GGD with shape 2, scale 1, location 0, (the Normal distribution) +PGeneralizedGaussian() # GGD with location 0, scale √2, and shape 2 (the normal distribution) PGeneralizedGaussian(μ, α, p) # GGD with location μ, scale α, and shape p -params(d) # Get the parameters, i.e. (μ, α, p) -shape(d) # Get the shape parameter, p -scale(d) # Get the scale parameter, α -location(d) # Get the location parameter, μ +params(d) # Get the parameters, i.e. (μ, α, p) +location(d) # Get the location parameter, μ +scale(d) # Get the scale parameter, α +shape(d) # Get the shape parameter, p ``` External Links * [Generalized Gaussian on Wikipedia](http://en.wikipedia.org/wiki/Generalized_normal_distribution) - * [Reference implementation paper](https://www.researchgate.net/publication/254282790_Simulation_of_the_p-generalized_Gaussian_distribution) + * [Reference implementation](https://www.researchgate.net/publication/254282790_Simulation_of_the_p-generalized_Gaussian_distribution) """ -struct PGeneralizedGaussian{T1<:Real, T2<:Real, T3<:Real} <: ContinuousUnivariateDistribution - μ::T1 - α::T2 - p::T3 - PGeneralizedGaussian{T1,T2,T3}(μ::T1,α::T2,p::T3) where {T1<:Real, T2<:Real, T3<:Real} = new{T1,T2,T3}(µ, α, p) +struct PGeneralizedGaussian{T<:Real} <: ContinuousUnivariateDistribution + μ::T + α::T + p::T + + PGeneralizedGaussian{T}(μ::T, α::T, p::T) where {T<:Real} = new{T}(µ, α, p) end -function PGeneralizedGaussian(μ::T1,α::T2,p::T3; check_args::Bool=true) where {T1<:Real, T2<:Real, T3<:Real} +function PGeneralizedGaussian(μ::T, α::T, p::T; check_args::Bool=true) where {T<:Real} @check_args PGeneralizedGaussian (α, α > zero(α)) (p, p > zero(p)) - return PGeneralizedGaussian{T1,T2,T3}(μ,α,p) + return PGeneralizedGaussian{T}(μ, α, p) +end +function PGeneralizedGaussian(μ::Real, α::Real, p::Real; check_args::Bool=true) + return PGeneralizedGaussian(promote(μ, α, p)...; check_args=check_args) end """ PGeneralizedGaussian(p) -Builds a p-generalized Gaussian with `μ=0.0, α=1.0` +Build a p-generalized Gaussian with `μ=0.0, α=1.0` """ -PGeneralizedGaussian(p::Real; check_args::Bool=true) = PGeneralizedGaussian(zero(p), one(p), p; check_args=check_args) +function PGeneralizedGaussian(p::Real; check_args::Bool=true) + @check_args PGeneralizedGaussian (p, p > zero(p)) + return PGeneralizedGaussian{typeof(p)}(zero(p), oftype(p, 1), p) +end """ PGeneralizedGaussian() @@ -52,21 +59,20 @@ PGeneralizedGaussian(p::Real; check_args::Bool=true) = PGeneralizedGaussian(zero Builds a default p-generalized Gaussian with `μ=0.0, α=√2, p=2.0`, corresponding to the normal distribution with `μ=0.0, σ=1.0`. """ -PGeneralizedGaussian() = PGeneralizedGaussian(0.0, √2, 2.0, check_args=false) # approximate scale with unity std deviation and shape 2 +PGeneralizedGaussian() = PGeneralizedGaussian{Float64}(0.0, √2, 2.0) # approximate scale with unity std deviation and shape 2 #### Conversions -convert(::Type{PGeneralizedGaussian{T1,T2,T3}}, μ::S1, α::S2, p::S3) where {T1 <: Real, T2 <: Real, T3 <:Real, S1 <: Real, S2 <: Real, S3 <: Real} = PGeneralizedGaussian(T1(μ),T2(α),T3(p)) -function Base.convert(::Type{PGeneralizedGaussian{T1,T2,T3}}, d::PGeneralizedGaussian) where {T1<:Real,T2<:Real,T3<:Real} - return PGeneralizedGaussian{T1,T2,T3}(T1(d.μ), T2(d.α), T3(d.p)) +function Base.convert(::Type{PGeneralizedGaussian{T}}, d::PGeneralizedGaussian) where {T<:Real} + return PGeneralizedGaussian{T}(T(d.μ), T(d.α), T(d.p)) end -Base.convert(::Type{PGeneralizedGaussian{T1,T2,T3}}, d::PGeneralizedGaussian{T1,T2,T3}) where {T1<:Real,T2<:Real,T3<:Real} = d +Base.convert(::Type{PGeneralizedGaussian{T}}, d::PGeneralizedGaussian{T}) where {T<:Real} = d @distr_support PGeneralizedGaussian -Inf Inf #### Parameters -partype(::PGeneralizedGaussian{T1,T2,T3}) where {T1,T2,T3} = promote_type(T1,T2,T3) +partype(::PGeneralizedGaussian{T}) where {T<:Real} = T params(d::PGeneralizedGaussian) = (d.μ, d.α, d.p) location(d::PGeneralizedGaussian) = d.μ @@ -80,54 +86,64 @@ mean(d::PGeneralizedGaussian) = d.μ median(d::PGeneralizedGaussian) = d.μ mode(d::PGeneralizedGaussian) = d.μ -var(d::PGeneralizedGaussian) = (d.α^2) * (gamma(3.0 * inv(d.p)) / gamma(inv(d.p))) -std(d::PGeneralizedGaussian) = (d.α) * sqrt(gamma(3.0 * inv(d.p)) / gamma(inv(d.p))) - -skewness(d::PGeneralizedGaussian{T1, T2, T3}) where {T1,T2,T3} = zero(T1) -kurtosis(d::PGeneralizedGaussian) = gamma(5.0 * inv(d.p)) * gamma(inv(d.p)) / (gamma(3.0 * inv(d.p))^2) - 3.0 -entropy(d::PGeneralizedGaussian) = inv(d.p) - log( d.p / (2.0 * d.α * gamma(inv(d.p)))) +var(d::PGeneralizedGaussian) = d.α^2 * (gamma(3 / d.p) / gamma(1 / d.p)) +std(d::PGeneralizedGaussian) = d.α * sqrt(gamma(3 / d.p) / gamma(1 / d.p)) +skewness(d::PGeneralizedGaussian) = zero(d.p) +kurtosis(d::PGeneralizedGaussian) = gamma(5 / d.p) * gamma(1 / d.p) / gamma(3 / d.p)^2 - 3 +entropy(d::PGeneralizedGaussian) = 1 / d.p - log(d.p / (2 * d.α * gamma(1 / d.p))) #### Evaluation function pdf(d::PGeneralizedGaussian, x::Real) - (μ, α, p) = params(d) - return ( p / ( 2.0 * α * gamma(1 / p) ) ) * exp( -( abs(x - μ) / α )^p ) + μ, α, p = params(d) + return (p / (2 * α * gamma(1 / p))) * exp(- (abs(x - μ) / α)^p) +end +function logpdf(d::PGeneralizedGaussian, x::Real) + μ, α, p = params(d) + return log(p / (2 * α)) - loggamma(1 / p) - (abs(x - μ) / α)^p end -logpdf(d::PGeneralizedGaussian, x::Real) = log(pdf(d, x)) - -""" - cdf(d, x) -Calculates the CDF of the distribution. To determine the CDF, the incomplete -gamma function is required. The CDF of the Gamma distribution provides this, -with the necessary 1/Γ(a) normalization. -""" +# To determine the CDF, the incomplete gamma function is required. +# The CDF of the Gamma distribution provides this, with the necessary 1/Γ(a) normalization. function cdf(d::PGeneralizedGaussian, x::Real) - (μ, α, p) = params(d) - v = cdf(Gamma(inv(p), 1), (abs(x - μ) / α)^p) * inv(2) - return typeof(v)(1/2) + sign(x - μ) * v + μ, α, p = params(d) + v = cdf(Gamma(inv(p), 1), (abs(x - μ) / α)^p) + return (1 + copysign(v, x - μ)) / 2 +end +function logcdf(d::PGeneralizedGaussian, x::Real) + μ, α, p = params(d) + Δ = x - μ + logv = logcdf(Gamma(inv(p), 1), (abs(Δ) / α)^p) + if Δ < 0 + return log1mexp(logv) - logtwo + else + return log1pexp(logv) - logtwo + end +end + +function quantile(d::PGeneralizedGaussian, q::Real) + μ, α, p = params(d) + inv_p = inv(p) + r = 2 * q - 1 + z = α * quantile(Gamma(inv_p, 1), abs(r))^inv_p + return μ + copysign(z, r) end #### Sampling -""" - rand(rng, d) - -Extract a sample from the p-Generalized Gaussian distribution 'd'. The sampling -procedure is implemented from from [1]. -[1] Gonzalez-Farias, G., Molina, J. A. D., & Rodríguez-Dagnino, R. M. (2009). -Efficiency of the approximated shape parameter estimator in the generalized -Gaussian distribution. IEEE Transactions on Vehicular Technology, 58(8), -4214-4223. -""" +# The sampling procedure is implemented from from [1]. +# [1] Gonzalez-Farias, G., Molina, J. A. D., & Rodríguez-Dagnino, R. M. (2009). +# Efficiency of the approximated shape parameter estimator in the generalized +# Gaussian distribution. IEEE Transactions on Vehicular Technology, 58(8), +# 4214-4223. function rand(rng::AbstractRNG, d::PGeneralizedGaussian) - - # utilizing the sampler from the Gamma distribution. - g = Gamma(inv(d.p), 1) - - # random variable with value -1 or 1 with probability (1/2). - b = 2.0 * rand(Bernoulli()) -1 - - return d.μ + inv(sqrt(d.α)) * rand(rng, g)^inv(d.p) * b + inv_p = inv(d.p) + g = Gamma(inv_p, 1) + z = d.α * rand(rng, g)^inv_p + if rand(rng) < 0.5 + return d.μ - z + else + return d.μ + z + end end diff --git a/src/univariate/continuous/skewedexponentialpower.jl b/src/univariate/continuous/skewedexponentialpower.jl index 1840f55c0d..d771511142 100644 --- a/src/univariate/continuous/skewedexponentialpower.jl +++ b/src/univariate/continuous/skewedexponentialpower.jl @@ -1,7 +1,7 @@ """ SkewedExponentialPower(μ, σ, p, α) -The *Skewed exponential power distribution*, with location `μ`, scale `σ`, shape `p`, and skewness `α` +The *Skewed exponential power distribution*, with location `μ`, scale `σ`, shape `p`, and skewness `α`, has the probability density function [1] ```math f(x; \\mu, \\sigma, p, \\alpha) = @@ -50,14 +50,13 @@ SkewedExponentialPower(μ::Real=0) = SkewedExponentialPower(μ, 1, 2, 1//2; chec @distr_support SkewedExponentialPower -Inf Inf ### Conversions -convert(::Type{SkewedExponentialPower{T}}, μ::S, σ::S, p::S, α::S) where {T <: Real, S <: Real} = SkewedExponentialPower(T(μ), T(σ), T(p), T(α)) function Base.convert(::Type{SkewedExponentialPower{T}}, d::SkewedExponentialPower) where {T<:Real} SkewedExponentialPower{T}(T(d.μ), T(d.σ), T(d.p), T(d.α)) end Base.convert(::Type{SkewedExponentialPower{T}}, d::SkewedExponentialPower{T}) where {T<:Real} = d ### Parameters -@inline partype(d::SkewedExponentialPower{T}) where {T<:Real} = T +@inline partype(::SkewedExponentialPower{T}) where {T<:Real} = T params(d::SkewedExponentialPower) = (d.μ, d.σ, d.p, d.α) location(d::SkewedExponentialPower) = d.μ @@ -87,7 +86,7 @@ function logpdf(d::SkewedExponentialPower, x::Real) μ, σ, p, α = params(d) a = x < μ ? α : 1 - α inv_p = inv(p) - return -(logtwo + log(σ) + inv_p * log(p) + loggamma(1 + inv_p) + inv_p * (abs(μ - x) / (2 * σ * a))^p) + return -(logtwo + log(σ) + loggamma(inv_p) + ((1 - p) * log(p) + (abs(μ - x) / (2 * σ * a))^p) / p) end function cdf(d::SkewedExponentialPower, x::Real) @@ -99,23 +98,33 @@ function cdf(d::SkewedExponentialPower, x::Real) α + (1-α) * cdf(Gamma(inv_p), inv_p * (abs((x-μ)/σ) / (2*(1-α)))^p) end end +function logcdf(d::SkewedExponentialPower, x::Real) + μ, σ, p, α = params(d) + inv_p = inv(p) + if x <= μ + log(α) + logccdf(Gamma(inv_p), inv_p * (abs((x-μ)/σ) / (2*α))^p) + else + log1mexp(log1p(-α) + logccdf(Gamma(inv_p), inv_p * (abs((x-μ)/σ) / (2*(1-α)))^p)) + end +end function quantile(d::SkewedExponentialPower, p::Real) μ, σ, _, α = params(d) inv_p = inv(d.p) if p <= α - μ - 2*α*σ * (d.p * quantile(Gamma(inv_p), 1-p/α))^(inv_p) + μ - 2*α*σ * (d.p * quantile(Gamma(inv_p), (α-p)/α))^inv_p else - μ + 2*(1-α)*σ * (d.p * quantile(Gamma(inv_p), 1-(1-p)/(1-α)))^(inv_p) + μ + 2*(1-α)*σ * (d.p * quantile(Gamma(inv_p), (p-α)/(1-α)))^inv_p end end function rand(rng::AbstractRNG, d::SkewedExponentialPower) μ, σ, p, α = params(d) inv_p = inv(d.p) + z = 2*σ * (p * rand(rng, Gamma(inv_p, 1)))^inv_p if rand(rng) < d.α - μ - σ * 2*p^(inv_p) * α * rand(Gamma(inv_p, 1))^(inv_p) + return μ - α * z else - μ + σ * 2*p^(inv_p) * (1-α) * rand(Gamma(inv_p, 1))^(inv_p) + return μ + (1-α) * z end end diff --git a/test/ref/continuous/pgeneralizedgaussian.R b/test/ref/continuous/pgeneralizedgaussian.R new file mode 100644 index 0000000000..be142e9922 --- /dev/null +++ b/test/ref/continuous/pgeneralizedgaussian.R @@ -0,0 +1,33 @@ +PGeneralizedGaussian <- R6Class("PGeneralizedGaussian", + inherit = ContinuousDistribution, + public = list( + names = c("mu", "alpha", "beta"), + mu = NA, + alpha = NA, + beta = NA, + initialize = function(m=0, a=sqrt(2), p=2) { + self$mu <- m + self$alpha <- a + self$beta <- p + }, + supp = function() { c(-Inf, Inf) }, + properties = function() { + m <- self$mu + a <- self$alpha + p <- self$beta + list(location=m, + scale=a, + shape=p, + mean=m, + var=a^2 * exp(lgamma(3/p) - lgamma(1/p)), + median=m, + mode=m, + entropy=1/p - log(p/(2 * a)) + lgamma(1/p), + skewness=0, + kurtosis=exp(lgamma(5/p) + lgamma(1/p) - 2 * lgamma(3/p)) - 3) + }, + pdf = function(x, log=FALSE){ gnorm::dgnorm(x, self$mu, self$alpha, self$beta, log=log) }, + cdf = function(x){ gnorm::pgnorm(x, self$mu, self$alpha, self$beta) }, + quan = function(v) { gnorm::qgnorm(v, self$mu, self$alpha, self$beta) } + ) +) diff --git a/test/ref/continuous_test.lst b/test/ref/continuous_test.lst index 381a1c4661..38c894dff8 100644 --- a/test/ref/continuous_test.lst +++ b/test/ref/continuous_test.lst @@ -145,6 +145,10 @@ Pareto(2.0) Pareto(2.0, 1.5) Pareto(3.0, 2.0) +PGeneralizedGaussian() +PGeneralizedGaussian(0.0, 1.5, 2.0) +PGeneralizedGaussian(3.0, 2.0, 1.5) + Rayleigh() Rayleigh(3.0) Rayleigh(8.0) diff --git a/test/ref/continuous_test.ref.json b/test/ref/continuous_test.ref.json index 8ed887a621..faadbeff20 100644 --- a/test/ref/continuous_test.ref.json +++ b/test/ref/continuous_test.ref.json @@ -3945,6 +3945,114 @@ { "q": 0.90, "x": 4.30886938006377 } ] }, +{ + "expr": "PGeneralizedGaussian()", + "dtype": "PGeneralizedGaussian", + "minimum": "-inf", + "maximum": "inf", + "properties": { + "location": 0, + "scale": 1.4142135623731, + "shape": 2, + "mean": 0, + "var": 1, + "median": 0, + "mode": 0, + "entropy": 1.41893853320467, + "skewness": 0, + "kurtosis": -4.44089209850063e-16 + }, + "points": [ + { "x": -1.2815515655446, "pdf": 0.175498331932487, "logpdf": -1.74012574077958, "cdf": 0.1 }, + { "x": -0.841621233572914, "pdf": 0.279961920407808, "logpdf": -1.27310168360507, "cdf": 0.2 }, + { "x": -0.524400512708041, "pdf": 0.347692614200074, "logpdf": -1.0564364820689, "cdf": 0.3 }, + { "x": -0.2533471031358, "pdf": 0.38634253349686, "logpdf": -0.951030910538324, "cdf": 0.4 }, + { "x": 0, "pdf": 0.398942280401433, "logpdf": -0.918938533204673, "cdf": 0.5 }, + { "x": 0.2533471031358, "pdf": 0.38634253349686, "logpdf": -0.951030910538324, "cdf": 0.6 }, + { "x": 0.524400512708041, "pdf": 0.347692614200074, "logpdf": -1.0564364820689, "cdf": 0.7 }, + { "x": 0.841621233572915, "pdf": 0.279961920407808, "logpdf": -1.27310168360507, "cdf": 0.8 }, + { "x": 1.2815515655446, "pdf": 0.175498331932487, "logpdf": -1.74012574077958, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": -1.2815515655446 }, + { "q": 0.25, "x": -0.674489750196082 }, + { "q": 0.50, "x": 0 }, + { "q": 0.75, "x": 0.674489750196082 }, + { "q": 0.90, "x": 1.2815515655446 } + ] +}, +{ + "expr": "PGeneralizedGaussian(0.0, 1.5, 2.0)", + "dtype": "PGeneralizedGaussian", + "minimum": "-inf", + "maximum": "inf", + "properties": { + "location": 0, + "scale": 1.5, + "shape": 2, + "mean": 0, + "var": 1.125, + "median": 0, + "mode": 0, + "entropy": 1.47783005103286, + "skewness": 0, + "kurtosis": -4.44089209850063e-16 + }, + "points": [ + { "x": -1.35929070365523, "pdf": 0.165461414128519, "logpdf": -1.79901725860777, "cdf": 0.1 }, + { "x": -0.892674122174992, "pdf": 0.26395062985916, "logpdf": -1.33199320143326, "cdf": 0.2 }, + { "x": -0.556210737890337, "pdf": 0.327807740359134, "logpdf": -1.11532799989709, "cdf": 0.3 }, + { "x": -0.268715181931937, "pdf": 0.364247233728561, "logpdf": -1.00992242836652, "cdf": 0.4 }, + { "x": 0, "pdf": 0.376126389031838, "logpdf": -0.977830051032864, "cdf": 0.5 }, + { "x": 0.268715181931937, "pdf": 0.364247233728561, "logpdf": -1.00992242836652, "cdf": 0.6 }, + { "x": 0.556210737890337, "pdf": 0.327807740359134, "logpdf": -1.11532799989709, "cdf": 0.7 }, + { "x": 0.892674122174993, "pdf": 0.26395062985916, "logpdf": -1.33199320143326, "cdf": 0.8 }, + { "x": 1.35929070365523, "pdf": 0.165461414128519, "logpdf": -1.79901725860777, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": -1.35929070365523 }, + { "q": 0.25, "x": -0.715404414306705 }, + { "q": 0.50, "x": 0 }, + { "q": 0.75, "x": 0.715404414306705 }, + { "q": 0.90, "x": 1.35929070365523 } + ] +}, +{ + "expr": "PGeneralizedGaussian(3.0, 2.0, 1.5)", + "dtype": "PGeneralizedGaussian", + "minimum": "-inf", + "maximum": "inf", + "properties": { + "location": 3, + "scale": 2, + "shape": 1.5, + "mean": 3, + "var": 2.95395244648659, + "median": 3, + "mode": 3, + "entropy": 1.95064619482592, + "skewness": 0, + "kurtosis": 0.76195423693023 + }, + "points": [ + { "x": 0.872206385795335, "pdf": 0.0924266554239228, "logpdf": -2.38133986332257, "cdf": 0.1 }, + { "x": 1.67223102018988, "pdf": 0.161232689191114, "logpdf": -1.82490668292265, "cdf": 0.2 }, + { "x": 2.20370021746836, "pdf": 0.215410750762552, "logpdf": -1.53520860481569, "cdf": 0.3 }, + { "x": 2.6271970087856, "pdf": 0.255519396518254, "logpdf": -1.36445695629713, "cdf": 0.4 }, + { "x": 3, "pdf": 0.276933041858118, "logpdf": -1.28397952815925, "cdf": 0.5 }, + { "x": 3.3728029912144, "pdf": 0.255519396518254, "logpdf": -1.36445695629713, "cdf": 0.6 }, + { "x": 3.79629978253164, "pdf": 0.215410750762552, "logpdf": -1.53520860481569, "cdf": 0.7 }, + { "x": 4.32776897981012, "pdf": 0.161232689191114, "logpdf": -1.82490668292265, "cdf": 0.8 }, + { "x": 5.12779361420466, "pdf": 0.0924266554239229, "logpdf": -2.38133986332257, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": 0.872206385795335 }, + { "q": 0.25, "x": 1.95708306892598 }, + { "q": 0.50, "x": 3 }, + { "q": 0.75, "x": 4.04291693107402 }, + { "q": 0.90, "x": 5.12779361420466 } + ] +}, { "expr": "Rayleigh()", "dtype": "Rayleigh", diff --git a/test/ref/rdistributions.R b/test/ref/rdistributions.R index c8b3b10317..1212ddb183 100644 --- a/test/ref/rdistributions.R +++ b/test/ref/rdistributions.R @@ -67,6 +67,7 @@ source("continuous/noncentralt.R") source("continuous/normal.R") source("continuous/normalinversegaussian.R") source("continuous/pareto.R") +source("continuous/pgeneralizedgaussian.R") source("continuous/rayleigh.R") source("continuous/rician.R") source("continuous/studentizedrange.R") diff --git a/test/ref/readme.md b/test/ref/readme.md index d6ebeba4dd..ad0cf13c44 100644 --- a/test/ref/readme.md +++ b/test/ref/readme.md @@ -23,6 +23,7 @@ in addition to the R language itself: | skellam | For ``Skellam`` | | BiasedUrn | For ``NoncentralHypergeometric`` | | fBasics | For ``NormalInverseGaussian`` | +| gnorm | For ``PGeneralizedGaussian`` | ## Usage diff --git a/test/testutils.jl b/test/testutils.jl index 531aa78bf3..c26d1bbb8e 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -144,8 +144,19 @@ function test_samples(s::Sampleable{Univariate, Discrete}, # the sampleable end # generate samples using RNG passed or default RNG - samples = ismissing(rng) ? rand(s, n) : rand(rng, s, n) - @assert length(samples) == n + # we also check reproducibility + if rng === missing + Random.seed!(1234) + samples = rand(s, n) + Random.seed!(1234) + samples2 = rand(s, n) + else + rng2 = deepcopy(rng) + samples = rand(rng, s, n) + samples2 = rand(rng2, s, n) + end + @test length(samples) == n + @test samples2 == samples # scan samples and get counts cnts = zeros(Int, m) @@ -232,9 +243,20 @@ function test_samples(s::Sampleable{Univariate, Continuous}, # the sampleable @assert cub[i] >= clb[i] end - # generate samples - samples = ismissing(rng) ? rand(s, n) : rand(rng, s, n) - @assert length(samples) == n + # generate samples using RNG passed or default RNG + # we also check reproducibility + if rng === missing + Random.seed!(1234) + samples = rand(s, n) + Random.seed!(1234) + samples2 = rand(s, n) + else + rng2 = deepcopy(rng) + samples = rand(rng, s, n) + samples2 = rand(rng2, s, n) + end + @test length(samples) == n + @test samples2 == samples if isa(distr, StudentizedRange) samples[isnan.(samples)] .= 0.0 # Underlying implementation in Rmath can't handle very low values. diff --git a/test/univariate/continuous/pgeneralizedgaussian.jl b/test/univariate/continuous/pgeneralizedgaussian.jl index 8ac1a27e7c..49de167cc5 100644 --- a/test/univariate/continuous/pgeneralizedgaussian.jl +++ b/test/univariate/continuous/pgeneralizedgaussian.jl @@ -1,138 +1,109 @@ -using SpecialFunctions - -d = PGeneralizedGaussian() # mean zero, scale √2, shape 2. - -# PDF and CDF values from R using the same default -# mean scale and shape parameters. Format is [x, pdf, cdf]. -test = [ - -2.0000 0.053990966513188 0.0227501319481792; - -1.9596 0.0584867753200998 0.0250212806912653; - -1.9192 0.0632536242130714 0.0274795137708557; - -1.8788 0.0682974223520507 0.0301359035293964; - -1.8384 0.0736231464260287 0.0330017425665818; - -1.798 0.0792347326126488 0.0360885038859181; - -1.7576 0.0851349688324957 0.0394077966787751; - -1.7172 0.091325388414829 0.0429713177801418; - -1.6768 0.0978061663608371 0.0467907988768448; - -1.6364 0.104576019449044 0.0508779495981413; - -1.596 0.111632111473603 0.0552443966698443; - -1.5556 0.118969964938241 0.0599016193659873; - -1.5152 0.126583380545089 0.0648608815458657; - -1.4748 0.134464365817196 0.0701331606184371; - -1.4344 0.142603074175027 0.0757290738298424; - -1.394 0.1509877557498 0.0816588023224809; - -1.3536 0.159604721159404 0.0879320134647649; - -1.3132 0.168438319395545 0.0945577819987581; - -1.2728 0.177470930873585 0.101544510597386; - -1.2324 0.186682976579551 0.108899850463094; - -1.192 0.196052944112626 0.116630622634895; - -1.1516 0.20555743126705 0.124742740699938; - -1.1112 0.215171207626101 0.133241135628347; - -1.0708 0.224867294454356 0.142129683465442; - -1.0304 0.234617062974812 0.151411136623046; - -0.99 0.244390350907 0.161087059510831; - -0.9496 0.254155596923586 0.171157769239293; - -0.9092 0.263879992459047 0.181622282107569; - -0.8688 0.273529650077933 0.192478266561878; - -0.8284 0.283069787385305 0.203722003273787; - -0.788 0.292464925241515 0.215348352941949; - -0.7476 0.301679098831105 0.227350732366586; - -0.7072 0.31067607993471 0.239721099283345; - -0.6668 0.319419608566778 0.252449946372615; - -0.6264 0.327873631974065 0.265526304782692; - -0.586 0.336002548843259 0.278937757421185; - -0.5456 0.343771456443474 0.292670462179526; - -0.5052 0.351146398333471 0.306709185161556; - -0.4648 0.358094610196201 0.321037343889939; - -0.4244 0.364584761326559 0.335637060364848; - -0.384 0.370587189293274 0.350489223749189; - -0.3436 0.376074125323367 0.36557356235494; - -0.3032 0.38101990801804 0.380868724507227; - -0.2628 0.385401183101656 0.396352367767935; - -0.2224 0.389197087030118 0.412001255910218; - -0.182 0.392389412439884 0.42779136295047; - -0.1416 0.394962753602334 0.443697983466388; - -0.1012 0.396904630257822 0.459695848359751; - -0.0608 0.398205588436696 0.475759245161512; - -0.0204 0.398859277127687 0.491862141925561; - 0.02 0.398862499923666 0.507978313716902; - 0.0604 0.398215241057149 0.524081470670474; - 0.1008 0.396920665528795 0.540145386578997; - 0.1412 0.394985093327439 0.556144026962162; - 0.1816 0.392417948035498 0.572051675575411; - 0.222 0.38923168040452 0.587843058334322; - 0.2624 0.385441667768147 0.603493463659977; - 0.3028 0.381066090429489 0.618978858291238; - 0.3432 0.376125786413223 0.634275997660943; - 0.3836 0.370644086205869 0.649362529994026; - 0.424 0.364646629317593 0.664217093355429; - 0.4644 0.358161164682611 0.678819404953587; - 0.5048 0.351217337070506 0.693150342089967; - 0.5452 0.343846461805699 0.707192014235536; - 0.5856 0.336081290185479 0.720927825809832; - 0.626 0.327955768047694 0.734342529336241; - 0.6664 0.319504789967085 0.747422268746731; - 0.7068 0.310763951554577 0.760154612709528; - 0.7472 0.301769302297434 0.772528577952478; - 0.7876 0.292557101311255 0.784534642652148; - 0.828 0.283163578279136 0.796164750052605; - 0.8684 0.273624701730973 0.807412302567402; - 0.9088 0.263975956669541 0.81827214670236; - 0.9492 0.254252133382257 0.828740549214512; - 0.9896 0.244487129091676 0.838815164993143; - 1.03 0.234713763897012 0.848494997211656; - 1.0704 0.224963612246627 0.857780350353393; - 1.1108 0.215266850961145 0.866672776760204; - 1.1512 0.205652124601902 0.875175017389245; - 1.1916 0.196146428753372 0.883290937490971; - 1.232 0.186775011564286 0.891025457939793; - 1.2724 0.177561293673426 0.898384482958291; - 1.3128 0.168526806435554 0.905374824976726; - 1.3532 0.159691148163201 0.912004127362079; - 1.3936 0.151071957913399 0.918280785735546; - 1.434 0.142684906177067 0.924213868574892; - 1.4744 0.13454370167416 0.929813037768908; - 1.5148 0.126660113321382 0.935088469756236; - 1.5552 0.119044006322032 0.940050777840661; - 1.5956 0.111703391230226 0.944710936230536; - 1.636 0.104644484764399 0.949080206301951; - 1.6764 0.0978717810877164 0.953170065534611; - 1.7168 0.091388132235354 0.956992139516678; - 1.7572 0.085194836349956 0.960558137361137; - 1.7976 0.0792917323859474 0.963879790822026; - 1.838 0.0736772999596982 0.966968797345075; - 1.8784 0.0683487630544158 0.969836767234444; - 1.9188 0.0633021963346127 0.97249517506594; - 1.9592 0.0585326328834392 0.97495531542798; - 1.9996 0.0540341722453821 0.977228263024933; -] -# CDF test. -for i=1:size(test, 1) - @test cdf(d, test[i, 1]) ≈ test[i, 3] -end -# PDF test. -for i=1:size(test, 1) - @test pdf(d, test[i, 1]) ≈ test[i, 2] +using Distributions + +using Random +using Test + +@testset "PGeneralizedGaussian" begin + @testset "Constructors" begin + # Argument checks + μ = randn() + for p in (-0.2, 0) + @test_throws DomainError PGeneralizedGaussian(p) + PGeneralizedGaussian(p; check_args=false) + + @test_throws DomainError PGeneralizedGaussian(μ, 1.0, p) + PGeneralizedGaussian(μ, 1.0, p; check_args=false) + end + for α in (-1.2, 0) + @test_throws DomainError PGeneralizedGaussian(μ, α, 1.0) + PGeneralizedGaussian(μ, α, 1.0; check_args=false) + + for p in (-0.2, 0) + @test_throws DomainError PGeneralizedGaussian(μ, α, p) + PGeneralizedGaussian(μ, α, p; check_args=false) + end + end + + # Convenience constructors + d = PGeneralizedGaussian() + @test d.μ == 0 + @test d.α ≈ sqrt(2) + @test d.p == 2 + + d = PGeneralizedGaussian(2.1) + @test d.μ == 0 + @test d.α == 1 + @test d.p == 2.1 + end + + @testset "Special cases" begin + μ = randn() + α = Random.randexp() + for (d, dref) in ( + (PGeneralizedGaussian(μ, α, 1), Laplace(μ, α)), # p = 1 (Laplace) + (PGeneralizedGaussian(), Normal()), # p = 2 (standard normal) + (PGeneralizedGaussian(μ, α, 2), Normal(μ, α / sqrt(2))), # p = 2 (normal) + ) + @test minimum(d) == -Inf + @test maximum(d) == Inf + + @test location(d) == d.μ + @test scale(d) == d.α + @test shape(d) == d.p + + @test mean(d) == d.μ + @test mean(d) ≈ mean(dref) + @test median(d) == d.μ + @test median(d) ≈ median(dref) + @test mode(d) == d.μ + @test mode(d) ≈ mode(dref) + + @test var(d) ≈ var(dref) + @test std(d) ≈ std(dref) + + @test skewness(d) == 0 + @test kurtosis(d) ≈ kurtosis(dref) atol = 1e-12 + @test entropy(d) ≈ entropy(dref) + + # PDF + CDF tests. + for x in (-Inf, d.μ - 4.2, d.μ - 1.2, d.μ, Float32(d.μ) + 0.3f0, d.μ + 4, Inf32) + @test @inferred(pdf(d, x)) ≈ pdf(dref, x) + @test @inferred(logpdf(d, x)) ≈ logpdf(dref, x) + @test @inferred(cdf(d, x)) ≈ cdf(dref, x) atol = 1e-12 + @test @inferred(logcdf(d, x)) ≈ logcdf(dref, x) atol = 1e-12 + end + + # Additional tests, including sampling + test_distr(d, 10^6) + end + end + + @testset "Non-special case" begin + μ = randn() + α = Random.randexp() + p = Random.randexp() + d = PGeneralizedGaussian(μ, α, p) + + @test minimum(d) == -Inf + @test maximum(d) == Inf + + @test location(d) == μ + @test scale(d) == α + @test shape(d) == p + + @test mean(d) == μ + @test median(d) == μ + @test mode(d) == μ + + @test cdf(d, -Inf) == 0 + @test logcdf(d, -Inf) == -Inf + @test cdf(d, μ) ≈ 0.5 + @test logcdf(d, μ) ≈ -log(2) + @test cdf(d, Inf) == 1 + @test logcdf(d, Inf) == 0 + @test quantile(d, 1 // 2) ≈ μ + + # Additional tests, including sampling + test_distr(d, 10^6) + end end -@test mean(d) ≈ 0 -@test median(d) ≈ 0 -@test mode(d) ≈ 0 -@test var(d) ≈ 1 # unity variance with shape 2 and scale √2 -@test std(d) ≈ 1 -@test skewness(d) ≈ 0 -@test kurtosis(d) ≈ 0 atol = 10e-10 -@test entropy(d) ≈ 1.418938533204673 - -# Non-Gaussian case -@test cdf(PGeneralizedGaussian(1., 2.0, 2.5), 1.) ≈ 0.5 - -α, β = rand(2) -d = PGeneralizedGaussian(0.0, α, β) -@test var(d) ≈ α^2 * (gamma(3.0 * inv(β)) / gamma(inv(β))) -@test kurtosis(d) ≈ gamma(5.0 * inv(β)) * gamma(inv(β)) / (gamma(3.0 * inv(β))^2) - 3.0 -@test entropy(d) ≈ inv(β) - log( β / (2.0 * α * gamma(inv(β)))) - -@test convert(PGeneralizedGaussian{Float64,Float64,Float64}, d) === d -d32 = convert(PGeneralizedGaussian{Int,Float32,Float16}, d) -@test d32 isa PGeneralizedGaussian{Int,Float32,Float16} -@test params(d32) == (0, Float32(α), Float16(β)) \ No newline at end of file diff --git a/test/univariate/continuous/skewedexponentialpower.jl b/test/univariate/continuous/skewedexponentialpower.jl index 95dff6658e..5eea7d81cc 100644 --- a/test/univariate/continuous/skewedexponentialpower.jl +++ b/test/univariate/continuous/skewedexponentialpower.jl @@ -1,5 +1,7 @@ -using Test using Distributions +using SpecialFunctions + +using Test @testset "SkewedExponentialPower" begin @testset "α = 0.5" begin @@ -13,6 +15,7 @@ using Distributions @test @inferred cdf(d2, -Inf) == 0 @test @inferred quantile(d2, 1) == Inf @test @inferred quantile(d2, 0) == -Inf + test_distr(d2, 10^6) # Comparison to laplace d = SkewedExponentialPower(0, 1, 1, 0.5) @@ -36,10 +39,12 @@ using Distributions @test @inferred kurtosis(d) ≈ kurtosis(de) @test @inferred pdf(d, 0.5) ≈ pdf(de, 0.5) @test @inferred cdf(d, 0.5) ≈ cdf(de, 0.5) + test_distr(d, 10^6) # This is infinite for the PGeneralizedGaussian implementation d = SkewedExponentialPower(0, 1, 0.01, 0.5) @test @inferred isfinite(var(d)) + test_distr(d, 10^6) # Comparison to normal d = SkewedExponentialPower(0, 1, 2, 0.5) @@ -51,6 +56,7 @@ using Distributions @test @inferred pdf(d, 0.5) ≈ pdf(dn, 0.5) @test @inferred cdf(d, 0.5) ≈ cdf(dn, 0.5) @test @inferred quantile(d, 0.5) ≈ quantile(dn, 0.5) + test_distr(d, 10^6) end @testset "α != 0.5" begin # Format is [x, pdf, cdf] from the asymmetric @@ -80,10 +86,13 @@ using Distributions ] d = SkewedExponentialPower(0, 1, 0.5, 0.7) - for i ∈ 1:size(test, 1) - @test @inferred isapprox(pdf(d, test[i, 1]), test[i, 2], rtol=1e-3) - @test @inferred isapprox(cdf(d, test[i, 1]), test[i, 3], rtol=1e-3) + for t in eachrow(test) + @test @inferred(pdf(d, t[1])) ≈ t[2] rtol=1e-5 + @test @inferred(logpdf(d, t[1])) ≈ log(t[2]) rtol=1e-5 + @test @inferred(cdf(d, t[1])) ≈ t[3] rtol=1e-3 + @test @inferred(logcdf(d, t[1])) ≈ log(t[3]) rtol=1e-3 end + test_distr(d, 10^6) # relationship between sepd(μ, σ, p, α) and # sepd(μ, σ, p, 1-α) @@ -102,5 +111,6 @@ using Distributions @inferred var(d) ≈ moments[2] - moments[1]^2 @inferred skewness(d) ≈ moments[3] / (√(moments[2] - moments[1]^2))^3 @inferred kurtosis(d) ≈ (moments[4] / ((moments[2] - moments[1]^2))^2 - 3) + test_distr(d, 10^6) end end diff --git a/test/univariates.jl b/test/univariates.jl index 66fcbc6b3a..675ef6909f 100644 --- a/test/univariates.jl +++ b/test/univariates.jl @@ -72,12 +72,6 @@ function verify_and_test(D::Union{Type,Function}, d::UnivariateDistribution, dct @test typeof(D(mixed_pars...)) == typeof(d) end - # promote integer arguments to floats, where applicable - if sum(float_pars) >= 1 && !any(map(isinf, pars)) && !isa(d, Geometric) && !isa(D, typeof(truncated)) - int_pars = map(x -> ceil(Int, x), pars) - @test typeof(D(int_pars...)) == typeof(d) - end - # conversions if D isa Type && !isconcretetype(D) @test convert(D{partype(d)}, d) === d From e761040500b769cfdbdbcd9bf2bac49202558f6b Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 23 Feb 2023 13:35:14 +0100 Subject: [PATCH 66/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 0621e87158..8578256d20 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.81" +version = "0.25.82" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 610bc12b6a27844c6c1cd4acc1374fb48f08deaa Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 23 Feb 2023 22:00:41 +0100 Subject: [PATCH 67/93] Fix inconsistency of `pdf`/`cdf` of `Uniform` (#1682) * Fix inconsistency of `pdf`/`cdf` of `Uniform` * Fix existing tests * Fixes * Fix --- src/univariate/continuous/uniform.jl | 10 +++++--- test/univariate/continuous/uniform.jl | 34 +++++++++++++++++++++++++++ test/univariates.jl | 24 ------------------- 3 files changed, 41 insertions(+), 27 deletions(-) diff --git a/src/univariate/continuous/uniform.jl b/src/univariate/continuous/uniform.jl index dd3fd59ed6..6386628e8c 100644 --- a/src/univariate/continuous/uniform.jl +++ b/src/univariate/continuous/uniform.jl @@ -72,12 +72,16 @@ entropy(d::Uniform) = log(d.b - d.a) #### Evaluation function pdf(d::Uniform, x::Real) - val = inv(d.b - d.a) + # include dependency on `x` for return type to be consistent with `cdf` + a, b, _ = promote(d.a, d.b, x) + val = inv(b - a) return insupport(d, x) ? val : zero(val) end function logpdf(d::Uniform, x::Real) - diff = d.b - d.a - return insupport(d, x) ? -log(diff) : log(zero(diff)) + # include dependency on `x` for return type to be consistent with `logcdf` + a, b, _ = promote(d.a, d.b, x) + val = - log(b - a) + return insupport(d, x) ? val : oftype(val, -Inf) end gradlogpdf(d::Uniform, x::Real) = zero(partype(d)) / oneunit(x) diff --git a/test/univariate/continuous/uniform.jl b/test/univariate/continuous/uniform.jl index 5936c3ea8a..e3a5d729ed 100644 --- a/test/univariate/continuous/uniform.jl +++ b/test/univariate/continuous/uniform.jl @@ -1,6 +1,7 @@ using Distributions using ChainRulesTestUtils using OffsetArrays +using StatsFuns using Random using Test @@ -80,4 +81,37 @@ using Test end end end + # issue #1677 + @testset "consistency of pdf and cdf" begin + for T in (Int, Float32, Float64) + d = Uniform{T}(T(2), T(4)) + for S in (Int, Float32, Float64) + TS = float(promote_type(T, S)) + + @test @inferred(pdf(d, S(1))) === TS(0) + @test @inferred(pdf(d, S(3))) === TS(1//2) + @test @inferred(pdf(d, S(5))) === TS(0) + + @test @inferred(logpdf(d, S(1))) === TS(-Inf) + @test @inferred(logpdf(d, S(3))) === -TS(logtwo) + @test @inferred(logpdf(d, S(5))) === TS(-Inf) + + @test @inferred(cdf(d, S(1))) === TS(0) + @test @inferred(cdf(d, S(3))) === TS(1//2) + @test @inferred(cdf(d, S(5))) === TS(1) + + @test @inferred(logcdf(d, S(1))) === TS(-Inf) + @test @inferred(logcdf(d, S(3))) === -TS(logtwo) + @test @inferred(logcdf(d, S(5))) === TS(0) + + @test @inferred(ccdf(d, S(1))) === TS(1) + @test @inferred(ccdf(d, S(3))) === TS(1//2) + @test @inferred(ccdf(d, S(5))) === TS(0) + + @test @inferred(logccdf(d, S(1))) === TS(0) + @test @inferred(logccdf(d, S(3))) === -TS(logtwo) + @test @inferred(logccdf(d, S(5))) === TS(-Inf) + end + end + end end diff --git a/test/univariates.jl b/test/univariates.jl index 675ef6909f..e16c52f59b 100644 --- a/test/univariates.jl +++ b/test/univariates.jl @@ -177,30 +177,6 @@ end @test invlogccdf(d, log(0.6)) isa Int end -@testset "Uniform type inference" begin - for T in (Int, Float32) - d = Uniform{T}(T(2), T(3)) - FT = float(T) - XFT = promote_type(FT, Float64) - - @test @inferred(pdf(d, 1.5)) === zero(FT) - @test @inferred(pdf(d, 2.5)) === one(FT) - @test @inferred(pdf(d, 3.5)) === zero(FT) - - @test @inferred(logpdf(d, 1.5)) === FT(-Inf) - @test @inferred(logpdf(d, 2.5)) === -zero(FT) # negative zero - @test @inferred(logpdf(d, 3.5)) === FT(-Inf) - - @test @inferred(cdf(d, 1.5)) === zero(XFT) - @test @inferred(cdf(d, 2.5)) === XFT(1//2) - @test @inferred(cdf(d, 3.5)) === one(XFT) - - @test @inferred(ccdf(d, 1.5)) === one(XFT) - @test @inferred(ccdf(d, 2.5)) === XFT(1//2) - @test @inferred(ccdf(d, 3.5)) === zero(XFT) - end -end - # #1471 @testset "InverseGamma constructor (#1471)" begin @test_throws DomainError InverseGamma(-1, 2) From 20654317057533d81dda23d2a558d171167e65df Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 23 Feb 2023 22:01:26 +0100 Subject: [PATCH 68/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 8578256d20..7113c2b15c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.82" +version = "0.25.83" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 5b99fe025f57dc8cbc17cb84f6484e06cfb06135 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Sat, 25 Feb 2023 13:28:04 -0800 Subject: [PATCH 69/93] Add the one-parameter Lindley distribution (#1678) --- docs/src/univariate.md | 7 + src/Distributions.jl | 3 +- src/univariate/continuous/lindley.jl | 177 ++++++++++++++++++++++++++ src/univariates.jl | 1 + test/ref/continuous/lindley.R | 12 ++ test/ref/continuous_test.lst | 5 + test/ref/continuous_test.ref.json | 104 +++++++++++++++ test/ref/rdistributions.R | 1 + test/ref/readme.md | 1 + test/runtests.jl | 1 + test/univariate/continuous/lindley.jl | 81 ++++++++++++ 11 files changed, 392 insertions(+), 1 deletion(-) create mode 100644 src/univariate/continuous/lindley.jl create mode 100644 test/ref/continuous/lindley.R create mode 100644 test/univariate/continuous/lindley.jl diff --git a/docs/src/univariate.md b/docs/src/univariate.md index caa1e22e0b..d46592248a 100644 --- a/docs/src/univariate.md +++ b/docs/src/univariate.md @@ -298,6 +298,13 @@ Levy plotdensity((0, 20), Levy, (0, 1)) # hide ``` +```@docs +Lindley +``` +```@example plotdensity +plotdensity((0, 20), Lindley, (1.5,)) # hide +``` + ```@docs Logistic ``` diff --git a/src/Distributions.jl b/src/Distributions.jl index 3b9397736c..7e3085a78d 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -114,6 +114,7 @@ export KSOneSided, Laplace, Levy, + Lindley, LKJ, LKJCholesky, LocationScale, @@ -345,7 +346,7 @@ Supported distributions: Frechet, FullNormal, FullNormalCanon, Gamma, GeneralizedPareto, GeneralizedExtremeValue, Geometric, Gumbel, Hypergeometric, InverseWishart, InverseGamma, InverseGaussian, IsoNormal, - IsoNormalCanon, Kolmogorov, KSDist, KSOneSided, Laplace, Levy, LKJ, LKJCholesky, + IsoNormalCanon, Kolmogorov, KSDist, KSOneSided, Laplace, Levy, Lindley, LKJ, LKJCholesky, Logistic, LogNormal, MatrixBeta, MatrixFDist, MatrixNormal, MatrixTDist, MixtureModel, Multinomial, MultivariateNormal, MvLogNormal, MvNormal, MvNormalCanon, diff --git a/src/univariate/continuous/lindley.jl b/src/univariate/continuous/lindley.jl new file mode 100644 index 0000000000..9e6c3ee797 --- /dev/null +++ b/src/univariate/continuous/lindley.jl @@ -0,0 +1,177 @@ +""" + Lindley(θ) + +The one-parameter *Lindley distribution* with shape `θ > 0` has probability density +function + +```math +f(x; \\theta) = \\frac{\\theta^2}{1 + \\theta} (1 + x) e^{-\\theta x}, \\quad x > 0 +``` + +It was first described by Lindley[^1] and was studied in greater detail by Ghitany +et al.[^2] +Note that `Lindley(θ)` is a mixture of an `Exponential(θ)` and a `Gamma(2, θ)` with +respective mixing weights `p = θ/(1 + θ)` and `1 - p`. + +[^1]: Lindley, D. V. (1958). Fiducial Distributions and Bayes' Theorem. Journal of the + Royal Statistical Society: Series B (Methodological), 20(1), 102–107. +[^2]: Ghitany, M. E., Atieh, B., & Nadarajah, S. (2008). Lindley distribution and its + application. Mathematics and Computers in Simulation, 78(4), 493–506. +""" +struct Lindley{T<:Real} <: ContinuousUnivariateDistribution + θ::T + + Lindley{T}(θ::T) where {T} = new{T}(θ) +end + +function Lindley(θ::Real; check_args::Bool=true) + @check_args Lindley (θ, θ > zero(θ)) + return Lindley{typeof(θ)}(θ) +end + +Lindley(θ::Integer; check_args::Bool=true) = Lindley(float(θ); check_args=check_args) + +Lindley() = Lindley{Float64}(1.0) + +Base.convert(::Type{Lindley{T}}, d::Lindley) where {T} = Lindley{T}(T(shape(d))) +Base.convert(::Type{Lindley{T}}, d::Lindley{T}) where {T} = d + +@distr_support Lindley 0.0 Inf + +### Parameters + +shape(d::Lindley) = d.θ +params(d::Lindley) = (shape(d),) +partype(::Lindley{T}) where {T} = T + +### Statistics + +mean(d::Lindley) = (2 + d.θ) / d.θ / (1 + d.θ) + +var(d::Lindley) = 2 / d.θ^2 - 1 / (1 + d.θ)^2 + +skewness(d::Lindley) = 2 * @evalpoly(d.θ, 2, 6, 6, 1) / @evalpoly(d.θ, 2, 4, 1)^(3//2) + +kurtosis(d::Lindley) = 3 * @evalpoly(d.θ, 8, 32, 44, 24, 3) / @evalpoly(d.θ, 2, 4, 1)^2 - 3 + +mode(d::Lindley) = max(0, (1 - d.θ) / d.θ) + +# Derived with Mathematica: +# KLDivergence := ResourceFunction["KullbackLeiblerDivergence"] +# KLDivergence[LindleyDistribution[θp], LindleyDistribution[θq]] +function kldivergence(p::Lindley, q::Lindley) + θp = shape(p) + θq = shape(q) + a = (θp + 2) * (θp - θq) / θp / (1 + θp) + b = 2 * log(θp) + log1p(θq) - 2 * log(θq) - log1p(θp) + return b - a +end + +# Derived with Mathematica based on https://mathematica.stackexchange.com/a/275765: +# ShannonEntropy[dist_?DistributionParameterQ] := +# Expectation[-LogLikelihood[dist, {x}], Distributed[x, dist]] +# Simplify[ShannonEntropy[LindleyDistribution[θ]]] +function entropy(d::Lindley) + θ = shape(d) + return 1 + exp(θ) * expinti(-θ) / (1 + θ) - 2 * log(θ) + log1p(θ) +end + +### Evaluation + +_lindley_mgf(θ, t) = θ^2 * (1 + θ - t) / (1 + θ) / (θ - t)^2 + +mgf(d::Lindley, t::Real) = _lindley_mgf(shape(d), t) + +cf(d::Lindley, t::Real) = _lindley_mgf(shape(d), t * im) + +cgf(d::Lindley, t::Real) = log1p(-t / (1 + d.θ)) - 2 * log1p(-t / d.θ) + +function pdf(d::Lindley, y::Real) + θ = shape(d) + res = θ^2 / (1 + θ) * (1 + y) * exp(-θ * y) + return y < 0 ? zero(res) : res +end + +function logpdf(d::Lindley, y::Real) + θ = shape(d) + _y = y < 0 ? zero(y) : y + res = 2 * log(θ) - log1p(θ) + log1p(_y) - θ * _y + return y < 0 ? oftype(res, -Inf) : res +end + +function gradlogpdf(d::Lindley, y::Real) + res = inv(1 + y) - shape(d) + return y < 0 ? zero(res) : res +end + +function ccdf(d::Lindley, y::Real) + θ = shape(d) + θy = θ * y + res = xexpy(1 + θy / (1 + θ), -θy) + return y < 0 ? oftype(res, 1) : res +end + +function logccdf(d::Lindley, y::Real) + θ = shape(d) + _y = y < 0 ? zero(y) : y + θy = θ * _y + res = log1p(θy / (1 + θ)) - θy + return y < 0 ? zero(res) : (y == Inf ? oftype(res, -Inf) : res) +end + +cdf(d::Lindley, y::Real) = 1 - ccdf(d, y) + +logcdf(d::Lindley, y::Real) = log1mexp(logccdf(d, y)) + +# Jodrá, P. (2010). Computer generation of random variables with Lindley or +# Poisson–Lindley distribution via the Lambert W function. Mathematics and Computers +# in Simulation, 81(4), 851–859. +# +# Only the -1 branch of the Lambert W functions is required since the argument is +# in (-1/e, 0) for all θ > 0 and 0 < q < 1. +function quantile(d::Lindley, q::Real) + θ = shape(d) + return -(1 + (1 + _lambertwm1((1 + θ) * (q - 1) / exp(1 + θ))) / θ) +end + +# Lóczi, L. (2022). Guaranteed- and high-precision evaluation of the Lambert W function. +# Applied Mathematics and Computation, 433, 127406. +# +# Compute W₋₁(x) for x ∈ (-1/e, 0) using formula (27) in Lóczi. By Theorem 2.23, the +# upper bound on the error for this algorithm is (1/2)^(2^n), where n is the number of +# recursion steps. The default here is set such that this error is less than `eps()`. +function _lambertwm1(x, n=6) + if -exp(-one(x)) < x <= -1//4 + β = -1 - sqrt2 * sqrt(1 + ℯ * x) + elseif x < 0 + lnmx = log(-x) + β = lnmx - log(-lnmx) + else + throw(DomainError(x)) + end + for i in 1:n + β = β / (1 + β) * (1 + log(x / β)) + end + return β +end + +### Sampling + +# Ghitany, M. E., Atieh, B., & Nadarajah, S. (2008). Lindley distribution and its +# application. Mathematics and Computers in Simulation, 78(4), 493–506. +function rand(rng::AbstractRNG, d::Lindley) + θ = shape(d) + λ = inv(θ) + T = typeof(λ) + u = rand(rng) + p = θ / (1 + θ) + return oftype(u, rand(rng, u <= p ? Exponential{T}(λ) : Gamma{T}(2, λ))) +end + +### Fitting + +# Ghitany et al. (2008) +function fit_mle(::Type{<:Lindley}, x::AbstractArray{<:Real}) + x̄ = mean(x) + return Lindley((1 - x̄ + sqrt((x̄ - 1)^2 + 8x̄)) / 2x̄) +end diff --git a/src/univariates.jl b/src/univariates.jl index 177940a44b..f9fe9c4b68 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -692,6 +692,7 @@ const continuous_distributions = [ "ksonesided", "laplace", "levy", + "lindley", "logistic", "noncentralbeta", "noncentralchisq", diff --git a/test/ref/continuous/lindley.R b/test/ref/continuous/lindley.R new file mode 100644 index 0000000000..cf112567fd --- /dev/null +++ b/test/ref/continuous/lindley.R @@ -0,0 +1,12 @@ +library("LindleyR") + +Lindley <- R6Class("Lindley", + inherit=ContinuousDistribution, + public=list(names=c("theta"), + theta=NA, + initialize=function(theta=1) { self$theta <- theta }, + supp=function() { c(0, Inf) }, + properties=function() { list() }, + pdf=function(x, log=FALSE) { dlindley(x, self$theta, log=log) }, + cdf=function(x) { plindley(x, self$theta) }, + quan=function(x) { qlindley(x, self$theta) })) diff --git a/test/ref/continuous_test.lst b/test/ref/continuous_test.lst index 38c894dff8..27e5b63367 100644 --- a/test/ref/continuous_test.lst +++ b/test/ref/continuous_test.lst @@ -101,6 +101,11 @@ Levy(2) Levy(2, 8) Levy(3.0, 3) +Lindley() +Lindley(0.5) +Lindley(1.5) +Lindley(3.0) + Logistic() Logistic(2.0) Logistic(0.0, 1.0) diff --git a/test/ref/continuous_test.ref.json b/test/ref/continuous_test.ref.json index faadbeff20..e40c121fe0 100644 --- a/test/ref/continuous_test.ref.json +++ b/test/ref/continuous_test.ref.json @@ -2876,6 +2876,110 @@ { "q": 0.90, "x": 2.19722457733622 } ] }, +{ + "expr": "Lindley()", + "dtype": "Lindley", + "minimum": 0, + "maximum": "inf", + "properties": { + }, + "points": [ + { "x": 0.201229322235454, "pdf": 0.491137556224262, "logpdf": -0.711031035180873, "cdf": 0.1 }, + { "x": 0.409355765160304, "pdf": 0.467961032750689, "logpdf": -0.759370249884304, "cdf": 0.2 }, + { "x": 0.630825401024496, "pdf": 0.433923809718651, "logpdf": -0.834886313936738, "cdf": 0.3 }, + { "x": 0.873054028699824, "pdf": 0.391162994497697, "logpdf": -0.938630940138138, "cdf": 0.4 }, + { "x": 1.14619322062058, "pdf": 0.341077783550314, "logpdf": -1.07564472345732, "cdf": 0.5 }, + { "x": 1.4662034354858, "pdf": 0.284599964357274, "logpdf": -1.25667071856436, "cdf": 0.6 }, + { "x": 1.86201458406329, "pdf": 0.222320334770884, "logpdf": -1.5036359877426, "cdf": 0.7 }, + { "x": 2.39727598728087, "pdf": 0.154517296485711, "logpdf": -1.8674492375466, "cdf": 0.8 }, + { "x": 3.27181206035629, "pdf": 0.0810311902520209, "logpdf": -2.51292113358837, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": 0.201229322235454 }, + { "q": 0.25, "x": 0.517999713886834 }, + { "q": 0.50, "x": 1.14619322062058 }, + { "q": 0.75, "x": 2.10546657787674 }, + { "q": 0.90, "x": 3.27181206035629 } + ] +}, +{ + "expr": "Lindley(0.5)", + "dtype": "Lindley", + "minimum": 0, + "maximum": "inf", + "properties": { + }, + "points": [ + { "x": 0.544019554971776, "pdf": 0.196051062631011, "logpdf": -1.6293801300544, "cdf": 0.0999999999999999 }, + { "x": 1.04307224421496, "pdf": 0.202130669036478, "logpdf": -1.59884091429709, "cdf": 0.2 }, + { "x": 1.5435367816863, "pdf": 0.195934998738978, "logpdf": -1.62997231384287, "cdf": 0.3 }, + { "x": 2.07183011333304, "pdf": 0.181699507555922, "logpdf": -1.70540101378774, "cdf": 0.4 }, + { "x": 2.65368480453801, "pdf": 0.161562102012007, "logpdf": -1.82286567765105, "cdf": 0.5 }, + { "x": 3.32408881228643, "pdf": 0.136749781372001, "logpdf": -1.98960243642355, "cdf": 0.6 }, + { "x": 4.14298158855248, "pdf": 0.10800073172794, "logpdf": -2.22561727662217, "cdf": 0.7 }, + { "x": 5.23954037209116, "pdf": 0.0757268013786986, "logpdf": -2.58062313392393, "cdf": 0.8 }, + { "x": 7.01639138849524, "pdf": 0.0400163645647015, "logpdf": -3.21846679441503, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": 0.544019554971776 }, + { "q": 0.25, "x": 1.29133558854642 }, + { "q": 0.50, "x": 2.65368480453801 }, + { "q": 0.75, "x": 4.64292488017037 }, + { "q": 0.90, "x": 7.01639138849524 } + ] +}, +{ + "expr": "Lindley(1.5)", + "dtype": "Lindley", + "minimum": 0, + "maximum": "inf", + "properties": { + }, + "points": [ + { "x": 0.114556971437579, "pdf": 0.844729364271255, "logpdf": -0.168738981894155, "cdf": 0.0999999999999998 }, + { "x": 0.237615294552685, "pdf": 0.77989414577674, "logpdf": -0.248597079050649, "cdf": 0.2 }, + { "x": 0.372143951935997, "pdf": 0.706662569042456, "logpdf": -0.347201998525827, "cdf": 0.3 }, + { "x": 0.522280435214869, "pdf": 0.625895614615692, "logpdf": -0.468571671601523, "cdf": 0.4 }, + { "x": 0.694246765710091, "pdf": 0.538217563107918, "logpdf": -0.619492408171021, "cdf": 0.5 }, + { "x": 0.89826450068756, "pdf": 0.444050395936119, "logpdf": -0.811817218630417, "cdf": 0.6 }, + { "x": 1.15323211442359, "pdf": 0.343613202710769, "logpdf": -1.06823866495725, "cdf": 0.7 }, + { "x": 1.50109080206919, "pdf": 0.236863853380854, "logpdf": -1.44026976122003, "cdf": 0.8 }, + { "x": 2.07401891659104, "pdf": 0.12326693255173, "logpdf": -2.09340309170877, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": 0.114556971437579 }, + { "q": 0.25, "x": 0.30322247911811 }, + { "q": 0.50, "x": 0.694246765710091 }, + { "q": 0.75, "x": 1.31109339979007 }, + { "q": 0.90, "x": 2.07401891659104 } + ] +}, +{ + "expr": "Lindley(3.0)", + "dtype": "Lindley", + "minimum": 0, + "maximum": "inf", + "properties": { + }, + "points": [ + { "x": 0.0465620413518296, "pdf": 2.04777663835177, "logpdf": 0.716754637924627, "cdf": 0.1 }, + { "x": 0.0980294998228117, "pdf": 1.84109209665868, "logpdf": 0.610358926343917, "cdf": 0.2 }, + { "x": 0.155708594360887, "pdf": 1.62989906665426, "logpdf": 0.488518090603404, "cdf": 0.3 }, + { "x": 0.221505053392774, "pdf": 1.41410780366481, "logpdf": 0.34649880478381, "cdf": 0.4 }, + { "x": 0.298361440425077, "pdf": 1.19357014066527, "logpdf": 0.176948933955783, "cdf": 0.5 }, + { "x": 0.391185596605387, "pdf": 0.968051255886061, "logpdf": -0.0324702428118596, "cdf": 0.6 }, + { "x": 0.509131699567543, "pdf": 0.737174657514306, "logpdf": -0.304930430453559, "cdf": 0.7 }, + { "x": 0.672626102659318, "pdf": 0.500297086565447, "logpdf": -0.692553183880015, "cdf": 0.8 }, + { "x": 0.94630688386014, "pdf": 0.256133428755212, "logpdf": -1.36205676420823, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": 0.0465620413518296 }, + { "q": 0.25, "x": 0.125991234674835 }, + { "q": 0.50, "x": 0.298361440425077 }, + { "q": 0.75, "x": 0.583010462004234 }, + { "q": 0.90, "x": 0.94630688386014 } + ] +}, { "expr": "Logistic(2.0)", "dtype": "Logistic", diff --git a/test/ref/rdistributions.R b/test/ref/rdistributions.R index 1212ddb183..9c69e33eb3 100644 --- a/test/ref/rdistributions.R +++ b/test/ref/rdistributions.R @@ -58,6 +58,7 @@ source("continuous/inversegamma.R") source("continuous/inversegaussian.R") source("continuous/laplace.R") source("continuous/levy.R") +source("continuous/lindley.R") source("continuous/logistic.R") source("continuous/lognormal.R") source("continuous/noncentralbeta.R") diff --git a/test/ref/readme.md b/test/ref/readme.md index ad0cf13c44..502ba012c8 100644 --- a/test/ref/readme.md +++ b/test/ref/readme.md @@ -24,6 +24,7 @@ in addition to the R language itself: | BiasedUrn | For ``NoncentralHypergeometric`` | | fBasics | For ``NormalInverseGaussian`` | | gnorm | For ``PGeneralizedGaussian`` | +| LindleyR | For ``Lindley`` | ## Usage diff --git a/test/runtests.jl b/test/runtests.jl index 186004edd6..8d38c0abcc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -75,6 +75,7 @@ const tests = [ "univariate/continuous/exponential", "univariate/continuous/gamma", "univariate/continuous/gumbel", + "univariate/continuous/lindley", "univariate/continuous/logistic", "univariate/continuous/noncentralchisq", "univariate/continuous/weibull", diff --git a/test/univariate/continuous/lindley.jl b/test/univariate/continuous/lindley.jl new file mode 100644 index 0000000000..57d141693f --- /dev/null +++ b/test/univariate/continuous/lindley.jl @@ -0,0 +1,81 @@ +using Distributions +using FiniteDifferences +using ForwardDiff +using Random +using Test + +using Base: Fix1 +using Distributions: expectation + +@testset "Lindley" begin + @testset "NaNs" begin + D = Lindley() + for f in (pdf, logpdf, gradlogpdf, cdf, ccdf, logcdf, logccdf, mgf, cgf, cf) + @test isnan(f(D, NaN)) + end + end + @testset "MLE" begin + rng = MersenneTwister(420) + samples = rand(rng, Lindley(1.5f0), 10_000) + mle = fit_mle(Lindley, samples) + @test mle isa Lindley + @test shape(mle) ≈ 1.5 atol=0.1 + end + @testset "$T" for T in (Float16, Float32, Float64, Rational{Int}) + D = Lindley(one(T)) + @test partype(D) === T + @test typeof(@inferred rand(D)) === typeof(rand()) + @test @inferred(mean(D)) == T(3/2) + tol = sqrt(eps(float(T))) + @testset "Gradient of log PDF" begin + for x in T(0):T(0.5):T(20) + fd = ForwardDiff.derivative(Fix1(logpdf, D), x) + gl = @inferred gradlogpdf(D, x) + @test gl isa T + @test fd ≈ gl atol=tol + end + end + @testset "Entropy" begin + shannon = @inferred entropy(D) + expect = T(expectation(x -> -logpdf(D, x), D)) + if T <: AbstractFloat + @test shannon isa T + end + @test shannon ≈ expect atol=tol + end + @testset "K-L divergence" begin + S = supertype(typeof(D)) + D₂ = Lindley(T(2)) + d₁ = kldivergence(D, D₂) + d₂ = invoke(kldivergence, Tuple{S,S}, D, D₂) + if T <: AbstractFloat + @test d₁ isa T + end + @test d₁ ≈ d₂ atol=tol + end + @testset "Mode" begin + @test iszero(@inferred mode(D)) + @test isone(mode(Lindley(T(0.5)))) + m = mode(Lindley(T(0.1))) + @test m isa T + @test m ≈ T(9) atol=tol + end + @testset "Skewness" begin + μ = mean(D) + σ = std(D) + s₁ = @inferred skewness(D) + s₂ = T(expectation(x -> ((x - μ) / σ)^3, D)) + if T <: AbstractFloat + @test s₁ isa T + end + @test s₁ ≈ s₂ atol=sqrt(tol) + end + @testset "MGF, CGF, CF" begin + @test @inferred(mgf(D, 0)) === one(T) + @test iszero(mgf(D, shape(D) + 1)) + @test ForwardDiff.derivative(Fix1(mgf, D), 0) ≈ mean(D) + @test central_fdm(5, 1)(Fix1(cf, D), 0) ≈ mean(D) * im + test_cgf(D, (-1e6, -100f0, Float16(-1), 1//10, 0.9)) + end + end +end From 0d25150d043d10229ddd31bcbd9ac7c1ea3efe05 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Sat, 25 Feb 2023 13:29:55 -0800 Subject: [PATCH 70/93] Bump patch version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 7113c2b15c..6c77a1cde1 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.83" +version = "0.25.84" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 2bda4c037b9ac696c82f97b230080f895df4d14e Mon Sep 17 00:00:00 2001 From: David Widmann Date: Tue, 28 Feb 2023 13:54:03 +0100 Subject: [PATCH 71/93] Fix comparisons of `DiscreteNonParametric` (#1683) * Fix comparisons of `DiscreteNonParametric` * More general comparisons --- src/univariate/discrete/categorical.jl | 9 ++++ .../discrete/discretenonparametric.jl | 14 +++--- test/univariate/discrete/categorical.jl | 28 ++++++++++++ .../discrete/discretenonparametric.jl | 43 +++++++++++++++++++ 4 files changed, 87 insertions(+), 7 deletions(-) diff --git a/src/univariate/discrete/categorical.jl b/src/univariate/discrete/categorical.jl index 1dc08960cc..9572acb17e 100644 --- a/src/univariate/discrete/categorical.jl +++ b/src/univariate/discrete/categorical.jl @@ -52,6 +52,15 @@ ncategories(d::Categorical) = support(d).stop params(d::Categorical{P,Ps}) where {P<:Real, Ps<:AbstractVector{P}} = (probs(d),) partype(::Categorical{T}) where {T<:Real} = T +function Base.isapprox(c1::Categorical, c2::Categorical; kwargs...) + # support are of type Base.OneTo, so comparing the cardinality of the support + # is sufficient + # we explicitly redefine the method for `DiscreteNonParametric` which also compares + # the support since `isapprox(::OneTo, ::OneTo)` is broken on Julia 1.6 (issue #1675) + return length(support(c1)) == length(support(c2)) && + isapprox(probs(c1), probs(c2); kwargs...) +end + ### Statistics function median(d::Categorical{T}) where {T<:Real} diff --git a/src/univariate/discrete/discretenonparametric.jl b/src/univariate/discrete/discretenonparametric.jl index d94c63aa1e..8e1eefab6e 100644 --- a/src/univariate/discrete/discretenonparametric.jl +++ b/src/univariate/discrete/discretenonparametric.jl @@ -63,13 +63,13 @@ Get the vector of probabilities associated with the support of `d`. """ probs(d::DiscreteNonParametric) = d.p -==(c1::D, c2::D) where D<:DiscreteNonParametric = - (support(c1) == support(c2) || all(support(c1) .== support(c2))) && - (probs(c1) == probs(c2) || all(probs(c1) .== probs(c2))) - -Base.isapprox(c1::D, c2::D) where D<:DiscreteNonParametric = - (support(c1) ≈ support(c2) || all(support(c1) .≈ support(c2))) && - (probs(c1) ≈ probs(c2) || all(probs(c1) .≈ probs(c2))) +function Base.isapprox(c1::DiscreteNonParametric, c2::DiscreteNonParametric; kwargs...) + support_c1 = support(c1) + support_c2 = support(c2) + return length(support_c1) == length(support_c2) && + isapprox(support_c1, support_c2; kwargs...) && + isapprox(probs(c1), probs(c2); kwargs...) +end # Sampling diff --git a/test/univariate/discrete/categorical.jl b/test/univariate/discrete/categorical.jl index 28da036157..45d2c84f7d 100644 --- a/test/univariate/discrete/categorical.jl +++ b/test/univariate/discrete/categorical.jl @@ -21,6 +21,8 @@ for p in Any[ @test maximum(d) == k @test extrema(d) == (1, k) @test ncategories(d) == k + @test d == d + @test d ≈ d c = 0.0 for i = 1:k @@ -96,4 +98,30 @@ end @test rand(rng, d, 10) == [2, 1, 3, 3, 2, 3, 3, 3, 3, 3] end +@testset "comparisons" begin + d1 = Categorical([0.4, 0.6]) + d2 = Categorical([0.6, 0.4]) + d3 = Categorical([0.2, 0.7, 0.1]) + + # Same distribution + for d in (d1, d2, d3) + @test d == d + @test d ≈ d + end + + # Same support, different probabilities + @test d2 != d1 + @test !isapprox(d2, d1) + @test d2 ≈ d1 atol=0.4 + + # Different support + @test d3 != d1 + @test !isapprox(d3, d1) + + # issue #1675 + @test Categorical([0.5, 0.5]) ≈ Categorical([0.5, 0.5]) + @test Categorical([0.5, 0.5]) == Categorical([0.5f0, 0.5f0]) + @test Categorical([0.5, 0.5]) ≈ Categorical([0.5f0, 0.5f0]) +end + end diff --git a/test/univariate/discrete/discretenonparametric.jl b/test/univariate/discrete/discretenonparametric.jl index c769b0a479..68354a064a 100644 --- a/test/univariate/discrete/discretenonparametric.jl +++ b/test/univariate/discrete/discretenonparametric.jl @@ -171,3 +171,46 @@ d = DiscreteNonParametric([2, 1], [1, 0]) @inferred(skewness(d)) end end + +@testset "comparisons" begin + d1 = DiscreteNonParametric([1, 2], [0.4, 0.6]) + d2 = DiscreteNonParametric([1, 2], [0.6, 0.4]) + d3 = DiscreteNonParametric([1 + 1e-9, 2], [0.4, 0.6]) + d4 = DiscreteNonParametric([1 + 1e-9, 2], [0.6, 0.4]) + d5 = DiscreteNonParametric([9, 2, 4], [0.2, 0.7, 0.1]) + + # Same distribution + for d in (d1, d2, d3, d4, d5) + @test d == d + @test d ≈ d + end + + # Comparison with categorical distribution + @test Categorical([0.4, 0.6]) == d1 + @test Categorical([0.4, 0.6]) ≈ d1 + + # Same support, different probabilities + @test d2 != d1 + @test !isapprox(d2, d1) + @test d2 ≈ d1 atol=0.4 + + # Different support, same probabilities + @test d3 != d1 + @test d3 ≈ d1 + + # Different support, different probabilities, same dimension + @test d4 != d1 + @test !isapprox(d4, d1) + @test d4 ≈ d1 atol=0.4 + + # Different cardinality of the support + @test d5 != d1 + @test !isapprox(d5, d1) + + # issue #1140 + @test DiscreteNonParametric(1:2, [0.5, 0.5]) != DiscreteNonParametric(1:3, [0.2, 0.4, 0.4]) + + # Different types + @test DiscreteNonParametric(1:2, [0.5, 0.5]) == DiscreteNonParametric([1, 2], [0.5f0, 0.5f0]) + @test DiscreteNonParametric(1:2, [0.5, 0.5]) ≈ DiscreteNonParametric([1, 2], [0.5f0, 0.5f0]) +end \ No newline at end of file From eecfd3c499bd32981c01bc85e1fda5f801cd33c6 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Tue, 28 Feb 2023 13:54:27 +0100 Subject: [PATCH 72/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 6c77a1cde1..3686abafda 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.84" +version = "0.25.85" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From b9d063facdf65d46540ef7d652825b171e7a7a9a Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 3 Mar 2023 12:40:09 +0100 Subject: [PATCH 73/93] Make ChainRulesCore and DensityInterface weak dependencies (#1686) * Make ChainRulesCore and DensityInterface weak dependencies * Fixes * More fixes * Another fix --- Project.toml | 12 +++- .../DistributionsChainRulesCoreExt.jl | 16 +++++ .../eachvariate.jl | 10 ++++ .../multivariate/dirichlet.jl | 57 ++++++++++++++++++ .../univariate/continuous/uniform.jl | 34 +++++++++++ .../univariate/discrete/negativebinomial.jl | 36 +++++++++++ .../univariate/discrete/poissonbinomial.jl | 24 ++++++++ ext/DistributionsChainRulesCoreExt/utils.jl | 1 + .../DistributionsDensityInterfaceExt.jl | 7 +++ src/Distributions.jl | 11 ++-- src/eachvariate.jl | 11 ---- src/multivariate/dirichlet.jl | 59 ------------------- src/univariate/continuous/uniform.jl | 37 ------------ src/univariate/discrete/negativebinomial.jl | 39 ------------ src/univariate/discrete/poissonbinomial.jl | 27 --------- src/utils.jl | 2 - 16 files changed, 201 insertions(+), 182 deletions(-) create mode 100644 ext/DistributionsChainRulesCoreExt/DistributionsChainRulesCoreExt.jl create mode 100644 ext/DistributionsChainRulesCoreExt/eachvariate.jl create mode 100644 ext/DistributionsChainRulesCoreExt/multivariate/dirichlet.jl create mode 100644 ext/DistributionsChainRulesCoreExt/univariate/continuous/uniform.jl create mode 100644 ext/DistributionsChainRulesCoreExt/univariate/discrete/negativebinomial.jl create mode 100644 ext/DistributionsChainRulesCoreExt/univariate/discrete/poissonbinomial.jl create mode 100644 ext/DistributionsChainRulesCoreExt/utils.jl rename src/density_interface.jl => ext/DistributionsDensityInterfaceExt.jl (90%) diff --git a/Project.toml b/Project.toml index 3686abafda..f9ad8e060b 100644 --- a/Project.toml +++ b/Project.toml @@ -19,6 +19,14 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +[weakdeps] +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" + +[extensions] +DistributionsChainRulesCoreExt = "ChainRulesCore" +DistributionsDensityInterfaceExt = "DensityInterface" + [compat] ChainRulesCore = "1" DensityInterface = "0.4" @@ -32,7 +40,9 @@ julia = "1.3" [extras] Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" +DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" @@ -43,4 +53,4 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["StableRNGs", "Calculus", "ChainRulesTestUtils", "Distributed", "FiniteDifferences", "ForwardDiff", "JSON", "StaticArrays", "Test", "OffsetArrays"] +test = ["StableRNGs", "Calculus", "ChainRulesCore", "ChainRulesTestUtils", "DensityInterface", "Distributed", "FiniteDifferences", "ForwardDiff", "JSON", "StaticArrays", "Test", "OffsetArrays"] diff --git a/ext/DistributionsChainRulesCoreExt/DistributionsChainRulesCoreExt.jl b/ext/DistributionsChainRulesCoreExt/DistributionsChainRulesCoreExt.jl new file mode 100644 index 0000000000..9dde406954 --- /dev/null +++ b/ext/DistributionsChainRulesCoreExt/DistributionsChainRulesCoreExt.jl @@ -0,0 +1,16 @@ +module DistributionsChainRulesCoreExt + +using Distributions +using Distributions: LinearAlgebra, SpecialFunctions, StatsFuns +import ChainRulesCore + +include("eachvariate.jl") +include("utils.jl") + +include("univariate/continuous/uniform.jl") +include("univariate/discrete/negativebinomial.jl") +include("univariate/discrete/poissonbinomial.jl") + +include("multivariate/dirichlet.jl") + +end # module diff --git a/ext/DistributionsChainRulesCoreExt/eachvariate.jl b/ext/DistributionsChainRulesCoreExt/eachvariate.jl new file mode 100644 index 0000000000..359a6703eb --- /dev/null +++ b/ext/DistributionsChainRulesCoreExt/eachvariate.jl @@ -0,0 +1,10 @@ +function ChainRulesCore.rrule(::Type{Distributions.EachVariate{V}}, x::AbstractArray{<:Real}) where {V} + y = Distributions.EachVariate{V}(x) + size_x = size(x) + function EachVariate_pullback(Δ) + # TODO: Should we also handle `Tangent{<:EachVariate}`? + Δ_out = reshape(mapreduce(vec, vcat, ChainRulesCore.unthunk(Δ)), size_x) + return (ChainRulesCore.NoTangent(), Δ_out) + end + return y, EachVariate_pullback +end diff --git a/ext/DistributionsChainRulesCoreExt/multivariate/dirichlet.jl b/ext/DistributionsChainRulesCoreExt/multivariate/dirichlet.jl new file mode 100644 index 0000000000..5aa0d727d5 --- /dev/null +++ b/ext/DistributionsChainRulesCoreExt/multivariate/dirichlet.jl @@ -0,0 +1,57 @@ +function ChainRulesCore.frule((_, Δalpha)::Tuple{Any,Any}, ::Type{DT}, alpha::AbstractVector{T}; check_args::Bool = true) where {T <: Real, DT <: Union{Dirichlet{T}, Dirichlet}} + d = DT(alpha; check_args=check_args) + ∂alpha0 = sum(Δalpha) + digamma_alpha0 = SpecialFunctions.digamma(d.alpha0) + ∂lmnB = sum(Broadcast.instantiate(Broadcast.broadcasted(Δalpha, alpha) do Δalphai, alphai + Δalphai * (SpecialFunctions.digamma(alphai) - digamma_alpha0) + end)) + Δd = ChainRulesCore.Tangent{typeof(d)}(; alpha=Δalpha, alpha0=∂alpha0, lmnB=∂lmnB) + return d, Δd +end + +function ChainRulesCore.rrule(::Type{DT}, alpha::AbstractVector{T}; check_args::Bool = true) where {T <: Real, DT <: Union{Dirichlet{T}, Dirichlet}} + d = DT(alpha; check_args=check_args) + digamma_alpha0 = SpecialFunctions.digamma(d.alpha0) + function Dirichlet_pullback(_Δd) + Δd = ChainRulesCore.unthunk(_Δd) + Δalpha = Δd.alpha .+ Δd.alpha0 .+ Δd.lmnB .* (SpecialFunctions.digamma.(alpha) .- digamma_alpha0) + return ChainRulesCore.NoTangent(), Δalpha + end + return d, Dirichlet_pullback +end + +function ChainRulesCore.frule((_, Δd, Δx)::Tuple{Any,Any,Any}, ::typeof(Distributions._logpdf), d::Dirichlet, x::AbstractVector{<:Real}) + Ω = Distributions._logpdf(d, x) + ∂alpha = sum(Broadcast.instantiate(Broadcast.broadcasted(Δd.alpha, Δx, d.alpha, x) do Δalphai, Δxi, alphai, xi + StatsFuns.xlogy(Δalphai, xi) + (alphai - 1) * Δxi / xi + end)) + ∂lmnB = -Δd.lmnB + ΔΩ = ∂alpha + ∂lmnB + if !isfinite(Ω) + ΔΩ = oftype(ΔΩ, NaN) + end + return Ω, ΔΩ +end + +function ChainRulesCore.rrule(::typeof(Distributions._logpdf), d::T, x::AbstractVector{<:Real}) where {T<:Dirichlet} + Ω = Distributions._logpdf(d, x) + isfinite_Ω = isfinite(Ω) + alpha = d.alpha + function _logpdf_Dirichlet_pullback(_ΔΩ) + ΔΩ = ChainRulesCore.unthunk(_ΔΩ) + ∂alpha = _logpdf_Dirichlet_∂alphai.(x, ΔΩ, isfinite_Ω) + ∂lmnB = isfinite_Ω ? -float(ΔΩ) : oftype(float(ΔΩ), NaN) + Δd = ChainRulesCore.Tangent{T}(; alpha=∂alpha, lmnB=∂lmnB) + Δx = _logpdf_Dirichlet_Δxi.(ΔΩ, alpha, x, isfinite_Ω) + return ChainRulesCore.NoTangent(), Δd, Δx + end + return Ω, _logpdf_Dirichlet_pullback +end +function _logpdf_Dirichlet_∂alphai(xi, ΔΩi, isfinite::Bool) + ∂alphai = StatsFuns.xlogy.(ΔΩi, xi) + return isfinite ? ∂alphai : oftype(∂alphai, NaN) +end +function _logpdf_Dirichlet_Δxi(ΔΩi, alphai, xi, isfinite::Bool) + Δxi = ΔΩi * (alphai - 1) / xi + return isfinite ? Δxi : oftype(Δxi, NaN) +end diff --git a/ext/DistributionsChainRulesCoreExt/univariate/continuous/uniform.jl b/ext/DistributionsChainRulesCoreExt/univariate/continuous/uniform.jl new file mode 100644 index 0000000000..0461329577 --- /dev/null +++ b/ext/DistributionsChainRulesCoreExt/univariate/continuous/uniform.jl @@ -0,0 +1,34 @@ +function ChainRulesCore.frule((_, Δd, _), ::typeof(logpdf), d::Uniform, x::Real) + # Compute log probability + a, b = params(d) + insupport = a <= x <= b + diff = b - a + Ω = insupport ? -log(diff) : log(zero(diff)) + + # Compute tangent + Δdiff = Δd.a - Δd.b + ΔΩ = (insupport ? Δdiff : zero(Δdiff)) / diff + + return Ω, ΔΩ +end + +function ChainRulesCore.rrule(::typeof(logpdf), d::Uniform, x::Real) + # Compute log probability + a, b = params(d) + insupport = a <= x <= b + diff = b - a + Ω = insupport ? -log(diff) : log(zero(diff)) + + # Define pullback + function logpdf_Uniform_pullback(Δ) + Δa = Δ / diff + Δd = if insupport + ChainRulesCore.Tangent{typeof(d)}(; a=Δa, b=-Δa) + else + ChainRulesCore.Tangent{typeof(d)}(; a=zero(Δa), b=zero(Δa)) + end + return ChainRulesCore.NoTangent(), Δd, ChainRulesCore.ZeroTangent() + end + + return Ω, logpdf_Uniform_pullback +end diff --git a/ext/DistributionsChainRulesCoreExt/univariate/discrete/negativebinomial.jl b/ext/DistributionsChainRulesCoreExt/univariate/discrete/negativebinomial.jl new file mode 100644 index 0000000000..06ee0294f7 --- /dev/null +++ b/ext/DistributionsChainRulesCoreExt/univariate/discrete/negativebinomial.jl @@ -0,0 +1,36 @@ +## Callable struct to fix type inference issues caused by captured values +struct LogPDFNegativeBinomialPullback{D,T<:Real} + ∂r::T + ∂p::T +end + +function (f::LogPDFNegativeBinomialPullback{D})(Δ) where {D} + Δr = Δ * f.∂r + Δp = Δ * f.∂p + Δd = ChainRulesCore.Tangent{D}(; r=Δr, p=Δp) + return ChainRulesCore.NoTangent(), Δd, ChainRulesCore.NoTangent() +end + +function ChainRulesCore.rrule(::typeof(logpdf), d::NegativeBinomial, k::Real) + # Compute log probability (as in the definition of `logpdf(d, k)` above) + r, p = params(d) + z = StatsFuns.xlogy(r, p) + StatsFuns.xlog1py(k, -p) + if iszero(k) + Ω = z + ∂r = oftype(z, log(p)) + ∂p = oftype(z, r/p) + elseif insupport(d, k) + Ω = z - log(k + r) - SpecialFunctions.logbeta(r, k + 1) + ∂r = oftype(z, log(p) - inv(k + r) - SpecialFunctions.digamma(r) + SpecialFunctions.digamma(r + k + 1)) + ∂p = oftype(z, r/p - k / (1 - p)) + else + Ω = oftype(z, -Inf) + ∂r = oftype(z, NaN) + ∂p = oftype(z, NaN) + end + + # Define pullback + logpdf_NegativeBinomial_pullback = LogPDFNegativeBinomialPullback{typeof(d),typeof(z)}(∂r, ∂p) + + return Ω, logpdf_NegativeBinomial_pullback +end diff --git a/ext/DistributionsChainRulesCoreExt/univariate/discrete/poissonbinomial.jl b/ext/DistributionsChainRulesCoreExt/univariate/discrete/poissonbinomial.jl new file mode 100644 index 0000000000..aa27a9fd94 --- /dev/null +++ b/ext/DistributionsChainRulesCoreExt/univariate/discrete/poissonbinomial.jl @@ -0,0 +1,24 @@ +for f in (:poissonbinomial_pdf, :poissonbinomial_pdf_fft) + pullback = Symbol(f, :_pullback) + @eval begin + function ChainRulesCore.frule( + (_, Δp)::Tuple{<:Any,<:AbstractVector{<:Real}}, ::typeof(Distributions.$f), p::AbstractVector{<:Real} + ) + y = Distributions.$f(p) + A = Distributions.poissonbinomial_pdf_partialderivatives(p) + return y, A' * Δp + end + function ChainRulesCore.rrule(::typeof(Distributions.$f), p::AbstractVector{<:Real}) + y = Distributions.$f(p) + A = Distributions.poissonbinomial_pdf_partialderivatives(p) + function $pullback(Δy) + p̄ = ChainRulesCore.InplaceableThunk( + Δ -> LinearAlgebra.mul!(Δ, A, Δy, true, true), + ChainRulesCore.@thunk(A * Δy), + ) + return ChainRulesCore.NoTangent(), p̄ + end + return y, $pullback + end + end +end diff --git a/ext/DistributionsChainRulesCoreExt/utils.jl b/ext/DistributionsChainRulesCoreExt/utils.jl new file mode 100644 index 0000000000..bc9dd0276c --- /dev/null +++ b/ext/DistributionsChainRulesCoreExt/utils.jl @@ -0,0 +1 @@ +ChainRulesCore.@non_differentiable Distributions.check_args(::Any, ::Bool) diff --git a/src/density_interface.jl b/ext/DistributionsDensityInterfaceExt.jl similarity index 90% rename from src/density_interface.jl rename to ext/DistributionsDensityInterfaceExt.jl index f9a30aeed6..4617c923ec 100644 --- a/src/density_interface.jl +++ b/ext/DistributionsDensityInterfaceExt.jl @@ -1,3 +1,8 @@ +module DistributionsDensityInterfaceExt + +using Distributions +import DensityInterface + @inline DensityInterface.DensityKind(::Distribution) = DensityInterface.HasDensity() for (di_func, d_func) in ((:logdensityof, :logpdf), (:densityof, :pdf)) @@ -17,3 +22,5 @@ for (di_func, d_func) in ((:logdensityof, :logpdf), (:densityof, :pdf)) end end end + +end # module diff --git a/src/Distributions.jl b/src/Distributions.jl index 7e3085a78d..0421ddbb0f 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -25,10 +25,6 @@ import PDMats: dim, PDMat, invquad using SpecialFunctions -import ChainRulesCore - -import DensityInterface - export # re-export Statistics mean, median, quantile, std, var, cov, cor, @@ -310,8 +306,11 @@ include("pdfnorm.jl") include("mixtures/mixturemodel.jl") include("mixtures/unigmm.jl") -# Implementation of DensityInterface API -include("density_interface.jl") +# Extensions: Implementation of DensityInterface and ChainRulesCore API +if !isdefined(Base, :get_extension) + include("../ext/DistributionsChainRulesCoreExt/DistributionsChainRulesCoreExt.jl") + include("../ext/DistributionsDensityInterfaceExt.jl") +end # Testing utilities for other packages which implement distributions. include("test_utils.jl") diff --git a/src/eachvariate.jl b/src/eachvariate.jl index 36a9ae9e97..701be99faa 100644 --- a/src/eachvariate.jl +++ b/src/eachvariate.jl @@ -11,17 +11,6 @@ function EachVariate{V}(x::AbstractArray{<:Real,M}) where {V,M} return EachVariate{V,typeof(x),typeof(ax),T,M-V}(x, ax) end -function ChainRulesCore.rrule(::Type{EachVariate{V}}, x::AbstractArray{<:Real}) where {V} - y = EachVariate{V}(x) - size_x = size(x) - function EachVariate_pullback(Δ) - # TODO: Should we also handle `Tangent{<:EachVariate}`? - Δ_out = reshape(mapreduce(vec, vcat, ChainRulesCore.unthunk(Δ)), size_x) - return (ChainRulesCore.NoTangent(), Δ_out) - end - return y, EachVariate_pullback -end - Base.IteratorSize(::Type{EachVariate{V,P,A,T,N}}) where {V,P,A,T,N} = Base.HasShape{N}() Base.axes(x::EachVariate) = x.axes diff --git a/src/multivariate/dirichlet.jl b/src/multivariate/dirichlet.jl index d77d4f5d0d..b24980ec98 100644 --- a/src/multivariate/dirichlet.jl +++ b/src/multivariate/dirichlet.jl @@ -375,62 +375,3 @@ function fit_mle(::Type{<:Dirichlet}, P::AbstractMatrix{Float64}, elogp = mean_logp(suffstats(Dirichlet, P, w)) fit_dirichlet!(elogp, α; maxiter=maxiter, tol=tol, debug=debug) end - -## Differentiation -function ChainRulesCore.frule((_, Δalpha)::Tuple{Any,Any}, ::Type{DT}, alpha::AbstractVector{T}; check_args::Bool = true) where {T <: Real, DT <: Union{Dirichlet{T}, Dirichlet}} - d = DT(alpha; check_args=check_args) - ∂alpha0 = sum(Δalpha) - digamma_alpha0 = SpecialFunctions.digamma(d.alpha0) - ∂lmnB = sum(Broadcast.instantiate(Broadcast.broadcasted(Δalpha, alpha) do Δalphai, alphai - Δalphai * (SpecialFunctions.digamma(alphai) - digamma_alpha0) - end)) - Δd = ChainRulesCore.Tangent{typeof(d)}(; alpha=Δalpha, alpha0=∂alpha0, lmnB=∂lmnB) - return d, Δd -end - -function ChainRulesCore.rrule(::Type{DT}, alpha::AbstractVector{T}; check_args::Bool = true) where {T <: Real, DT <: Union{Dirichlet{T}, Dirichlet}} - d = DT(alpha; check_args=check_args) - digamma_alpha0 = SpecialFunctions.digamma(d.alpha0) - function Dirichlet_pullback(_Δd) - Δd = ChainRulesCore.unthunk(_Δd) - Δalpha = Δd.alpha .+ Δd.alpha0 .+ Δd.lmnB .* (SpecialFunctions.digamma.(alpha) .- digamma_alpha0) - return ChainRulesCore.NoTangent(), Δalpha - end - return d, Dirichlet_pullback -end - -function ChainRulesCore.frule((_, Δd, Δx)::Tuple{Any,Any,Any}, ::typeof(_logpdf), d::Dirichlet, x::AbstractVector{<:Real}) - Ω = _logpdf(d, x) - ∂alpha = sum(Broadcast.instantiate(Broadcast.broadcasted(Δd.alpha, Δx, d.alpha, x) do Δalphai, Δxi, alphai, xi - xlogy(Δalphai, xi) + (alphai - 1) * Δxi / xi - end)) - ∂lmnB = -Δd.lmnB - ΔΩ = ∂alpha + ∂lmnB - if !isfinite(Ω) - ΔΩ = oftype(ΔΩ, NaN) - end - return Ω, ΔΩ -end - -function ChainRulesCore.rrule(::typeof(_logpdf), d::T, x::AbstractVector{<:Real}) where {T<:Dirichlet} - Ω = _logpdf(d, x) - isfinite_Ω = isfinite(Ω) - alpha = d.alpha - function _logpdf_Dirichlet_pullback(_ΔΩ) - ΔΩ = ChainRulesCore.unthunk(_ΔΩ) - ∂alpha = _logpdf_Dirichlet_∂alphai.(x, ΔΩ, isfinite_Ω) - ∂lmnB = isfinite_Ω ? -float(ΔΩ) : oftype(float(ΔΩ), NaN) - Δd = ChainRulesCore.Tangent{T}(; alpha=∂alpha, lmnB=∂lmnB) - Δx = _logpdf_Dirichlet_Δxi.(ΔΩ, alpha, x, isfinite_Ω) - return ChainRulesCore.NoTangent(), Δd, Δx - end - return Ω, _logpdf_Dirichlet_pullback -end -function _logpdf_Dirichlet_∂alphai(xi, ΔΩi, isfinite::Bool) - ∂alphai = xlogy.(ΔΩi, xi) - return isfinite ? ∂alphai : oftype(∂alphai, NaN) -end -function _logpdf_Dirichlet_Δxi(ΔΩi, alphai, xi, isfinite::Bool) - Δxi = ΔΩi * (alphai - 1) / xi - return isfinite ? Δxi : oftype(Δxi, NaN) -end diff --git a/src/univariate/continuous/uniform.jl b/src/univariate/continuous/uniform.jl index 6386628e8c..1f535159d0 100644 --- a/src/univariate/continuous/uniform.jl +++ b/src/univariate/continuous/uniform.jl @@ -165,40 +165,3 @@ function fit_mle(::Type{T}, x::AbstractArray{<:Real}) where {T<:Uniform} end return T(extrema(x)...) end - -# ChainRules definitions - -function ChainRulesCore.frule((_, Δd, _), ::typeof(logpdf), d::Uniform, x::Real) - # Compute log probability - a, b = params(d) - insupport = a <= x <= b - diff = b - a - Ω = insupport ? -log(diff) : log(zero(diff)) - - # Compute tangent - Δdiff = Δd.a - Δd.b - ΔΩ = (insupport ? Δdiff : zero(Δdiff)) / diff - - return Ω, ΔΩ -end - -function ChainRulesCore.rrule(::typeof(logpdf), d::Uniform, x::Real) - # Compute log probability - a, b = params(d) - insupport = a <= x <= b - diff = b - a - Ω = insupport ? -log(diff) : log(zero(diff)) - - # Define pullback - function logpdf_Uniform_pullback(Δ) - Δa = Δ / diff - Δd = if insupport - ChainRulesCore.Tangent{typeof(d)}(; a=Δa, b=-Δa) - else - ChainRulesCore.Tangent{typeof(d)}(; a=zero(Δa), b=zero(Δa)) - end - return ChainRulesCore.NoTangent(), Δd, ChainRulesCore.ZeroTangent() - end - - return Ω, logpdf_Uniform_pullback -end diff --git a/src/univariate/discrete/negativebinomial.jl b/src/univariate/discrete/negativebinomial.jl index 8a0a079db8..a6667b44c8 100644 --- a/src/univariate/discrete/negativebinomial.jl +++ b/src/univariate/discrete/negativebinomial.jl @@ -139,42 +139,3 @@ function cgf(d::NegativeBinomial, t) r * cgf(Geometric{typeof(p)}(p), t) end cf(d::NegativeBinomial, t::Real) = laplace_transform(d, -t*im) - -# ChainRules definitions - -## Callable struct to fix type inference issues caused by captured values -struct LogPDFNegativeBinomialPullback{D,T<:Real} - ∂r::T - ∂p::T -end - -function (f::LogPDFNegativeBinomialPullback{D})(Δ) where {D} - Δr = Δ * f.∂r - Δp = Δ * f.∂p - Δd = ChainRulesCore.Tangent{D}(; r=Δr, p=Δp) - return ChainRulesCore.NoTangent(), Δd, ChainRulesCore.NoTangent() -end - -function ChainRulesCore.rrule(::typeof(logpdf), d::NegativeBinomial, k::Real) - # Compute log probability (as in the definition of `logpdf(d, k)` above) - r, p = params(d) - z = xlogy(r, p) + xlog1py(k, -p) - if iszero(k) - Ω = z - ∂r = oftype(z, log(p)) - ∂p = oftype(z, r/p) - elseif insupport(d, k) - Ω = z - log(k + r) - logbeta(r, k + 1) - ∂r = oftype(z, log(p) - inv(k + r) - digamma(r) + digamma(r + k + 1)) - ∂p = oftype(z, r/p - k / (1 - p)) - else - Ω = oftype(z, -Inf) - ∂r = oftype(z, NaN) - ∂p = oftype(z, NaN) - end - - # Define pullback - logpdf_NegativeBinomial_pullback = LogPDFNegativeBinomialPullback{typeof(d),typeof(z)}(∂r, ∂p) - - return Ω, logpdf_NegativeBinomial_pullback -end diff --git a/src/univariate/discrete/poissonbinomial.jl b/src/univariate/discrete/poissonbinomial.jl index f96163e646..0db82f07bd 100644 --- a/src/univariate/discrete/poissonbinomial.jl +++ b/src/univariate/discrete/poissonbinomial.jl @@ -209,8 +209,6 @@ end sampler(d::PoissonBinomial) = PoissBinAliasSampler(d) -## ChainRules definitions - # Compute matrix of partial derivatives [∂P(X=j-1)/∂pᵢ]_{i=1,…,n; j=1,…,n+1} # # This implementation uses the same dynamic programming "trick" as for the computation of @@ -250,28 +248,3 @@ function poissonbinomial_pdf_partialderivatives(p::AbstractVector{<:Real}) end return A end - -for f in (:poissonbinomial_pdf, :poissonbinomial_pdf_fft) - pullback = Symbol(f, :_pullback) - @eval begin - function ChainRulesCore.frule( - (_, Δp)::Tuple{<:Any,<:AbstractVector{<:Real}}, ::typeof($f), p::AbstractVector{<:Real} - ) - y = $f(p) - A = poissonbinomial_pdf_partialderivatives(p) - return y, A' * Δp - end - function ChainRulesCore.rrule(::typeof($f), p::AbstractVector{<:Real}) - y = $f(p) - A = poissonbinomial_pdf_partialderivatives(p) - function $pullback(Δy) - p̄ = ChainRulesCore.InplaceableThunk( - Δ -> LinearAlgebra.mul!(Δ, A, Δy, true, true), - ChainRulesCore.@thunk(A * Δy), - ) - return ChainRulesCore.NoTangent(), p̄ - end - return y, $pullback - end - end -end diff --git a/src/utils.jl b/src/utils.jl index fa75875a4b..a2c9aaffa9 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -90,8 +90,6 @@ function check_args(f::F, check::Bool) where {F} nothing end -ChainRulesCore.@non_differentiable check_args(::Any, ::Bool) - ##### Utility functions isunitvec(v::AbstractVector) = (norm(v) - 1.0) < 1.0e-12 From ec68da3a8d4a4776367f2d7ca5ec2d4666e29c78 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 3 Mar 2023 12:40:33 +0100 Subject: [PATCH 74/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f9ad8e060b..8c2ac59478 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.85" +version = "0.25.86" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 974103e33bc3c25630f8199716d45513fbcaec9f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 3 Apr 2023 15:24:02 -0700 Subject: [PATCH 75/93] CompatHelper: bump compat for FillArrays to 1, (keep existing compat) (#1704) Co-authored-by: CompatHelper Julia --- Project.toml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Project.toml b/Project.toml index 8c2ac59478..6cb8d95903 100644 --- a/Project.toml +++ b/Project.toml @@ -19,18 +19,10 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -[weakdeps] -ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" - -[extensions] -DistributionsChainRulesCoreExt = "ChainRulesCore" -DistributionsDensityInterfaceExt = "DensityInterface" - [compat] ChainRulesCore = "1" DensityInterface = "0.4" -FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13" +FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1" PDMats = "0.10, 0.11" QuadGK = "2" SpecialFunctions = "1.2, 2" @@ -38,6 +30,10 @@ StatsBase = "0.32, 0.33" StatsFuns = "0.9.15, 1" julia = "1.3" +[extensions] +DistributionsChainRulesCoreExt = "ChainRulesCore" +DistributionsDensityInterfaceExt = "DensityInterface" + [extras] Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" @@ -54,3 +50,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] test = ["StableRNGs", "Calculus", "ChainRulesCore", "ChainRulesTestUtils", "DensityInterface", "Distributed", "FiniteDifferences", "ForwardDiff", "JSON", "StaticArrays", "Test", "OffsetArrays"] + +[weakdeps] +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" From 630e1c908b82bc2702cc7f29616cd22e5f94ab04 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Mon, 3 Apr 2023 15:25:15 -0700 Subject: [PATCH 76/93] Bump patch version for FillArrays compat --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 6cb8d95903..5f4356ad51 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.86" +version = "0.25.87" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From b11806bedd9a0416d873c1e8da3d6a7969135584 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Mon, 1 May 2023 16:14:26 -0700 Subject: [PATCH 77/93] Support StatsBase 0.34 (#1714) --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 5f4356ad51..01eaa736e6 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.87" +version = "0.25.88" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" @@ -26,7 +26,7 @@ FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1" PDMats = "0.10, 0.11" QuadGK = "2" SpecialFunctions = "1.2, 2" -StatsBase = "0.32, 0.33" +StatsBase = "0.32, 0.33, 0.34" StatsFuns = "0.9.15, 1" julia = "1.3" From 946478e9c5848590fa2aab51ff5f560699c3a2c2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 4 May 2023 14:35:27 +0200 Subject: [PATCH 78/93] CompatHelper: bump compat for GR to 0.72 for package docs, (keep existing compat) (#1707) * CompatHelper: bump compat for GR to 0.72 for package docs, (keep existing compat) * Update Project.toml --------- Co-authored-by: CompatHelper Julia Co-authored-by: David Widmann --- docs/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index 3d6a9ee4ea..540725c6d6 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,4 +4,4 @@ GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" [compat] Documenter = "0.26, 0.27" -GR = "0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.70, 0.71" +GR = "0.72.1" From 730ab3c50bc142618499bbc791b574f3a8be748b Mon Sep 17 00:00:00 2001 From: Andreas Scheidegger Date: Fri, 5 May 2023 12:56:30 +0200 Subject: [PATCH 79/93] Make sure `qqbuild` does not drop min and max value (#1708) * `qqbuild` uses the correct number of probability points * pass length not vector * Update docstring Co-authored-by: David Widmann * simplify `ppoints` Co-authored-by: David Widmann * remove ppoints * Remove argument `a` from `ppoints` - add reference to Matlab docs - reverts commit 6e6f25be94b78bb6c06ac5d24f0f36e8610f509f * use latex syntax for doc string * Fix docstring signature --------- Co-authored-by: David Widmann --- src/qq.jl | 24 ++++++++++++++++++++++-- test/qq.jl | 19 +++++++++++++++---- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/qq.jl b/src/qq.jl index f6f7fa7f62..fe80e31a68 100644 --- a/src/qq.jl +++ b/src/qq.jl @@ -11,9 +11,29 @@ function qqbuild(x::AbstractVector, y::AbstractVector) return QQPair(qx, qy) end + +""" + ppoints(n::Int) + +Generate a sequence of probability points of length `n`: + +```math +(k − 0.5)/n, \\qquad k \\in \\{1, \\ldots, n\\} +``` + +## References + +https://ch.mathworks.com/help/stats/probplot.html +""" +function ppoints(n::Int) + m = 2 * n + return (1:2:(m - 1)) ./ m +end + + function qqbuild(x::AbstractVector, d::UnivariateDistribution) n = length(x) - grid = [(1 / (n - 1)):(1 / (n - 1)):(1.0 - (1 / (n - 1)));] + grid = ppoints(n) qx = quantile(x, grid) qd = quantile.(Ref(d), grid) return QQPair(qx, qd) @@ -21,7 +41,7 @@ end function qqbuild(d::UnivariateDistribution, x::AbstractVector) n = length(x) - grid = [(1 / (n - 1)):(1 / (n - 1)):(1.0 - (1 / (n - 1)));] + grid = ppoints(n) qd = quantile.(Ref(d), grid) qx = quantile(x, grid) return QQPair(qd, qx) diff --git a/test/qq.jl b/test/qq.jl index dcf7775149..c825528555 100644 --- a/test/qq.jl +++ b/test/qq.jl @@ -7,14 +7,25 @@ c = qqbuild(view(collect(1:20), 1:10), view(collect(1:20), 1:10)) @test a.qx ≈ b.qx ≈ c.qx ≈ collect(1.0:10) @test a.qy ≈ b.qy ≈ c.qy ≈ collect(1.0:10) + +pp = Distributions.ppoints(10) +@test length(pp) == 10 +@test minimum(pp) >= 0 +@test maximum(pp) <= 1 + a = qqbuild(collect(1:10), Uniform(1,10)) b = qqbuild(1:10, Uniform(1,10)) c = qqbuild(view(collect(1:20), 1:10), Uniform(1,10)) -@test a.qx ≈ b.qx ≈ c.qx ≈ collect(2.0:9) -@test a.qy ≈ b.qy ≈ c.qy ≈ collect(2.0:9) +@test length(a.qy) == length(a.qx) == 10 +@test a.qx ≈ b.qx ≈ c.qx ≈ a.qy ≈ b.qy ≈ c.qy a = qqbuild(Uniform(1,10), collect(1:10)) b = qqbuild(Uniform(1,10), 1:10) c = qqbuild(Uniform(1,10), view(collect(1:20), 1:10)) -@test a.qx ≈ b.qx ≈ c.qx ≈ collect(2.0:9) -@test a.qy ≈ b.qy ≈ c.qy ≈ collect(2.0:9) +@test length(a.qy) == length(a.qx) == 10 +@test a.qx ≈ b.qx ≈ c.qx ≈ a.qy ≈ b.qy ≈ c.qy + +for n in 0:3 + a = qqbuild(rand(n), Uniform(0,1)) + @test length(a.qy) == length(a.qx) == n +end From d4608469a4416407187295b27de436820b75458f Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 5 May 2023 12:58:50 +0200 Subject: [PATCH 80/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 01eaa736e6..7f1a5c6845 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.88" +version = "0.25.89" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 4fd8be85c7cc8446f8094afc18eac7b209e8fb56 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Wed, 10 May 2023 01:15:16 +0200 Subject: [PATCH 81/93] Implement `StatsAPI.pvalue` (#1719) * Implement `StatsAPI.pvalue` * Add tests * Update test/runtests.jl * Update test/statsapi.jl Co-authored-by: Alex Arslan --------- Co-authored-by: Alex Arslan --- Project.toml | 4 +++- src/Distributions.jl | 4 ++++ src/statsapi.jl | 29 +++++++++++++++++++++++++++++ test/runtests.jl | 1 + test/statsapi.jl | 26 ++++++++++++++++++++++++++ 5 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 src/statsapi.jl create mode 100644 test/statsapi.jl diff --git a/Project.toml b/Project.toml index 7f1a5c6845..f2c7ae7228 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.89" +version = "0.25.90" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" @@ -15,6 +15,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" @@ -26,6 +27,7 @@ FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1" PDMats = "0.10, 0.11" QuadGK = "2" SpecialFunctions = "1.2, 2" +StatsAPI = "1.6" StatsBase = "0.32, 0.33, 0.34" StatsFuns = "0.9.15, 1" julia = "1.3" diff --git a/src/Distributions.jl b/src/Distributions.jl index 0421ddbb0f..08c174f0ce 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -17,6 +17,7 @@ using Random import Random: default_rng, rand!, SamplerRangeInt import Statistics: mean, median, quantile, std, var, cov, cor +import StatsAPI import StatsBase: kurtosis, skewness, entropy, mode, modes, fit, kldivergence, loglikelihood, dof, span, params, params! @@ -306,6 +307,9 @@ include("pdfnorm.jl") include("mixtures/mixturemodel.jl") include("mixtures/unigmm.jl") +# Interface for StatsAPI +include("statsapi.jl") + # Extensions: Implementation of DensityInterface and ChainRulesCore API if !isdefined(Base, :get_extension) include("../ext/DistributionsChainRulesCoreExt/DistributionsChainRulesCoreExt.jl") diff --git a/src/statsapi.jl b/src/statsapi.jl new file mode 100644 index 0000000000..18112b3dc8 --- /dev/null +++ b/src/statsapi.jl @@ -0,0 +1,29 @@ +function _check_tail(tail::Symbol) + if tail !== :both && tail !== :left && tail !== :right + throw(ArgumentError("`tail=$(repr(tail))` is invalid")) + end +end + +function StatsAPI.pvalue(dist::DiscreteUnivariateDistribution, x::Number; tail::Symbol=:both) + _check_tail(tail) + if tail === :both + p = 2 * min(ccdf(dist, x-1), cdf(dist, x)) + min(p, oneunit(p)) # if P(X = x) > 0, then possibly p > 1 + elseif tail === :left + cdf(dist, x) + else # tail === :right + ccdf(dist, x-1) + end +end + +function StatsAPI.pvalue(dist::ContinuousUnivariateDistribution, x::Number; tail::Symbol=:both) + _check_tail(tail) + if tail === :both + p = 2 * min(cdf(dist, x), ccdf(dist, x)) + min(p, oneunit(p)) # if P(X = x) > 0, then possibly p > 1 + elseif tail === :left + cdf(dist, x) + else # tail === :right + ccdf(dist, x) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 8d38c0abcc..7eb9020667 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -90,6 +90,7 @@ const tests = [ "multivariate/product", "eachvariate", "univariate/continuous/triangular", + "statsapi", ### missing files compared to /src: # "common", diff --git a/test/statsapi.jl b/test/statsapi.jl new file mode 100644 index 0000000000..37745c5223 --- /dev/null +++ b/test/statsapi.jl @@ -0,0 +1,26 @@ +using Distributions +using StatsAPI: pvalue + +using Test + +@testset "pvalue" begin + # For two discrete and two continuous distribution + for dist in (Binomial(10, 0.3), Poisson(0.3), Normal(1.4, 2.1), Gamma(1.9, 0.8)) + # Draw sample + x = rand(dist) + + # Draw 10^6 additional samples + ys = rand(dist, 1_000_000) + + # Check that empirical frequencies match pvalues of left/right tail approximately + @test pvalue(dist, x; tail=:left) ≈ mean(≤(x), ys) rtol=5e-3 + @test pvalue(dist, x; tail=:right) ≈ mean(≥(x), ys) rtol=5e-3 + + # Check consistency of pvalues of both tails + @test pvalue(dist, x; tail=:both) == + min(1, 2 * min(pvalue(dist, x; tail=:left), pvalue(dist, x; tail=:right))) + + # Incorrect value for keyword argument + @test_throws ArgumentError("`tail=:l` is invalid") pvalue(dist, x; tail=:l) + end +end From fa8c30dedf058cb9891ef141c14cc857576221ac Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Wed, 10 May 2023 08:34:17 -0700 Subject: [PATCH 82/93] Improve type stability for truncated normal moments (#1717) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Improve type stability for truncated normal moments The internal functions used for computing the mean and variance of the truncated normal distribution were implemented using literals like `√2`, which are always `Float64`. However, some branches of the functions based the return type off of the types of the arguments, which needn't be `Float64`. This led to some instability that can be seen via `@code_warntype`. For example, the inferred result of `_tnmom1` for `Float32` inputs was `Union{Float32,Float64}`. We can instead use the irrational constants defined for square roots of pi and 2 and ratios thereof in order to avoid promoting to `Float64` unnecessarily. This makes the return type concretely inferrable and provides a small performance improvement, about 7 ns for `mean` and 20 ns for `var` on my machine as compared to current master. * Apply suggestions from code review Co-authored-by: David Widmann --------- Co-authored-by: David Widmann --- src/truncated/normal.jl | 51 ++++++++++++++++++++-------------------- test/truncated/normal.jl | 10 ++++++++ 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/src/truncated/normal.jl b/src/truncated/normal.jl index 5dc57a60df..dac92f11a1 100644 --- a/src/truncated/normal.jl +++ b/src/truncated/normal.jl @@ -27,56 +27,57 @@ modes(d::Truncated{Normal{T},Continuous}) where {T <: Real} = [mode(d)] # do not export. Used in mean # computes mean of standard normal distribution truncated to [a, b] function _tnmom1(a, b) + mid = float(middle(a, b)) if !(a ≤ b) - return oftype(middle(a, b), NaN) + return oftype(mid, NaN) elseif a == b - return middle(a, b) + return mid elseif abs(a) > abs(b) return -_tnmom1(-b, -a) elseif isinf(a) && isinf(b) - return zero(middle(a, b)) + return zero(mid) end - Δ = (b - a) * middle(a, b) + Δ = (b - a) * mid + a′ = a * invsqrt2 + b′ = b * invsqrt2 if a ≤ 0 ≤ b - m = √(2/π) * expm1(-Δ) * exp(-a^2 / 2) / (erf(a/√2) - erf(b/√2)) + m = expm1(-Δ) * exp(-a^2 / 2) / erf(b′, a′) elseif 0 < a < b - z = exp(-Δ) * erfcx(b/√2) - erfcx(a/√2) - iszero(z) && return middle(a, b) - m = √(2/π) * expm1(-Δ) / z + z = exp(-Δ) * erfcx(b′) - erfcx(a′) + iszero(z) && return mid + m = expm1(-Δ) / z end - return clamp(m, a, b) + return clamp(m / sqrthalfπ, a, b) end # do not export. Used in var # computes 2nd moment of standard normal distribution truncated to [a, b] function _tnmom2(a::Real, b::Real) + mid = float(middle(a, b)) if !(a ≤ b) - return oftype(middle(a, b), NaN) + return oftype(mid, NaN) elseif a == b - return middle(a, b)^2 + return mid^2 elseif abs(a) > abs(b) return _tnmom2(-b, -a) elseif isinf(a) && isinf(b) - return one(middle(a, b)) + return one(mid) elseif isinf(b) - return 1 + √(2 / π) * a / erfcx(a / √2) + return 1 + a / erfcx(a * invsqrt2) / sqrthalfπ end - + a′ = a * invsqrt2 + b′ = b * invsqrt2 if a ≤ 0 ≤ b - ea = √(π/2) * erf(a / √2) - eb = √(π/2) * erf(b / √2) - fa = ea - a * exp(-a^2 / 2) - fb = eb - b * exp(-b^2 / 2) - m2 = (fb - fa) / (eb - ea) - return m2 + eb_ea = sqrthalfπ * erf(a′, b′) + fb_fa = eb_ea + a * exp(-a^2 / 2) - b * exp(-b^2 / 2) + return fb_fa / eb_ea else # 0 ≤ a ≤ b - exΔ = exp((a - b)middle(a, b)) - ea = √(π/2) * erfcx(a / √2) - eb = √(π/2) * erfcx(b / √2) + exΔ = exp((a - b) * mid) + ea = sqrthalfπ * erfcx(a′) + eb = sqrthalfπ * erfcx(b′) fa = ea + a fb = eb + b - m2 = (fa - fb * exΔ) / (ea - eb * exΔ) - return m2 + return (fa - fb * exΔ) / (ea - eb * exΔ) end end diff --git a/test/truncated/normal.jl b/test/truncated/normal.jl index f0c18340d7..7ed4b8f900 100644 --- a/test/truncated/normal.jl +++ b/test/truncated/normal.jl @@ -28,6 +28,16 @@ rng = MersenneTwister(123) # https://github.com/JuliaStats/Distributions.jl/issues/624 @test rand(truncated(Normal(+Inf, 1), 0, 1)) ≈ 1 @test rand(truncated(Normal(-Inf, 1), 0, 1)) ≈ 0 + # Type stability + for T in (Float32, Float64) + t = truncated(Normal(T(1.5), T(4.1)), 0, 1) + μ = @inferred mean(t) + σ = @inferred std(t) + @test μ ≈ 0.50494725270783081889610661619986770485973643194141 + @test μ isa T + @test σ ≈ 0.28836356398830993140576947440881738258157196701554 + @test σ isa T + end end @testset "Truncated normal $trunc" begin trunc = truncated(Normal(0, 1), -2, 2) From f2cceb56bc9582931af3a99a413d2d5c6b513339 Mon Sep 17 00:00:00 2001 From: Yuan-Ru Lin Date: Mon, 15 May 2023 05:26:44 +0800 Subject: [PATCH 83/93] Add the Johnson's S_U-distribution (continued) (#1690) * Resolve conflicts produced by adding Lindley * Retab * Add type information of parameters in constructor * Refactor out transformation functions * Remove conversions to float * Avoid float literals * Fix typo in the doc * Point out how X is transformed from Z * In the doc, change the parameters in the constructor from float to integer * Add R cross-check * Rename Johnson to JohnsonSU * Fix typo Co-authored-by: David Widmann * Apply suggestions from code review Co-authored-by: David Widmann * Remove a excessive right parenthesis * Add JohnsonSU's reference data * Add tests * Cover tests for Base.convert and promotion ctor * Fix mismatch pdf, cdf and logpdf of JohnsonSU * Test JohnsonSU with a more reasonable set of params * Retab * Uncomment StudentizedRange * Properly test Base.convert() * Remove skewness and kurtosis * Update src/Distributions.jl Co-authored-by: Alex Arslan * Update src/univariate/continuous/johnsonsu.jl Co-authored-by: Alex Arslan --------- Co-authored-by: David Widmann Co-authored-by: Alex Arslan --- docs/src/univariate.md | 7 ++ src/Distributions.jl | 3 +- src/univariate/continuous/johnsonsu.jl | 101 +++++++++++++++++++ src/univariates.jl | 1 + test/ref/continuous/johnsonsu.R | 34 +++++++ test/ref/continuous_test.lst | 4 + test/ref/continuous_test.ref.json | 78 +++++++++++++++ test/ref/rdistributions.R | 1 + test/ref/readme.md | 1 + test/runtests.jl | 1 + test/univariate/continuous/johnsonsu.jl | 126 ++++++++++++++++++++++++ 11 files changed, 356 insertions(+), 1 deletion(-) create mode 100644 src/univariate/continuous/johnsonsu.jl create mode 100644 test/ref/continuous/johnsonsu.R create mode 100644 test/univariate/continuous/johnsonsu.jl diff --git a/docs/src/univariate.md b/docs/src/univariate.md index d46592248a..633a8952bc 100644 --- a/docs/src/univariate.md +++ b/docs/src/univariate.md @@ -272,6 +272,13 @@ InverseGaussian plotdensity((0, 5), InverseGaussian, (1, 1)) # hide ``` +```@docs +JohnsonSU +``` +```@example plotdensity +plotdensity((-20, 20), JohnsonSU, (0.0, 1.0, 0.0, 1.0)) # hide +``` + ```@docs Kolmogorov ``` diff --git a/src/Distributions.jl b/src/Distributions.jl index 08c174f0ce..443e776c37 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -106,6 +106,7 @@ export InverseGaussian, IsoNormal, IsoNormalCanon, + JohnsonSU, Kolmogorov, KSDist, KSOneSided, @@ -349,7 +350,7 @@ Supported distributions: Frechet, FullNormal, FullNormalCanon, Gamma, GeneralizedPareto, GeneralizedExtremeValue, Geometric, Gumbel, Hypergeometric, InverseWishart, InverseGamma, InverseGaussian, IsoNormal, - IsoNormalCanon, Kolmogorov, KSDist, KSOneSided, Laplace, Levy, Lindley, LKJ, LKJCholesky, + IsoNormalCanon, JohnsonSU, Kolmogorov, KSDist, KSOneSided, Laplace, Levy, Lindley, LKJ, LKJCholesky, Logistic, LogNormal, MatrixBeta, MatrixFDist, MatrixNormal, MatrixTDist, MixtureModel, Multinomial, MultivariateNormal, MvLogNormal, MvNormal, MvNormalCanon, diff --git a/src/univariate/continuous/johnsonsu.jl b/src/univariate/continuous/johnsonsu.jl new file mode 100644 index 0000000000..65597adc2d --- /dev/null +++ b/src/univariate/continuous/johnsonsu.jl @@ -0,0 +1,101 @@ +""" + JohnsonSU(ξ, λ, γ, δ) + +The Johnson's ``S_U``-distribution with parameters ξ, λ, γ and δ is a transformation of the normal distribution: + +If +```math +X = \\lambda\\sinh\\Bigg( \\frac{Z - \\gamma}{\\delta} \\Bigg) + \\xi +``` +where ``Z \\sim \\mathcal{N}(0,1)``, then ``X \\sim \\operatorname{Johnson}(\\xi, \\lambda, \\gamma, \\delta)``. + +```julia +JohnsonSU() # Equivalent to JohnsonSU(0, 1, 0, 1) +JohnsonSU(ξ, λ, γ, δ) # JohnsonSU's S_U-distribution with parameters ξ, λ, γ and δ + +params(d) # Get the parameters, i.e. (ξ, λ, γ, δ) +shape(d) # Get the shape parameter, i.e. ξ +scale(d) # Get the scale parameter, i.e. λ +``` + +External links + +* [Johnson's ``S_U``-distribution on Wikipedia](http://en.wikipedia.org/wiki/Johnson%27s_SU-distribution) +""" +struct JohnsonSU{T<:Real} <: ContinuousUnivariateDistribution + ξ::T + λ::T + γ::T + δ::T + JohnsonSU{T}(ξ::T, λ::T, γ::T, δ::T) where {T<:Real} = new{T}(ξ, λ, γ, δ) +end + +function JohnsonSU(ξ::T, λ::T, γ::T, δ::T; check_args::Bool=true) where {T<:Real} + @check_args JohnsonSU (λ, λ ≥ zero(λ)) (δ, δ ≥ zero(δ)) + return JohnsonSU{T}(ξ, λ, γ, δ) +end + +JohnsonSU() = JohnsonSU{Int}(0, 1, 0, 1) +JohnsonSU(ξ::Real, λ::Real, γ::Real, δ::Real; check_args::Bool=true) = JohnsonSU(promote(ξ, λ, γ, δ)...; check_args=check_args) + +@distr_support JohnsonSU -Inf Inf + +#### Conversions + +Base.convert(::Type{JohnsonSU{T}}, d::JohnsonSU) where {T<:Real} = JohnsonSU{T}(T(d.ξ), T(d.λ), T(d.γ), T(d.δ)) +Base.convert(::Type{JohnsonSU{T}}, d::JohnsonSU{T}) where {T<:Real} = d + +#### Parameters + +shape(d::JohnsonSU) = d.ξ +scale(d::JohnsonSU) = d.λ + +params(d::JohnsonSU) = (d.ξ, d.λ, d.γ, d.δ) +partype(d::JohnsonSU{T}) where {T<:Real} = T + +#### Statistics + +function mean(d::JohnsonSU) + a = exp(1/(2*d.δ^2)) + r = d.γ/d.δ + d.ξ - d.λ * a * sinh(r) +end +function median(d::JohnsonSU) + r = d.γ/d.δ + d.ξ + d.λ * sinh(-r) +end +function var(d::JohnsonSU) + a = d.δ^-2 + r = d.γ/d.δ + d.λ^2/2 * expm1(a) * (exp(a)*cosh(2r)+1) +end + +#### Evaluation + +yval(d::JohnsonSU, x::Real) = (x - d.ξ) / d.λ +zval(d::JohnsonSU, x::Real) = d.γ + d.δ * asinh(yval(d, x)) +xval(d::JohnsonSU, x::Real) = d.λ * sinh((x - d.γ) / d.δ) + d.ξ + +pdf(d::JohnsonSU, x::Real) = d.δ / (d.λ * hypot(1, yval(d, x))) * normpdf(zval(d, x)) +logpdf(d::JohnsonSU, x::Real) = log(d.δ) - log(d.λ) - log1psq(yval(d, x)) / 2 + normlogpdf(zval(d, x)) +cdf(d::JohnsonSU, x::Real) = normcdf(zval(d, x)) +logcdf(d::JohnsonSU, x::Real) = normlogcdf(zval(d, x)) +ccdf(d::JohnsonSU, x::Real) = normccdf(zval(d, x)) +logccdf(d::JohnsonSU, x::Real) = normlogccdf(zval(d, x)) + +quantile(d::JohnsonSU, q::Real) = xval(d, norminvcdf(q)) +cquantile(d::JohnsonSU, p::Real) = xval(d, norminvccdf(p)) +invlogcdf(d::JohnsonSU, lp::Real) = xval(d, norminvlogcdf(lp)) +invlogccdf(d::JohnsonSU, lq::Real) = xval(d, norminvlogccdf(lq)) + +# entropy(d::JohnsonSU) +# mgf(d::JohnsonSU) +# cf(d::JohnsonSU) + +#### Sampling + +rand(rng::AbstractRNG, d::JohnsonSU) = xval(d, randn(rng)) + +## Fitting + +# function fit_mle(::Type{<:JohnsonSU}, x::AbstractArray{T}) where T<:Real diff --git a/src/univariates.jl b/src/univariates.jl index f9fe9c4b68..b271434529 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -687,6 +687,7 @@ const continuous_distributions = [ "gumbel", "inversegamma", "inversegaussian", + "johnsonsu", "kolmogorov", "ksdist", "ksonesided", diff --git a/test/ref/continuous/johnsonsu.R b/test/ref/continuous/johnsonsu.R new file mode 100644 index 0000000000..18da14a48c --- /dev/null +++ b/test/ref/continuous/johnsonsu.R @@ -0,0 +1,34 @@ +library("ExtDist") + +JohnsonSU <- R6Class("JohnsonSU", + inherit = ContinuousDistribution, + public = list(names = c("xi", "lambda", "gamma", "delta"), + xi = NA, + lambda = NA, + gamma = NA, + delta = NA, + initialize = function(xi = 0, lambda = 1, gamma = 0, delta = 1) { + self$xi <- xi + self$lambda <- lambda + self$gamma <- gamma + self$delta <- delta + }, + supp = function() { c(-Inf, Inf) }, + properties = function() { list() }, + pdf = function(x, log = FALSE) { + p <- dJohnsonSU(x, xi = self$xi, lambda = self$lambda, gamma = self$gamma, delta = self$delta) + if (log) { + result <- log(p) + } else { + result <- p + } + return(result) + }, + cdf = function(x) { + pJohnsonSU(x, xi = self$xi, lambda = self$lambda, gamma = self$gamma, delta = self$delta) + }, + quan = function(x) { + qJohnsonSU(x, xi = self$xi, lambda = self$lambda, gamma = self$gamma, delta = self$delta) + } + ) +) diff --git a/test/ref/continuous_test.lst b/test/ref/continuous_test.lst index 27e5b63367..c9b64ed0e4 100644 --- a/test/ref/continuous_test.lst +++ b/test/ref/continuous_test.lst @@ -90,6 +90,10 @@ InverseGaussian(1.0, 1.0) InverseGaussian(2.0, 1.5) InverseGaussian(2.0, 7.0) +JohnsonSU() +JohnsonSU(2.0, 5.0, 5.0, 5.0) +JohnsonSU(0.0, 2.0, -1.0, 3.0) + Laplace() Laplace(2.0) Laplace(0.0, 1.0) diff --git a/test/ref/continuous_test.ref.json b/test/ref/continuous_test.ref.json index e40c121fe0..dee2b577cf 100644 --- a/test/ref/continuous_test.ref.json +++ b/test/ref/continuous_test.ref.json @@ -2530,6 +2530,84 @@ { "q": 0.90, "x": 3.38940701260244 } ] }, +{ + "expr": "JohnsonSU()", + "dtype": "JohnsonSU", + "minimum": "-inf", + "maximum": "inf", + "properties": { + }, + "points": [ + { "x": -1.66230911871357, "pdf": 0.0904670046436047, "logpdf": -2.40277008432862, "cdf": 0.1 }, + { "x": -0.94455710381124, "pdf": 0.203524552217433, "logpdf": -1.59196863165996, "cdf": 0.2 }, + { "x": -0.548767821437225, "pdf": 0.304812258990856, "logpdf": -1.18805923619217, "cdf": 0.3 }, + { "x": -0.256065984296828, "pdf": 0.374267032822002, "logpdf": -0.982785744835596, "cdf": 0.4 }, + { "x": 0, "pdf": 0.398942280401433, "logpdf": -0.918938533204673, "cdf": 0.5 }, + { "x": 0.256065984296828, "pdf": 0.374267032822002, "logpdf": -0.982785744835596, "cdf": 0.6 }, + { "x": 0.548767821437225, "pdf": 0.304812258990855, "logpdf": -1.18805923619217, "cdf": 0.7 }, + { "x": 0.94455710381124, "pdf": 0.203524552217433, "logpdf": -1.59196863165996, "cdf": 0.8 }, + { "x": 1.66230911871357, "pdf": 0.0904670046436047, "logpdf": -2.40277008432862, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": -1.66230911871357 }, + { "q": 0.25, "x": -0.72680740014749 }, + { "q": 0.50, "x": 0 }, + { "q": 0.75, "x": 0.72680740014749 }, + { "q": 0.90, "x": 1.66230911871357 } + ] +}, +{ + "expr": "JohnsonSU(2.0, 5.0, 5.0, 5.0)", + "dtype": "JohnsonSU", + "minimum": "-inf", + "maximum": "inf", + "properties": { + }, + "points": [ + { "x": -6.06933799845238, "pdf": 0.0924371067198883, "logpdf": -2.38122679307175, "cdf": 0.0999999999999998 }, + { "x": -5.26427604865487, "pdf": 0.158731725845823, "logpdf": -1.84053976061754, "cdf": 0.2 }, + { "x": -4.71902974300291, "pdf": 0.207570672959045, "logpdf": -1.57228340472616, "cdf": 0.3 }, + { "x": -4.27465289286695, "pdf": 0.240766740113252, "logpdf": -1.42392669754053, "cdf": 0.4 }, + { "x": -3.87600596821901, "pdf": 0.258536249759365, "logpdf": -1.3527193636877, "cdf": 0.5 }, + { "x": -3.49244827154443, "pdf": 0.260077565477476, "logpdf": -1.34677536369826, "cdf": 0.6 }, + { "x": -3.09767656457399, "pdf": 0.243466209388267, "logpdf": -1.4127771165684, "cdf": 0.7 }, + { "x": -2.65461452816574, "pdf": 0.204913844973847, "logpdf": -1.58516565660151, "cdf": 0.8 }, + { "x": -2.07081596606744, "pdf": 0.136095863996024, "logpdf": -1.99439575923524, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": -6.06933799845238 }, + { "q": 0.25, "x": -4.97350284960928 }, + { "q": 0.50, "x": -3.87600596821901 }, + { "q": 0.75, "x": -2.88559970274271 }, + { "q": 0.90, "x": -2.07081596606744 } + ] +}, +{ + "expr": "JohnsonSU(0.0, 2.0, -1.0, 3.0)", + "dtype": "JohnsonSU", + "minimum": "-inf", + "maximum": "inf", + "properties": { + }, + "points": [ + { "x": -0.187976707713316, "pdf": 0.262092405911057, "logpdf": -1.33905814308605, "cdf": 0.1 }, + { "x": 0.105634897374711, "pdf": 0.419358349586406, "logpdf": -0.869029474967994, "cdf": 0.2 }, + { "x": 0.318396120331103, "pdf": 0.515053001600013, "logpdf": -0.663485467885681, "cdf": 0.3 }, + { "x": 0.502923450240603, "pdf": 0.562017137821468, "logpdf": -0.576222935209141, "cdf": 0.4 }, + { "x": 0.6790811145123, "pdf": 0.566640811890761, "logpdf": -0.568029664721968, "cdf": 0.5 }, + { "x": 0.860084619293426, "pdf": 0.532373297576829, "logpdf": -0.630410348533345, "cdf": 0.6 }, + { "x": 1.0605683870342, "pdf": 0.460763464073353, "logpdf": -0.774870460779176, "cdf": 0.7 }, + { "x": 1.30632444824923, "pdf": 0.351589529625484, "logpdf": -1.0452908928367, "cdf": 0.8 }, + { "x": 1.67195778058535, "pdf": 0.201969302872052, "logpdf": -1.59963955910887, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": -0.187976707713316 }, + { "q": 0.25, "x": 0.217432887185809 }, + { "q": 0.50, "x": 0.6790811145123 }, + { "q": 0.75, "x": 1.17520070778732 }, + { "q": 0.90, "x": 1.67195778058535 } + ] +}, { "expr": "Laplace()", "dtype": "Laplace", diff --git a/test/ref/rdistributions.R b/test/ref/rdistributions.R index 9c69e33eb3..5f0ab123fa 100644 --- a/test/ref/rdistributions.R +++ b/test/ref/rdistributions.R @@ -56,6 +56,7 @@ source("continuous/generalizedpareto.R") source("continuous/gumbel.R") source("continuous/inversegamma.R") source("continuous/inversegaussian.R") +source("continuous/johnsonsu.R") source("continuous/laplace.R") source("continuous/levy.R") source("continuous/lindley.R") diff --git a/test/ref/readme.md b/test/ref/readme.md index 502ba012c8..7ae82d5d2e 100644 --- a/test/ref/readme.md +++ b/test/ref/readme.md @@ -25,6 +25,7 @@ in addition to the R language itself: | fBasics | For ``NormalInverseGaussian`` | | gnorm | For ``PGeneralizedGaussian`` | | LindleyR | For ``Lindley`` | +| ExtDist | For ``JohnsonSU`` | ## Usage diff --git a/test/runtests.jl b/test/runtests.jl index 7eb9020667..9c8f5b3a4b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -77,6 +77,7 @@ const tests = [ "univariate/continuous/gumbel", "univariate/continuous/lindley", "univariate/continuous/logistic", + "univariate/continuous/johnsonsu", "univariate/continuous/noncentralchisq", "univariate/continuous/weibull", "pdfnorm", diff --git a/test/univariate/continuous/johnsonsu.jl b/test/univariate/continuous/johnsonsu.jl new file mode 100644 index 0000000000..5f259167f8 --- /dev/null +++ b/test/univariate/continuous/johnsonsu.jl @@ -0,0 +1,126 @@ +@testset "JohnsonSU" begin + + d1 = JohnsonSU(0.0, 10.0, -2.0, 3.0) + @test d1 isa JohnsonSU{Float64} + @test params(d1) == (0.0, 10.0, -2.0, 3.0) + @test shape(d1) == 0.0 + @test scale(d1) == 10.0 + @test partype(d1) === Float64 + @test eltype(d1) === Float64 + @test rand(d1) isa Float64 + + @test median(d1) == quantile(d1, 0.5) + x = quantile.(d1, [0.25, 0.45, 0.60, 0.80, 0.90]) + @test all(cdf.(d1, x) .≈ [0.25, 0.45, 0.60, 0.80, 0.90]) + y = cquantile.(d1, [0.25, 0.45, 0.60, 0.80, 0.90]) + @test all(ccdf.(d1, y) .≈ [0.25, 0.45, 0.60, 0.80, 0.90]) + + @test mean(d1) ≈ 7.581281 + @test var(d1) ≈ 19.1969485 + + d1 = JohnsonSU(10.0f0, 10.0f0, 1.0f0, 3.0f0) + @test d1 isa JohnsonSU{Float32} + @test params(d1) == (10.0f0, 10.0f0, 1.0f0, 3.0f0) + @test shape(d1) == 10.0f0 + @test scale(d1) == 10.0f0 + @test partype(d1) === Float32 + @test eltype(d1) === Float64 + @test rand(d1) isa Float64 + + d1 = JohnsonSU(1.0, 1, 0, 1) + @test Base.convert(JohnsonSU{Float64}, d1) === d1 + @test Base.convert(JohnsonSU{Int}, d1) isa JohnsonSU{Int} + + d1 = JohnsonSU() + @test d1 isa JohnsonSU{Int} + @test params(d1) == (0, 1, 0, 1) + + @test pdf(d1, -Inf) == 0.0 + @test pdf(d1, Inf) == 0.0 + @test isnan(pdf(d1, NaN)) + + @test logpdf(d1, -Inf) == -Inf + @test logpdf(d1, Inf) == -Inf + @test isnan(logpdf(d1, NaN)) + + @test cdf(d1, -Inf) == 0.0 + @test cdf(d1, Inf) == 1.0 + @test isnan(cdf(d1, NaN)) + + @test logcdf(d1, -Inf) == -Inf + @test logcdf(d1, Inf) == 0.0 + @test isnan(logcdf(d1, NaN)) + + @test ccdf(d1, -Inf) == 1.0 + @test ccdf(d1, Inf) == 0.0 + @test isnan(ccdf(d1, NaN)) + + @test logccdf(d1, -Inf) == 0.0 + @test logccdf(d1, Inf) == -Inf + @test isnan(logccdf(d1, NaN)) + + @test invlogcdf(d1, -Inf) == -Inf + @test isnan(invlogcdf(d1, Inf)) + @test isnan(invlogcdf(d1, NaN)) + + @test invlogccdf(d1, -Inf) == Inf + @test isnan(invlogccdf(d1, Inf)) + @test isnan(invlogccdf(d1, NaN)) + + @inferred pdf(d1, -Inf32) + @inferred pdf(d1, 1.0) + @inferred pdf(d1, 1.0f0) + @inferred pdf(d1, 1) + @inferred pdf(d1, 1//2) + @inferred pdf(d1, Inf) + + @inferred logpdf(d1, -Inf32) + @inferred logpdf(d1, 1.0) + @inferred logpdf(d1, 1.0f0) + @inferred logpdf(d1, 1) + @inferred logpdf(d1, 1//2) + @inferred logpdf(d1, Inf) + + @inferred cdf(d1, -Inf32) + @inferred cdf(d1, 1.0) + @inferred cdf(d1, 1.0f0) + @inferred cdf(d1, 1) + @inferred cdf(d1, 1//2) + @inferred cdf(d1, Inf) + + @inferred logcdf(d1, -Inf32) + @inferred logcdf(d1, 1.0) + @inferred logcdf(d1, 1.0f0) + @inferred logcdf(d1, 1) + @inferred logcdf(d1, 1//2) + @inferred logcdf(d1, Inf) + + @inferred ccdf(d1, -Inf32) + @inferred ccdf(d1, 1.0) + @inferred ccdf(d1, 1.0f0) + @inferred ccdf(d1, 1) + @inferred ccdf(d1, 1//2) + @inferred ccdf(d1, Inf) + + @inferred logccdf(d1, -Inf32) + @inferred logccdf(d1, 1.0) + @inferred logccdf(d1, 1.0f0) + @inferred logccdf(d1, 1) + @inferred logccdf(d1, 1//2) + @inferred logccdf(d1, Inf) + + @inferred invlogcdf(d1, -Inf32) + @inferred invlogcdf(d1, 1.0) + @inferred invlogcdf(d1, 1.0f0) + @inferred invlogcdf(d1, 1) + @inferred invlogcdf(d1, 1//2) + @inferred invlogcdf(d1, Inf) + + @inferred invlogccdf(d1, -Inf32) + @inferred invlogccdf(d1, 1.0) + @inferred invlogccdf(d1, 1.0f0) + @inferred invlogccdf(d1, 1) + @inferred invlogccdf(d1, 1//2) + @inferred invlogccdf(d1, Inf) + +end From 9cf6a7459e71b133359ba9390d913d26deef5642 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sun, 14 May 2023 23:27:38 +0200 Subject: [PATCH 84/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f2c7ae7228..0c3476bba5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.90" +version = "0.25.91" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From 32cea9b82195a4425204a789cbdc3a6d808ea526 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Mon, 15 May 2023 18:53:08 +0200 Subject: [PATCH 85/93] Store `nothing` bounds in `Truncated` (#1720) --- src/truncate.jl | 103 ++++++++++++++++++++++++---------------- src/truncated/normal.jl | 41 ++++++++-------- test/truncate.jl | 12 +++-- 3 files changed, 92 insertions(+), 64 deletions(-) diff --git a/src/truncate.jl b/src/truncate.jl index 2f0ce87cab..804c709fad 100644 --- a/src/truncate.jl +++ b/src/truncate.jl @@ -45,33 +45,26 @@ function truncated(d::UnivariateDistribution, ::Nothing, u::Real) logucdf = logtp = logcdf(d, u) ucdf = tp = exp(logucdf) - Truncated(d, promote(oftype(float(u), -Inf), u, oftype(ucdf, -Inf), zero(ucdf), ucdf, tp, logtp)...) + Truncated(d, nothing, promote(u, oftype(ucdf, -Inf), zero(ucdf), ucdf, tp, logtp)...) end function truncated(d::UnivariateDistribution, l::Real, ::Nothing) # (log)lcdf = (log) P(X < l) where X ~ d - loglcdf = if value_support(typeof(d)) === Discrete - logsubexp(logcdf(d, l), logpdf(d, l)) - else - logcdf(d, l) - end + loglcdf = _logcdf_noninclusive(d, l) lcdf = exp(loglcdf) # (log)tp = (log) P(l ≤ X) where X ∼ d logtp = log1mexp(loglcdf) tp = exp(logtp) - Truncated(d, promote(l, oftype(float(l), Inf), loglcdf, lcdf, one(lcdf), tp, logtp)...) + l, loglcdf, lcdf, ucdf, tp, logtp = promote(l, loglcdf, lcdf, one(lcdf), tp, logtp) + Truncated(d, l, nothing, loglcdf, lcdf, ucdf, tp, logtp) end truncated(d::UnivariateDistribution, ::Nothing, ::Nothing) = d function truncated(d::UnivariateDistribution, l::T, u::T) where {T <: Real} l <= u || error("the lower bound must be less or equal than the upper bound") # (log)lcdf = (log) P(X < l) where X ~ d - loglcdf = if value_support(typeof(d)) === Discrete - logsubexp(logcdf(d, l), logpdf(d, l)) - else - logcdf(d, l) - end + loglcdf = _logcdf_noninclusive(d, l) lcdf = exp(loglcdf) # (log)ucdf = (log) P(X ≤ u) where X ~ d @@ -90,10 +83,10 @@ end Generic wrapper for a truncated distribution. """ -struct Truncated{D<:UnivariateDistribution, S<:ValueSupport, T <: Real} <: UnivariateDistribution{S} +struct Truncated{D<:UnivariateDistribution, S<:ValueSupport, T<: Real, TL<:Union{T,Nothing}, TU<:Union{T,Nothing}} <: UnivariateDistribution{S} untruncated::D # the original distribution (untruncated) - lower::T # lower bound - upper::T # upper bound + lower::TL # lower bound + upper::TU # upper bound loglcdf::T # log-cdf of lower bound (exclusive): log P(X < lower) lcdf::T # cdf of lower bound (exclusive): P(X < lower) ucdf::T # cdf of upper bound (inclusive): P(X ≤ upper) @@ -101,29 +94,54 @@ struct Truncated{D<:UnivariateDistribution, S<:ValueSupport, T <: Real} <: Univa tp::T # the probability of the truncated part, i.e. ucdf - lcdf logtp::T # log(tp), i.e. log(ucdf - lcdf) - function Truncated(d::UnivariateDistribution, l::T, u::T, loglcdf::T, lcdf::T, ucdf::T, tp::T, logtp::T) where {T <: Real} - new{typeof(d), value_support(typeof(d)), T}(d, l, u, loglcdf, lcdf, ucdf, tp, logtp) + function Truncated(d::UnivariateDistribution, l::TL, u::TU, loglcdf::T, lcdf::T, ucdf::T, tp::T, logtp::T) where {T <: Real, TL <: Union{T,Nothing}, TU <: Union{T,Nothing}} + new{typeof(d), value_support(typeof(d)), T, TL, TU}(d, l, u, loglcdf, lcdf, ucdf, tp, logtp) end end +const LeftTruncated{D<:UnivariateDistribution,S<:ValueSupport,T<:Real} = Truncated{D,S,T,T,Nothing} +const RightTruncated{D<:UnivariateDistribution,S<:ValueSupport,T<:Real} = Truncated{D,S,T,Nothing,T} + ### Constructors of `Truncated` are deprecated - users should call `truncated` @deprecate Truncated(d::UnivariateDistribution, l::Real, u::Real) truncated(d, l, u) @deprecate Truncated(d::UnivariateDistribution, l::T, u::T, lcdf::T, ucdf::T, tp::T, logtp::T) where {T <: Real} Truncated(d, l, u, log(lcdf), lcdf, ucdf, tp, logtp) +function truncated(d::Truncated, l::T, u::T) where {T<:Real} + return truncated( + d.untruncated, + d.lower === nothing ? l : max(l, d.lower), + d.upper === nothing ? u : min(u, d.upper), + ) +end +function truncated(d::Truncated, ::Nothing, u::Real) + return truncated(d.untruncated, d.lower, d.upper === nothing ? u : min(u, d.upper)) +end +function truncated(d::Truncated, l::Real, ::Nothing) + return truncated(d.untruncated, d.lower === nothing ? l : max(l, d.lower), d.upper) +end + params(d::Truncated) = tuple(params(d.untruncated)..., d.lower, d.upper) -partype(d::Truncated) = partype(d.untruncated) -Base.eltype(::Type{Truncated{D, S, T} } ) where {D, S, T} = T +partype(d::Truncated{<:UnivariateDistribution,<:ValueSupport,T}) where {T<:Real} = promote_type(partype(d.untruncated), T) + +Base.eltype(::Type{<:Truncated{D}}) where {D<:UnivariateDistribution} = eltype(D) +Base.eltype(d::Truncated) = eltype(d.untruncated) ### range and support +islowerbounded(d::RightTruncated) = islowerbounded(d.untruncated) islowerbounded(d::Truncated) = islowerbounded(d.untruncated) || isfinite(d.lower) + +isupperbounded(d::LeftTruncated) = isupperbounded(d.untruncated) isupperbounded(d::Truncated) = isupperbounded(d.untruncated) || isfinite(d.upper) +minimum(d::RightTruncated) = minimum(d.untruncated) minimum(d::Truncated) = max(minimum(d.untruncated), d.lower) + +maximum(d::LeftTruncated) = maximum(d.untruncated) maximum(d::Truncated) = min(maximum(d.untruncated), d.upper) -function insupport(d::Truncated{D,<:Union{Discrete,Continuous}}, x::Real) where {D<:UnivariateDistribution} - return d.lower <= x <= d.upper && insupport(d.untruncated, x) +function insupport(d::Truncated{<:UnivariateDistribution,<:Union{Discrete,Continuous}}, x::Real) + return _in_closed_interval(x, d.lower, d.upper) && insupport(d.untruncated, x) end ### evaluation @@ -132,19 +150,19 @@ quantile(d::Truncated, p::Real) = quantile(d.untruncated, d.lcdf + p * d.tp) function pdf(d::Truncated, x::Real) result = pdf(d.untruncated, x) / d.tp - return d.lower <= x <= d.upper ? result : zero(result) + return _in_closed_interval(x, d.lower, d.upper) ? result : zero(result) end function logpdf(d::Truncated, x::Real) result = logpdf(d.untruncated, x) - d.logtp - return d.lower <= x <= d.upper ? result : oftype(result, -Inf) + return _in_closed_interval(x, d.lower, d.upper) ? result : oftype(result, -Inf) end function cdf(d::Truncated, x::Real) result = (cdf(d.untruncated, x) - d.lcdf) / d.tp - return if x < d.lower + return if d.lower !== nothing && x < d.lower zero(result) - elseif x >= d.upper + elseif d.upper !== nothing && x >= d.upper one(result) else result @@ -153,9 +171,9 @@ end function logcdf(d::Truncated, x::Real) result = logsubexp(logcdf(d.untruncated, x), d.loglcdf) - d.logtp - return if x < d.lower + return if d.lower !== nothing && x < d.lower oftype(result, -Inf) - elseif x >= d.upper + elseif d.upper !== nothing && x >= d.upper zero(result) else result @@ -164,9 +182,9 @@ end function ccdf(d::Truncated, x::Real) result = (d.ucdf - cdf(d.untruncated, x)) / d.tp - return if x <= d.lower + return if d.lower !== nothing && x <= d.lower one(result) - elseif x > d.upper + elseif d.upper !== nothing && x > d.upper zero(result) else result @@ -175,9 +193,9 @@ end function logccdf(d::Truncated, x::Real) result = logsubexp(logccdf(d.untruncated, x), log1p(-d.ucdf)) - d.logtp - return if x <= d.lower + return if d.lower !== nothing && x <= d.lower zero(result) - elseif x > d.upper + elseif d.upper !== nothing && x > d.upper oftype(result, -Inf) else result @@ -189,10 +207,12 @@ end function rand(rng::AbstractRNG, d::Truncated) d0 = d.untruncated tp = d.tp + lower = d.lower + upper = d.upper if tp > 0.25 while true r = rand(rng, d0) - if d.lower <= r <= d.upper + if _in_closed_interval(r, lower, upper) return r end end @@ -212,16 +232,12 @@ function show(io::IO, d::Truncated) uml, namevals = _use_multline_show(d0) uml ? show_multline(io, d0, namevals) : show_oneline(io, d0, namevals) - if d.lower > -Inf - if d.upper < Inf - print(io, "; lower=$(d.lower), upper=$(d.upper))") - else - print(io, "; lower=$(d.lower))") - end - elseif d.upper < Inf + if d.lower === nothing print(io, "; upper=$(d.upper))") + elseif d.upper === nothing + print(io, "; lower=$(d.lower))") else - print(io, ")") + print(io, "; lower=$(d.lower), upper=$(d.upper))") end uml && println(io) end @@ -236,3 +252,10 @@ include(joinpath("truncated", "exponential.jl")) include(joinpath("truncated", "uniform.jl")) include(joinpath("truncated", "loguniform.jl")) include(joinpath("truncated", "discrete_uniform.jl")) + +#### Utilities + +# utilities to handle closed intervals represented with possibly `nothing` bounds +_in_closed_interval(x::Real, l::Real, u::Real) = l ≤ x ≤ u +_in_closed_interval(x::Real, ::Nothing, u::Real) = x ≤ u +_in_closed_interval(x::Real, l::Real, ::Nothing) = x ≥ l diff --git a/src/truncated/normal.jl b/src/truncated/normal.jl index dac92f11a1..6b79b5dae3 100644 --- a/src/truncated/normal.jl +++ b/src/truncated/normal.jl @@ -12,17 +12,12 @@ TruncatedNormal ### statistics -minimum(d::Truncated{Normal{T},Continuous}) where {T <: Real} = d.lower -maximum(d::Truncated{Normal{T},Continuous}) where {T <: Real} = d.upper - - -function mode(d::Truncated{Normal{T},Continuous}) where T <: Real +function mode(d::Truncated{<:Normal{<:Real},Continuous}) μ = mean(d.untruncated) - d.upper < μ ? d.upper : - d.lower > μ ? d.lower : μ + return clamp(μ, extrema(d)...) end -modes(d::Truncated{Normal{T},Continuous}) where {T <: Real} = [mode(d)] +modes(d::Truncated{<:Normal{<:Real},Continuous}) = [mode(d)] # do not export. Used in mean # computes mean of standard normal distribution truncated to [a, b] @@ -94,39 +89,42 @@ function _tnvar(a::Real, b::Real) end end -function mean(d::Truncated{Normal{T},Continuous}) where T <: Real +function mean(d::Truncated{<:Normal{<:Real},Continuous}) d0 = d.untruncated μ = mean(d0) σ = std(d0) if iszero(σ) return mode(d) else - a = (d.lower - μ) / σ - b = (d.upper - μ) / σ + lower, upper = extrema(d) + a = (lower - μ) / σ + b = (upper - μ) / σ return μ + _tnmom1(a, b) * σ end end -function var(d::Truncated{Normal{T},Continuous}) where T <: Real +function var(d::Truncated{<:Normal{<:Real},Continuous}) d0 = d.untruncated μ = mean(d0) σ = std(d0) if iszero(σ) return σ else - a = (d.lower - μ) / σ - b = (d.upper - μ) / σ + lower, upper = extrema(d) + a = (lower - μ) / σ + b = (upper - μ) / σ return _tnvar(a, b) * σ^2 end end -function entropy(d::Truncated{Normal{T},Continuous}) where T <: Real +function entropy(d::Truncated{<:Normal{<:Real},Continuous}) d0 = d.untruncated z = d.tp μ = mean(d0) σ = std(d0) - a = (d.lower - μ) / σ - b = (d.upper - μ) / σ + lower, upper = extrema(d) + a = (lower - μ) / σ + b = (upper - μ) / σ aφa = isinf(a) ? 0.0 : a * normpdf(a) bφb = isinf(b) ? 0.0 : b * normpdf(b) 0.5 * (log2π + 1.) + log(σ * z) + (aφa - bφb) / (2.0 * z) @@ -138,17 +136,18 @@ end ## Use specialized sampler, as quantile-based method is inaccurate in ## tail regions of the Normal, issue #343 -function rand(rng::AbstractRNG, d::Truncated{Normal{T},Continuous}) where T <: Real +function rand(rng::AbstractRNG, d::Truncated{<:Normal{<:Real},Continuous}) d0 = d.untruncated μ = mean(d0) σ = std(d0) if isfinite(μ) - a = (d.lower - μ) / σ - b = (d.upper - μ) / σ + lower, upper = extrema(d) + a = (lower - μ) / σ + b = (upper - μ) / σ z = randnt(rng, a, b, d.tp) return μ + σ * z else - return clamp(μ, d.lower, d.upper) + return clamp(μ, extrema(d)...) end end diff --git a/test/truncate.jl b/test/truncate.jl index 1ca48e4c5b..b9c0b42635 100644 --- a/test/truncate.jl +++ b/test/truncate.jl @@ -78,7 +78,7 @@ function verify_and_test(d::UnivariateDistribution, dct::Dict, n_tsamples::Int) end @test cdf(d, x) ≈ cf atol=sqrt(eps()) # NOTE: some distributions use pdf() in StatsFuns.jl which have no generic support yet - if !(typeof(d) in [Distributions.Truncated{Distributions.NoncentralChisq{Float64},Distributions.Continuous, Float64}, + if !any(T -> d isa T, [Distributions.Truncated{Distributions.NoncentralChisq{Float64},Distributions.Continuous, Float64}, Distributions.Truncated{Distributions.NoncentralF{Float64},Distributions.Continuous, Float64}, Distributions.Truncated{Distributions.NoncentralT{Float64},Distributions.Continuous, Float64}, Distributions.Truncated{Distributions.StudentizedRange{Float64},Distributions.Continuous, Float64}, @@ -133,12 +133,18 @@ for (μ, lower, upper) in [(0, -1, 1), (1, 2, 4)] end for bound in (-2, 1) d = @test_deprecated Distributions.Truncated(Normal(), Float64(bound), Inf) - @test truncated(Normal(); lower=bound) == d @test truncated(Normal(); lower=bound, upper=Inf) == d + d_nothing = truncated(Normal(); lower=bound) + @test truncated(Normal(); lower=bound, upper=nothing) == d_nothing + @test extrema(d_nothing) == promote(bound, Inf) + d = @test_deprecated Distributions.Truncated(Normal(), -Inf, Float64(bound)) - @test truncated(Normal(); upper=bound) == d @test truncated(Normal(); lower=-Inf, upper=bound) == d + + d_nothing = truncated(Normal(); upper=bound) + @test truncated(Normal(); lower=nothing, upper=bound) == d_nothing + @test extrema(d_nothing) == promote(-Inf, bound) end @test truncated(Normal()) === Normal() From f61831d69673454284fb98e00a332260e87d7040 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Mon, 15 May 2023 18:55:30 +0200 Subject: [PATCH 86/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 0c3476bba5..497eefb4f9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.91" +version = "0.25.92" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From ef42afb215cb08304dd64e309da8002c333caef0 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Wed, 17 May 2023 11:35:46 +0200 Subject: [PATCH 87/93] Fix inference failures (#1722) * Fix test failures * Add tests * Bump version --- Project.toml | 2 +- src/truncated/normal.jl | 14 +++++++------- test/truncated/normal.jl | 2 ++ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Project.toml b/Project.toml index 497eefb4f9..2d891419ea 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.92" +version = "0.25.93" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" diff --git a/src/truncated/normal.jl b/src/truncated/normal.jl index 6b79b5dae3..a3ff33e1e1 100644 --- a/src/truncated/normal.jl +++ b/src/truncated/normal.jl @@ -12,9 +12,9 @@ TruncatedNormal ### statistics -function mode(d::Truncated{<:Normal{<:Real},Continuous}) +function mode(d::Truncated{<:Normal{<:Real},Continuous,T}) where {T<:Real} μ = mean(d.untruncated) - return clamp(μ, extrema(d)...) + return T(clamp(μ, extrema(d)...)) end modes(d::Truncated{<:Normal{<:Real},Continuous}) = [mode(d)] @@ -89,7 +89,7 @@ function _tnvar(a::Real, b::Real) end end -function mean(d::Truncated{<:Normal{<:Real},Continuous}) +function mean(d::Truncated{<:Normal{<:Real},Continuous,T}) where {T<:Real} d0 = d.untruncated μ = mean(d0) σ = std(d0) @@ -99,21 +99,21 @@ function mean(d::Truncated{<:Normal{<:Real},Continuous}) lower, upper = extrema(d) a = (lower - μ) / σ b = (upper - μ) / σ - return μ + _tnmom1(a, b) * σ + return T(μ + _tnmom1(a, b) * σ) end end -function var(d::Truncated{<:Normal{<:Real},Continuous}) +function var(d::Truncated{<:Normal{<:Real},Continuous,T}) where {T<:Real} d0 = d.untruncated μ = mean(d0) σ = std(d0) if iszero(σ) - return σ + return T(σ) else lower, upper = extrema(d) a = (lower - μ) / σ b = (upper - μ) / σ - return _tnvar(a, b) * σ^2 + return T(_tnvar(a, b) * σ^2) end end diff --git a/test/truncated/normal.jl b/test/truncated/normal.jl index 7ed4b8f900..9d287c0262 100644 --- a/test/truncated/normal.jl +++ b/test/truncated/normal.jl @@ -31,8 +31,10 @@ rng = MersenneTwister(123) # Type stability for T in (Float32, Float64) t = truncated(Normal(T(1.5), T(4.1)), 0, 1) + m = @inferred mode(t) μ = @inferred mean(t) σ = @inferred std(t) + @test m === T(1) @test μ ≈ 0.50494725270783081889610661619986770485973643194141 @test μ isa T @test σ ≈ 0.28836356398830993140576947440881738258157196701554 From 05dc0671e367f0453cd050521ef45be6f8d03d01 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Thu, 18 May 2023 09:47:15 -0700 Subject: [PATCH 88/93] Add the Kumaraswamy distribution (#1699) * Add the Kumaraswamy distribution * Incorporate suggestions from code review Some minor modifications made Co-Authored-By: David Widmann * Fix median whoops * Adjust checks for input within support Co-Authored-By: David Widmann --------- Co-authored-by: David Widmann --- docs/src/univariate.md | 7 + src/Distributions.jl | 5 +- src/univariate/continuous/kumaraswamy.jl | 138 +++++++++++++++++++ src/univariates.jl | 1 + test/ref/continuous/kumaraswamy.R | 14 ++ test/ref/continuous_test.lst | 7 + test/ref/continuous_test.ref.json | 156 ++++++++++++++++++++++ test/ref/rdistributions.R | 1 + test/univariate/continuous/kumaraswamy.jl | 69 ++++++++++ 9 files changed, 397 insertions(+), 1 deletion(-) create mode 100644 src/univariate/continuous/kumaraswamy.jl create mode 100644 test/ref/continuous/kumaraswamy.R create mode 100644 test/univariate/continuous/kumaraswamy.jl diff --git a/docs/src/univariate.md b/docs/src/univariate.md index 633a8952bc..0b2c48c6ea 100644 --- a/docs/src/univariate.md +++ b/docs/src/univariate.md @@ -291,6 +291,13 @@ KSDist KSOneSided ``` +```@docs +Kumaraswamy +``` +```@example plotdensity +plotdensity((0, 1), Kumaraswamy, (2, 5)) # hide +``` + ```@docs Laplace ``` diff --git a/src/Distributions.jl b/src/Distributions.jl index 443e776c37..489f6435ac 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -25,6 +25,7 @@ import StatsBase: kurtosis, skewness, entropy, mode, modes, import PDMats: dim, PDMat, invquad using SpecialFunctions +using Base.MathConstants: eulergamma export # re-export Statistics @@ -110,6 +111,7 @@ export Kolmogorov, KSDist, KSOneSided, + Kumaraswamy, Laplace, Levy, Lindley, @@ -350,7 +352,8 @@ Supported distributions: Frechet, FullNormal, FullNormalCanon, Gamma, GeneralizedPareto, GeneralizedExtremeValue, Geometric, Gumbel, Hypergeometric, InverseWishart, InverseGamma, InverseGaussian, IsoNormal, - IsoNormalCanon, JohnsonSU, Kolmogorov, KSDist, KSOneSided, Laplace, Levy, Lindley, LKJ, LKJCholesky, + IsoNormalCanon, JohnsonSU, Kolmogorov, KSDist, KSOneSided, Kumaraswamy, + Laplace, Levy, Lindley, LKJ, LKJCholesky, Logistic, LogNormal, MatrixBeta, MatrixFDist, MatrixNormal, MatrixTDist, MixtureModel, Multinomial, MultivariateNormal, MvLogNormal, MvNormal, MvNormalCanon, diff --git a/src/univariate/continuous/kumaraswamy.jl b/src/univariate/continuous/kumaraswamy.jl new file mode 100644 index 0000000000..8a3fecfe71 --- /dev/null +++ b/src/univariate/continuous/kumaraswamy.jl @@ -0,0 +1,138 @@ +""" + Kumaraswamy(a, b) + +The *Kumaraswamy distribution* with shape parameters `a > 0` and `b > 0` has probability +density function + +```math +f(x; a, b) = a b x^{a - 1} (1 - x^a)^{b - 1}, \\quad 0 < x < 1 +``` + +It is related to the [Beta distribution](@ref Beta) by the following identity: +if ``X \\sim \\operatorname{Kumaraswamy}(a, b)`` then ``X^a \\sim \\operatorname{Beta}(1, b)``. +In particular, if ``X \\sim \\operatorname{Kumaraswamy}(1, 1)`` then +``X \\sim \\operatorname{Uniform}(0, 1)``. + +External links + +- [Kumaraswamy distribution on Wikipedia](https://en.wikipedia.org/wiki/Kumaraswamy_distribution) + +References + +- Kumaraswamy, P. (1980). A generalized probability density function for double-bounded + random processes. Journal of Hydrology. 46(1-2), 79-88. +""" +struct Kumaraswamy{T<:Real} <: ContinuousUnivariateDistribution + a::T + b::T +end + +function Kumaraswamy(a::Real, b::Real; check_args::Bool=true) + @check_args Kumaraswamy (a, a > zero(a)) (b, b > zero(b)) + a′, b′ = promote(a, b) + return Kumaraswamy{typeof(a′)}(a′, b′) +end + +Kumaraswamy() = Kumaraswamy{Float64}(1.0, 1.0) + +Base.convert(::Type{Kumaraswamy{T}}, d::Kumaraswamy) where {T} = Kumaraswamy{T}(T(d.a), T(d.b)) +Base.convert(::Type{Kumaraswamy{T}}, d::Kumaraswamy{T}) where {T} = d + +@distr_support Kumaraswamy 0 1 + +### Parameters + +params(d::Kumaraswamy) = (d.a, d.b) +partype(::Kumaraswamy{T}) where {T} = T + +### Evaluation + +# `pdf`: Uses fallback `exp(logpdf(_))` method + +function logpdf(d::Kumaraswamy, x::Real) + a, b = params(d) + _x = clamp(x, 0, 1) # Ensures we can still get a value when outside the support + y = log(a) + log(b) + xlogy(a - 1, _x) + xlog1py(b - 1, -_x^a) + return x < 0 || x > 1 ? oftype(y, -Inf) : y +end + +function ccdf(d::Kumaraswamy, x::Real) + a, b = params(d) + y = (1 - clamp(x, 0, 1)^a)^b + return x < 0 ? one(y) : (x > 1 ? zero(y) : y) +end + +cdf(d::Kumaraswamy, x::Real) = 1 - ccdf(d, x) + +function logccdf(d::Kumaraswamy, x::Real) + a, b = params(d) + y = b * log1p(-clamp(x, 0, 1)^a) + return x < 0 ? zero(y) : (x > 1 ? oftype(y, -Inf) : y) +end + +logcdf(d::Kumaraswamy, x::Real) = log1mexp(logccdf(d, x)) + +function quantile(d::Kumaraswamy, q::Real) + a, b = params(d) + return (1 - (1 - q)^inv(b))^inv(a) +end + +function entropy(d::Kumaraswamy) + a, b = params(d) + H = digamma(b + 1) + eulergamma + return (1 - inv(b)) + (1 - inv(a)) * H - log(a) - log(b) +end + +function gradlogpdf(d::Kumaraswamy, x::Real) + a, b = params(d) + _x = clamp(x, 0, 1) + _xᵃ = _x^a + y = (a * (b * _xᵃ - 1) + (1 - _xᵃ)) / (_x * (_xᵃ - 1)) + return x < 0 || x > 1 ? oftype(y, -Inf) : y +end + +### Sampling + +# `rand`: Uses fallback inversion sampling method + +### Statistics + +_kumomentaswamy(a, b, n) = b * beta(1 + n / a, b) + +mean(d::Kumaraswamy) = _kumomentaswamy(params(d)..., 1) + +function var(d::Kumaraswamy) + a, b = params(d) + m₁ = _kumomentaswamy(a, b, 1) + m₂ = _kumomentaswamy(a, b, 2) + return m₂ - m₁^2 +end + +function skewness(d::Kumaraswamy) + a, b = params(d) + μ = mean(d) + σ² = var(d) + m₂ = _kumomentaswamy(a, b, 2) + m₃ = _kumomentaswamy(a, b, 3) + return (2m₃ - μ * (3m₂ - μ^2)) / (σ² * sqrt(σ²)) +end + +function kurtosis(d::Kumaraswamy) + a, b = params(d) + μ = mean(d) + m₂ = _kumomentaswamy(a, b, 2) + m₃ = _kumomentaswamy(a, b, 3) + m₄ = _kumomentaswamy(a, b, 4) + return (m₄ + μ * (-4m₃ + μ * (6m₂ - 3μ^2))) / var(d)^2 - 3 +end + +function median(d::Kumaraswamy) + a, b = params(d) + return (1 - 2^-inv(b))^inv(a) +end + +function mode(d::Kumaraswamy) + a, b = params(d) + m = ((a - 1) / (a * b - 1))^inv(a) + return a >= 1 && b >= 1 && !(a == b == 1) ? m : oftype(m, NaN) +end diff --git a/src/univariates.jl b/src/univariates.jl index b271434529..16d1a2e71e 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -691,6 +691,7 @@ const continuous_distributions = [ "kolmogorov", "ksdist", "ksonesided", + "kumaraswamy", "laplace", "levy", "lindley", diff --git a/test/ref/continuous/kumaraswamy.R b/test/ref/continuous/kumaraswamy.R new file mode 100644 index 0000000000..f323399017 --- /dev/null +++ b/test/ref/continuous/kumaraswamy.R @@ -0,0 +1,14 @@ +Kumaraswamy <- R6Class("Kumaraswamy", + inherit=ContinuousDistribution, + public=list(names=c("a", "b"), + a=NA, + b=NA, + initialize=function(a=1, b=1) { + self$a <- a + self$b <- b + }, + supp=function() { c(0, 1) }, + properties=function() { list() }, + pdf=function(x, log=FALSE) { dkumar(x, self$a, self$b, log=log) }, + cdf=function(x) { pkumar(x, self$a, self$b) }, + quan=function(x) { qkumar(x, self$a, self$b) })) diff --git a/test/ref/continuous_test.lst b/test/ref/continuous_test.lst index c9b64ed0e4..a939d47dd8 100644 --- a/test/ref/continuous_test.lst +++ b/test/ref/continuous_test.lst @@ -94,6 +94,13 @@ JohnsonSU() JohnsonSU(2.0, 5.0, 5.0, 5.0) JohnsonSU(0.0, 2.0, -1.0, 3.0) +Kumaraswamy() +Kumaraswamy(0.5, 0.5) +Kumaraswamy(5, 1.0) +Kumaraswamy(1.0, 3) +Kumaraswamy(2, 2) +Kumaraswamy(2, 5) + Laplace() Laplace(2.0) Laplace(0.0, 1.0) diff --git a/test/ref/continuous_test.ref.json b/test/ref/continuous_test.ref.json index dee2b577cf..f8fbb6f5d3 100644 --- a/test/ref/continuous_test.ref.json +++ b/test/ref/continuous_test.ref.json @@ -2608,6 +2608,162 @@ { "q": 0.90, "x": 1.67195778058535 } ] }, +{ + "expr": "Kumaraswamy()", + "dtype": "Kumaraswamy", + "minimum": 0, + "maximum": 1, + "properties": { + }, + "points": [ + { "x": 0.1, "pdf": 1, "logpdf": 0, "cdf": 0.1 }, + { "x": 0.2, "pdf": 1, "logpdf": 0, "cdf": 0.2 }, + { "x": 0.3, "pdf": 1, "logpdf": 0, "cdf": 0.3 }, + { "x": 0.4, "pdf": 1, "logpdf": 0, "cdf": 0.4 }, + { "x": 0.5, "pdf": 1, "logpdf": 0, "cdf": 0.5 }, + { "x": 0.6, "pdf": 1, "logpdf": 0, "cdf": 0.6 }, + { "x": 0.7, "pdf": 1, "logpdf": 0, "cdf": 0.7 }, + { "x": 0.8, "pdf": 1, "logpdf": 0, "cdf": 0.8 }, + { "x": 0.9, "pdf": 1, "logpdf": 0, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": 0.1 }, + { "q": 0.25, "x": 0.25 }, + { "q": 0.50, "x": 0.5 }, + { "q": 0.75, "x": 0.75 }, + { "q": 0.90, "x": 0.9 } + ] +}, +{ + "expr": "Kumaraswamy(0.5, 0.5)", + "dtype": "Kumaraswamy", + "minimum": 0, + "maximum": 1, + "properties": { + }, + "points": [ + { "x": 0.0361, "pdf": 1.46198830409357, "logpdf": 0.379797361359587, "cdf": 0.1 }, + { "x": 0.1296, "pdf": 0.868055555555556, "logpdf": -0.141499562273699, "cdf": 0.2 }, + { "x": 0.2601, "pdf": 0.700280112044818, "logpdf": -0.356274863917393, "cdf": 0.3 }, + { "x": 0.4096, "pdf": 0.651041666666667, "logpdf": -0.42918163472548, "cdf": 0.4 }, + { "x": 0.5625, "pdf": 0.666666666666667, "logpdf": -0.405465108108164, "cdf": 0.5 }, + { "x": 0.7056, "pdf": 0.744047619047619, "logpdf": -0.295650242100958, "cdf": 0.6 }, + { "x": 0.8281, "pdf": 0.915750915750916, "logpdf": -0.0880108773227132, "cdf": 0.7 }, + { "x": 0.9216, "pdf": 1.30208333333333, "logpdf": 0.263965545834465, "cdf": 0.8 }, + { "x": 0.9801, "pdf": 2.52525252525252, "logpdf": 0.926341067727656, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": 0.0361 }, + { "q": 0.25, "x": 0.19140625 }, + { "q": 0.50, "x": 0.5625 }, + { "q": 0.75, "x": 0.87890625 }, + { "q": 0.90, "x": 0.9801 } + ] +}, +{ + "expr": "Kumaraswamy(5, 1.0)", + "dtype": "Kumaraswamy", + "minimum": 0, + "maximum": 1, + "properties": { + }, + "points": [ + { "x": 0.630957344480193, "pdf": 0.792446596230557, "logpdf": -0.232630161961136, "cdf": 0.1 }, + { "x": 0.724779663677696, "pdf": 1.37972966146121, "logpdf": 0.32188758248682, "cdf": 0.2 }, + { "x": 0.786003085596623, "pdf": 1.90838945480909, "logpdf": 0.646259668973352, "cdf": 0.3 }, + { "x": 0.832553207401873, "pdf": 2.40224886796286, "logpdf": 0.876405326934776, "cdf": 0.4 }, + { "x": 0.870550563296124, "pdf": 2.87174588749259, "logpdf": 1.05492016798614, "cdf": 0.5 }, + { "x": 0.902880451447434, "pdf": 3.32269902974487, "logpdf": 1.20077741342131, "cdf": 0.6 }, + { "x": 0.931149915094838, "pdf": 3.75879323325023, "logpdf": 1.32409795728311, "cdf": 0.7 }, + { "x": 0.956352499790037, "pdf": 4.18255821036509, "logpdf": 1.43092307138273, "cdf": 0.8 }, + { "x": 0.979148362360977, "pdf": 4.59583059420061, "logpdf": 1.52514949990784, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": 0.630957344480193 }, + { "q": 0.25, "x": 0.757858283255199 }, + { "q": 0.50, "x": 0.870550563296124 }, + { "q": 0.75, "x": 0.944087511294902 }, + { "q": 0.90, "x": 0.979148362360977 } + ] +}, +{ + "expr": "Kumaraswamy(1.0, 3)", + "dtype": "Kumaraswamy", + "minimum": 0, + "maximum": 1, + "properties": { + }, + "points": [ + { "x": 0.0345106153943703, "pdf": 2.79650925535847, "logpdf": 1.02837194489623, "cdf": 0.1 }, + { "x": 0.0716822332774442, "pdf": 2.58532162803826, "logpdf": 0.949849921125303, "cdf": 0.2 }, + { "x": 0.112095998257399, "pdf": 2.36512054893157, "logpdf": 0.860828992708955, "cdf": 0.3 }, + { "x": 0.156567334698251, "pdf": 2.13413598269404, "logpdf": 0.758061872824116, "cdf": 0.4 }, + { "x": 0.2062994740159, "pdf": 1.88988157484231, "logpdf": 0.636514168294813, "cdf": 0.5 }, + { "x": 0.263193700271923, "pdf": 1.62865056995694, "logpdf": 0.487751800752006, "cdf": 0.6 }, + { "x": 0.33056704991783, "pdf": 1.34442142396715, "logpdf": 0.295963752450819, "cdf": 0.7 }, + { "x": 0.415196452357427, "pdf": 1.02598556800602, "logpdf": 0.0256536803787092, "cdf": 0.8 }, + { "x": 0.535841116638722, "pdf": 0.646330407009565, "logpdf": -0.436444439994588, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": 0.0345106153943703 }, + { "q": 0.25, "x": 0.0914397035839302 }, + { "q": 0.50, "x": 0.2062994740159 }, + { "q": 0.75, "x": 0.370039475052563 }, + { "q": 0.90, "x": 0.535841116638722 } + ] +}, +{ + "expr": "Kumaraswamy(2, 2)", + "dtype": "Kumaraswamy", + "minimum": 0, + "maximum": 1, + "properties": { + }, + "points": [ + { "x": 0.226531900511796, "pdf": 0.859628121964726, "logpdf": -0.151255399573567, "cdf": 0.1 }, + { "x": 0.324919696232906, "pdf": 1.16246804480858, "logpdf": 0.150545369764855, "cdf": 0.2 }, + { "x": 0.40415340338283, "pdf": 1.35255598879246, "logpdf": 0.301996127401067, "cdf": 0.3 }, + { "x": 0.47476660661689, "pdf": 1.4710105286101, "logpdf": 0.385949599044466, "cdf": 0.4 }, + { "x": 0.541196100146197, "pdf": 1.53073372946036, "logpdf": 0.42574718219016, "cdf": 0.5 }, + { "x": 0.606254458100165, "pdf": 1.53371594338211, "logpdf": 0.427693511992613, "cdf": 0.6 }, + { "x": 0.672515756317154, "pdf": 1.47340820005021, "logpdf": 0.387578220644201, "cdf": 0.7 }, + { "x": 0.743496068920369, "pdf": 1.33000620088785, "logpdf": 0.285183604544486, "cdf": 0.8 }, + { "x": 0.82690521463053, "pdf": 1.04596155492114, "logpdf": 0.0449366105897816, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": 0.226531900511796 }, + { "q": 0.25, "x": 0.366025403784439 }, + { "q": 0.50, "x": 0.541196100146197 }, + { "q": 0.75, "x": 0.707106781186548 }, + { "q": 0.90, "x": 0.82690521463053 } + ] +}, +{ + "expr": "Kumaraswamy(2, 5)", + "dtype": "Kumaraswamy", + "minimum": 0, + "maximum": 1, + "properties": { + }, + "points": [ + { "x": 0.144400961350758, "pdf": 1.32728471201559, "logpdf": 0.28313528547594, "cdf": 0.1 }, + { "x": 0.208919841589934, "pdf": 1.7476387975003, "logpdf": 0.558265618295218, "cdf": 0.2 }, + { "x": 0.262392997058158, "pdf": 1.9725620435889, "logpdf": 0.679333227534167, "cdf": 0.3 }, + { "x": 0.311640094584387, "pdf": 2.07097247981028, "logpdf": 0.728018293967013, "cdf": 0.4 }, + { "x": 0.359790823540395, "pdf": 2.0664556357194, "logpdf": 0.725834886420501, "cdf": 0.5 }, + { "x": 0.409202630243412, "pdf": 1.96601311053933, "logpdf": 0.676007690339223, "cdf": 0.6 }, + { "x": 0.462598005187417, "pdf": 1.76563430983077, "logpdf": 0.568510008163275, "cdf": 0.7 }, + { "x": 0.524614464461574, "pdf": 1.44765227489845, "logpdf": 0.369943123480185, "cdf": 0.8 }, + { "x": 0.607488811024373, "pdf": 0.962804881088825, "logpdf": -0.0379045034051143, "cdf": 0.9 } + ], + "quans": [ + { "q": 0.10, "x": 0.144400961350758 }, + { "q": 0.25, "x": 0.236458217673013 }, + { "q": 0.50, "x": 0.359790823540395 }, + { "q": 0.75, "x": 0.492078974093388 }, + { "q": 0.90, "x": 0.607488811024373 } + ] +}, { "expr": "Laplace()", "dtype": "Laplace", diff --git a/test/ref/rdistributions.R b/test/ref/rdistributions.R index 5f0ab123fa..53623544ba 100644 --- a/test/ref/rdistributions.R +++ b/test/ref/rdistributions.R @@ -57,6 +57,7 @@ source("continuous/gumbel.R") source("continuous/inversegamma.R") source("continuous/inversegaussian.R") source("continuous/johnsonsu.R") +source("continuous/kumaraswamy.R") source("continuous/laplace.R") source("continuous/levy.R") source("continuous/lindley.R") diff --git a/test/univariate/continuous/kumaraswamy.jl b/test/univariate/continuous/kumaraswamy.jl new file mode 100644 index 0000000000..37cfbd429b --- /dev/null +++ b/test/univariate/continuous/kumaraswamy.jl @@ -0,0 +1,69 @@ +using Distributions +using ForwardDiff +using Test + +using Distributions: expectation + +@testset "Kumaraswamy" begin + @testset "NaNs" begin + D = Kumaraswamy(420, 69) + for f in (pdf, logpdf, cdf, ccdf, logcdf, logccdf) + @test isnan(f(D, NaN)) + end + end + @testset "$T" for T in (Float16, Float32, Float64, Int32, Int64, Rational{Int}) + D = Kumaraswamy(T(2), T(3)) + @test partype(D) === T + @test typeof(@inferred rand(D)) === typeof(rand()) + tol = sqrt(eps(float(T))) + @testset "gradlogpdf" begin + for x in T(0):(T <: Integer ? one(T) : T(0.5)):T(20) + fd = ForwardDiff.derivative(Base.Fix1(logpdf, D), x) + gl = @inferred gradlogpdf(D, x) + @test fd ≈ gl atol=tol + if T <: AbstractFloat + @test gl isa T + end + end + end + @testset "median" begin + m = @inferred median(D) + @test m ≈ sqrt(1 - T(2)^(-1//3)) atol=tol + if T <: AbstractFloat + @test m isa T + end + end + @testset "entropy" begin + shannon = @inferred entropy(D) + @test shannon ≈ (19//12 - log(T(6))) atol=tol + if T <: AbstractFloat + @test shannon isa T + end + end + @testset "mode" begin + m = @inferred mode(D) + @test m ≈ inv(sqrt(T(5))) atol=tol + if T <: AbstractFloat + @test m isa T + end + @test isnan(mode(Kumaraswamy(1, 1))) + end + @testset "$f" for (f, n) in [(skewness, 3), (kurtosis, 4)] + μ = mean(D) + σ = std(D) + y₁ = @inferred f(D) + y₂ = expectation(x -> ((x - μ) / σ)^n, D) - 3 * (f === kurtosis) + if T <: AbstractFloat + @test y₁ isa T + end + @test y₁ ≈ y₂ atol=sqrt(tol) + end + end + @testset "limits" begin + bathtub = Kumaraswamy(0.5, 0.5) + @test logpdf(bathtub, 0) == logpdf(bathtub, 1) == Inf + explike = Kumaraswamy(5, 1) + @test logpdf(explike, 0) == -Inf + @test logpdf(explike, 1) ≈ log(5) + end +end From 87f323a83f640d6c2fa7546809960640c251ae72 Mon Sep 17 00:00:00 2001 From: ynhame <107622956+ynhame@users.noreply.github.com> Date: Sun, 21 May 2023 17:34:44 -0300 Subject: [PATCH 89/93] Fix wrong parameter on chisq.jl documentation (#1725) there were a "k" left from the old documentation of the chisq. this commit changes the "k" to a \\nu, making it consistent with the rest of the distribution. --- src/univariate/continuous/chisq.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/univariate/continuous/chisq.jl b/src/univariate/continuous/chisq.jl index 3ca0ac54c4..8604742ce5 100644 --- a/src/univariate/continuous/chisq.jl +++ b/src/univariate/continuous/chisq.jl @@ -4,7 +4,7 @@ The *Chi squared distribution* (typically written χ²) with `ν` degrees of fre probability density function ```math -f(x; \\nu) = \\frac{x^{\\nu/2 - 1} e^{-x/2}}{2^{\\nu/2} \\Gamma(k/2)}, \\quad x > 0. +f(x; \\nu) = \\frac{x^{\\nu/2 - 1} e^{-x/2}}{2^{\\nu/2} \\Gamma(\\nu/2)}, \\quad x > 0. ``` If `ν` is an integer, then it is the distribution of the sum of squares of `ν` independent standard [`Normal`](@ref) variates. From a7fb967495ab68dff11562277e01ad3e7f0f855b Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Mon, 22 May 2023 22:56:03 +0200 Subject: [PATCH 90/93] Add OrderStatistic and JointOrderStatistics distributions (#1668) * Add OrderStatistic distribution * Add JointOrderStatistics distribution * Simplify implementation of logpdf * Bug fix * Add complementary methods * Avoid code duplication * Repair cquantile * Add OrderStatistic tests * Fix cquantile and add comments * Actually repair cquantile function * Add more tests * Remove suffix * Remove scrU * Use correction for multiple tests * Simplify code * Use correct distribution name * Use larger sample size * Ensure return type is constant * Swap i and j * Ensure eltype is returned * Add JointOrderStatistics tests * Add reference * Link between docstrings * Unify OrderStatistic tests * More stringently compute number of checks * Update testset name * Increase number of draws * Set seed in testset * Add an order statistics docs page * Rename rank variables to `rank` and `ranks` * Make arg checking more efficient * Support tuple of ranks * Apply suggestions from code review Co-authored-by: David Widmann * Use Fill * Reduce chances of type-instability * Ensure type-stability and use GammaMTSampler * Update docstrings * Apply suggestions from code review Co-authored-by: David Widmann * Add tests that check for empty ranks * Add comment explaining choice of gamma sampler * Apply suggestions from code review Co-authored-by: David Widmann * Test for 0 density out of support * Return -Inf for logpdf if out of support --------- Co-authored-by: David Widmann --- docs/make.jl | 1 + docs/src/order_statistics.md | 16 ++ src/Distributions.jl | 2 + src/multivariate/jointorderstatistics.jl | 168 ++++++++++++++++ src/multivariates.jl | 1 + src/univariate/orderstatistic.jl | 108 ++++++++++ src/univariates.jl | 2 + test/multivariate/jointorderstatistics.jl | 232 ++++++++++++++++++++++ test/runtests.jl | 2 + test/univariate/orderstatistic.jl | 232 ++++++++++++++++++++++ 10 files changed, 764 insertions(+) create mode 100644 docs/src/order_statistics.md create mode 100644 src/multivariate/jointorderstatistics.jl create mode 100644 src/univariate/orderstatistic.jl create mode 100644 test/multivariate/jointorderstatistics.jl create mode 100644 test/univariate/orderstatistic.jl diff --git a/docs/make.jl b/docs/make.jl index 39e51d4db6..f95b3f60b3 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -17,6 +17,7 @@ makedocs( "reshape.md", "cholesky.md", "mixture.md", + "order_statistics.md", "convolution.md", "fit.md", "extends.md", diff --git a/docs/src/order_statistics.md b/docs/src/order_statistics.md new file mode 100644 index 0000000000..cd2bad58bd --- /dev/null +++ b/docs/src/order_statistics.md @@ -0,0 +1,16 @@ +# Order Statistics + +The $i$th [Order Statistic](https://en.wikipedia.org/wiki/Order_statistic) of a random sample of size $n$ from a univeriate distribution is the $i$th element after sorting in increasing order. +As a special case, the first and $n$th order statistics are the minimum and maximum of the sample, while for odd $n$, the $\lceil \frac{n}{2} \rceil$th entry is the sample median. + +Given any univariate distribution and the sample size $n$, we can construct the distribution of its $i$th order statistic: + +```@docs +OrderStatistic +``` + +If we are interested in more than one order statistic, for continuous univariate distributions we can also construct the joint distribution of order statistics: + +```@docs +JointOrderStatistics +``` diff --git a/src/Distributions.jl b/src/Distributions.jl index 489f6435ac..1e2d580c55 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -108,6 +108,7 @@ export IsoNormal, IsoNormalCanon, JohnsonSU, + JointOrderStatistics, Kolmogorov, KSDist, KSOneSided, @@ -144,6 +145,7 @@ export Normal, NormalCanon, NormalInverseGaussian, + OrderStatistic, Pareto, PGeneralizedGaussian, SkewedExponentialPower, diff --git a/src/multivariate/jointorderstatistics.jl b/src/multivariate/jointorderstatistics.jl new file mode 100644 index 0000000000..71c3f93bf3 --- /dev/null +++ b/src/multivariate/jointorderstatistics.jl @@ -0,0 +1,168 @@ +# Implementation based on chapters 2-4 of +# Arnold, Barry C., Narayanaswamy Balakrishnan, and Haikady Navada Nagaraja. +# A first course in order statistics. Society for Industrial and Applied Mathematics, 2008. + +""" + JointOrderStatistics <: ContinuousMultivariateDistribution + +The joint distribution of a subset of order statistics from a sample from a continuous +univariate distribution. + + JointOrderStatistics( + dist::ContinuousUnivariateDistribution, + n::Int, + ranks=Base.OneTo(n); + check_args::Bool=true, + ) + +Construct the joint distribution of order statistics for the specified `ranks` from an IID +sample of size `n` from `dist`. + +The ``i``th order statistic of a sample is the ``i``th element of the sorted sample. +For example, the 1st order statistic is the sample minimum, while the ``n``th order +statistic is the sample maximum. + +`ranks` must be a sorted vector or tuple of unique `Int`s between 1 and `n`. + +For a single order statistic, use [`OrderStatistic`](@ref) instead. + +## Examples + +```julia +JointOrderStatistics(Normal(), 10) # Product(fill(Normal(), 10)) restricted to ordered vectors +JointOrderStatistics(Cauchy(), 10, 2:9) # joint distribution of all but the extrema +JointOrderStatistics(Cauchy(), 10, (1, 10)) # joint distribution of only the extrema +``` +""" +struct JointOrderStatistics{ + D<:ContinuousUnivariateDistribution,R<:Union{AbstractVector{Int},Tuple{Int,Vararg{Int}}} +} <: ContinuousMultivariateDistribution + dist::D + n::Int + ranks::R + function JointOrderStatistics( + dist::ContinuousUnivariateDistribution, + n::Int, + ranks::Union{AbstractVector{Int},Tuple{Int,Vararg{Int}}}=Base.OneTo(n); + check_args::Bool=true, + ) + @check_args( + JointOrderStatistics, + (n, n ≥ 1, "`n` must be a positive integer."), + ( + ranks, + _are_ranks_valid(ranks, n), + "`ranks` must be a sorted vector or tuple of unique integers between 1 and `n`.", + ), + ) + return new{typeof(dist),typeof(ranks)}(dist, n, ranks) + end +end + +_islesseq(x, y) = isless(x, y) || isequal(x, y) + +function _are_ranks_valid(ranks, n) + # this is equivalent to but faster than + # issorted(ranks) && allunique(ranks) + !isempty(ranks) && first(ranks) ≥ 1 && last(ranks) ≤ n && issorted(ranks; lt=_islesseq) +end +function _are_ranks_valid(ranks::AbstractRange, n) + !isempty(ranks) && first(ranks) ≥ 1 && last(ranks) ≤ n && step(ranks) > 0 +end + +length(d::JointOrderStatistics) = length(d.ranks) +function insupport(d::JointOrderStatistics, x::AbstractVector) + length(d) == length(x) || return false + xi, state = iterate(x) # at least one element! + dist = d.dist + insupport(dist, xi) || return false + while (xj_state = iterate(x, state)) !== nothing + xj, state = xj_state + xj ≥ xi && insupport(dist, xj) || return false + xi = xj + end + return true +end +minimum(d::JointOrderStatistics) = Fill(minimum(d.dist), length(d)) +maximum(d::JointOrderStatistics) = Fill(maximum(d.dist), length(d)) + +params(d::JointOrderStatistics) = tuple(params(d.dist)..., d.n, d.ranks) +partype(d::JointOrderStatistics) = partype(d.dist) +Base.eltype(::Type{<:JointOrderStatistics{D}}) where {D} = Base.eltype(D) +Base.eltype(d::JointOrderStatistics) = eltype(d.dist) + +function logpdf(d::JointOrderStatistics, x::AbstractVector{<:Real}) + n = d.n + ranks = d.ranks + lp = loglikelihood(d.dist, x) + T = typeof(lp) + lp += loggamma(T(n + 1)) + if length(ranks) == n + issorted(x) && return lp + return oftype(lp, -Inf) + end + i = first(ranks) + xᵢ = first(x) + if i > 1 # _marginalize_range(d.dist, 0, i, -Inf, xᵢ, T) + lp += (i - 1) * logcdf(d.dist, xᵢ) - loggamma(T(i)) + end + for (j, xⱼ) in Iterators.drop(zip(ranks, x), 1) + xⱼ < xᵢ && return oftype(lp, -Inf) + lp += _marginalize_range(d.dist, i, j, xᵢ, xⱼ, T) + i = j + xᵢ = xⱼ + end + if i < n # _marginalize_range(d.dist, i, n + 1, xᵢ, Inf, T) + lp += (n - i) * logccdf(d.dist, xᵢ) - loggamma(T(n - i + 1)) + end + return lp +end + +# given ∏ₖf(xₖ), marginalize all xₖ for i < k < j +function _marginalize_range(dist, i, j, xᵢ, xⱼ, T) + k = j - i - 1 + k == 0 && return zero(T) + return k * T(logdiffcdf(dist, xⱼ, xᵢ)) - loggamma(T(k + 1)) +end + +function _rand!(rng::AbstractRNG, d::JointOrderStatistics, x::AbstractVector{<:Real}) + n = d.n + if n == length(d.ranks) # ranks == 1:n + # direct method, slower than inversion method for large `n` and distributions with + # fast quantile function or that use inversion sampling + rand!(rng, d.dist, x) + sort!(x) + else + # use exponential generation method with inversion, where for gaps in the ranks, we + # use the fact that the sum Y of k IID variables xₘ ~ Exp(1) is Y ~ Gamma(k, 1). + # Lurie, D., and H. O. Hartley. "Machine-generation of order statistics for Monte + # Carlo computations." The American Statistician 26.1 (1972): 26-27. + # this is slow if length(d.ranks) is close to n and quantile for d.dist is expensive, + # but this branch is probably taken when length(d.ranks) is small or much smaller than n. + T = typeof(one(eltype(x))) + s = zero(eltype(x)) + i = 0 + for (m, j) in zip(eachindex(x), d.ranks) + k = j - i + if k > 1 + # specify GammaMTSampler directly to avoid unnecessarily checking the shape + # parameter again and because it has been benchmarked to be the fastest for + # shape k ≥ 1 and scale 1 + s += T(rand(rng, GammaMTSampler(Gamma{T}(T(k), T(1))))) + else + s += randexp(rng, T) + end + i = j + x[m] = s + end + j = n + 1 + k = j - i + if k > 1 + s += T(rand(rng, GammaMTSampler(Gamma{T}(T(k), T(1))))) + else + s += randexp(rng, T) + end + x .= quantile.(d.dist, x ./ s) + end + return x +end diff --git a/src/multivariates.jl b/src/multivariates.jl index 477c78ba5a..7a6f926a65 100644 --- a/src/multivariates.jl +++ b/src/multivariates.jl @@ -112,6 +112,7 @@ end for fname in ["dirichlet.jl", "multinomial.jl", "dirichletmultinomial.jl", + "jointorderstatistics.jl", "mvnormal.jl", "mvnormalcanon.jl", "mvlognormal.jl", diff --git a/src/univariate/orderstatistic.jl b/src/univariate/orderstatistic.jl new file mode 100644 index 0000000000..1a7055ef91 --- /dev/null +++ b/src/univariate/orderstatistic.jl @@ -0,0 +1,108 @@ +# Implementation based on chapters 2-4 of +# Arnold, Barry C., Narayanaswamy Balakrishnan, and Haikady Navada Nagaraja. +# A first course in order statistics. Society for Industrial and Applied Mathematics, 2008. + +""" + OrderStatistic{D<:UnivariateDistribution,S<:ValueSupport} <: UnivariateDistribution{S} + +The distribution of an order statistic from IID samples from a univariate distribution. + + OrderStatistic(dist::UnivariateDistribution, n::Int, rank::Int; check_args::Bool=true) + +Construct the distribution of the `rank` ``=i``th order statistic from `n` independent +samples from `dist`. + +The ``i``th order statistic of a sample is the ``i``th element of the sorted sample. +For example, the 1st order statistic is the sample minimum, while the ``n``th order +statistic is the sample maximum. + +If ``f`` is the probability density (mass) function of `dist` with distribution function +``F``, then the probability density function ``g`` of the order statistic for continuous +`dist` is +```math +g(x; n, i) = {n \\choose i} [F(x)]^{i-1} [1 - F(x)]^{n-i} f(x), +``` +and the probability mass function ``g`` of the order statistic for discrete `dist` is +```math +g(x; n, i) = \\sum_{k=i}^n {n \\choose k} \\left( [F(x)]^k [1 - F(x)]^{n-k} - [F(x_-)]^k [1 - F(x_-)]^{n-k} \\right), +``` +where ``x_-`` is the largest element in the support of `dist` less than ``x``. + +For the joint distribution of a subset of order statistics, use +[`JointOrderStatistics`](@ref) instead. + +## Examples + +```julia +OrderStatistic(Cauchy(), 10, 1) # distribution of the sample minimum +OrderStatistic(DiscreteUniform(10), 10, 10) # distribution of the sample maximum +OrderStatistic(Gamma(1, 1), 11, 5) # distribution of the sample median +``` +""" +struct OrderStatistic{D<:UnivariateDistribution,S<:ValueSupport} <: + UnivariateDistribution{S} + dist::D + n::Int + rank::Int + function OrderStatistic( + dist::UnivariateDistribution, n::Int, rank::Int; check_args::Bool=true + ) + @check_args(OrderStatistic, 1 ≤ rank ≤ n) + return new{typeof(dist),value_support(typeof(dist))}(dist, n, rank) + end +end + +minimum(d::OrderStatistic) = minimum(d.dist) +maximum(d::OrderStatistic) = maximum(d.dist) +insupport(d::OrderStatistic, x::Real) = insupport(d.dist, x) + +params(d::OrderStatistic) = tuple(params(d.dist)..., d.n, d.rank) +partype(d::OrderStatistic) = partype(d.dist) +Base.eltype(::Type{<:OrderStatistic{D}}) where {D} = Base.eltype(D) +Base.eltype(d::OrderStatistic) = eltype(d.dist) + +# distribution of the ith order statistic from an IID uniform distribution, with CDF Uᵢₙ(x) +function _uniform_orderstatistic(d::OrderStatistic) + n = d.n + rank = d.rank + return Beta{Int}(rank, n - rank + 1) +end + +function logpdf(d::OrderStatistic, x::Real) + b = _uniform_orderstatistic(d) + p = cdf(d.dist, x) + if value_support(typeof(d)) === Continuous + return logpdf(b, p) + logpdf(d.dist, x) + else + return logdiffcdf(b, p, p - pdf(d.dist, x)) + end +end + +for f in (:logcdf, :logccdf, :cdf, :ccdf) + @eval begin + function $f(d::OrderStatistic, x::Real) + b = _uniform_orderstatistic(d) + return $f(b, cdf(d.dist, x)) + end + end +end + +for f in (:quantile, :cquantile) + @eval begin + function $f(d::OrderStatistic, p::Real) + # since cdf is Fᵢₙ(x) = Uᵢₙ(Fₓ(x)), and Uᵢₙ is invertible and increasing, we + # have Fₓ(x) = Uᵢₙ⁻¹(Fᵢₙ(x)). then quantile function is + # Qᵢₙ(p) = inf{x: p ≤ Fᵢₙ(x)} = inf{x: Uᵢₙ⁻¹(p) ≤ Fₓ(x)} = Qₓ(Uᵢₙ⁻¹(p)) + b = _uniform_orderstatistic(d) + return quantile(d.dist, $f(b, p)) + end + end +end + +function rand(rng::AbstractRNG, d::OrderStatistic) + # inverse transform sampling. Since quantile function is Qₓ(Uᵢₙ⁻¹(p)), we draw a random + # variable from Uᵢₙ and pass it through the quantile function of `d.dist` + T = eltype(d.dist) + b = _uniform_orderstatistic(d) + return T(quantile(d.dist, rand(rng, b))) +end diff --git a/src/univariates.jl b/src/univariates.jl index 16d1a2e71e..726fd8e429 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -731,3 +731,5 @@ end for dname in continuous_distributions include(joinpath("univariate", "continuous", "$(dname).jl")) end + +include(joinpath("univariate", "orderstatistic.jl")) diff --git a/test/multivariate/jointorderstatistics.jl b/test/multivariate/jointorderstatistics.jl new file mode 100644 index 0000000000..d5d65a752e --- /dev/null +++ b/test/multivariate/jointorderstatistics.jl @@ -0,0 +1,232 @@ +using Distributions, LinearAlgebra, Random, SpecialFunctions, Statistics, Test + +@testset "JointOrderStatistics" begin + Random.seed!(123) + + @testset "check_args" begin + dist = Normal() + JointOrderStatistics(dist, 2, 1:2) + JointOrderStatistics(dist, 3, 2:3) + JointOrderStatistics(dist, 5, [2, 3]) + @test_throws DomainError JointOrderStatistics(dist, 0, 1:2) + @test_throws DomainError JointOrderStatistics(dist, 2, 2:3) + @test_throws DomainError JointOrderStatistics(dist, 3, 0:3) + @test_throws DomainError JointOrderStatistics(dist, 5, 3:-1:2) + @test_throws DomainError JointOrderStatistics(dist, 5, 2:1:1) + @test_throws DomainError JointOrderStatistics(dist, 0, [1, 2]) + @test_throws DomainError JointOrderStatistics(dist, 2, [2, 3]) + @test_throws DomainError JointOrderStatistics(dist, 3, [0, 1, 2, 3]) + @test_throws DomainError JointOrderStatistics(dist, 5, Int[]) + @test_throws DomainError JointOrderStatistics(dist, 5, (3, 2)) + @test_throws DomainError JointOrderStatistics(dist, 5, (3, 3)) + JointOrderStatistics(dist, 0, 1:2; check_args=false) + JointOrderStatistics(dist, 2, 2:3; check_args=false) + JointOrderStatistics(dist, 3, 0:3; check_args=false) + JointOrderStatistics(dist, 5, 3:-1:2; check_args=false) + JointOrderStatistics(dist, 5, 2:1:1; check_args=false) + JointOrderStatistics(dist, 0, [1, 2]; check_args=false) + JointOrderStatistics(dist, 2, [2, 3]; check_args=false) + JointOrderStatistics(dist, 3, [0, 1, 2, 3]; check_args=false) + JointOrderStatistics(dist, 5, Int[]; check_args=false) + JointOrderStatistics(dist, 5, (3, 2); check_args=false) + JointOrderStatistics(dist, 5, (3, 3); check_args=false) + end + + @testset for T in [Float32, Float64], + dist in [Uniform(T(2), T(10)), Exponential(T(10)), Normal(T(100), T(10))], + n in [16, 40], + r in [ + 1:n, + ([i, j] for j in 2:n for i in 1:min(10, j - 1))..., + vcat(2:4, (n - 10):(n - 5)), + (2, n ÷ 2, n - 5), + ] + + d = JointOrderStatistics(dist, n, r) + + @testset "basic" begin + @test d isa JointOrderStatistics + @test d.dist === dist + @test d.n === n + @test d.ranks === r + @test length(d) == length(r) + @test params(d) == (params(dist)..., d.n, d.ranks) + @test partype(d) === partype(dist) + @test eltype(d) === eltype(dist) + + length(r) == n && @test JointOrderStatistics(dist, n) == d + end + + @testset "support" begin + @test minimum(d) == fill(minimum(dist), length(r)) + @test maximum(d) == fill(maximum(dist), length(r)) + x = sort(rand(dist, length(r))) + x2 = sort(rand(dist, length(r) + 1)) + @test insupport(d, x) + if length(x) > 1 + @test !insupport(d, reverse(x)) + @test !insupport(d, x[1:(end - 1)]) + end + @test !insupport(d, x2) + @test !insupport(d, fill(NaN, length(x))) + end + + @testset "pdf/logpdf" begin + x = convert(Vector{T}, sort(rand(dist, length(r)))) + @test @inferred(logpdf(d, x)) isa T + @test @inferred(pdf(d, x)) isa T + + if length(r) == 1 + @test logpdf(d, x) ≈ logpdf(OrderStatistic(dist, n, r[1]), x[1]) + @test pdf(d, x) ≈ pdf(OrderStatistic(dist, n, r[1]), x[1]) + elseif length(r) == 2 + i, j = r + xi, xj = x + lc = T( + logfactorial(n) - logfactorial(i - 1) - logfactorial(n - j) - + logfactorial(j - i - 1), + ) + lp = ( + lc + + (i - 1) * logcdf(dist, xi) + + (n - j) * logccdf(dist, xj) + + (j - i - 1) * logdiffcdf(dist, xj, xi) + + logpdf(dist, xi) + + logpdf(dist, xj) + ) + @test logpdf(d, x) ≈ lp + @test pdf(d, x) ≈ exp(lp) + elseif collect(r) == 1:n + @test logpdf(d, x) ≈ sum(Base.Fix1(logpdf, d.dist), x) + loggamma(T(n + 1)) + @test pdf(d, x) ≈ exp(logpdf(d, x)) + end + + @testset "no density for vectors out of support" begin + # check unsorted vectors have 0 density + x2 = copy(x) + x2[1], x2[2] = x2[2], x2[1] + @test logpdf(d, x2) == T(-Inf) + @test pdf(d, x2) == zero(T) + + x3 = copy(x) + x3[end-1], x3[end] = x3[end], x3[end-1] + @test logpdf(d, x3) == T(-Inf) + @test pdf(d, x3) == zero(T) + + # check out of support of original distribution + if islowerbounded(dist) + x4 = copy(x) + x4[1] = minimum(dist) - 1 + @test logpdf(d, x4) == T(-Inf) + @test pdf(d, x4) == zero(T) + end + end + end + end + + @testset "rand" begin + @testset for T in [Float32, Float64] + dist = Uniform(T(-2), T(1)) + d = JointOrderStatistics(dist, 10, 1:10) + S = typeof(rand(dist)) + + v = rand(d) + @test v isa Vector{S} + @test insupport(d, v) + @test size(v) == (10,) + + rng = Random.default_rng() + Random.seed!(rng, 42) + x = @inferred(rand(rng, d, 20)) + @test x isa Matrix{S} + @test size(x) == (10, 20) + @test all(xi -> insupport(d, xi), eachcol(x)) + + Random.seed!(rng, 42) + x2 = rand(rng, d, 20) + @test x2 == x + end + + ndraws = 300_000 + dists = [Uniform(), Exponential()] + + @testset "marginal mean and standard deviation" begin + n = 20 + rs = [1:n, [1, n], vcat(1:7, 12:17)] + @testset for dist in dists, r in rs + d = JointOrderStatistics(dist, n, r) + x = rand(d, ndraws) + @test all(xi -> insupport(d, xi), eachcol(x)) + + m = mean(x; dims=2) + v = var(x; mean=m, dims=2) + if dist isa Uniform + # Arnold (2008). A first course in order statistics. eq 2.2.20-21 + m_exact = r ./ (n + 1) + v_exact = @. (m_exact * (1 - m_exact) / (n + 2)) + elseif dist isa Exponential + # Arnold (2008). A first course in order statistics. eq 4.6.6-7 + m_exact = [sum(k -> inv(n - k + 1), 1:i) for i in r] + v_exact = [sum(k -> inv((n - k + 1)^2), 1:i) for i in r] + end + # compute asymptotic sample standard deviation + mean_std = @. sqrt(v_exact / ndraws) + m4 = dropdims(mapslices(xi -> moment(xi, 4), x; dims=2); dims=2) + var_std = @. sqrt((m4 - v_exact^2) / ndraws) + + nchecks = length(r) + α = (0.01 / nchecks) / 2 # multiple correction + tol = quantile(Normal(), 1 - α) + for k in eachindex(m, m_exact, v, v_exact, mean_std, var_std) + @test m[k] ≈ m_exact[k] atol = (tol * mean_std[k]) + @test v[k] ≈ v_exact[k] atol = (tol * var_std[k]) + end + end + end + + @testset "pairwise correlations" begin + n = 100 + rs = [ # good mixture of r values with gaps and no gaps + 1:n, + vcat(1:10, (div(n, 2) - 5):(div(n, 2) + 5), (n - 9):n), + vcat(10:20, (n - 19):(n - 10)), + (1, n), + ] + + nchecks = length(dists) * sum(rs) do r + m = length(r) + return div(m * (m - 1), 2) + end + α = (0.01 / nchecks) / 2 # multiple correction + tol = quantile(Normal(), 1 - α) / sqrt(ndraws) + + @testset for dist in dists, r in rs + d = JointOrderStatistics(dist, n, r) + x = rand(d, ndraws) + @test all(xi -> insupport(d, xi), eachcol(x)) + + m = length(r) + + xcor = cor(x; dims=2) + if dist isa Uniform + # Arnold (2008). A first course in order statistics. Eq 2.3.16 + s = @. n - r + 1 + xcor_exact = Symmetric(sqrt.((r .* collect(s)') ./ (collect(r)' .* s))) + elseif dist isa Exponential + # Arnold (2008). A first course in order statistics. Eq 4.6.8 + v = [sum(k -> inv((n - k + 1)^2), 1:i) for i in r] + xcor_exact = Symmetric(sqrt.(v ./ v')) + end + for ii in 1:m, ji in (ii + 1):m + i = r[ii] + j = r[ji] + ρ = xcor[ii, ji] + ρ_exact = xcor_exact[ii, ji] + # use variance-stabilizing transformation, recommended in §3.6 of + # Van der Vaart, A. W. (2000). Asymptotic statistics (Vol. 3). + @test atanh(ρ) ≈ atanh(ρ_exact) atol = tol + end + end + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 9c8f5b3a4b..cb724278b4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -88,6 +88,8 @@ const tests = [ "univariate/continuous/skewedexponentialpower", "univariate/discrete/discreteuniform", "univariate/continuous/tdist", + "univariate/orderstatistic", + "multivariate/jointorderstatistics", "multivariate/product", "eachvariate", "univariate/continuous/triangular", diff --git a/test/univariate/orderstatistic.jl b/test/univariate/orderstatistic.jl new file mode 100644 index 0000000000..5cd65cb856 --- /dev/null +++ b/test/univariate/orderstatistic.jl @@ -0,0 +1,232 @@ +using Test, Distributions +using Random +using StatsBase + +@testset "OrderStatistic" begin + @testset "basic" begin + for dist in [Uniform(), Normal(), DiscreteUniform(10)], n in [1, 2, 10], i in 1:n + d = OrderStatistic(dist, n, i) + @test d isa OrderStatistic + if dist isa DiscreteUnivariateDistribution + @test d isa DiscreteUnivariateDistribution + else + @test d isa ContinuousUnivariateDistribution + end + @test d.dist === dist + @test d.n == n + @test d.rank == i + end + @test_throws ArgumentError OrderStatistic(Normal(), 0, 1) + OrderStatistic(Normal(), 0, 1; check_args=false) + @test_throws ArgumentError OrderStatistic(Normal(), 10, 11) + OrderStatistic(Normal(), 10, 11; check_args=false) + @test_throws ArgumentError OrderStatistic(Normal(), 10, 0) + OrderStatistic(Normal(), 10, 0; check_args=false) + end + + @testset "params" begin + for dist in [Uniform(), Normal(), DiscreteUniform(10)], n in [1, 2, 10], i in 1:n + d = OrderStatistic(dist, n, i) + @test params(d) == (params(dist)..., n, i) + @test partype(d) === partype(dist) + end + end + + @testset "support" begin + n = 10 + for i in 1:10 + d1 = OrderStatistic(Uniform(), n, i) + @test minimum(d1) == 0 + @test maximum(d1) == 1 + @test insupport(d1, 0) + @test insupport(d1, 0.5) + @test insupport(d1, 1) + @test !insupport(d1, -eps()) + @test !insupport(d1, 1 + eps()) + @test !hasfinitesupport(d1) + @test islowerbounded(d1) + @test isupperbounded(d1) + + d2 = OrderStatistic(Normal(), n, i) + @test minimum(d2) == -Inf + @test maximum(d2) == Inf + @test insupport(d2, -Inf) + @test insupport(d2, 0) + @test insupport(d2, Inf) + @test !hasfinitesupport(d2) + @test !islowerbounded(d2) + @test !isupperbounded(d2) + + d3 = OrderStatistic(DiscreteUniform(1, 10), n, i) + @test minimum(d3) == 1 + @test maximum(d3) == 10 + @test insupport(d3, 1) + @test insupport(d3, 5) + @test insupport(d3, 10) + @test !insupport(d3, 0) + @test !insupport(d3, 11) + @test hasfinitesupport(d3) + @test islowerbounded(d3) + @test isupperbounded(d3) + end + end + + @testset "pdf/logpdf" begin + @testset "continuous" begin + # test against the exact formula computed using BigFloats + @testset for T in (Float32, Float64) + @testset for dist in + [Uniform(T(-2), T(1)), Normal(T(3), T(2)), Exponential(T(10))], + n in [1, 10, 100], + i in 1:n + + d = OrderStatistic(dist, n, i) + c = factorial(big(n)) / factorial(big(i - 1)) / factorial(big(n - i)) + # since density is concentrated around the i/n quantile, sample a point + # nearby it + x = quantile(dist, clamp(i / n + (rand() - 1//2) / 10, 0, 1)) + p = cdf(dist, big(x)) + pdf_exp = c * p^(i - 1) * (1 - p)^(n - i) * pdf(dist, big(x)) + @test @inferred(T, pdf(d, x)) ≈ T(pdf_exp) + @test @inferred(T, logpdf(d, x)) ≈ T(log(pdf_exp)) + end + end + end + @testset "discrete" begin + # test check that the pdf is the difference of the CDF at adjacent points + @testset for dist in + [DiscreteUniform(10, 30), Poisson(100.0), Binomial(20, 0.3)], + n in [1, 10, 100], + i in 1:n + + d = OrderStatistic(dist, n, i) + xs = quantile(dist, 0.01):quantile(dist, 0.99) + for x in xs + p = @inferred pdf(d, x) + lp = @inferred logpdf(d, x) + @test lp ≈ logdiffcdf(d, x, x - 1) + @test p ≈ exp(lp) + end + end + end + end + + @testset "distribution normalizes to 1" begin + @testset for dist in [ + Uniform(-2, 1), + Normal(2, 3), + Exponential(5), + DiscreteUniform(10, 40), + Poisson(100), + ], + n in [1, 10, 20], + i in 1:n + + d = OrderStatistic(dist, n, i) + Distributions.expectation(one, d) ≈ 1 + end + end + + @testset "cdf/logcdf/ccdf/logccdf/quantile/cquantile" begin + # test against the exact formula computed using BigFloats + @testset for T in (Float32, Float64) + dists = [ + (Uniform(T(-2), T(1)), Uniform(big(-2), big(1))), + (Normal(T(3), T(2)), Normal(big(3), big(2))), + (Exponential(T(10)), Exponential(big(10))), + (DiscreteUniform(1, 10), DiscreteUniform(1, 10)), + (Poisson(T(20)), Poisson(big(20))), + ] + @testset for (dist, bigdist) in dists, n in [10, 100], i in 1:n + dist isa DiscreteDistribution && T !== Float64 && continue + d = OrderStatistic(dist, n, i) + # since density is concentrated around the i/n quantile, sample a point + # nearby it + x = quantile(dist, clamp(i / n + (rand() - 1//2) / 10, 1e-4, 1 - 1e-4)) + p = cdf(bigdist, big(x)) + cdf_exp = sum(i:n) do j + c = binomial(big(n), big(j)) + return c * p^j * (1 - p)^(n - j) + end + @test @inferred(T, cdf(d, x)) ≈ T(cdf_exp) + @test cdf(d, maximum(d)) ≈ one(T) + @test cdf(d, minimum(d) - 1) ≈ zero(T) + @test @inferred(T, logcdf(d, x)) ≈ T(log(cdf_exp)) + @test logcdf(d, maximum(d)) ≈ zero(T) + @test logcdf(d, minimum(d) - 1) ≈ -Inf + @test @inferred(T, ccdf(d, x)) ≈ T(1 - cdf_exp) + @test ccdf(d, maximum(d)) ≈ zero(T) + @test ccdf(d, minimum(d) - 1) ≈ one(T) + @test @inferred(T, logccdf(d, x)) ≈ T(log(1 - cdf_exp)) + @test logccdf(d, maximum(d)) ≈ -Inf + @test logccdf(d, minimum(d) - 1) ≈ zero(T) + q = cdf(d, x) + if dist isa DiscreteDistribution + # for discrete distributions, tiny numerical error can cause the wrong + # integer value to be returned. + q -= sqrt(eps(T)) + end + xq = @inferred(T, quantile(d, q)) + xqc = @inferred(T, cquantile(d, 1 - q)) + @test xq ≈ xqc + @test isapprox(xq, T(x); atol=1e-4) || + (dist isa DiscreteDistribution && xq < x) + end + end + end + + @testset "rand" begin + @testset for T in [Float32, Float64], + dist in [Uniform(T(-2), T(1)), Normal(T(1), T(2))] + + d = OrderStatistic(dist, 10, 5) + rng = Random.default_rng() + Random.seed!(rng, 42) + x = @inferred(rand(rng, d)) + xs = @inferred(rand(rng, d, 10)) + S = eltype(rand(dist)) + @test typeof(x) === S + @test eltype(xs) === S + @test length(xs) == 10 + + Random.seed!(rng, 42) + x2 = rand(rng, d) + xs2 = rand(rng, d, 10) + @test x2 == x + @test xs2 == xs + end + + ndraws = 100_000 + nchecks = 4 * 2 * 111 # NOTE: update if the below number of tests changes + α = (0.01 / nchecks) / 2 # multiple correction + tol = quantile(Normal(), 1 - α) + + @testset for dist in [Uniform(), Exponential(), Poisson(20), Binomial(20, 0.3)] + @testset for n in [1, 10, 100], i in 1:n + d = OrderStatistic(dist, n, i) + x = rand(d, ndraws) + m, v = mean_and_var(x) + if dist isa Uniform + # Arnold (2008). A first course in order statistics. Eqs 2.2.20-21 + m_exact = i / (n + 1) + v_exact = m_exact * (1 - m_exact) / (n + 2) + elseif dist isa Exponential + # Arnold (2008). A first course in order statistics. Eqs 4.6.6-7 + m_exact = sum(k -> inv(n - k + 1), 1:i) + v_exact = sum(k -> inv((n - k + 1)^2), 1:i) + elseif dist isa DiscreteUnivariateDistribution + # estimate mean and variance with explicit sum, Eqs 3.2.6-7 from + # Arnold (2008). A first course in order statistics. + xs = 0:quantile(dist, 0.9999) + m_exact = sum(x -> ccdf(d, x), xs) + v_exact = 2 * sum(x -> x * ccdf(d, x), xs) + m_exact - m_exact^2 + end + # compute asymptotic sample standard deviation + mean_std = sqrt(v_exact / ndraws) + var_std = sqrt((moment(x, 4) - v_exact^2) / ndraws) + @test m ≈ m_exact atol = (tol * mean_std) + @test v ≈ v_exact atol = (tol * var_std) + end + end + end +end From 4ec511b3bc248293d3324e3f7233f349665b2e6f Mon Sep 17 00:00:00 2001 From: David Widmann Date: Mon, 22 May 2023 22:56:39 +0200 Subject: [PATCH 91/93] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 2d891419ea..c9b1c1a0a2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.93" +version = "0.25.94" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From fd58ce19ba18e15cd0c766893c2928e91ad61ad6 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 25 May 2023 18:50:48 +0200 Subject: [PATCH 92/93] Improve accuracy of `logdiffcdf(::Normal, x, y)` (#1728) --- Project.toml | 2 +- src/univariate/continuous/normal.jl | 9 +++++++++ test/univariate/continuous/normal.jl | 18 ++++++++++++++---- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/Project.toml b/Project.toml index c9b1c1a0a2..84090560ee 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Distributions" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" authors = ["JuliaStats"] -version = "0.25.94" +version = "0.25.95" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" diff --git a/src/univariate/continuous/normal.jl b/src/univariate/continuous/normal.jl index f652a27d77..1dcadadac3 100644 --- a/src/univariate/continuous/normal.jl +++ b/src/univariate/continuous/normal.jl @@ -89,6 +89,15 @@ end # Use Julia implementations in StatsFuns @_delegate_statsfuns Normal norm μ σ +# `logerf(...)` is more accurate for arguments in the tails than `logsubexp(logcdf(...), logcdf(...))` +function logdiffcdf(d::Normal, x::Real, y::Real) + x < y && throw(ArgumentError("requires x >= y.")) + μ, σ = params(d) + _x, _y, _μ, _σ = promote(x, y, μ, σ) + s = sqrt2 * _σ + return logerf((_y - _μ) / s, (_x - _μ) / s) - logtwo +end + gradlogpdf(d::Normal, x::Real) = (d.μ - x) / d.σ^2 mgf(d::Normal, t::Real) = exp(t * d.μ + d.σ^2 / 2 * t^2) diff --git a/test/univariate/continuous/normal.jl b/test/univariate/continuous/normal.jl index 6f494d9383..ccc6266dbb 100644 --- a/test/univariate/continuous/normal.jl +++ b/test/univariate/continuous/normal.jl @@ -1,4 +1,4 @@ -using Test, Distributions, ForwardDiff +using Test, Distributions, StatsFuns, ForwardDiff isnan_type(::Type{T}, v) where {T} = isnan(v) && v isa T @@ -17,9 +17,19 @@ isnan_type(::Type{T}, v) where {T} = isnan(v) && v isa T @test -Inf === logpdf(Normal(), Inf) @test iszero(logcdf(Normal(0, 0), 0)) @test iszero(logcdf(Normal(), Inf)) - @test logdiffcdf(Normal(), Float32(5), Float32(3)) ≈ -6.607938594596893 rtol=1e-12 - @test logdiffcdf(Normal(), Float32(5), Float64(3)) ≈ -6.607938594596893 rtol=1e-12 - @test logdiffcdf(Normal(), Float64(5), Float64(3)) ≈ -6.607938594596893 rtol=1e-12 + @test @inferred(logdiffcdf(Normal(), 5f0, 3f0)) ≈ -6.607938594596893 rtol=1e-12 + @test @inferred(logdiffcdf(Normal(), 5f0, 3.0)) ≈ -6.607938594596893 rtol=1e-12 + @test @inferred(logdiffcdf(Normal(), 5.0, 3.0)) ≈ -6.607938594596893 rtol=1e-12 + @test_throws ArgumentError logdiffcdf(Normal(), 3, 5) + + # Arguments in the tails + logdiffcdf_big(d::Normal, x::Real, y::Real) = logsubexp(logcdf(d, big(y)), logcdf(d, big(x))) + for d in (Normal(), Normal(2.1, 0.1)), (a, b) in ((15, 10), (115, 100), (1015, 1000)) + for (x, y) in ((a, b), (-b, -a)) + @test isfinite(@inferred(logdiffcdf(d, x, y))) + @test logdiffcdf(d, x, y) ≈ logdiffcdf_big(d, x, y) + end + end let d = Normal(Float64(0), Float64(1)), x = Float64(-60), y = Float64(-60.001) float_res = logdiffcdf(d, x, y) big_x = BigFloat(x; precision=100) From 2dee35e13eacb0909c6b2189f229ce93c04d2560 Mon Sep 17 00:00:00 2001 From: Pietro Monticone <38562595+pitmonticone@users.noreply.github.com> Date: Sat, 27 May 2023 18:09:56 +0200 Subject: [PATCH 93/93] Update order_statistics.md (#1729) --- docs/src/order_statistics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/order_statistics.md b/docs/src/order_statistics.md index cd2bad58bd..731d1099dd 100644 --- a/docs/src/order_statistics.md +++ b/docs/src/order_statistics.md @@ -1,6 +1,6 @@ # Order Statistics -The $i$th [Order Statistic](https://en.wikipedia.org/wiki/Order_statistic) of a random sample of size $n$ from a univeriate distribution is the $i$th element after sorting in increasing order. +The $i$th [Order Statistic](https://en.wikipedia.org/wiki/Order_statistic) of a random sample of size $n$ from a univariate distribution is the $i$th element after sorting in increasing order. As a special case, the first and $n$th order statistics are the minimum and maximum of the sample, while for odd $n$, the $\lceil \frac{n}{2} \rceil$th entry is the sample median. Given any univariate distribution and the sample size $n$, we can construct the distribution of its $i$th order statistic: