Skip to content

Commit

Permalink
Fix sampling from distributions with integer-valued parameters (e.g. …
Browse files Browse the repository at this point in the history
…`MvNormal` and `Dirichlet`) (JuliaStats#1262)

* Fix sampling from `Dirichlet`

* Use containers with floating point numbers for samples from continuous distributions
  • Loading branch information
devmotion authored Jan 22, 2021
1 parent 01ba56b commit 863844c
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 100 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Distributions"
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
authors = ["JuliaStats"]
version = "0.24.11"
version = "0.24.12"

[deps]
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
Expand Down
4 changes: 4 additions & 0 deletions src/matrixvariates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,14 @@ end
# multiple matrix-variates, must allocate array of arrays
rand(rng::AbstractRNG, s::Sampleable{Matrixvariate}, dims::Dims) =
rand!(rng, s, Array{Matrix{eltype(s)}}(undef, dims), true)
rand(rng::AbstractRNG, s::Sampleable{Matrixvariate,Continuous}, dims::Dims) =
rand!(rng, s, Array{Matrix{float(eltype(s))}}(undef, dims), true)

# single matrix-variate, must allocate one matrix
rand(rng::AbstractRNG, s::Sampleable{Matrixvariate}) =
_rand!(rng, s, Matrix{eltype(s)}(undef, size(s)))
rand(rng::AbstractRNG, s::Sampleable{Matrixvariate,Continuous}) =
_rand!(rng, s, Matrix{float(eltype(s))}(undef, size(s)))

# single matrix-variate with pre-allocated matrix
function rand!(rng::AbstractRNG, s::Sampleable{Matrixvariate},
Expand Down
2 changes: 1 addition & 1 deletion src/multivariate/mvnormal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ _rand!(rng::AbstractRNG, d::MvNormal, x::VecOrMat) =
# Workaround: randn! only works for Array, but not generally for AbstractArray
function _rand!(rng::AbstractRNG, d::MvNormal, x::AbstractVector)
for i in eachindex(x)
@inbounds x[i] = randn(rng,eltype(d))
@inbounds x[i] = randn(rng, eltype(x))
end
add!(unwhiten!(d.Σ, x), d.μ)
end
Expand Down
8 changes: 7 additions & 1 deletion src/multivariates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,18 @@ end
rand(s::Sampleable{Multivariate}, n::Int) = rand(GLOBAL_RNG, s, n)
rand(rng::AbstractRNG, s::Sampleable{Multivariate}, n::Int) =
_rand!(rng, s, Matrix{eltype(s)}(undef, length(s), n))
rand(rng::AbstractRNG, s::Sampleable{Multivariate,Continuous}, n::Int) =
_rand!(rng, s, Matrix{float(eltype(s))}(undef, length(s), n))
rand(rng::AbstractRNG, s::Sampleable{Multivariate}, dims::Dims) =
rand(rng, s, Array{Vector{eltype(s)}}(undef, dims), true)
rand!(rng, s, Array{Vector{eltype(s)}}(undef, dims), true)
rand(rng::AbstractRNG, s::Sampleable{Multivariate,Continuous}, dims::Dims) =
rand!(rng, s, Array{Vector{float(eltype(s))}}(undef, dims), true)

# single multivariate, must allocate vector
rand(rng::AbstractRNG, s::Sampleable{Multivariate}) =
_rand!(rng, s, Vector{eltype(s)}(undef, length(s)))
rand(rng::AbstractRNG, s::Sampleable{Multivariate,Continuous}) =
_rand!(rng, s, Vector{float(eltype(s))}(undef, length(s)))

## domain

Expand Down
2 changes: 1 addition & 1 deletion src/univariate/continuous/normal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ cf(d::Normal, t::Real) = exp(im * t * d.μ - d.σ^2 / 2 * t^2)

#### Sampling

rand(rng::AbstractRNG, d::Normal{T}) where {T} = d.μ + d.σ * randn(rng, T)
rand(rng::AbstractRNG, d::Normal{T}) where {T} = d.μ + d.σ * randn(rng, float(T))

#### Fitting

Expand Down
2 changes: 2 additions & 0 deletions src/univariates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ end
# multiple univariate, must allocate array
rand(rng::AbstractRNG, s::Sampleable{Univariate}, dims::Dims) =
rand!(rng, sampler(s), Array{eltype(s)}(undef, dims))
rand(rng::AbstractRNG, s::Sampleable{Univariate,Continuous}, dims::Dims) =
rand!(rng, sampler(s), Array{float(eltype(s))}(undef, dims))

# multiple univariate with pre-allocated array
function rand!(rng::AbstractRNG, s::Sampleable{Univariate}, A::AbstractArray)
Expand Down
196 changes: 100 additions & 96 deletions test/dirichlet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,100 +12,104 @@ rng = MersenneTwister(123)
Dict("rand(...)" => [rand, rand],
"rand(rng, ...)" => [dist -> rand(rng, dist), (dist, n) -> rand(rng, dist, n)])

d = Dirichlet(3, 2.0)

@test length(d) == 3
@test d.alpha == [2.0, 2.0, 2.0]
@test d.alpha0 == 6.0

@test mean(d) fill(1.0/3, 3)
@test cov(d) [8 -4 -4; -4 8 -4; -4 -4 8] / (36 * 7)
@test var(d) diag(cov(d))

@test pdf(Dirichlet([1, 1]), [0, 1]) 1.0
@test pdf(Dirichlet([1f0, 1f0]), [0f0, 1f0]) 1.0f0
@test typeof(pdf(Dirichlet([1f0, 1f0]), [0f0, 1f0])) == Float32

@test pdf(d, [-1, 1, 0]) 0.0
@test pdf(d, [0, 0, 1]) 0.0
@test pdf(d, [0.2, 0.3, 0.5]) 3.6
@test pdf(d, [0.4, 0.5, 0.1]) 2.4
@test logpdf(d, [0.2, 0.3, 0.5]) log(3.6)
@test logpdf(d, [0.4, 0.5, 0.1]) log(2.4)

x = func[2](d, 100)
p = pdf(d, x)
lp = logpdf(d, x)
for i in 1 : size(x, 2)
@test lp[i] logpdf(d, x[:,i])
@test p[i] pdf(d, x[:,i])
end

v = [2.0, 1.0, 3.0]
d = Dirichlet(v)

@test Dirichlet([2, 1, 3]).alpha == d.alpha

@test length(d) == length(v)
@test d.alpha == v
@test d.alpha0 == sum(v)
@test d == Dirichlet{eltype(d)}(params(d)...)
@test d == deepcopy(d)

@test mean(d) v / sum(v)
@test cov(d) [8 -2 -6; -2 5 -3; -6 -3 9] / (36 * 7)
@test var(d) diag(cov(d))

@test pdf(d, [0.2, 0.3, 0.5]) 3.0
@test pdf(d, [0.4, 0.5, 0.1]) 0.24
@test logpdf(d, [0.2, 0.3, 0.5]) log(3.0)
@test logpdf(d, [0.4, 0.5, 0.1]) log(0.24)

x = func[2](d, 100)
p = pdf(d, x)
lp = logpdf(d, x)
for i in 1 : size(x, 2)
@test p[i] pdf(d, x[:,i])
@test lp[i] logpdf(d, x[:,i])
end

# Sampling

x = func[1](d)
@test isa(x, Vector{Float64})
@test length(x) == 3

x = func[2](d, 10)
@test isa(x, Matrix{Float64})
@test size(x) == (3, 10)

v = [2.0, 1.0, 3.0]
d = Dirichlet(Float32.(v))

x = func[1](d)
@test isa(x, Vector{Float32})
@test length(x) == 3

x = func[2](d, 10)
@test isa(x, Matrix{Float32})
@test size(x) == (3, 10)


# Test MLE

v = [2.0, 1.0, 3.0]
d = Dirichlet(v)

n = 10000
x = func[2](d, n)
x = x ./ sum(x, dims=1)

r = fit_mle(Dirichlet, x)
@test isapprox(r.alpha, d.alpha, atol=0.25)
r = fit(Dirichlet{Float32}, x)
@test isapprox(r.alpha, d.alpha, atol=0.25)

# r = fit_mle(Dirichlet, x, fill(2.0, n))
# @test isapprox(r.alpha, d.alpha, atol=0.25)

for T in (Int, Float64)
d = Dirichlet(3, T(2))

@test length(d) == 3
@test eltype(d) === T
@test d.alpha == [2, 2, 2]
@test d.alpha0 == 6

@test mean(d) fill(1/3, 3)
@test cov(d) [8 -4 -4; -4 8 -4; -4 -4 8] / (36 * 7)
@test var(d) diag(cov(d))

@test pdf(Dirichlet([1, 1]), [0, 1]) 1
@test pdf(Dirichlet([1f0, 1f0]), [0f0, 1f0]) 1
@test typeof(pdf(Dirichlet([1f0, 1f0]), [0f0, 1f0])) === Float32

@test iszero(pdf(d, [-1, 1, 0]))
@test iszero(pdf(d, [0, 0, 1]))
@test pdf(d, [0.2, 0.3, 0.5]) 3.6
@test pdf(d, [0.4, 0.5, 0.1]) 2.4
@test logpdf(d, [0.2, 0.3, 0.5]) log(3.6)
@test logpdf(d, [0.4, 0.5, 0.1]) log(2.4)

x = func[2](d, 100)
p = pdf(d, x)
lp = logpdf(d, x)
for i in 1 : size(x, 2)
@test lp[i] logpdf(d, x[:,i])
@test p[i] pdf(d, x[:,i])
end

v = [2, 1, 3]
d = Dirichlet(T.(v))

@test eltype(d) === T
@test Dirichlet([2, 1, 3]).alpha == d.alpha

@test length(d) == length(v)
@test d.alpha == v
@test d.alpha0 == sum(v)
@test d == Dirichlet{T}(params(d)...)
@test d == deepcopy(d)

@test mean(d) v / sum(v)
@test cov(d) [8 -2 -6; -2 5 -3; -6 -3 9] / (36 * 7)
@test var(d) diag(cov(d))

@test pdf(d, [0.2, 0.3, 0.5]) 3
@test pdf(d, [0.4, 0.5, 0.1]) 0.24
@test logpdf(d, [0.2, 0.3, 0.5]) log(3)
@test logpdf(d, [0.4, 0.5, 0.1]) log(0.24)

x = func[2](d, 100)
p = pdf(d, x)
lp = logpdf(d, x)
for i in 1 : size(x, 2)
@test p[i] pdf(d, x[:,i])
@test lp[i] logpdf(d, x[:,i])
end

# Sampling

x = func[1](d)
@test isa(x, Vector{Float64})
@test length(x) == 3

x = func[2](d, 10)
@test isa(x, Matrix{Float64})
@test size(x) == (3, 10)

v = [2, 1, 3]
d = Dirichlet(Float32.(v))
@test eltype(d) === Float32

x = func[1](d)
@test isa(x, Vector{Float32})
@test length(x) == 3

x = func[2](d, 10)
@test isa(x, Matrix{Float32})
@test size(x) == (3, 10)


# Test MLE

v = [2, 1, 3]
d = Dirichlet(v)

n = 10000
x = func[2](d, n)
x = x ./ sum(x, dims=1)

r = fit_mle(Dirichlet, x)
@test r.alpha d.alpha atol=0.25
r = fit(Dirichlet{Float32}, x)
@test r.alpha d.alpha atol=0.25

# r = fit_mle(Dirichlet, x, fill(2.0, n))
# @test r.alpha ≈ d.alpha atol=0.25
end
end
7 changes: 7 additions & 0 deletions test/mvnormal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,10 @@ end
@test_throws DimensionMismatch dot(o3, d4)
end
end

@testset "MvNormal: Sampling with integer-valued parameters (#1004)" begin
d = MvNormal([0, 0], [1, 1])
@test rand(d) isa Vector{Float64}
@test rand(d, 10) isa Matrix{Float64}
@test rand(d, (3, 2)) isa Matrix{Vector{Float64}}
end
7 changes: 7 additions & 0 deletions test/normal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,10 @@ end
@test isnan_type(Float32, @inferred(cquantile(Normal(1.0f0, 0.0f0), NaN32)))
@test @inferred(cquantile(Normal(1//1, 0//1), 1//2)) === 1.0
end

@testset "Normal: Sampling with integer-valued parameters" begin
d = Normal{Int}(0, 1)
@test rand(d) isa Float64
@test rand(d, 10) isa Vector{Float64}
@test rand(d, (3, 2)) isa Matrix{Float64}
end

0 comments on commit 863844c

Please sign in to comment.