Skip to content

Commit

Permalink
Rank-deficient fixed-effects (#112)
Browse files Browse the repository at this point in the history
* Drop columns of zeros (and names) in X

* Bump julia requirement to v0.6.1

* Rewrite to detect rank deficiency in general

* Add pivot and rank to f.e. model matrix [ci skip]

* fixef permuted arg for GLMMs

* Adjust tests

* Fix dof methods and add tests

* Add nobs method for GLMM and tests

* Correct the test

* Correct description and remove vestigial import

* Bump  versions of StatsBase and StatsModels

* Add predict 1-argument methods and tests
  • Loading branch information
dmbates authored Dec 11, 2017
1 parent 37d82e2 commit 9d70f8f
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 27 deletions.
3 changes: 2 additions & 1 deletion src/MixedModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import Base: cor, cond, convert, eltype, full, logdet, std
import Base.LinAlg: A_mul_B!, A_mul_Bc!, Ac_mul_B!, A_ldiv_B!, Ac_ldiv_B!, A_rdiv_B!, A_rdiv_Bc!
import NLopt: Opt
import StatsBase: coef, coeftable, dof, deviance, fit!, fitted, loglikelihood,
model_response, nobs, vcov
model_response, nobs, predict, vcov

export
@formula,
Expand Down Expand Up @@ -64,6 +64,7 @@ export
objective, # the objective function in fitting a model
pwrss, # penalized, weighted residual sum-of-squares
pirls!, # use Penalized Iteratively Reweighted Least Squares to obtain conditional modes of random effects
predict,
ranef, # extract the conditional modes of the random effects
refit!, # install a response and refit the model
remat, # factory for construction of ReMat objects
Expand Down
13 changes: 12 additions & 1 deletion src/PIRLS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,16 @@ function StatsBase.dof(m::GeneralizedLinearMixedModel)
length(m.β) + length(m.θ) + GLM.dispersion_parameter(m.resp.d)
end

fixef(m::GeneralizedLinearMixedModel) = m.β
StatsBase.fitted(m::GeneralizedLinearMixedModel) = m.resp.mu

function fixef(m::GeneralizedLinearMixedModel{T}, permuted=true) where T
permuted && return m.β
Xtrm = m.LMM.trms[end - 1]
iperm = invperm(Xtrm.piv)
p = length(iperm)
r = Xtrm.rank
r == p ? m.β[iperm] : copy!(fill(-zero(T), p), m.β)[iperm]
end

"""
glmm(f::Formula, fr::ModelFrame, d::Distribution[, l::GLM.Link])
Expand Down Expand Up @@ -85,6 +94,8 @@ end

StatsBase.nobs(m::GeneralizedLinearMixedModel) = length(m.η)

StatsBase.predict(m::GeneralizedLinearMixedModel) = fitted(m)

"""
updateη!(m::GeneralizedLinearMixedModel)
Expand Down
22 changes: 18 additions & 4 deletions src/mixedmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ fixefnames(m::MixedModel) = lmm(m).trms[end - 1].cnames
## methods for generics defined in StatsBase

function StatsBase.coeftable(m::MixedModel)
fe = fixef(m)
co = coef(m)
se = stderr(m)
z = fe ./ se
z = co ./ se
pvalue = ccdf.(Chisq(1), abs2.(z))
CoefTable(hcat(fe, se, z, pvalue), ["Estimate", "Std.Error", "z value", "P(>|z|)"],
CoefTable(hcat(co, se, z, pvalue), ["Estimate", "Std.Error", "z value", "P(>|z|)"],
fixefnames(m), 4)
end

Expand Down Expand Up @@ -161,8 +161,22 @@ function ranef(m::MixedModel; uscale=false, named=false)
end

function StatsBase.vcov(m::MixedModel)
Xtrm = lmm(m).trms[end - 1]
iperm = invperm(Xtrm.piv)
p = length(iperm)
r = Xtrm.rank
Linv = inv(feL(m))
varest(m) * (Linv'Linv)
permvcov = varest(m) * (Linv'Linv)
if p == Xtrm.rank
permvcov[iperm, iperm]
else
T = eltype(permvcov)
covmat = fill(zero(T)/zero(T), (p, p))
for j in 1:r, i in 1:r
covmat[i,j] = permvcov[i, j]
end
covmat[iperm, iperm]
end
end

Base.cor(m::MixedModel{T}) where {T} = Matrix{T}[stddevcor(t)[2] for t in reterms(m)]
Expand Down
15 changes: 13 additions & 2 deletions src/modelterms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,24 @@ Term with an explicit, constant matrix representation
mutable struct MatrixTerm{T,S<:AbstractMatrix} <: AbstractTerm{T}
x::S
wtx::S
piv::Vector{Int}
rank::Int
cnames::Vector{String}
end
MatrixTerm(X, cnms) = MatrixTerm{eltype(X),typeof(X)}(X, X, cnms)

function MatrixTerm(X::AbstractMatrix, cnms)
T = eltype(X)
cf = cholfact!(Symmetric(X'X, :U), Val{true}, tol = -one(T))
r = cf.rank
piv = cf.piv
X = X[:, piv[1:r]]
MatrixTerm{T,typeof(X)}(X, X, piv, r, cnms)
end

function MatrixTerm(y::Vector)
T = eltype(y)
m = reshape(y, (length(y), 1))
MatrixTerm{T,Matrix{T}}(m, m, [""])
MatrixTerm{T,Matrix{T}}(m, m, [1], Int(all(iszero, y)), [""])
end

function reweight!(A::MatrixTerm{T}, sqrtwts::Vector{T}) where T
Expand Down
27 changes: 20 additions & 7 deletions src/pls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ function updateL!(m::LinearMixedModel{T}) where T
m
end

StatsBase.coef(m::LinearMixedModel) = fixef(m)
StatsBase.coef(m::MixedModel) = fixef(m, false)

"""
fit!(m::LinearMixedModel[, verbose::Bool=false])
Expand Down Expand Up @@ -219,6 +219,8 @@ end

StatsBase.fitted(m::LinearMixedModel{T}) where {T} = fitted!(Vector{T}(nobs(m)), m)

StatsBase.predict(m::LinearMixedModel) = fitted(m)

StatsBase.residuals(m::LinearMixedModel) = model_response(m) .- fitted(m)

"""
Expand All @@ -243,19 +245,30 @@ end
"""
fixef!(v::Vector{T}, m::LinearMixedModel{T}) where T
Overwrite `v` with the fixed-effects coefficients of model `m`
Overwrite `v` with the pivoted and, possibly, truncated fixed-effects coefficients of model `m`
"""
function fixef!(v::AbstractVector{T}, m::LinearMixedModel{T}) where T
!isfit(m) && throw(ArgumentError("Model m has not been fit"))
Ac_ldiv_B!(feL(m), copy!(v, m.L.data.blocks[end, end - 1]))
L = feL(m)
@argcheck(length(v) == size(L, 1), DimensionMismatch)
Ac_ldiv_B!(L, copy!(v, m.L.data.blocks[end, end - 1]))
end

"""
fixef(m::MixedModel)
fixef(m::MixedModel, permuted=true)
Return the fixed-effects parameter vector estimate of `m`.
Returns the fixed-effects parameter vector estimate.
If `permuted` is `true` the vector elements are permuted according to
`m.trms[end - 1].piv` and truncated to the rank of that term.
"""
fixef(m::LinearMixedModel{T}) where {T} = fixef!(Vector{T}(size(m)[2]), m)
function fixef(m::LinearMixedModel{T}, permuted=true) where T
permuted && return fixef!(Vector{T}(size(m)[2]), m)
Xtrm = m.trms[end - 1]
piv = Xtrm.piv
v = fill(-zero(T), size(piv))
fixef!(view(v, 1:Xtrm.rank), m)
ipermute!(v, piv)
end

StatsBase.dof(m::LinearMixedModel) = size(m)[2] + sum(nθ, m.trms) + 1

Expand Down
6 changes: 4 additions & 2 deletions test/matrixterm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ end

@testset "matrixterm" begin
trm = MatrixTerm(hcat(ones(30), repeat(0:9, outer = 3)), ["(Intercept)", "U"])
piv = trm.piv
ipiv = invperm(piv)
@test size(trm) == (30, 2)
@test trm.x === trm.wtx
prd = trm'trm
@test typeof(prd) == Matrix{Float64}
@test prd == [30.0 135.0; 135.0 855.0]
@test prd == [30.0 135.0; 135.0 855.0][piv, piv]
wts = rand(MersenneTwister(123454321), 30)
MixedModels.reweight!(trm, wts)
@test Ac_mul_B!(prd, trm, trm)[1, 1] sum(abs2, wts)
@test Ac_mul_B!(prd, trm, trm)[ipiv[1], ipiv[1]] sum(abs2, wts)
end
15 changes: 8 additions & 7 deletions test/pirls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ end
Bernoulli()));
@test isapprox(gm1.θ[1], 0.573054, atol=0.005)
@test lowerbd(gm1) == push!(fill(-Inf, 7), 0.)
@test isapprox(LaplaceDeviance(gm1), 2361.57129, rtol=0.00001)
@test isapprox(loglikelihood(gm1), -1180.78565, rtol=0.00001)
@test StatsBase.dof(gm0) == length(gm0.β) + length(gm0.θ)
@test StatsBase.nobs(gm0) == 1934
@test isapprox(LaplaceDeviance(gm1), 2361.5457542860527, rtol=0.00001)
@test isapprox(loglikelihood(gm1), -1180.7729, rtol=0.00001)
# the next three values are not well defined in the optimization
@test isapprox(logdet(gm1), 75.7275, atol=0.1)
@test isapprox(sum(abs2, gm1.u[1]), 48.4747, atol=0.1)
@test isapprox(sum(gm1.resp.devresid), 2237.344, atol=0.1)
#@test isapprox(logdet(gm1), 75.7275, atol=0.1)
#@test isapprox(sum(abs2, gm1.u[1]), 48.4747, atol=0.1)
#@test isapprox(sum(gm1.resp.devresid), 2237.344, atol=0.1)
show(IOBuffer(), gm1)
end

Expand All @@ -48,12 +48,13 @@ end
Bernoulli()));
@test isapprox(LaplaceDeviance(gm3), 8151.39972809092, atol=0.001)
@test lowerbd(gm3) == vcat(fill(-Inf, 6), zeros(2))
@test fitted(gm3) == predict(gm3)
# these two values are not well defined at the optimum
@test isapprox(sum(x -> sum(abs2, x), gm3.u), 273.31563469936697, atol=0.1)
@test isapprox(sum(gm3.resp.devresid), 7156.558983084621, atol=0.1)
end

if false # still missing a method needed here
#=
@testset "grouseticks" begin
gm4 = fit!(glmm(@formula(t ~ 1 + y + ch + (1|i) + (1|b) + (1|l)), dat[:grouseticks],
Poisson()))
Expand All @@ -63,4 +64,4 @@ if false # still missing a method needed here
@test isapprox(sum(x -> sum(abs2, x), gm4.u), 196.8695297987013, atol=0.1)
@test isapprox(sum(gm4.resp.devresid), 220.92685781326136, atol=0.1)
end
end
=#
7 changes: 4 additions & 3 deletions test/pls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ end
@test isapprox(objective(fm1), 237721.7687745563, atol=0.001)
ftd1 = fitted(fm1);
@test size(ftd1) == (73421, )
@test ftd1 == predict(fm1)
@test isapprox(ftd1[1], 3.17876, atol=0.0001)
resid1 = residuals(fm1);
@test size(resid1) == (73421, )
Expand Down Expand Up @@ -156,7 +157,7 @@ end
@test isapprox(logdet(fm), 73.90322021999222, atol=0.001)
@test isapprox(stderr(fm), [6.632257721914501, 1.5022354739749826], atol=0.0001)
@test coef(fm) [251.40510484848477,10.4672859595959]
@test fixef(fm) [251.40510484848477,10.4672859595959]
@test fixef(fm) [10.4672859595959, 251.40510484848477]
@test isapprox(stderr(fm), [6.632246393963571, 1.502190605041084], atol=0.01)
@test isapprox(std(fm)[1], [23.780468100188497, 5.716827903196682], atol=0.01)
@test isapprox(logdet(fm), 73.90337187545992, atol=0.001)
Expand Down Expand Up @@ -188,7 +189,7 @@ end
@test isapprox(deviance(fmnc), 1752.0032551398835, atol=0.001)
@test isapprox(objective(fmnc), 1752.0032551398835, atol=0.001)
@test coef(fmnc) [251.40510484848585, 10.467285959595715]
@test fixef(fmnc) [251.40510484848585, 10.467285959595715]
@test fixef(fmnc) [10.467285959595715, 251.40510484848477]
@test isapprox(stderr(fmnc), [6.707710260366577, 1.5193083237479683], atol=0.001)
@test isapprox(getθ(fmnc), [0.9458106880922268, 0.22692826607677266], atol=0.0001)
@test std(fmnc)[1] [24.171449463289047, 5.799379721123582]
Expand All @@ -210,7 +211,7 @@ end
@test isapprox(objective(fm), 901641.2930413672, rtol = 1e-6)
fit!(fm)
@test isapprox(objective(fm), 884957.5540213, rtol = 1e-6)
@test isapprox(fixef(fm), [0.4991229873, 0.31130780953], atol = 1.e-4)
@test isapprox(coef(fm), [0.4991229873, 0.31130780953], atol = 1.e-4)
end

@testset "simulate!" begin
Expand Down

0 comments on commit 9d70f8f

Please sign in to comment.