Skip to content

Commit

Permalink
Merge dae16e2 into aae37e9
Browse files Browse the repository at this point in the history
  • Loading branch information
palday authored Apr 13, 2021
2 parents aae37e9 + dae16e2 commit cdd1369
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 6 deletions.
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ Run-time formula syntax
* Methods for `Base./(::AbstractTerm, ::AbstractTerm)` are added, allowing
nesting syntax to be used with `Term`s at run-time as well [#470]

MixedModels v3.6.0 Release Notes
========================
* Add `likelihoodratiotest` method for comparing non-mixed (generalized) linear models to (generalized) linear mixed models [#508].

MixedModels v3.5.2 Release Notes
========================
* Explicitly deprecate non-functional `named` kwarg in `ranef` in favor of `raneftables` [#507].

MixedModels v3.5.1 Release Notes
========================
* Fix MIME show methods for models with random-effects not corresponding to a fixed effect [#501].
Expand Down Expand Up @@ -219,3 +227,5 @@ Package dependencies
[#493]: https://github.com/JuliaStats/MixedModels.jl/issues/493
[#495]: https://github.com/JuliaStats/MixedModels.jl/issues/495
[#501]: https://github.com/JuliaStats/MixedModels.jl/issues/501
[#507]: https://github.com/JuliaStats/MixedModels.jl/issues/507
[#508]: https://github.com/JuliaStats/MixedModels.jl/issues/508
1 change: 1 addition & 0 deletions src/MixedModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ using Tables
using LinearAlgebra: BlasFloat, BlasReal, HermOrSym, PosDefException, copytri!
using Base: Ryu
using GLM: Link, canonicallink
using StatsModels: TableRegressionModel

using StatsFuns: log2π, normccdf

Expand Down
134 changes: 131 additions & 3 deletions src/likelihoodratiotest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,24 @@ Base.getindex(lrt::LikelihoodRatioTest, s::Symbol) = getfield(lrt,s)

"""
likelihoodratiotest(m::MixedModel...)
likelihoodratiotest(m0::LinearModel, m::MixedModel...)
likelihoodratiotest(m0::GeneralizedLinearModel, m::MixedModel...)
likelihoodratiotest(m0::TableRegressionModel{LinearModel}, m::MixedModel...)
likelihoodratiotest(m0::TableRegressionModel{GeneralizedLinearModel}, m::MixedModel...)
Likeihood ratio test applied to a set of nested models.
Note that nesting of the models is not checked. It is incumbent on the user
to check this. This differs from [`StatsModels.lrtest`](@ref) as nesting in
mixed models, especially in the random effects specification, may be non obvious.
!!! note
The nesting of the models is not checked. It is incumbent on the user
to check this. This differs from [`StatsModels.lrtest`](@ref) as nesting in
mixed models, especially in the random effects specification, may be non obvious.
!!! note
For comparisons between mixed and non-mixed models, the deviance for the non-mixed
model is taken to be -2 log likelihood, i.e. omitting the additive constant for the
fully saturated model. This is in line with the computation of the deviance for mixed
models.
This functionality may be deprecated in the future in favor of [`StatsModels.lrtest`](@ref).
"""
Expand Down Expand Up @@ -85,6 +97,34 @@ function likelihoodratiotest(m::MixedModel...)
)
end

_formula(::Union{LinearModel, GeneralizedLinearModel}) = "NA"
_formula(x::TableRegressionModel{<:Union{LinearModel, GeneralizedLinearModel}}) = String(Symbol(x.mf.f))

function likelihoodratiotest(m0::Union{TableRegressionModel{<:Union{LinearModel, GeneralizedLinearModel}},
LinearModel, GeneralizedLinearModel},
m::MixedModel...)
_iscomparable(m0, first(m)) ||
throw(ArgumentError("""Models are not comparable: are the objectives, data
and, where appropriate, the link and family the same?
"""))
lrt = likelihoodratiotest(m...)
devs = pushfirst!(lrt.deviance, -2 * loglikelihood(m0))
formulas = pushfirst!(lrt.formulas, _formula(m0))
dofs = pushfirst!(lrt.models.dof, dof(m0))
devdiffs = pushfirst!(lrt.tests.deviancediff, devs[1] - devs[2])
dofdiffs = pushfirst!(lrt.tests.dofdiff, dofs[2] - dofs[1])

df, dev = first(dofdiffs), first(devdiffs)
p = dev > 0 ? ccdf(Chisq(df), dev) : NaN
pvals = pushfirst!(lrt.tests.pvalues, p)

LikelihoodRatioTest(
formulas,
(dof = dofs, deviance = devs),
(dofdiff = dofdiffs, deviancediff = devdiffs, pvalues = pvals)
)
end

function Base.show(io::IO, ::MIME"text/plain", lrt::LikelihoodRatioTest)
println(io, "Model Formulae")

Expand Down Expand Up @@ -218,3 +258,91 @@ function StatsModels.isnested(m1::MixedModel, m2::MixedModel; atol::Real=0.0)

true
end


function _iscomparable(m1::TableRegressionModel{<:Union{LinearModel, GeneralizedLinearModel}},
m2::MixedModel)
_iscomparable(m1.model, m2) || return false

# check that the nested fixef are a subset of the outer
all(in.(coefnames(m1), Ref(coefnames(m2)))) || return false

return true
end

# GLM isn't nested with in LMM and LM isn't nested within GLMM
_iscomparable(m1::Union{LinearModel, GeneralizedLinearModel}, m2::MixedModel) = false

function _iscomparable(m1::LinearModel, m2::LinearMixedModel)
nobs(m1) == nobs(m2) || return false

# XXX This reaches into the internal structure of GLM
size(m1.pp.X, 2) <= size(m2.X, 2) || return false

_isnested(m1.pp.X, m2.X) || return false

!m2.optsum.REML ||
throw(ArgumentError("REML-fitted models cannot be compared to linear models"))

return true
end

function _iscomparable(m1::GeneralizedLinearModel, m2::GeneralizedLinearMixedModel)
nobs(m1) == nobs(m2) || return false

# XXX This reaches into the internal structure of GLM
size(m1.pp.X, 2) <= size(m2.X, 2) || return false

_isnested(m1.pp.X, m2.X) || return false

Distribution(m1) == Distribution(m2) ||
throw(ArgumentError("Models must be fit to the same distribution"))

Link(m1) == Link(m2) ||
throw(ArgumentError("Models must have the same link function"))

return true
end

"""
_isnested(x::AbstractMatrix, y::AbstractMatrix; atol::Real=0.0)
Test whether the column span of `x` is a subspace of (nested within)
the column span of y.
The nesting of the column span of the fixed-effects model matrices is a necessary,
but not sufficient condition for a linear model (whether mixed-effects or not)
to be nested within a linear mixed-effects model.
!!! note
The `rtol` argument is an internal threshold and not currently
compatible with the `atol` argument of `StatsModels.isnested`.
"""
function _isnested(x::AbstractMatrix, y::AbstractMatrix; rtol=1e-8, ranktol=1e-8)

# technically this can return false positives if x or y
# are rank deficient, but either they're rank deficient
# in the same way (b/c same data) and we don't care OR
# it's not the same data/fixef specification and we're
# extra conservative
# size(x, 2) <= size(y, 2) || return false

qy = qr(y).Q

qrx = qr(x, Val(true))
dvec = abs.(diag(qrx.R))
fdv = first(dvec)
cmp = fdv * ranktol
r = searchsortedlast(dvec, cmp, rev=true)

p = qy' * x

nested = map(eachcol(p)) do col
# if set Julia 1.6 as the minimum, we can use last(col, r)
top = @view col[firstindex(col):(end-r-1)]
tail = @view col[(end-r):end]
return norm(tail) / norm(top) < rtol
end

return all(nested)
end
46 changes: 43 additions & 3 deletions test/likelihoodratiotest.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using DataFrames
using GLM
using MixedModels
using Test

Expand Down Expand Up @@ -45,8 +47,13 @@ end

fm0 = fit(MixedModel,@formula(reaction ~ 1 + (1+days|subj)),slp);
fm1 = fit(MixedModel,@formula(reaction ~ 1 + days + (1+days|subj)),slp);
lm0 = lm(@formula(reaction ~ 1), slp)
lm1 = lm(@formula(reaction ~ 1 + days), slp)

lrt = likelihoodratiotest(fm0,fm1);
@test MixedModels._iscomparable(lm0, fm1)
@test !MixedModels._iscomparable(lm1, fm0)

lrt = likelihoodratiotest(fm0,fm1)

@test [deviance(fm0), deviance(fm1)] == lrt.deviance
@test deviance(fm0) - deviance(fm1) == only(lrt.tests.deviancediff)
Expand All @@ -58,19 +65,38 @@ end
show(IOBuffer(),lrt);
@test :pvalues in propertynames(lrt)

lrt = likelihoodratiotest(lm1,fm1)
@test lrt.deviance likelihoodratiotest(lm1.model,fm1).deviance
@test lrt.dof == [3, 6]
@test lrt.deviance -2 * loglikelihood.([lm1, fm1])

# non nested FE between non-mixed and mixed
@test_throws ArgumentError likelihoodratiotest(lm1, fm0)

# mix of REML and ML
fm0 = fit(MixedModel,@formula(reaction ~ 1 + (1+days|subj)),slp, REML=true);
@test_throws ArgumentError likelihoodratiotest(fm0,fm1)
@test_throws ArgumentError likelihoodratiotest(lm0,fm0)

# differing FE with REML
fm1 = fit(MixedModel,@formula(reaction ~ 1 + days + (1+days|subj)),slp, REML=true);

@test_throws ArgumentError likelihoodratiotest(fm0,fm1)

contra = MixedModels.dataset(:contra);
# glm doesn't like categorical responses, so we convert it to numeric ourselves
# TODO: upstream fix
cc = DataFrame(contra);
cc.usenum = ifelse.(cc.use .== "Y", 1 , 0)
gmf = glm(@formula(usenum ~ 1+age+urban+livch), cc, Bernoulli());
gmf2 = glm(@formula(usenum ~ 1+age+abs2(age)+urban+livch), cc, Bernoulli());
gm0 = fit(MixedModel, @formula(use ~ 1+age+urban+livch+(1|urban&dist)), contra, Bernoulli(), fast=true);
gm1 = fit(MixedModel, @formula(use ~ 1+age+abs2(age)+urban+livch+(1|urban&dist)), contra, Bernoulli(), fast=true);

lrt = likelihoodratiotest(gmf, gm1)
@test [-2 * loglikelihood(gmf), deviance(gm1)] lrt.deviance
@test -2 * loglikelihood(gmf) - deviance(gm1) only(lrt.tests.deviancediff)

lrt = likelihoodratiotest(gm0,gm1);
@test [deviance(gm0), deviance(gm1)] == lrt.deviance
@test deviance(gm0) - deviance(gm1) == only(lrt.tests.deviancediff)
Expand All @@ -82,9 +108,23 @@ end

# mismatched links
gm_probit = fit(MixedModel, @formula(use ~ 1+age+urban+livch+(1|urban&dist)), contra, Bernoulli(), ProbitLink(), fast=true);
@test_throws ArgumentError likelihoodratiotest(gm0,gm_probit)
@test_throws ArgumentError likelihoodratiotest(gmf, gm_probit)
@test_throws ArgumentError likelihoodratiotest(gm0, gm_probit)

# mismatched families
gm_poisson = fit(MixedModel, @formula(use ~ 1+age+urban+livch+(1|urban&dist)), contra, Poisson(), fast=true);
@test_throws ArgumentError MixedModels.likelihoodratiotest(gm0,gm_poisson)
@test_throws ArgumentError likelihoodratiotest(gmf, gm_poisson)
@test_throws ArgumentError likelihoodratiotest(gm0, gm_poisson)

@test !MixedModels._iscomparable(lm0, gm0)
@test !MixedModels._iscomparable(gmf, fm1)

@test MixedModels._iscomparable(gmf, gm0)
@test !MixedModels._iscomparable(gmf2, gm0)

@test MixedModels._isnested(gmf.mm.m, gm0.X)
@test !MixedModels._isnested(gmf2.mm.m, gm0.X)
# this skips the linear term so that the model matrices
# have the same column rank
@test !MixedModels._isnested(gmf2.mm.m[:,Not(2)], gm0.X)
end

0 comments on commit cdd1369

Please sign in to comment.