JuliaStats · palday · Mar 2, 2021 · Jan 15, 2021 · Jan 16, 2021 · Jan 16, 2021
diff --git a/docs/src/GaussHermite.md b/docs/src/GaussHermite.md
@@ -33,8 +33,10 @@ For a `k`th order normalized Gauss-Hermite rule the tridiagonal matrix has zeros
 using DataFrames, LinearAlgebra, Gadfly
 sym3 = SymTridiagonal(zeros(3), sqrt.(1:2))
 ev = eigen(sym3);
-show(ev.values)
-show(abs2.(ev.vectors[1,:]))
+ev.values
+```
+```@example Main
+abs2.(ev.vectors[1,:])
 ```
 As a function of `k` this can be written as
 ```@example Main

diff --git a/docs/src/bootstrap.md b/docs/src/bootstrap.md
@@ -41,7 +41,7 @@ To bootstrap the model parameters, first initialize a random number generator th
 
 ```@example Main
 const rng = MersenneTwister(1234321);
-samp = parametricbootstrap(rng, 10_000, m1, use_threads=true);
+samp = parametricbootstrap(rng, 10_000, m1);
 df = DataFrame(samp.allpars);
 first(df, 10)
 ```

diff --git a/docs/src/optimization.md b/docs/src/optimization.md
@@ -120,7 +120,7 @@ identity matrix where the multiple is
 t1.λ
 ```
 
-Because there is only one random-effects term in the model, the matrix $\bf Z$ is the indicators matrix shown as the result of `Matrix(t1)`, but stored in a special sparse format.
+Because there is only one random-effects term in the model, the matrix $\bf Z$ is the indicators matrix shown as the result of `Int.(t1)`, but stored in a special sparse format.
 Furthermore, there is only one block in $\Lambda_\theta$.
 
 
@@ -153,19 +153,23 @@ Int.(t31)
 For this model the matrix $\bf Z$ is the same as that of model `fm2` but the diagonal blocks of $\Lambda_\theta$ are themselves diagonal.
 ```@example Main
 t31.λ
+```
+```@example Main
 MixedModels.getθ(t31)
 ```
-Random-effects terms with distinct grouping factors generate distinct elements of the `allterms` field of the `LinearMixedModel` object.
+Random-effects terms with distinct grouping factors generate distinct elements of the `reterms` field of the `LinearMixedModel` object.
 Multiple `ReMat` objects are sorted by decreasing numbers of random effects.
 ```@example Main
 penicillin = MixedModels.dataset(:penicillin)
 fm4 = fit(MixedModel,
-    @formula(diameter ~ 1 + (1|plate) + (1|sample)),
+    @formula(diameter ~ 1 + (1|sample) + (1|plate)),
     penicillin)
 Int.(first(fm4.reterms))
+```
+```@example Main
 Int.(last(fm4.reterms))
 ```
-Note that the first `ReMat` in `fm4.terms` corresponds to grouping factor `G` even though the term `(1|G)` occurs in the formula after `(1|H)`.
+Note that the first `ReMat` in `fm4.reterms` corresponds to grouping factor `plate` even though the term `(1|plate)` occurs in the formula after `(1|sample)`.
 
 ### Progress of the optimization
 
@@ -183,6 +187,7 @@ fm2.optsum
 ## A blocked Cholesky factor
 
 A `LinearMixedModel` object contains two blocked matrices; a symmetric matrix `A` (only the lower triangle is stored) and a lower-triangular `L` which is the lower Cholesky factor of the updated and inflated `A`.
+In versions 4.0.0 and later of `MixedModels` only the blocks in the lower triangle are stored in `A` and `L`, as a `Vector{AbstractMatrix{T}}`
 ```@docs
 BlockDescription
 ```
@@ -191,6 +196,8 @@ shows the structure of the blocks
 BlockDescription(fm2)
 ```
 
+Another change in v4.0.0 and later is that the last row of blocks is constructed from `m.Xymat` which contains the full-rank model matrix `X` with the response `y` concatenated on the right.
+
 The operation of installing a new value of the variance parameters, `θ`, and updating `L`
 ```@docs
 setθ!
@@ -246,7 +253,7 @@ In a [*generalized linear model*](https://en.wikipedia.org/wiki/Generalized_line
 The scalar distributions of individual responses differ only in their means, which are determined by a *linear predictor* expression $\eta=\bf X\beta$, where, as before, $\bf X$ is a model matrix derived from the values of covariates and $\beta$ is a vector of coefficients.
 
 The unconstrained components of $\eta$ are mapped to the, possiby constrained, components of the mean response, $\mu$, via a scalar function, $g^{-1}$, applied to each component of $\eta$.
-For historical reasons, the inverse of this function, taking components of $\mu$ to the corresponding component of $\eta$ is called the *link function* and more frequently used map from $\eta$ to $\mu$ is the *inverse link*.
+For historical reasons, the inverse of this function, taking components of $\mu$ to the corresponding component of $\eta$ is called the *link function* and the more frequently used map from $\eta$ to $\mu$ is the *inverse link*.
 
 A *generalized linear mixed-effects model* (GLMM) is defined, for the purposes of this package, by
 ```math
@@ -298,7 +305,7 @@ In a call to the `pirls!` function the first argument is a `GeneralizedLinearMix
 The second and third arguments are optional logical values indicating if $\beta$ is to be varied and if verbose output is to be printed.
 
 ```@example Main
-pirls!(mdl, true, true)
+pirls!(mdl, true, false);
 ```
 
 ```@example Main
@@ -344,9 +351,3 @@ mdl1 = @btime fit(MixedModel, vaform, verbagg, Bernoulli())
 
 This fit provided slightly better results (Laplace approximation to the deviance of 8151.400 versus 8151.583) but took 6 times as long.
 That is not terribly important when the times involved are a few seconds but can be important when the fit requires many hours or days of computing time.
-
-The comparison of the slow and fast fit is available in the optimization summary after the slow fit.
-
-```@example Main
-mdl1.LMM.optsum
-```
diff --git a/src/MixedModels.jl b/src/MixedModels.jl
@@ -155,7 +155,7 @@ include("pca.jl")
 include("datasets.jl")
 include("arraytypes.jl")
 include("varcorr.jl")
-include("femat.jl")
+include("Xymat.jl")
 include("remat.jl")
 include("optsummary.jl")
 include("schema.jl")

diff --git a/src/Xymat.jl b/src/Xymat.jl
@@ -0,0 +1,130 @@
+"""
+    FeTerm{T,S}
+
+Term with an explicit, constant matrix representation
+
+# Fields
+* `x`: full model matrix
+* `piv`: pivot `Vector{Int}` for moving linearly dependent columns to the right
+* `rank`: computational rank of `x`
+* `cnames`: vector of column names
+"""
+struct FeTerm{T,S<:AbstractMatrix}
+    x::S
+    piv::Vector{Int}
+    rank::Int
+    cnames::Vector{String}
+end
+
+"""
+    FeTerm(X::AbstractMatrix, cnms)
+
+Convenience constructor for [`FeTerm`](@ref) that computes the rank and pivot with unit weights.
+
+See the vignette "[Rank deficiency in mixed-effects models](@ref)" for more information on the 
+computation of the rank and pivot.
+"""
+function FeTerm(X::AbstractMatrix{T}, cnms) where {T}
+    if iszero(size(X, 2))
+        return FeTerm{T,typeof(X)}(X, Int[], 0, cnms)
+    end
+    st = statsqr(X)
+    pivot = st.p
+    rank = findfirst(<=(0), diff(st.p))
+    rank = isnothing(rank) ? length(pivot) : rank
+    Xp = pivot == collect(1:size(X, 2)) ? X : X[:, pivot]
+        # single-column rank deficiency is the result of a constant column vector
+        # this generally happens when constructing a dummy response, so we don't
+        # warn.
+    if rank < length(pivot) && size(X,2) > 1
+        @warn "Fixed-effects matrix is rank deficient"
+    end
+    FeTerm{T,typeof(X)}(Xp, pivot, rank, cnms[pivot])
+end
+
+"""
+    FeTerm(X::SparseMatrixCSC, cnms)
+
+Convenience constructor for a sparse [`FeTerm`](@ref) assuming full rank, identity pivot and unit weights.
+
+Note: automatic rank deficiency handling may be added to this method in the future, as discused in
+the vignette "[Rank deficiency in mixed-effects models](@ref)" for general `FeTerm`.
+"""
+function FeTerm(X::SparseMatrixCSC, cnms::AbstractVector{String})
+    @debug "Full rank is assumed for sparse fixed-effect matrices."
+    rank = size(X,2)
+    FeTerm{eltype(X),typeof(X)}(X, collect(1:rank), rank, collect(cnms))
+end
+
+Base.copyto!(A::FeTerm{T}, src::AbstractVecOrMat{T}) where {T} = copyto!(A.x, src)
+
+Base.eltype(::FeTerm{T}) where {T} = T
+
+function fullrankx(A::FeTerm)
+    x, rnk = A.x, A.rank
+    rnk == size(x, 2) ? x : view(x, :, 1:rnk)  # this handles the zero-columns case
+end
+
+LinearAlgebra.rank(A::FeTerm) = A.rank
+
+"""
+    isfullrank(A::FeTerm)
+
+Does `A` have full column rank?
+"""
+isfullrank(A::FeTerm) = A.rank == length(A.piv)
+
+"""
+    FeMat{T,S}
+
+A matrix and a (possibly) weighted copy of itself.
+
+# Fields
+- `xy`: original matrix, called `xy` b/c in practice this is `hcat(fullrank(X), y)`
+- `wtxy`: (possibly) weighted copy of `xy`
+
+Upon construction the `xy` and `wtxy` fields refer to the same matrix
+"""
+mutable struct FeMat{T,S<:AbstractMatrix} <: AbstractMatrix{T}
+    xy::S
+    wtxy::S
+end
+
+function FeMat(A::FeTerm{T}, y::AbstractVector{T}) where {T}
+    xy = hcat(fullrankx(A), y)
+    FeMat{T,typeof(xy)}(xy, xy)
+end
+
+Base.adjoint(A::FeMat) = Adjoint(A)
+
+Base.eltype(::FeMat{T}) where {T} = T
+
+Base.getindex(A::FeMat, i, j) = getindex(A.xy, i, j)
+
+Base.length(A::FeMat) = length(A.wtxy)
+
+function *(adjA::Adjoint{T,<:FeMat{T}}, B::FeMat{T}) where {T}
+    adjoint(adjA.parent.wtxy) * B.wtxy
+end
+
+function LinearAlgebra.mul!(R::StridedVecOrMat{T}, A::FeMat{T}, B::StridedVecOrMat{T}) where {T}
+    mul!(R, A.wtxy, B)
+end
+
+function LinearAlgebra.mul!(C, adjA::Adjoint{T,<:FeMat{T}}, B::FeMat{T}) where {T}
+    mul!(C, adjoint(adjA.parent.wtxy), B.wtxy)
+end
+
+function reweight!(A::FeMat{T}, sqrtwts::Vector{T}) where {T}
+    if !isempty(sqrtwts)
+        if A.xy === A.wtxy
+            A.wtxy = similar(A.xy)
+        end
+        mul!(A.wtxy, Diagonal(sqrtwts), A.xy)
+    end
+    A
+end
+
+Base.size(A::FeMat) = size(A.xy)
+
+Base.size(A::FeMat, i::Integer) = size(A.xy, i)
diff --git a/src/arraytypes.jl b/src/arraytypes.jl
@@ -111,5 +111,5 @@ function LinearAlgebra.mul!(
     α,
     β
     ) where {T,P,Ti}
-    mul!(C.cscmat, A, adjB.parent.cscmat', α, β)
+    mul!(C.cscmat, A, adjoint(adjB.parent.cscmat), α, β)
 end
diff --git a/src/femat.jl b/src/femat.jl