Skip to content

Commit

Permalink
Minor release with Gramian training for OLS (#151)
Browse files Browse the repository at this point in the history
* fix a doc typo

* fixes following discussion around #147

* small adjustments + typo fixes

* first pass at Gramian training for OLS (#146)

* proof of concept

* AbstractMatrix -> AVR

* cleaner impl

* endline

* fix error type

* construct kernels if not passed in

* add test case for implicit gram construction

* last endline

* check for isempty instead of iszero

* Prepare minor release with gramian training

---------

Co-authored-by: Anthony D. Blaom <[email protected]>
Co-authored-by: adienes <[email protected]>
  • Loading branch information
3 people authored Oct 2, 2023
1 parent a872d7c commit 0887cfe
Show file tree
Hide file tree
Showing 12 changed files with 86 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MLJLinearModels"
uuid = "6ee0df7b-362f-4a72-a706-9e79364fb692"
authors = ["Thibaut Lienart <[email protected]>"]
version = "0.9.2"
version = "0.10.0"

[deps]
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
Expand Down
19 changes: 14 additions & 5 deletions src/fit/default.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,22 @@ $SIGNATURES
Fit a generalised linear regression model using an appropriate solver based on
the loss and penalty of the model. A method can, in some cases, be specified.
"""
function fit(glr::GLR, X::AbstractMatrix{<:Real}, y::AVR;
function fit(glr::GLR, X::AbstractMatrix{<:Real}, y::AVR; data=nothing,
solver::Solver=_solver(glr, size(X)))
check_nrows(X, y)
n, p = size(X)
c = getc(glr, y)
return _fit(glr, solver, X, y, scratch(n, p, c, i=glr.fit_intercept))
if hasproperty(solver, :gram) && solver.gram
# interpret X,y as X'X, X'y
data = verify_or_construct_gramian(glr, X, y, data)
p = size(data.XX, 2)
return _fit(glr, solver, data.XX, data.Xy, (; dims=(data.n, p, 0)))
else
check_nrows(X, y)
n, p = size(X)
c = getc(glr, y)
return _fit(glr, solver, X, y, scratch(n, p, c, i=glr.fit_intercept))
end
end
fit(glr::GLR; kwargs...) = fit(glr, zeros((0,0)), zeros((0,)); kwargs...)


function scratch(n, p, c=0; i=false)
p_ = p + Int(i)
Expand Down
17 changes: 13 additions & 4 deletions src/fit/proxgrad.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Assumption: loss has gradient; penalty has prox e.g.: Lasso
# J(θ) = f(θ) + r(θ) where f is smooth
function _fit(glr::GLR, solver::ProxGrad, X, y, scratch)
_,p,c = npc(scratch)
n,p,c = npc(scratch)
c > 0 && (p *= c)
# vector caches + eval cache
θ = zeros(p) # θ_k
Expand All @@ -19,9 +19,18 @@ function _fit(glr::GLR, solver::ProxGrad, X, y, scratch)
η = 1.0 # stepsize (1/L)
acc = ifelse(solver.accel, 1.0, 0.0) # if 0, no extrapolation (ISTA)
# functions
_f = smooth_objective(glr, X, y; c=c)
_fg! = smooth_fg!(glr, X, y, scratch)
_prox! = prox!(glr, size(X, 1))
_f = if solver.gram
smooth_gram_objective(glr, X, y, n)
else
smooth_objective(glr, X, y; c=c)
end

_fg! = if solver.gram
smooth_gram_fg!(glr, X, y, n)
else
smooth_fg!(glr, X, y, scratch)
end
_prox! = prox!(glr, n)
bt_cond = θ̂ ->
_f(θ̂) > fθ̄ + dot(θ̂ .- θ̄, ∇fθ̄) + sum(abs2.(θ̂ .- θ̄)) / (2η)
# loop-related
Expand Down
1 change: 1 addition & 0 deletions src/fit/solvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ Proximal Gradient solver for non-smooth objective functions.
tol::Float64 = 1e-4 # tol relative change of θ i.e. norm(θ-θ_)/norm(θ)
max_inner::Int = 100 # β^max_inner should be > 1e-10
beta::Float64 = 0.8 # in (0, 1); shrinkage in the backtracking step
gram::Bool = false # use precomputed Gramian for lsq where possible
end

FISTA(; kwa...) = ProxGrad(;accel = true, kwa...)
Expand Down
9 changes: 9 additions & 0 deletions src/glr/d_l2loss.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,12 @@ function smooth_fg!(glr::GLR{L2Loss,<:ENR}, X, y, scratch)
return glr.loss(r) + get_l2(glr.penalty)(view_θ(glr, θ))
end
end

function smooth_gram_fg!(glr::GLR{L2Loss,<:ENR}, XX, Xy, n)
λ = get_penalty_scale_l2(glr, n)
(g, θ) -> begin
_g = XX * θ .- Xy
g .= _g .+ λ .* θ
return θ'*_g + get_l2(glr.penalty)(view_θ(glr, θ))
end
end
5 changes: 4 additions & 1 deletion src/glr/utils.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export objective, smooth_objective
export objective, smooth_objective, smooth_gram_objective

# NOTE: RobustLoss are not always everywhere smooth but "smooth-enough".
const SmoothLoss = Union{L2Loss, LogisticLoss, MultinomialLoss, RobustLoss}
Expand Down Expand Up @@ -37,6 +37,9 @@ Return the smooth part of the objective function of a GLR.
"""
smooth_objective(glr::GLR{<:SmoothLoss,<:ENR}, n) = glr.loss + get_l2(glr.penalty) * ifelse(glr.scale_penalty_with_samples, n, 1.)

smooth_gram_objective(glr::GLR{<:SmoothLoss,<:ENR}, XX, Xy, n) =
θ ->'*XX*θ)/2 -'*Xy) + (get_l2(glr.penalty) * ifelse(glr.scale_penalty_with_samples, n, 1.))(θ)

smooth_objective(::GLR) = @error "Case not implemented yet."

"""
Expand Down
4 changes: 2 additions & 2 deletions src/mlj/classifiers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ See also [`MultinomialClassifier`](@ref).
"""some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `Newton`,
`NewtonCG`, `ProxGrad`; but subject to the following restrictions:
- If `penalty = :l2`, `ProxGrad` is disallowed. Otherwise, `ProxyGrad` is the only
- If `penalty = :l2`, `ProxGrad` is disallowed. Otherwise, `ProxGrad` is the only
option.
- Unless `scitype(y) <: Finite{2}` (binary target) `Newton` is disallowed.
Expand Down Expand Up @@ -142,7 +142,7 @@ See also [`LogisticClassifier`](@ref).
"""some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`,
`NewtonCG`, `ProxGrad`; but subject to the following restrictions:
- If `penalty = :l2`, `ProxGrad` is disallowed. Otherwise, `ProxyGrad` is the only
- If `penalty = :l2`, `ProxGrad` is disallowed. Otherwise, `ProxGrad` is the only
option.
- Unless `scitype(y) <: Finite{2}` (binary target) `Newton` is disallowed.
Expand Down
23 changes: 23 additions & 0 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,29 @@ function check_nrows(X::AbstractMatrix, y::AbstractVecOrMat)::Nothing
throw(DimensionMismatch("`X` and `y` must have the same number of rows."))
end

function verify_or_construct_gramian(glr, X, y, data)
check_nrows(X, y)
isnothing(data) && return (; XX = X'X, Xy = X'y, n = length(y))

!all(hasproperty.(Ref(data), (:XX, :Xy, :n))) && throw(ArgumentError("data must contain XX, Xy, n"))
size(data.XX, 1) != size(data.Xy, 1) && throw(DimensionMismatch("`XX` and Xy` must have the same number of rows."))
!issymmetric(data.XX) && throw(ArgumentError("Input `XX` must be symmetric"))

c = getc(glr, data.Xy)
!iszero(c) && throw(ArgumentError("Categorical loss not supported with Gramian kernel"))
glr.fit_intercept && throw(ArgumentError("Intercept not supported with Gramian kernel"))

if any(!isempty, (X, y))
all((
isapprox(X'X, data.XX; rtol=1e-5),
isapprox(X'y, data.Xy; rtol=1e-5),
length(y) == data.n
)) || throw(ArgumentError("Inputs `X` and `y` do not match inputs `XX` and `Xy`."))
end

return data
end

"""
$SIGNATURES
Expand Down
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
Expand Down
4 changes: 2 additions & 2 deletions test/benchmarks/robust.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

if DO_COMPARISONS
@testset "Comp-QR-147" begin
using CSV, DataFrames
using CSV, DataFrames, Downloads

dataset = CSV.read(download("http://freakonometrics.free.fr/rent98_00.txt"), DataFrame)
dataset = CSV.read(Downloads.download("http://freakonometrics.free.fr/rent98_00.txt"), DataFrame)
tau = 0.3

y = Vector(dataset[!,:rent_euro])
Expand Down
15 changes: 15 additions & 0 deletions test/fit/ols-ridge-lasso-elnet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,18 @@ end
@test nnz(θ_sk) == 8
end
end

@testset "gramian" begin
λ = 0.1
γ = 0.1
enr = ElasticNetRegression(λ, γ; fit_intercept=false,
scale_penalty_with_samples=false)
XX = X'X
Xy = X'y
n = size(X, 1)
θ_fista = fit(enr, X, y; solver=FISTA(max_iter=5000))
θ_gram_explicit = fit(enr; data=(; XX, Xy, n), solver=FISTA(max_iter=5000, gram=true))
θ_gram_implicit = fit(enr, X, y; solver=FISTA(max_iter=5000, gram=true))
@test isapprox(θ_fista, θ_gram_explicit, rtol=1e-5)
@test isapprox(θ_gram_explicit, θ_gram_implicit; rtol=1e-5)
end
2 changes: 1 addition & 1 deletion test/fit/quantile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ end
## With Sparsity penalty ##
###########################

Jn, p = 500, 100
n, p = 500, 100
((X, y, θ), (X1, y1, θ1)) = generate_continuous(n, p; seed=51112, sparse=0.1)
# pepper with outliers
y1a = outlify(y1, 0.1)
Expand Down

0 comments on commit 0887cfe

Please sign in to comment.