JuliaTrustworthyAI · pat-alt · Dec 3, 2024 · Nov 26, 2024 · Nov 26, 2024 · Dec 3, 2024
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -20,7 +20,6 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.9'
           - '1.10'
           - '1'
         os:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 
 *Note*: We try to adhere to these practices as of version [v0.2.1].
 
+
+## Version [1.2.0] - 2024-12-03
+
+### Changed
+
+- Largely removed unicode characters from code base. [#134]
+- Removed legacy v1.9 from CI testing. [#134]
+
+### Added
+
+- Added general support for MLJ [#126] [#134]
+
 ## Version [1.1.1] - 2024-09-12
 
 ### Changed

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "LaplaceRedux"
 uuid = "c52c1a26-f7c5-402b-80be-ba1e638ad478"
 authors = ["Patrick Altmeyer"]
-version = "1.1.1"
+version = "1.2.0"
 
 [deps]
 CategoricalDistributions = "af321ab8-2d2e-40a6-b165-3d674595d28e"
@@ -32,7 +32,7 @@ MLJBase = "1"
 MLJModelInterface = "1.8.0"
 MLUtils = "0.4"
 Optimisers = "0.2, 0.3"
-Random = "1.9, 1.10"
+Random = "1"
 Statistics = "1"
 Tables = "1.10.1"
 Test = "1"

diff --git a/docs/src/tutorials/regression.qmd b/docs/src/tutorials/regression.qmd
@@ -128,7 +128,7 @@ then we can plot the calibration plot of our neural model
 
 ```{julia}
 #| output: true
-Calibration_Plot(la,y_test,vec(predicted_distributions);n_bins = 20)
+calibration_plot(la,y_test,vec(predicted_distributions);n_bins = 20)
 ``` 
 
 and compute the sharpness of the predictive distribution

diff --git a/src/baselaplace/core_struct.jl b/src/baselaplace/core_struct.jl
@@ -26,21 +26,45 @@
 - `hessian_structure::HessianStructure`: the structure of the Hessian. Possible values are `:full` and `:kron` or a concrete subtype of `HessianStructure`.
 - `backend::Symbol`: the backend to use. Possible values are `:GGN` and `:Fisher`.
 - `curvature::Union{Curvature.CurvatureInterface,Nothing}`: the curvature interface. Possible values are `nothing` or a concrete subtype of `CurvatureInterface`.
-- `σ::Real`: the observation noise
-- `μ₀::Real`: the prior mean
-- `λ::Real`: the prior precision
-- `P₀::Union{Nothing,AbstractMatrix,UniformScaling}`: the prior precision matrix
+- `observational_noise::Real`: the observation noise
+- `σ::Real`: alias for `observational_noise`.
+- `prior_mean::Real`: the prior mean of the network parameters.
+- `μ₀::Real`: alias for `prior_mean`.
+- `prio_precision::Real`: the prior precision for the network parameters.
+- `λ::Real`: alias for `prior_precision`.
+- `prior_precision_matrix::Union{Nothing,AbstractMatrix,UniformScaling}`: the prior precision matrix for the network parameters.
+- `P₀::Union{Nothing,AbstractMatrix,UniformScaling}`: alias for `prior_precision_matrix`.
 """
 Base.@kwdef struct LaplaceParams
     subset_of_weights::Symbol = :all
     subnetwork_indices::Union{Nothing,Vector{Vector{Int}}} = nothing
     hessian_structure::Union{HessianStructure,Symbol,String} = FullHessian()
     backend::Symbol = :GGN
     curvature::Union{Curvature.CurvatureInterface,Nothing} = nothing
-    σ::Real = 1.0
-    μ₀::Real = 0.0
-    λ::Real = 1.0
-    P₀::Union{Nothing,AbstractMatrix,UniformScaling} = nothing
+    observational_noise::Real = 1.0
+    σ::Real = observational_noise
+    prior_mean::Real = 0.0
+    μ₀::Real = prior_mean
+    prior_precision::Real = 1.0
+    λ::Real = prior_precision
+    prior_precision_matrix::Union{Nothing,AbstractMatrix,UniformScaling} = nothing
+    P₀::Union{Nothing,AbstractMatrix,UniformScaling} = prior_precision_matrix
+end
+
+function Base.getproperty(ce::LaplaceParams, sym::Symbol)
+    sym = sym === :σ ? :observational_noise : sym
+    sym = sym === :μ₀ ? :prior_mean : sym
+    sym = sym === :λ ? :prior_precision : sym
+    sym = sym === :P₀ ? :prior_precision_matrix : sym
+    return Base.getfield(ce, sym)
+end
+
+function Base.setproperty!(ce::LaplaceParams, sym::Symbol, val)
+    sym = sym === :σ ? :observational_noise : sym
+    sym = sym === :μ₀ ? :prior_mean : sym
+    sym = sym === :λ ? :prior_precision : sym
+    sym = sym === :P₀ ? :prior_precision_matrix : sym
+    return Base.setfield!(ce, sym, val)
 end
 
 include("estimation_params.jl")
@@ -96,7 +120,7 @@
 """
 function Laplace(model::Any; likelihood::Symbol, kwargs...)
     args = LaplaceParams(; kwargs...)
-    @assert !(args.σ != 1.0 && likelihood != :regression) "Observation noise σ ≠ 1 only available for regression."
+    @assert !(args.observational_noise != 1.0 && likelihood != :regression) "Observation noise σ ≠ 1 only available for regression."
 
     # Unpack arguments and wrap in containers:
     est_args = EstimationParams(args, model, likelihood)

diff --git a/src/baselaplace/optimize_prior.jl b/src/baselaplace/optimize_prior.jl
@@ -19,8 +19,8 @@ function optimize_prior!(
 )
 
     # Setup:
-    logP₀ = isnothing(λinit) ? log.(unique(diag(la.prior.P₀))) : log.([λinit])   # prior precision (scalar)
-    logσ = isnothing(σinit) ? log.([la.prior.σ]) : log.([σinit])                 # noise (scalar)
+    logP₀ = isnothing(λinit) ? log.(unique(diag(la.prior.prior_precision_matrix))) : log.([λinit])   # prior precision (scalar)
+    logσ = isnothing(σinit) ? log.([la.prior.observational_noise]) : log.([σinit])                 # noise (scalar)
     opt = Adam(lr)
     show_every = round(n_steps / 10)
     i = 0

diff --git a/src/baselaplace/posterior.jl b/src/baselaplace/posterior.jl
@@ -5,26 +5,38 @@ Container for the results of a Laplace approximation.
 
 # Fields
 
-- `μ::AbstractVector`: the MAP estimate of the parameters
+- `posterior_mean::AbstractVector`: the MAP estimate of the parameters
 - `H::Union{AbstractArray,AbstractDecomposition,Nothing}`: the Hessian matrix
 - `P::Union{AbstractArray,AbstractDecomposition,Nothing}`: the posterior precision matrix
-- `Σ::Union{AbstractArray,Nothing}`: the posterior covariance matrix
+- `posterior_covariance_matrix::Union{AbstractArray,Nothing}`: the posterior covariance matrix
 - `n_data::Union{Int,Nothing}`: the number of data points
 - `n_params::Union{Int,Nothing}`: the number of parameters
 - `n_out::Union{Int,Nothing}`: the number of outputs
 - `loss::Real`: the loss value
 """
 mutable struct Posterior
-    μ::AbstractVector
+    posterior_mean::AbstractVector
     H::Union{AbstractArray,AbstractDecomposition,Nothing}
     P::Union{AbstractArray,AbstractDecomposition,Nothing}
-    Σ::Union{AbstractArray,Nothing}
+    posterior_covariance_matrix::Union{AbstractArray,Nothing}
     n_data::Union{Int,Nothing}
     n_params::Union{Int,Nothing}
     n_out::Union{Int,Nothing}
     loss::Real
 end
 
+function Base.getproperty(ce::Posterior, sym::Symbol)
+    sym = sym === :μ ? :posterior_mean : sym
+    sym = sym === :Σ ? :posterior_covariance_matrix : sym
+    return Base.getfield(ce, sym)
+end
+
+function Base.setproperty!(ce::Posterior, sym::Symbol, val)
+    sym = sym === :μ ? :posterior_mean : sym
+    sym = sym === :Σ ? :posterior_covariance_matrix : sym
+    return Base.setfield!(ce, sym, val)
+end
+
 """
     Posterior(model::Any, est_params::EstimationParams)
 

diff --git a/src/baselaplace/predicting.jl b/src/baselaplace/predicting.jl
@@ -135,7 +135,7 @@ function predict(
     if la.likelihood == :regression
 
         # Add observational noise:
-        pred_var = fvar .+ la.prior.σ^2
+        pred_var = fvar .+ la.prior.observational_noise^2
         fstd = sqrt.(pred_var)
         pred_dist = [Normal(fμ[i], fstd[i]) for i in axes(fμ, 2)]
 

diff --git a/src/baselaplace/prior.jl b/src/baselaplace/prior.jl
@@ -5,16 +5,32 @@
 
 # Fields
 
-- `σ::Real`: the observation noise
-- `μ₀::Real`: the prior mean
-- `λ::Real`: the prior precision
-- `P₀::Union{Nothing,AbstractMatrix,UniformScaling}`: the prior precision matrix
+- `observational_noise::Real`: the observation noise
+- `prior_mean::Real`: the prior mean
+- `prior_precision::Real`: the prior precision
+- `prior_precision_matrix::Union{Nothing,AbstractMatrix,UniformScaling}`: the prior precision matrix
 """
 mutable struct Prior
-    σ::Real
-    μ₀::Real
-    λ::Real
-    P₀::Union{Nothing,AbstractMatrix,UniformScaling}
+    observational_noise::Real
+    prior_mean::Real
+    prior_precision::Real
+    prior_precision_matrix::Union{Nothing,AbstractMatrix,UniformScaling}
+end
+
+function Base.getproperty(ce::Prior, sym::Symbol)
+    sym = sym === :σ ? :observational_noise : sym
+    sym = sym === :μ₀ ? :prior_mean : sym
+    sym = sym === :λ ? :prior_precision : sym
+    sym = sym === :P₀ ? :prior_precision_matrix : sym
+    return Base.getfield(ce, sym)
+end
+
+function Base.setproperty!(ce::Prior, sym::Symbol, val)
+    sym = sym === :σ ? :observational_noise : sym
+    sym = sym === :μ₀ ? :prior_mean : sym
+    sym = sym === :λ ? :prior_precision : sym
+    sym = sym === :P₀ ? :prior_precision_matrix : sym
+    return Base.setfield!(ce, sym, val)
 end
 
 """
@@ -23,16 +39,16 @@
 Extracts the prior parameters from a `LaplaceParams` object.
 """
 function Prior(params::LaplaceParams, model::Any, likelihood::Symbol)
-    P₀ = params.P₀
+    prior_precision_matrix = params.prior_precision_matrix
     n = LaplaceRedux.n_params(model, EstimationParams(params, model, likelihood))
-    if typeof(P₀) <: UniformScaling
-        P₀ = P₀(n)
-    elseif isnothing(P₀)
-        P₀ = UniformScaling(params.λ)(n)
+    if typeof(prior_precision_matrix) <: UniformScaling
+        prior_precision_matrix = prior_precision_matrix(n)
+    elseif isnothing(prior_precision_matrix)
+        prior_precision_matrix = UniformScaling(params.prior_precision)(n)
     end
     # Sanity:
-    if isa(P₀, AbstractMatrix)
-        @assert all(size(P₀) .== n) "Dimensions of prior Hessian $(size(P₀)) do not align with number of parameters ($n)"
+    if isa(prior_precision_matrix, AbstractMatrix)
+        @assert all(size(prior_precision_matrix) .== n) "Dimensions of prior Hessian $(size(prior_precision_matrix)) do not align with number of parameters ($n)"
     end
-    return Prior(params.σ, params.μ₀, params.λ, P₀)
+    return Prior(params.observational_noise, params.prior_mean, params.prior_precision, prior_precision_matrix)
 end
diff --git a/src/baselaplace/utils.jl b/src/baselaplace/utils.jl
@@ -18,7 +18,7 @@ LaplaceRedux.n_params(la::Laplace) = LaplaceRedux.n_params(la.model, la.est_para
 Helper function to extract the prior mean of the parameters from a Laplace approximation.
 """
 function get_prior_mean(la::Laplace)
-    return la.prior.μ₀
+    return la.prior.prior_mean
 end
 
 """
@@ -27,7 +27,7 @@ end
 Helper function to extract the prior precision matrix from a Laplace approximation.
 """
 function prior_precision(la::Laplace)
-    return la.prior.P₀
+    return la.prior.prior_precision_matrix
 end
 
 """
@@ -39,15 +39,15 @@ on the last layer of the NN, of a `Flux.Chain` with Laplace approximation.
 outdim(la::AbstractLaplace) = outdim(la.model)
 
 @doc raw"""
-    posterior_precision(la::AbstractLaplace, H=la.posterior.H, P₀=la.prior.P₀)
+    posterior_precision(la::AbstractLaplace, H=la.posterior.H, P₀=la.prior.prior_precision_matrix)
 
 Computes the posterior precision ``P`` for a fitted Laplace Approximation as follows,
 
 ``P = \sum_{n=1}^N\nabla_{\theta}^2 \log p(\mathcal{D}_n|\theta)|_{\hat\theta} + \nabla_{\theta}^2 \log p(\theta)|_{\hat\theta}``
 
 where ``\sum_{n=1}^N\nabla_{\theta}^2\log p(\mathcal{D}_n|\theta)|_{\hat\theta}=H`` is the Hessian and ``\nabla_{\theta}^2 \log p(\theta)|_{\hat\theta}=P_0`` is the prior precision and ``\hat\theta`` is the MAP estimate.
 """
-function posterior_precision(la::AbstractLaplace, H=la.posterior.H, P₀=la.prior.P₀)
+function posterior_precision(la::AbstractLaplace, H=la.posterior.H, P₀=la.prior.prior_precision_matrix)
     @assert !isnothing(H) "Hessian not available. Either no value supplied or Laplace Approximation has not yet been estimated."
     return H + P₀
 end
@@ -70,7 +70,7 @@ end
 function log_likelihood(la::AbstractLaplace)
     factor = -_H_factor(la)
     if la.likelihood == :regression
-        c = la.posterior.n_data * la.posterior.n_out * log(la.prior.σ * sqrt(2 * pi))
+        c = la.posterior.n_data * la.posterior.n_out * log(la.prior.observational_noise * sqrt(2 * pi))
     else
         c = 0
     end
@@ -82,7 +82,7 @@ end
 
 Returns the factor σ⁻², where σ is used in the zero-centered Gaussian prior p(θ) = N(θ;0,σ²I)
 """
-_H_factor(la::AbstractLaplace) = 1 / (la.prior.σ^2)
+_H_factor(la::AbstractLaplace) = 1 / (la.prior.observational_noise^2)
 
 """
     _init_H(la::AbstractLaplace)
@@ -100,7 +100,7 @@ Smaller weights in a neural network can result in a model that is more stable an
 making a prediction on new data.
 """
 function _weight_penalty(la::AbstractLaplace)
-    μ = la.posterior.μ
+    μ = la.posterior.posterior_mean
     μ₀ = get_prior_mean(la)
     Δ = μ .- μ₀
     P₀ = prior_precision(la)
@@ -120,14 +120,14 @@ function log_marginal_likelihood(
 
     # update prior precision:
     if !isnothing(P₀)
-        la.prior.P₀ =
+        la.prior.prior_precision_matrix =
             typeof(P₀) <: AbstractFloat ? UniformScaling(P₀)(la.posterior.n_params) : P₀
     end
 
     # update observation noise:
     if !isnothing(σ)
-        @assert (la.likelihood == :regression || la.prior.σ == σ) "Can only change observational noise σ for regression."
-        la.prior.σ = σ
+        @assert (la.likelihood == :regression || la.prior.observational_noise == σ) "Can only change observational noise σ for regression."
+        la.prior.observational_noise = σ
     end
 
     return log_likelihood(la) - 0.5 * (log_det_ratio(la) + _weight_penalty(la))
@@ -147,7 +147,7 @@ end
 
 
 """
-log_det_prior_precision(la::AbstractLaplace) = sum(log.(diag(la.prior.P₀)))
+log_det_prior_precision(la::AbstractLaplace) = sum(log.(diag(la.prior.prior_precision_matrix)))
 
 """
     log_det_posterior_precision(la::AbstractLaplace)
-Original file line number
+Diff line change
@@ Expand Up / @@ -20,7 +20,6 @@ jobs: @@
           fail-fast: false
           matrix:
             version:
-              - '1.9'
               - '1.10'
               - '1'
             os:
@@ Expand Down @@