JuliaTrustworthyAI · pasq-cat · Jun 9, 2024 · Jun 9, 2024 · Jun 14, 2024 · Jun 15, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,27 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 
 *Note*: We try to adhere to these practices as of version [v0.2.1].
 
+## Version [0.3.1] - 2024-06-22
+
+### Changed
+
+- Changed `glm_predictive_distribution` so that return a tuple(Normal distribution,fμ, fvar) rather than the tuple (mean,variance). [#90]
+
+## Version [0.3.0] - 2024-06-21
+
+### Changed
+
+- Changed `glm_predictive_distribution` so that return a Normal distribution rather than the tuple (mean,variance). [#90]
+- Changed `predict` so that return directly a Normal distribution  in the case of regression. [#90]
+
+### Added
+
+- Added functions to compute the average empirical frequency for both classification and regression problems in utils.jl. [#90]
+
+
+
+
+
 ## Version [0.2.1] - 2024-05-29
 
 ### Changed

diff --git a/Project.toml b/Project.toml
@@ -7,6 +7,7 @@ version = "0.2.1"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845"
@@ -24,6 +25,7 @@ Aqua = "0.8"
 ChainRulesCore = "1.23.0"
 Compat = "4.7.0"
 ComputationalResources = "0.3.2"
+Distributions = "0.25.109"
 Flux = "0.12, 0.13, 0.14"
 LinearAlgebra = "1.6, 1.7, 1.8, 1.9, 1.10"
 MLJFlux = "0.2.10, 0.3, 0.4"

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,4 +1,5 @@
 [deps]
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 LaplaceRedux = "c52c1a26-f7c5-402b-80be-ba1e638ad478"
@@ -9,5 +10,6 @@ RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 TaijaPlotting = "bd7198b4-c7d6-400c-9bab-9a24614b0240"
+Trapz = "592b5752-818d-11e9-1e9a-2b8ca4a44cd1"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
diff --git a/src/LaplaceRedux.jl b/src/LaplaceRedux.jl
@@ -1,6 +1,8 @@
 module LaplaceRedux
 
 include("utils.jl")
+export empirical_frequency_binary_classification,
+    sharpness_classification, empirical_frequency_regression, sharpness_regression
 
 include("data/Data.jl")
 using .Data

diff --git a/src/baselaplace/predicting.jl b/src/baselaplace/predicting.jl
@@ -1,3 +1,5 @@
+using Distributions: Distributions
+using Statistics: mean, var
 """
     functional_variance(la::AbstractLaplace, 𝐉::AbstractArray)
 
@@ -22,6 +24,8 @@ Computes the linearized GLM predictive.
 - `fμ::AbstractArray`: Mean of the predictive distribution. The output shape is column-major as in Flux.
 - `fvar::AbstractArray`: Variance of the predictive distribution. The output shape is column-major as in Flux.
 
+- `normal_distr` An array of  normal distributions approximating the predictive distribution p(y|X) given the input data X.
+
 # Examples
 
 ```julia-repl
@@ -39,7 +43,9 @@ function glm_predictive_distribution(la::AbstractLaplace, X::AbstractArray)
     fμ = reshape(fμ, Flux.outputsize(la.model, size(X)))
     fvar = functional_variance(la, 𝐉)
     fvar = reshape(fvar, size(fμ)...)
-    return fμ, fvar
+    fstd = sqrt.(fvar)
+    normal_distr = [Distributions.Normal(fμ[i], fstd[i]) for i in 1:size(fμ, 1)]
+    return (normal_distr, fμ, fvar)
 end
 
 """
@@ -55,9 +61,10 @@ Computes predictions from Bayesian neural network.
 - `predict_proba::Bool=true`: If `true` (default), returns probabilities for classification tasks.
 
 # Returns
-
+For classification tasks:
 - `fμ::AbstractArray`: Mean of the predictive distribution if link function is set to `:plugin`, otherwise the probit approximation. The output shape is column-major as in Flux.
-- `fvar::AbstractArray`: If regression, it also returns the variance of the predictive distribution. The output shape is column-major as in Flux.
+For regression tasks:
+- `normal_distr::Distributions.Normal`:the array of Normal distributions computed by glm_predictive_distribution. The output shape is column-major as in Flux.
 
 # Examples
 
@@ -75,11 +82,12 @@ predict(la, hcat(x...))
 function predict(
     la::AbstractLaplace, X::AbstractArray; link_approx=:probit, predict_proba::Bool=true
 )
-    fμ, fvar = glm_predictive_distribution(la, X)
+    normal_distr, fμ, fvar = glm_predictive_distribution(la, X)
+    #fμ, fvar = mean.(normal_distr), var.(normal_distr)
 
     # Regression:
     if la.likelihood == :regression
-        return fμ, fvar
+        return normal_distr
     end
 
     # Classification:

diff --git a/src/utils.jl b/src/utils.jl
@@ -1,4 +1,5 @@
 using Flux
+using Statistics
 
 """
     get_loss_fun(likelihood::Symbol)
@@ -39,3 +40,128 @@
 function outdim(model::Chain)::Number
     return [size(p) for p in Flux.params(model)][end][1]
 end
+
+""" 
+    empirical_frequency(Y_cal, sampled_distributions)
+
+FOR REGRESSION MODELS.
+Given a calibration dataset (x_t, y_t) for i ∈ {1,...,T} and an array of predicted distributions, the function calculates the empirical frequency
+phat_j = {y_t|F_t(y_t)<= p_j, t= 1,....,T}/T, where T is the number of calibration points, p_j is the confidence level and F_t is the 
+cumulative distribution function of the predicted distribution targeting y_t.
+Source: https://arxiv.org/abs/1807.00263
+
+Inputs:
+    - 'Y_cal': a vector of values y_t
+    - 'sampled_distributions': an array of sampled distributions F(x_t) stacked column-wise.
+    - 'n_bins': number of equally spaced bins to use.
+Outputs:
+    - counts: an array cointaining the empirical frequencies for each quantile interval.
+"""
+function empirical_frequency_regression(Y_cal, sampled_distributions, n_bins=20)
+    quantiles = collect(range(0; stop=1, length=n_bins + 1))
+    quantiles_matrix = hcat(
+        [quantile(samples, quantiles) for samples in sampled_distributions]...
+    )
+    n_rows = size(bounds_quantiles_matrix, 1)
+    counts = []
+
+    for i in 1:n_rows
+        push!(counts, sum(Y_cal .<= quantiles_matrix[i, :]) / length(Y_cal))
+    end
+    return counts
+end
+
+""" 
+    sharpness(sampled_distributions)
+
+FOR REGRESSION MODELS.
+Given a calibration dataset (x_t, y_t) for i ∈ {1,...,T} and an array of predicted distributions, the function calculates the 
+sharpness of the predicted distributions, i.e., the average of the variances var(F_t) predicted by the forecaster for each x_t
+Source: https://arxiv.org/abs/1807.00263
+
+Inputs:
+    - sampled_distributions: an array of sampled distributions F(x_t) stacked column-wise.
+Outputs:
+    - sharpness: a scalar that measure the level of sharpness of the regressor
+"""
+function sharpness_regression(sampled_distributions)
+    sharpness = mean(var.(sampled_distributions))
+    return sharpness
+end
+
+""" 
+    empirical_frequency-classification(y_binary, sampled_distributions)
+
+FOR BINARY CLASSIFICATION MODELS.
+Given a calibration dataset (x_t, y_t) for i ∈ {1,...,T} let p_t= H(x_t)∈[0,1] be the forecasted probability. 
+We group the p_t into intervals I-j for j= 1,2,...,m that form a partition of [0,1]. The function computes
+the observed average p_j= T^-1_j ∑_{t:p_t ∈ I_j} y_j in each interval I_j. 
+Source: https://arxiv.org/abs/1807.00263
+
+Inputs:
+    - y_binary: the array of outputs y_t numerically coded: 1 for the target class, 0 for the null class.
+    - sampled_distributions: an array of sampled distributions stacked column-wise so that in the first row 
+        there is the probability for the target class y_1 and in the second row the probability for the null class y_0.
+    - 'n_bins': number of equally spaced bins to use.
+Outputs:
+    - num_p_per_interval: array with the number of probabilities falling within interval
+    - emp_avg: array with the observed empirical average per interval
+    - bin_centers: array with the centers of the bins
+
+"""
+function empirical_frequency_binary_classification(
+    y_binary, sampled_distributions, n_bins=20
+)
+    #intervals boundaries
+    int_bds = collect(range(0; stop=1, length=n_bins + 1))
+    #bin centers
+    bin_centers = [(int_bds[i] + int_bds[i + 1]) / 2 for i in 1:(length(int_bds) - 1)]
+    #initialize list for empirical averages per interval 
+    emp_avg = []
+    #initialize list for predicted averages per interval
+    pred_avg = []
+    # initialize list of number of probabilities falling within each intervals
+    num_p_per_interval = []
+    #list of the predicted probabilities for the target class
+    class_probs = sampled_distributions[1, :]
+    # iterate over the bins
+    for j in 1:n_bins
+        push!(num_p_per_interval, sum(int_bds[j] .< class_probs .< int_bds[j + 1]))
+        if num_p_per_interval[j] == 0
+            push!(emp_avg, 0)
+            push!(pred_avg, bin_centers[j])
+
+        else
+            # find the indices fo all istances for which class_probs fall withing the j-th interval
+            indices = findall(x -> int_bds[j] < x < int_bds[j + 1], class_probs)
+            #compute the empirical average and saved it in emp_avg in the j-th position
+            push!(emp_avg, 1 / num_p_per_interval[j] * sum(y_binary[indices]))
+            #TO DO: maybe substitute to bin_Centers?
+            push!(pred_avg, 1 / num_p_per_interval[j] * sum(class_probs[ indices]))
+        end
+    end
+    #return the tuple
+    return (num_p_per_interval, emp_avg, bin_centers)
+end
+
+""" 
+    sharpness-classification(y_binary,sampled_distributions)
+
+FOR BINARY CLASSIFICATION MODELS.
+Assess  the sharpness of the model by looking at the distribution of model predictions. When forecasts are sharp, 
+most predictions are close to 0 or 1; not sharp forecasters make predictions closer to 0.5.
+Source: https://arxiv.org/abs/1807.00263
+
+Inputs:
+    -y_binary: the array of outputs y_t numerically coded . 1 for the target class, 0 for the negative result.
+    -sampled_distributions: an array of sampled distributions stacked column-wise so that in the first row 
+        there is the probability for the target class and in the second row the probability for the null class.
+    Outputs:
+    - mean_class_one: a scalar that measure the average prediction for the target class
+    - mean_class_zero: a scalar that measure the average prediction for the null class
+"""
+function sharpness_classification(y_binary, sampled_distributions)
+    mean_class_one = mean(sampled_distributions[1, findall(y_binary .== 1)])
+    mean_class_zero = mean(sampled_distributions[2, findall(y_binary .== 0)])
+    return mean_class_one, mean_class_zero
+end