From 9de452445c4e918b76199c7f7d8d37d8b296d2a3 Mon Sep 17 00:00:00 2001 From: pat-alt Date: Fri, 5 Apr 2024 16:58:01 +0200 Subject: [PATCH 01/11] field added --- Project.toml | 1 + src/ConformalPrediction.jl | 2 + .../inductive_classification.jl | 8 ++- src/conformal_models/inductive_regression.jl | 8 ++- .../transductive_classification.jl | 8 ++- .../transductive_regression.jl | 52 ++++++++++++++----- 6 files changed, 60 insertions(+), 19 deletions(-) diff --git a/Project.toml b/Project.toml index c728ff5..277ebe0 100755 --- a/Project.toml +++ b/Project.toml @@ -20,6 +20,7 @@ ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +TaijaBase = "10284c91-9f28-4c9a-abbf-ee43576dfff6" [compat] Aqua = "0.8" diff --git a/src/ConformalPrediction.jl b/src/ConformalPrediction.jl index eafc829..9164852 100755 --- a/src/ConformalPrediction.jl +++ b/src/ConformalPrediction.jl @@ -1,5 +1,7 @@ module ConformalPrediction +using TaijaBase + # Conformal Models: include("conformal_models/conformal_models.jl") export ConformalModel diff --git a/src/conformal_models/inductive_classification.jl b/src/conformal_models/inductive_classification.jl index 8cdb9e4..119d84d 100755 --- a/src/conformal_models/inductive_classification.jl +++ b/src/conformal_models/inductive_classification.jl @@ -29,6 +29,7 @@ mutable struct SimpleInductiveClassifier{Model<:Supervised} <: ConformalProbabil coverage::AbstractFloat scores::Union{Nothing,Dict{Any,Any}} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} train_ratio::AbstractFloat end @@ -36,9 +37,10 @@ function SimpleInductiveClassifier( model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=minus_softmax, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, train_ratio::AbstractFloat=0.5, ) - return SimpleInductiveClassifier(model, coverage, nothing, heuristic, train_ratio) + return SimpleInductiveClassifier(model, coverage, nothing, heuristic, parallelizer, train_ratio) end """ @@ -127,6 +129,7 @@ mutable struct AdaptiveInductiveClassifier{Model<:Supervised} <: ConformalProbab coverage::AbstractFloat scores::Union{Nothing,Dict{Any,Any}} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} train_ratio::AbstractFloat end @@ -134,9 +137,10 @@ function AdaptiveInductiveClassifier( model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=minus_softmax, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, train_ratio::AbstractFloat=0.5, ) - return AdaptiveInductiveClassifier(model, coverage, nothing, heuristic, train_ratio) + return AdaptiveInductiveClassifier(model, coverage, nothing, heuristic, parallelizer, train_ratio) end @doc raw""" diff --git a/src/conformal_models/inductive_regression.jl b/src/conformal_models/inductive_regression.jl index cfeeb69..7f19fa9 100755 --- a/src/conformal_models/inductive_regression.jl +++ b/src/conformal_models/inductive_regression.jl @@ -6,6 +6,7 @@ mutable struct SimpleInductiveRegressor{Model<:Supervised} <: ConformalInterval coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} train_ratio::AbstractFloat end @@ -13,9 +14,10 @@ function SimpleInductiveRegressor( model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=absolute_error, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, train_ratio::AbstractFloat=0.5, ) - return SimpleInductiveRegressor(model, coverage, nothing, heuristic, train_ratio) + return SimpleInductiveRegressor(model, coverage, nothing, heuristic, parallelizer, train_ratio) end @doc raw""" @@ -84,6 +86,7 @@ mutable struct ConformalQuantileRegressor{Model<:QuantileModel} <: ConformalInte coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} train_ratio::AbstractFloat end @@ -93,9 +96,10 @@ function ConformalQuantileRegressor( heuristic::Function=function f(y, ŷ_lb, ŷ_ub) return reduce((x, y) -> max.(x, y), [ŷ_lb - y, y - ŷ_ub]) end, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, train_ratio::AbstractFloat=0.5, ) - return ConformalQuantileRegressor(model, coverage, nothing, heuristic, train_ratio) + return ConformalQuantileRegressor(model, coverage, nothing, heuristic, parallelizer, train_ratio) end @doc raw""" diff --git a/src/conformal_models/transductive_classification.jl b/src/conformal_models/transductive_classification.jl index d13da08..e489670 100755 --- a/src/conformal_models/transductive_classification.jl +++ b/src/conformal_models/transductive_classification.jl @@ -5,12 +5,16 @@ mutable struct NaiveClassifier{Model<:Supervised} <: ConformalProbabilisticSet coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} end function NaiveClassifier( - model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=minus_softmax + model::Supervised; + coverage::AbstractFloat=0.95, + heuristic::Function=minus_softmax, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, ) - return NaiveClassifier(model, coverage, nothing, heuristic) + return NaiveClassifier(model, coverage, nothing, heuristic, parallelizer) end @doc raw""" diff --git a/src/conformal_models/transductive_regression.jl b/src/conformal_models/transductive_regression.jl index 6598531..2e3b150 100755 --- a/src/conformal_models/transductive_regression.jl +++ b/src/conformal_models/transductive_regression.jl @@ -10,12 +10,16 @@ mutable struct NaiveRegressor{Model<:Supervised} <: ConformalInterval coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} end function NaiveRegressor( - model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=absolute_error + model::Supervised; + coverage::AbstractFloat=0.95, + heuristic::Function=absolute_error, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, ) - return NaiveRegressor(model, coverage, nothing, heuristic) + return NaiveRegressor(model, coverage, nothing, heuristic, parallelizer) end @doc raw""" @@ -79,12 +83,16 @@ mutable struct JackknifeRegressor{Model<:Supervised} <: ConformalInterval coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} end function JackknifeRegressor( - model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=absolute_error + model::Supervised; + coverage::AbstractFloat=0.95, + heuristic::Function=absolute_error, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, ) - return JackknifeRegressor(model, coverage, nothing, heuristic) + return JackknifeRegressor(model, coverage, nothing, heuristic, parallelizer) end @doc raw""" @@ -160,12 +168,16 @@ mutable struct JackknifePlusRegressor{Model<:Supervised} <: ConformalInterval coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} end function JackknifePlusRegressor( - model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=absolute_error + model::Supervised; + coverage::AbstractFloat=0.95, + heuristic::Function=absolute_error, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, ) - return JackknifePlusRegressor(model, coverage, nothing, heuristic) + return JackknifePlusRegressor(model, coverage, nothing, heuristic, parallelizer) end @doc raw""" @@ -249,12 +261,16 @@ mutable struct JackknifeMinMaxRegressor{Model<:Supervised} <: ConformalInterval coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} end function JackknifeMinMaxRegressor( - model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=absolute_error + model::Supervised; + coverage::AbstractFloat=0.95, + heuristic::Function=absolute_error, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, ) - return JackknifeMinMaxRegressor(model, coverage, nothing, heuristic) + return JackknifeMinMaxRegressor(model, coverage, nothing, heuristic, parallelizer) end @doc raw""" @@ -337,6 +353,7 @@ mutable struct CVPlusRegressor{Model<:Supervised} <: ConformalInterval coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} cv::MLJBase.CV end @@ -344,9 +361,10 @@ function CVPlusRegressor( model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=absolute_error, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, cv::MLJBase.CV=MLJBase.CV(), ) - return CVPlusRegressor(model, coverage, nothing, heuristic, cv) + return CVPlusRegressor(model, coverage, nothing, heuristic, parallelizer, cv) end @doc raw""" @@ -442,6 +460,7 @@ mutable struct CVMinMaxRegressor{Model<:Supervised} <: ConformalInterval coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} cv::MLJBase.CV end @@ -449,9 +468,10 @@ function CVMinMaxRegressor( model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=absolute_error, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, cv::MLJBase.CV=MLJBase.CV(), ) - return CVMinMaxRegressor(model, coverage, nothing, heuristic, cv) + return CVMinMaxRegressor(model, coverage, nothing, heuristic, parallelizer, cv) end @doc raw""" @@ -567,6 +587,7 @@ mutable struct JackknifePlusAbRegressor{Model<:Supervised} <: ConformalInterval coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} nsampling::Int sample_size::AbstractFloat replacement::Bool @@ -577,13 +598,14 @@ function JackknifePlusAbRegressor( model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=absolute_error, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, nsampling::Int=30, sample_size::AbstractFloat=0.5, replacement::Bool=true, aggregate::Union{Symbol,String}="mean", ) return JackknifePlusAbRegressor( - model, coverage, nothing, heuristic, nsampling, sample_size, replacement, aggregate + model, coverage, nothing, heuristic, parallelizer, nsampling, sample_size, replacement, aggregate ) end @@ -673,6 +695,7 @@ mutable struct JackknifePlusAbMinMaxRegressor{Model<:Supervised} <: ConformalInt coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} nsampling::Int sample_size::AbstractFloat replacement::Bool @@ -683,13 +706,14 @@ function JackknifePlusAbMinMaxRegressor( model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=absolute_error, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, nsampling::Int=30, sample_size::AbstractFloat=0.5, replacement::Bool=true, aggregate::Union{Symbol,String}="mean", ) return JackknifePlusAbMinMaxRegressor( - model, coverage, nothing, heuristic, nsampling, sample_size, replacement, aggregate + model, coverage, nothing, heuristic, parallelizer, nsampling, sample_size, replacement, aggregate ) end @@ -777,6 +801,7 @@ mutable struct TimeSeriesRegressorEnsembleBatch{Model<:Supervised} <: ConformalI coverage::AbstractFloat scores::Union{Nothing,AbstractArray} heuristic::Function + parallelizer::Union{Nothing,AbstractParallelizer} nsampling::Int sample_size::AbstractFloat aggregate::Union{Symbol,String} @@ -786,12 +811,13 @@ function TimeSeriesRegressorEnsembleBatch( model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=absolute_error, + parallelizer::Union{Nothing,AbstractParallelizer}=nothing, nsampling::Int=50, sample_size::AbstractFloat=0.3, aggregate::Union{Symbol,String}="mean", ) return TimeSeriesRegressorEnsembleBatch( - model, coverage, nothing, heuristic, nsampling, sample_size, aggregate + model, coverage, nothing, heuristic, parallelizer, nsampling, sample_size, aggregate ) end From 8559209495302dda90b4c0d57be4e5874ccb9406 Mon Sep 17 00:00:00 2001 From: pat-alt Date: Fri, 5 Apr 2024 17:00:24 +0200 Subject: [PATCH 02/11] format --- .../inductive_classification.jl | 8 ++++++-- src/conformal_models/inductive_regression.jl | 8 ++++++-- .../transductive_regression.jl | 20 +++++++++++++++++-- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/conformal_models/inductive_classification.jl b/src/conformal_models/inductive_classification.jl index 119d84d..39c9442 100755 --- a/src/conformal_models/inductive_classification.jl +++ b/src/conformal_models/inductive_classification.jl @@ -40,7 +40,9 @@ function SimpleInductiveClassifier( parallelizer::Union{Nothing,AbstractParallelizer}=nothing, train_ratio::AbstractFloat=0.5, ) - return SimpleInductiveClassifier(model, coverage, nothing, heuristic, parallelizer, train_ratio) + return SimpleInductiveClassifier( + model, coverage, nothing, heuristic, parallelizer, train_ratio + ) end """ @@ -140,7 +142,9 @@ function AdaptiveInductiveClassifier( parallelizer::Union{Nothing,AbstractParallelizer}=nothing, train_ratio::AbstractFloat=0.5, ) - return AdaptiveInductiveClassifier(model, coverage, nothing, heuristic, parallelizer, train_ratio) + return AdaptiveInductiveClassifier( + model, coverage, nothing, heuristic, parallelizer, train_ratio + ) end @doc raw""" diff --git a/src/conformal_models/inductive_regression.jl b/src/conformal_models/inductive_regression.jl index 7f19fa9..f52d6f0 100755 --- a/src/conformal_models/inductive_regression.jl +++ b/src/conformal_models/inductive_regression.jl @@ -17,7 +17,9 @@ function SimpleInductiveRegressor( parallelizer::Union{Nothing,AbstractParallelizer}=nothing, train_ratio::AbstractFloat=0.5, ) - return SimpleInductiveRegressor(model, coverage, nothing, heuristic, parallelizer, train_ratio) + return SimpleInductiveRegressor( + model, coverage, nothing, heuristic, parallelizer, train_ratio + ) end @doc raw""" @@ -99,7 +101,9 @@ function ConformalQuantileRegressor( parallelizer::Union{Nothing,AbstractParallelizer}=nothing, train_ratio::AbstractFloat=0.5, ) - return ConformalQuantileRegressor(model, coverage, nothing, heuristic, parallelizer, train_ratio) + return ConformalQuantileRegressor( + model, coverage, nothing, heuristic, parallelizer, train_ratio + ) end @doc raw""" diff --git a/src/conformal_models/transductive_regression.jl b/src/conformal_models/transductive_regression.jl index 2e3b150..a07e762 100755 --- a/src/conformal_models/transductive_regression.jl +++ b/src/conformal_models/transductive_regression.jl @@ -605,7 +605,15 @@ function JackknifePlusAbRegressor( aggregate::Union{Symbol,String}="mean", ) return JackknifePlusAbRegressor( - model, coverage, nothing, heuristic, parallelizer, nsampling, sample_size, replacement, aggregate + model, + coverage, + nothing, + heuristic, + parallelizer, + nsampling, + sample_size, + replacement, + aggregate, ) end @@ -713,7 +721,15 @@ function JackknifePlusAbMinMaxRegressor( aggregate::Union{Symbol,String}="mean", ) return JackknifePlusAbMinMaxRegressor( - model, coverage, nothing, heuristic, parallelizer, nsampling, sample_size, replacement, aggregate + model, + coverage, + nothing, + heuristic, + parallelizer, + nsampling, + sample_size, + replacement, + aggregate, ) end From aa55c6ebc794358ce43fcf55415b4fda4c47f45a Mon Sep 17 00:00:00 2001 From: pat-alt Date: Fri, 5 Apr 2024 17:30:10 +0200 Subject: [PATCH 03/11] added compat entry --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 277ebe0..0446e4e 100755 --- a/Project.toml +++ b/Project.toml @@ -40,6 +40,7 @@ ProgressMeter = "1" Random = "1.7, 1.8, 1.9, 1.10" StatsBase = "0.33, 0.34.0" Tables = "1" +TaijaBase = "1" Test = "1.7, 1.8, 1.9, 1.10" julia = "1.7, 1.8, 1.9, 1.10" From bf6289abc5df632fdf14b9285cfde05b8559d87a Mon Sep 17 00:00:00 2001 From: pat-alt Date: Fri, 5 Apr 2024 17:56:23 +0200 Subject: [PATCH 04/11] will also use this same branch to streamline the codebase a bit --- src/conformal_models/conformal_models.jl | 13 +--- .../classification.jl} | 67 +++++++---------- .../inductive/inductive_models.jl | 27 +++++++ .../regression.jl} | 14 +--- src/conformal_models/inductive_bayes.jl | 74 ------------------- 5 files changed, 58 insertions(+), 137 deletions(-) rename src/conformal_models/{inductive_classification.jl => inductive/classification.jl} (92%) create mode 100644 src/conformal_models/inductive/inductive_models.jl rename src/conformal_models/{inductive_regression.jl => inductive/regression.jl} (92%) delete mode 100755 src/conformal_models/inductive_bayes.jl diff --git a/src/conformal_models/conformal_models.jl b/src/conformal_models/conformal_models.jl index 4023173..d8e63f8 100755 --- a/src/conformal_models/conformal_models.jl +++ b/src/conformal_models/conformal_models.jl @@ -50,26 +50,19 @@ function conformal_model( return conf_model end +# Inductive Models: +include("inductive/inductive_models.jl") + # Regression Models: -include("inductive_regression.jl") include("transductive_regression.jl") # Classification Models -include("inductive_classification.jl") include("transductive_classification.jl") # Training: include("ConformalTraining/ConformalTraining.jl") using .ConformalTraining -# Type unions: -const InductiveModel = Union{ - SimpleInductiveRegressor, - SimpleInductiveClassifier, - AdaptiveInductiveClassifier, - ConformalQuantileRegressor, -} - const TransductiveModel = Union{ NaiveRegressor, JackknifeRegressor, diff --git a/src/conformal_models/inductive_classification.jl b/src/conformal_models/inductive/classification.jl similarity index 92% rename from src/conformal_models/inductive_classification.jl rename to src/conformal_models/inductive/classification.jl index 39c9442..baad252 100755 --- a/src/conformal_models/inductive_classification.jl +++ b/src/conformal_models/inductive/classification.jl @@ -7,19 +7,26 @@ function score(conf_model::ConformalProbabilisticSet, fitresult, X, y=nothing) return score(conf_model, conf_model.model, fitresult, X, y) end -""" - split_data(conf_model::ConformalProbabilisticSet, indices::Base.OneTo{Int}) +@doc raw""" + MMI.fit(conf_model::ConformalProbabilisticSet, verbosity, X, y) -Splits the data into a proper training and calibration set. +Fits the [`ConformalProbabilisticSet`](@ref) model. """ -function split_data(conf_model::ConformalProbabilisticSet, X, y) - train, calibration = partition(eachindex(y), conf_model.train_ratio) - Xtrain = selectrows(X, train) - ytrain = y[train] - Xcal = selectrows(X, calibration) - ycal = y[calibration] - - return Xtrain, ytrain, Xcal, ycal +function MMI.fit(conf_model::ConformalProbabilisticSet, verbosity, X, y) + + # Data Splitting: + Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y) + + # Training: + fitresult, cache, report = MMI.fit( + conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)... + ) + + # Nonconformity Scores: + cal_scores, scores = score(conf_model, fitresult, Xcal, ycal) + conf_model.scores = Dict(:calibration => cal_scores, :all => scores) + + return (fitresult, cache, report) end # Simple @@ -45,10 +52,16 @@ function SimpleInductiveClassifier( ) end -""" +@doc raw""" score(conf_model::SimpleInductiveClassifier, ::Type{<:Supervised}, fitresult, X, y::Union{Nothing,AbstractArray}=nothing) -Score method for the [`SimpleInductiveClassifier`](@ref) dispatched for any `<:Supervised` model. +Score method for the [`SimpleInductiveClassifier`](@ref) dispatched for any `<:Supervised` model. For the [`SimpleInductiveClassifier`](@ref) nonconformity scores are computed as follows: + +`` +S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\text{calibration}} +`` + +A typical choice for the heuristic function is ``h(\hat\mu(X_i), Y_i)=1-\hat\mu(X_i)_{Y_i}`` where ``\hat\mu(X_i)_{Y_i}`` denotes the softmax output of the true class and ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``. The simple approach only takes the softmax probability of the true label into account. """ function score( conf_model::SimpleInductiveClassifier, atomic::Supervised, fitresult, X, y=nothing @@ -65,34 +78,6 @@ function score( end end -@doc raw""" - MMI.fit(conf_model::SimpleInductiveClassifier, verbosity, X, y) - -For the [`SimpleInductiveClassifier`](@ref) nonconformity scores are computed as follows: - -`` -S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\text{calibration}} -`` - -A typical choice for the heuristic function is ``h(\hat\mu(X_i), Y_i)=1-\hat\mu(X_i)_{Y_i}`` where ``\hat\mu(X_i)_{Y_i}`` denotes the softmax output of the true class and ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``. The simple approach only takes the softmax probability of the true label into account. -""" -function MMI.fit(conf_model::SimpleInductiveClassifier, verbosity, X, y) - - # Data Splitting: - Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y) - - # Training: - fitresult, cache, report = MMI.fit( - conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)... - ) - - # Nonconformity Scores: - cal_scores, scores = score(conf_model, fitresult, Xcal, ycal) - conf_model.scores = Dict(:calibration => cal_scores, :all => scores) - - return (fitresult, cache, report) -end - @doc raw""" MMI.predict(conf_model::SimpleInductiveClassifier, fitresult, Xnew) diff --git a/src/conformal_models/inductive/inductive_models.jl b/src/conformal_models/inductive/inductive_models.jl new file mode 100644 index 0000000..0e69bf9 --- /dev/null +++ b/src/conformal_models/inductive/inductive_models.jl @@ -0,0 +1,27 @@ +# Type unions: +include("classification.jl") +include("regression.jl") + +const InductiveModel = Union{ + SimpleInductiveRegressor, + SimpleInductiveClassifier, + AdaptiveInductiveClassifier, + ConformalQuantileRegressor, +} + +""" + split_data(conf_model::InductiveModel, indices::Base.OneTo{Int}) + +Splits the data into a proper training and calibration set. +""" +function split_data(conf_model::InductiveModel, X, y) + train, calibration = partition(eachindex(y), conf_model.train_ratio) + Xtrain = selectrows(X, train) + ytrain = y[train] + Xtrain, ytrain = MMI.reformat(conf_model.model, Xtrain, ytrain) + Xcal = selectrows(X, calibration) + ycal = y[calibration] + Xcal, ycal = MMI.reformat(conf_model.model, Xcal, ycal) + + return Xtrain, ytrain, Xcal, ycal +end diff --git a/src/conformal_models/inductive_regression.jl b/src/conformal_models/inductive/regression.jl similarity index 92% rename from src/conformal_models/inductive_regression.jl rename to src/conformal_models/inductive/regression.jl index f52d6f0..a11dcad 100755 --- a/src/conformal_models/inductive_regression.jl +++ b/src/conformal_models/inductive/regression.jl @@ -36,11 +36,7 @@ A typical choice for the heuristic function is ``h(\hat\mu(X_i),Y_i)=|Y_i-\hat\m function MMI.fit(conf_model::SimpleInductiveRegressor, verbosity, X, y) # Data Splitting: - train, calibration = partition(eachindex(y), conf_model.train_ratio) - Xtrain = selectrows(X, train) - ytrain = y[train] - Xcal = selectrows(X, calibration) - ycal = y[calibration] + Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y) # Training: fitresult, cache, report = MMI.fit( @@ -122,13 +118,7 @@ A typical choice for the heuristic function is ``h(\hat\mu_{\alpha_{lo}}(X_i), \ function MMI.fit(conf_model::ConformalQuantileRegressor, verbosity, X, y) # Data Splitting: - train, calibration = partition(eachindex(y), conf_model.train_ratio) - Xtrain = selectrows(X, train) - ytrain = y[train] - Xtrain, ytrain = MMI.reformat(conf_model.model, Xtrain, ytrain) - Xcal = selectrows(X, calibration) - ycal = y[calibration] - Xcal, ycal = MMI.reformat(conf_model.model, Xcal, ycal) + Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y) # Training: fitresult, cache, report, y_pred = ([], [], [], []) diff --git a/src/conformal_models/inductive_bayes.jl b/src/conformal_models/inductive_bayes.jl deleted file mode 100755 index 7c330d5..0000000 --- a/src/conformal_models/inductive_bayes.jl +++ /dev/null @@ -1,74 +0,0 @@ -# # Simple -# "The `SimpleInductiveBayes` is the simplest approach to Inductive Conformalized Bayes." -# mutable struct SimpleInductiveBayes{Model <: Supervised} <: ConformalModel -# model::Model -# coverage::AbstractFloat -# scores::Union{Nothing,AbstractArray} -# heuristic::Function -# train_ratio::AbstractFloat -# end - -# function SimpleInductiveBayes(model::Supervised; coverage::AbstractFloat=0.95, heuristic::Function=f(y, ŷ)=-ŷ, train_ratio::AbstractFloat=0.5) -# return SimpleInductiveBayes(model, coverage, nothing, heuristic, train_ratio) -# end - -# @doc raw""" -# MMI.fit(conf_model::SimpleInductiveBayes, verbosity, X, y) - -# For the [`SimpleInductiveBayes`](@ref) nonconformity scores are computed as follows: - -# `` -# S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\text{calibration}} -# `` - -# A typical choice for the heuristic function is ``h(\hat\mu(X_i), Y_i)=1-\hat\mu(X_i)_{Y_i}`` where ``\hat\mu(X_i)_{Y_i}`` denotes the softmax output of the true class and ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``. The simple approach only takes the softmax probability of the true label into account. -# """ -# function MMI.fit(conf_model::SimpleInductiveBayes, verbosity, X, y) - -# # Data Splitting: -# train, calibration = partition(eachindex(y), conf_model.train_ratio) -# Xtrain = selectrows(X, train) -# ytrain = y[train] -# Xtrain, ytrain = MMI.reformat(conf_model.model, Xtrain, ytrain) -# Xcal = selectrows(X, calibration) -# ycal = y[calibration] -# Xcal, ycal = MMI.reformat(conf_model.model, Xcal, ycal) - -# # Training: -# fitresult, cache, report = MMI.fit(conf_model.model, verbosity, Xtrain, ytrain) - -# # Nonconformity Scores: -# ŷ = pdf.(MMI.predict(conf_model.model, fitresult, Xcal), ycal) # predict returns a vector of distributions -# conf_model.scores = @.(conf_model.heuristic(ycal, ŷ)) - -# return (fitresult, cache, report) -# end - -# @doc raw""" -# MMI.predict(conf_model::SimpleInductiveBayes, fitresult, Xnew) - -# For the [`SimpleInductiveBayes`](@ref) prediction sets are computed as follows, - -# `` -# \hat{C}_{n,\alpha}(X_{n+1}) = \left\{y: s(X_{n+1},y) \le \hat{q}_{n, \alpha}^{+} \{S_i^{\text{CAL}}\} \right\}, \ i \in \mathcal{D}_{\text{calibration}} -# `` - -# where ``\mathcal{D}_{\text{calibration}}`` denotes the designated calibration data. -# """ -# function MMI.predict(conf_model::SimpleInductiveBayes, fitresult, Xnew) -# p̂ = MMI.predict(conf_model.model, fitresult, MMI.reformat(conf_model.model, Xnew)...) -# v = conf_model.scores -# q̂ = qplus(v, conf_model.coverage) -# p̂ = map(p̂) do pp -# L = p̂.decoder.classes -# probas = pdf.(pp, L) -# is_in_set = 1.0 .- probas .<= q̂ -# if !all(is_in_set .== false) -# pp = UnivariateFinite(L[is_in_set], probas[is_in_set]) -# else -# pp = missing -# end -# return pp -# end -# return p̂ -# end From c6ae2b5ef6cbc995259856299b28a94ed57cc76f Mon Sep 17 00:00:00 2001 From: pat-alt Date: Fri, 5 Apr 2024 18:09:07 +0200 Subject: [PATCH 05/11] lets try this again --- src/conformal_models/inductive/inductive_models.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/conformal_models/inductive/inductive_models.jl b/src/conformal_models/inductive/inductive_models.jl index 0e69bf9..5975f3d 100644 --- a/src/conformal_models/inductive/inductive_models.jl +++ b/src/conformal_models/inductive/inductive_models.jl @@ -18,10 +18,8 @@ function split_data(conf_model::InductiveModel, X, y) train, calibration = partition(eachindex(y), conf_model.train_ratio) Xtrain = selectrows(X, train) ytrain = y[train] - Xtrain, ytrain = MMI.reformat(conf_model.model, Xtrain, ytrain) Xcal = selectrows(X, calibration) ycal = y[calibration] - Xcal, ycal = MMI.reformat(conf_model.model, Xcal, ycal) return Xtrain, ytrain, Xcal, ycal end From eeda2a10d55dccaa9b8acf132d8e333af87f6623 Mon Sep 17 00:00:00 2001 From: pat-alt Date: Fri, 5 Apr 2024 21:32:37 +0200 Subject: [PATCH 06/11] trying to fix the erorr --- .../inductive/classification.jl | 57 ------------------- .../inductive/inductive_models.jl | 31 ++++++++++ src/conformal_models/inductive/regression.jl | 29 +++++----- 3 files changed, 44 insertions(+), 73 deletions(-) diff --git a/src/conformal_models/inductive/classification.jl b/src/conformal_models/inductive/classification.jl index baad252..5524bde 100755 --- a/src/conformal_models/inductive/classification.jl +++ b/src/conformal_models/inductive/classification.jl @@ -1,34 +1,3 @@ -""" - score(conf_model::ConformalProbabilisticSet, fitresult, X, y=nothing) - -Generic score method for the [`ConformalProbabilisticSet`](@ref). It computes nonconformity scores using the heuristic function `h` and the softmax probabilities of the true class. Method is dispatched for different Conformal Probabilistic Sets and atomic models. -""" -function score(conf_model::ConformalProbabilisticSet, fitresult, X, y=nothing) - return score(conf_model, conf_model.model, fitresult, X, y) -end - -@doc raw""" - MMI.fit(conf_model::ConformalProbabilisticSet, verbosity, X, y) - -Fits the [`ConformalProbabilisticSet`](@ref) model. -""" -function MMI.fit(conf_model::ConformalProbabilisticSet, verbosity, X, y) - - # Data Splitting: - Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y) - - # Training: - fitresult, cache, report = MMI.fit( - conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)... - ) - - # Nonconformity Scores: - cal_scores, scores = score(conf_model, fitresult, Xcal, ycal) - conf_model.scores = Dict(:calibration => cal_scores, :all => scores) - - return (fitresult, cache, report) -end - # Simple "The `SimpleInductiveClassifier` is the simplest approach to Inductive Conformal Classification. Contrary to the [`NaiveClassifier`](@ref) it computes nonconformity scores using a designated calibration dataset." mutable struct SimpleInductiveClassifier{Model<:Supervised} <: ConformalProbabilisticSet @@ -132,32 +101,6 @@ function AdaptiveInductiveClassifier( ) end -@doc raw""" - MMI.fit(conf_model::AdaptiveInductiveClassifier, verbosity, X, y) - -For the [`AdaptiveInductiveClassifier`](@ref) nonconformity scores are computed by cumulatively summing the ranked scores of each label in descending order until reaching the true label ``Y_i``: - -`` -S_i^{\text{CAL}} = s(X_i,Y_i) = \sum_{j=1}^k \hat\mu(X_i)_{\pi_j} \ \text{where } \ Y_i=\pi_k, i \in \mathcal{D}_{\text{calibration}} -`` -""" -function MMI.fit(conf_model::AdaptiveInductiveClassifier, verbosity, X, y) - - # Data Splitting: - Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y) - - # Training: - fitresult, cache, report = MMI.fit( - conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)... - ) - - # Nonconformity Scores: - cal_scores, scores = score(conf_model, fitresult, Xcal, ycal) - conf_model.scores = Dict(:calibration => cal_scores, :all => scores) - - return (fitresult, cache, report) -end - """ score(conf_model::AdaptiveInductiveClassifier, ::Type{<:Supervised}, fitresult, X, y::Union{Nothing,AbstractArray}=nothing) diff --git a/src/conformal_models/inductive/inductive_models.jl b/src/conformal_models/inductive/inductive_models.jl index 5975f3d..c3298be 100644 --- a/src/conformal_models/inductive/inductive_models.jl +++ b/src/conformal_models/inductive/inductive_models.jl @@ -23,3 +23,34 @@ function split_data(conf_model::InductiveModel, X, y) return Xtrain, ytrain, Xcal, ycal end + +""" + score(conf_model::InductiveModel, fitresult, X, y=nothing) + +Generic score method for the [`InductiveModel`](@ref). It computes nonconformity scores using the heuristic function `h` and the softmax probabilities of the true class. Method is dispatched for different Conformal Probabilistic Sets and atomic models. +""" +function score(conf_model::InductiveModel, fitresult, X, y=nothing) + return score(conf_model, conf_model.model, fitresult, X, y) +end + +@doc raw""" + MMI.fit(conf_model::InductiveModel, verbosity, X, y) + +Fits the [`InductiveModel`](@ref) model. +""" +function MMI.fit(conf_model::InductiveModel, verbosity, X, y) + + # Data Splitting: + Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y) + + # Training: + fitresult, cache, report = MMI.fit( + conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)... + ) + + # Nonconformity Scores: + cal_scores, scores = score(conf_model, fitresult, Xcal, ycal) + conf_model.scores = Dict(:calibration => cal_scores, :all => scores) + + return (fitresult, cache, report) +end diff --git a/src/conformal_models/inductive/regression.jl b/src/conformal_models/inductive/regression.jl index a11dcad..3d72226 100755 --- a/src/conformal_models/inductive/regression.jl +++ b/src/conformal_models/inductive/regression.jl @@ -22,8 +22,9 @@ function SimpleInductiveRegressor( ) end + @doc raw""" - MMI.fit(conf_model::SimpleInductiveRegressor, verbosity, X, y) + score(conf_model::SimpleInductiveRegressor, atomic::Supervised, fitresult, X, y=nothing) For the [`SimpleInductiveRegressor`](@ref) nonconformity scores are computed as follows: @@ -33,23 +34,19 @@ S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\tex A typical choice for the heuristic function is ``h(\hat\mu(X_i),Y_i)=|Y_i-\hat\mu(X_i)|`` where ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``. """ -function MMI.fit(conf_model::SimpleInductiveRegressor, verbosity, X, y) - - # Data Splitting: - Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y) - - # Training: - fitresult, cache, report = MMI.fit( - conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)... - ) - - # Nonconformity Scores: +function score( + conf_model::SimpleInductiveRegressor, atomic::Supervised, fitresult, X, y=nothing +) ŷ = reformat_mlj_prediction( - MMI.predict(conf_model.model, fitresult, MMI.reformat(conf_model.model, Xcal)...) + MMI.predict(atomic, fitresult, MMI.reformat(atomic, X)...) ) - conf_model.scores = @.(conf_model.heuristic(ycal, ŷ)) - - return (fitresult, cache, report) + scores = @.(conf_model.heuristic(y, ŷ)) + if isnothing(y) + return scores + else + cal_scores = getindex.(Ref(scores), 1:size(scores, 1), y) + return cal_scores, scores + end end # Prediction From 624d4a736a51945d797b8e501e4d09887da0d046 Mon Sep 17 00:00:00 2001 From: pat-alt Date: Fri, 5 Apr 2024 21:34:43 +0200 Subject: [PATCH 07/11] formatter --- src/conformal_models/inductive/regression.jl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/conformal_models/inductive/regression.jl b/src/conformal_models/inductive/regression.jl index 3d72226..d75215a 100755 --- a/src/conformal_models/inductive/regression.jl +++ b/src/conformal_models/inductive/regression.jl @@ -22,7 +22,6 @@ function SimpleInductiveRegressor( ) end - @doc raw""" score(conf_model::SimpleInductiveRegressor, atomic::Supervised, fitresult, X, y=nothing) @@ -37,9 +36,7 @@ A typical choice for the heuristic function is ``h(\hat\mu(X_i),Y_i)=|Y_i-\hat\m function score( conf_model::SimpleInductiveRegressor, atomic::Supervised, fitresult, X, y=nothing ) - ŷ = reformat_mlj_prediction( - MMI.predict(atomic, fitresult, MMI.reformat(atomic, X)...) - ) + ŷ = reformat_mlj_prediction(MMI.predict(atomic, fitresult, MMI.reformat(atomic, X)...)) scores = @.(conf_model.heuristic(y, ŷ)) if isnothing(y) return scores From dfe58d7a82891ed4fbafaff274b3e3675ade5451 Mon Sep 17 00:00:00 2001 From: pat-alt Date: Sat, 6 Apr 2024 16:10:21 +0200 Subject: [PATCH 08/11] uh --- src/conformal_models/inductive/inductive_models.jl | 3 ++- src/conformal_models/inductive/regression.jl | 7 +------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/conformal_models/inductive/inductive_models.jl b/src/conformal_models/inductive/inductive_models.jl index c3298be..f68f654 100644 --- a/src/conformal_models/inductive/inductive_models.jl +++ b/src/conformal_models/inductive/inductive_models.jl @@ -12,9 +12,10 @@ const InductiveModel = Union{ """ split_data(conf_model::InductiveModel, indices::Base.OneTo{Int}) -Splits the data into a proper training and calibration set. +Splits the data into a proper training and calibration set for inductive models. """ function split_data(conf_model::InductiveModel, X, y) + train, calibration = partition(eachindex(y), conf_model.train_ratio) Xtrain = selectrows(X, train) ytrain = y[train] diff --git a/src/conformal_models/inductive/regression.jl b/src/conformal_models/inductive/regression.jl index d75215a..44bc3b7 100755 --- a/src/conformal_models/inductive/regression.jl +++ b/src/conformal_models/inductive/regression.jl @@ -38,12 +38,7 @@ function score( ) ŷ = reformat_mlj_prediction(MMI.predict(atomic, fitresult, MMI.reformat(atomic, X)...)) scores = @.(conf_model.heuristic(y, ŷ)) - if isnothing(y) - return scores - else - cal_scores = getindex.(Ref(scores), 1:size(scores, 1), y) - return cal_scores, scores - end + return scores, scores end # Prediction From 255b7c552cf13739def3b88148001fc6b0d8ac35 Mon Sep 17 00:00:00 2001 From: pat-alt Date: Sat, 6 Apr 2024 16:12:33 +0200 Subject: [PATCH 09/11] formatter --- src/conformal_models/inductive/inductive_models.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/conformal_models/inductive/inductive_models.jl b/src/conformal_models/inductive/inductive_models.jl index f68f654..9d6bffc 100644 --- a/src/conformal_models/inductive/inductive_models.jl +++ b/src/conformal_models/inductive/inductive_models.jl @@ -15,7 +15,6 @@ const InductiveModel = Union{ Splits the data into a proper training and calibration set for inductive models. """ function split_data(conf_model::InductiveModel, X, y) - train, calibration = partition(eachindex(y), conf_model.train_ratio) Xtrain = selectrows(X, train) ytrain = y[train] From 81bd64f873ecb970d87e5a33efa007a34bc2d339 Mon Sep 17 00:00:00 2001 From: pat-alt Date: Sat, 6 Apr 2024 16:34:45 +0200 Subject: [PATCH 10/11] slowly slowly --- src/conformal_models/inductive/inductive_models.jl | 14 +++++++++++--- src/conformal_models/inductive/regression.jl | 13 +++++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/conformal_models/inductive/inductive_models.jl b/src/conformal_models/inductive/inductive_models.jl index 9d6bffc..c05834f 100644 --- a/src/conformal_models/inductive/inductive_models.jl +++ b/src/conformal_models/inductive/inductive_models.jl @@ -33,6 +33,16 @@ function score(conf_model::InductiveModel, fitresult, X, y=nothing) return score(conf_model, conf_model.model, fitresult, X, y) end +""" + fit_atomic(conf_model::InductiveModel, verbosity, X, y) + +Fits the atomic model for the [`InductiveModel`](@ref). In the case of inductive models, the atomic model is fit once on the proper training data. +""" +function fit_atomic(conf_model::InductiveModel, verbosity, X, y) + fitresult, cache, report = MMI.fit(conf_model.model, verbosity, MMI.reformat(conf_model.model, X, y)...) + return fitresult, cache, report +end + @doc raw""" MMI.fit(conf_model::InductiveModel, verbosity, X, y) @@ -44,9 +54,7 @@ function MMI.fit(conf_model::InductiveModel, verbosity, X, y) Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y) # Training: - fitresult, cache, report = MMI.fit( - conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)... - ) + fitresult, cache, report = fit_atomic(conf_model, verbosity, Xtrain, ytrain) # Nonconformity Scores: cal_scores, scores = score(conf_model, fitresult, Xcal, ycal) diff --git a/src/conformal_models/inductive/regression.jl b/src/conformal_models/inductive/regression.jl index 44bc3b7..008f3c7 100755 --- a/src/conformal_models/inductive/regression.jl +++ b/src/conformal_models/inductive/regression.jl @@ -4,7 +4,7 @@ using MLJLinearModels: MLJLinearModels mutable struct SimpleInductiveRegressor{Model<:Supervised} <: ConformalInterval model::Model coverage::AbstractFloat - scores::Union{Nothing,AbstractArray} + scores::Union{Nothing,Dict{Any,Any}} heuristic::Function parallelizer::Union{Nothing,AbstractParallelizer} train_ratio::AbstractFloat @@ -38,7 +38,11 @@ function score( ) ŷ = reformat_mlj_prediction(MMI.predict(atomic, fitresult, MMI.reformat(atomic, X)...)) scores = @.(conf_model.heuristic(y, ŷ)) - return scores, scores + if isnothing(y) + return scores + else + return scores, scores + end end # Prediction @@ -91,6 +95,11 @@ function ConformalQuantileRegressor( ) end +# function fit_atomic(conf_model::ConformalQuantileRegressor, verbosity, X, y) +# fitresult, cache, report = MMI.fit(conf_model.model, verbosity, MMI.reformat(conf_model.model, X, y)...) +# return fitresult, cache, report +# end + @doc raw""" MMI.fit(conf_model::ConformalQuantileRegressor, verbosity, X, y) From 8bc0d800d41b17ba4e337f6759013c4e9bf89f89 Mon Sep 17 00:00:00 2001 From: pat-alt Date: Sat, 6 Apr 2024 16:37:05 +0200 Subject: [PATCH 11/11] formatter --- src/conformal_models/inductive/inductive_models.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/conformal_models/inductive/inductive_models.jl b/src/conformal_models/inductive/inductive_models.jl index c05834f..a89f8ba 100644 --- a/src/conformal_models/inductive/inductive_models.jl +++ b/src/conformal_models/inductive/inductive_models.jl @@ -39,7 +39,9 @@ end Fits the atomic model for the [`InductiveModel`](@ref). In the case of inductive models, the atomic model is fit once on the proper training data. """ function fit_atomic(conf_model::InductiveModel, verbosity, X, y) - fitresult, cache, report = MMI.fit(conf_model.model, verbosity, MMI.reformat(conf_model.model, X, y)...) + fitresult, cache, report = MMI.fit( + conf_model.model, verbosity, MMI.reformat(conf_model.model, X, y)... + ) return fitresult, cache, report end