-
Notifications
You must be signed in to change notification settings - Fork 12
/
inductive_regression.jl
executable file
·168 lines (139 loc) · 6.12 KB
/
inductive_regression.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
using MLJLinearModels: MLJLinearModels
"The `SimpleInductiveRegressor` is the simplest approach to Inductive Conformal Regression. Contrary to the [`NaiveRegressor`](@ref) it computes nonconformity scores using a designated calibration dataset."
mutable struct SimpleInductiveRegressor{Model<:Supervised} <: ConformalInterval
model::Model
coverage::AbstractFloat
scores::Union{Nothing,AbstractArray}
heuristic::Function
train_ratio::AbstractFloat
end
function SimpleInductiveRegressor(
model::Supervised;
coverage::AbstractFloat=0.95,
heuristic::Function=absolute_error,
train_ratio::AbstractFloat=0.5,
)
return SimpleInductiveRegressor(model, coverage, nothing, heuristic, train_ratio)
end
@doc raw"""
MMI.fit(conf_model::SimpleInductiveRegressor, verbosity, X, y)
For the [`SimpleInductiveRegressor`](@ref) nonconformity scores are computed as follows:
``
S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\text{calibration}}
``
A typical choice for the heuristic function is ``h(\hat\mu(X_i),Y_i)=|Y_i-\hat\mu(X_i)|`` where ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``.
"""
function MMI.fit(conf_model::SimpleInductiveRegressor, verbosity, X, y)
# Data Splitting:
train, calibration = partition(eachindex(y), conf_model.train_ratio)
Xtrain = selectrows(X, train)
ytrain = y[train]
Xcal = selectrows(X, calibration)
ycal = y[calibration]
# Training:
fitresult, cache, report = MMI.fit(
conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
)
# Nonconformity Scores:
ŷ = reformat_mlj_prediction(
MMI.predict(conf_model.model, fitresult, MMI.reformat(conf_model.model, Xcal)...)
)
conf_model.scores = @.(conf_model.heuristic(ycal, ŷ))
return (fitresult, cache, report)
end
# Prediction
@doc raw"""
MMI.predict(conf_model::SimpleInductiveRegressor, fitresult, Xnew)
For the [`SimpleInductiveRegressor`](@ref) prediction intervals are computed as follows,
``
\hat{C}_{n,\alpha}(X_{n+1}) = \hat\mu(X_{n+1}) \pm \hat{q}_{n, \alpha}^{+} \{S_i^{\text{CAL}} \}, \ i \in \mathcal{D}_{\text{calibration}}
``
where ``\mathcal{D}_{\text{calibration}}`` denotes the designated calibration data.
"""
function MMI.predict(conf_model::SimpleInductiveRegressor, fitresult, Xnew)
ŷ = reformat_mlj_prediction(
MMI.predict(conf_model.model, fitresult, MMI.reformat(conf_model.model, Xnew)...)
)
v = conf_model.scores
q̂ = qplus(v, conf_model.coverage)
ŷ = map(x -> (x .- q̂, x .+ q̂), eachrow(ŷ))
ŷ = reformat_interval(ŷ)
return ŷ
end
"Union type for quantile models."
const QuantileModel = Union{MLJLinearModels.QuantileRegressor}
"Constructor for `ConformalQuantileRegressor`."
mutable struct ConformalQuantileRegressor{Model<:QuantileModel} <: ConformalInterval
model::Model
coverage::AbstractFloat
scores::Union{Nothing,AbstractArray}
heuristic::Function
train_ratio::AbstractFloat
end
function ConformalQuantileRegressor(
model::Supervised;
coverage::AbstractFloat=0.95,
heuristic::Function=function f(y, ŷ_lb, ŷ_ub)
return reduce((x, y) -> max.(x, y), [ŷ_lb - y, y - ŷ_ub])
end,
train_ratio::AbstractFloat=0.5,
)
return ConformalQuantileRegressor(model, coverage, nothing, heuristic, train_ratio)
end
@doc raw"""
MMI.fit(conf_model::ConformalQuantileRegressor, verbosity, X, y)
For the [`ConformalQuantileRegressor`](@ref) nonconformity scores are computed as follows:
``
S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu_{\alpha_{lo}}(X_i), \hat\mu_{\alpha_{hi}}(X_i) ,Y_i), \ i \in \mathcal{D}_{\text{calibration}}
``
A typical choice for the heuristic function is ``h(\hat\mu_{\alpha_{lo}}(X_i), \hat\mu_{\alpha_{hi}}(X_i) ,Y_i)= max\{\hat\mu_{\alpha_{low}}(X_i)-Y_i, Y_i-\hat\mu_{\alpha_{hi}}(X_i)\} `` where ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}` and
``\alpha_{lo}, \alpha_{hi}`` lower and higher percentile.
"""
function MMI.fit(conf_model::ConformalQuantileRegressor, verbosity, X, y)
# Data Splitting:
train, calibration = partition(eachindex(y), conf_model.train_ratio)
Xtrain = selectrows(X, train)
ytrain = y[train]
Xtrain, ytrain = MMI.reformat(conf_model.model, Xtrain, ytrain)
Xcal = selectrows(X, calibration)
ycal = y[calibration]
Xcal, ycal = MMI.reformat(conf_model.model, Xcal, ycal)
# Training:
fitresult, cache, report, y_pred = ([], [], [], [])
# Training two Quantile regression models with different deltas
quantile = conf_model.model.delta
for qvalue in sort([quantile, 1 - quantile])
conf_model.model.delta = qvalue
μ̂ᵢ, cacheᵢ, reportᵢ = MMI.fit(conf_model.model, verbosity, Xtrain, ytrain)
push!(fitresult, μ̂ᵢ)
push!(cache, cacheᵢ)
push!(report, reportᵢ)
# Nonconformity Scores:
ŷᵢ = reformat_mlj_prediction(MMI.predict(conf_model.model, μ̂ᵢ, Xcal))
push!(y_pred, ŷᵢ)
end
conf_model.scores = @.(conf_model.heuristic(ycal, y_pred[1], y_pred[2]))
return (fitresult, cache, report)
end
# Prediction
@doc raw"""
MMI.predict(conf_model::ConformalQuantileRegressor, fitresult, Xnew)
For the [`ConformalQuantileRegressor`](@ref) prediction intervals are computed as follows,
``
\hat{C}_{n,\alpha}(X_{n+1}) = [\hat\mu_{\alpha_{lo}}(X_{n+1}) - \hat{q}_{n, \alpha} \{S_i^{\text{CAL}} \}, \hat\mu_{\alpha_{hi}}(X_{n+1}) + \hat{q}_{n, \alpha} \{S_i^{\text{CAL}} \}], \ i \in \mathcal{D}_{\text{calibration}}
``
where ``\mathcal{D}_{\text{calibration}}`` denotes the designated calibration data.
"""
function MMI.predict(conf_model::ConformalQuantileRegressor, fitresult, Xnew)
ŷ = [
reformat_mlj_prediction(
MMI.predict(conf_model.model, μ̂ᵢ, MMI.reformat(conf_model.model, Xnew)...)
) for μ̂ᵢ in fitresult
]
ŷ = reduce(hcat, ŷ)
v = conf_model.scores
q̂ = StatsBase.quantile(v, conf_model.coverage)
ŷ = map(yᵢ -> (minimum(yᵢ .- q̂), maximum(yᵢ .+ q̂)), eachrow(ŷ))
ŷ = reformat_interval(ŷ)
return ŷ
end