-
Notifications
You must be signed in to change notification settings - Fork 12
/
utils.jl
executable file
·144 lines (121 loc) · 4.3 KB
/
utils.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
using CategoricalArrays
using StatsBase: quantile
@doc raw"""
qplus(v::AbstractArray, coverage::AbstractFloat=0.9)
Implements the ``\hat{q}_{n,\alpha}^{+}`` finite-sample corrected quantile function as defined in Barber et al. (2020): https://arxiv.org/pdf/1905.02928.pdf.
"""
function qplus(v::AbstractArray, coverage::AbstractFloat=0.9; kwrgs...)
n = length(v)
p̂ = ceil(((n + 1) * coverage)) / n
p̂ = clamp(p̂, 0.0, 1.0)
q̂ = quantile(v, p̂; kwrgs...)
return q̂
end
@doc raw"""
qminus(v::AbstractArray, coverage::AbstractFloat=0.9)
Implements the ``\hat{q}_{n,\alpha}^{-}`` finite-sample corrected quantile function as defined in Barber et al. (2020): https://arxiv.org/pdf/1905.02928.pdf.
"""
function qminus(v::AbstractArray, coverage::AbstractFloat=0.9; kwrgs...)
n = length(v)
p̂ = floor(((n + 1) * coverage)) / n
p̂ = clamp(p̂, 0.0, 1.0)
q̂ = quantile(v, p̂; kwrgs...)
return q̂
end
"""
reformat_interval(ŷ)
Reformats conformal interval predictions.
"""
function reformat_interval(ŷ)
return map(y -> map(yᵢ -> ndims(yᵢ) == 1 ? yᵢ[1] : yᵢ, y), ŷ)
end
"""
reformat_mlj_prediction(ŷ)
A helper function that extracts only the output (predicted values) for whatever is returned from `MMI.predict(model, fitresult, Xnew)`. This is currently used to avoid issues when calling `MMI.predict(model, fitresult, Xnew)` in pipelines.
"""
function reformat_mlj_prediction(ŷ)
return isa(ŷ, Tuple) ? first(ŷ) : ŷ
end
"""
is_regression(ŷ)
Helper function that checks if conformal prediction `ŷ` comes from a conformal regression model.
"""
is_regression(ŷ) = isa(ŷ, Tuple)
"""
is_classification(ŷ)
Helper function that checks if conformal prediction `ŷ` comes from a conformal classification model.
"""
is_classification(ŷ) = typeof(ŷ) <: Union{UnivariateFinite,Missing}
"""
set_size(ŷ)
Helper function that computes the set size for conformal predictions.
"""
function set_size(ŷ)
# Regression:
if is_regression(ŷ)
_size = abs(ŷ[1] - ŷ[2])
end
# Classification:
if is_classification(ŷ)
_size = ismissing(ŷ) ? 0 : Int.(sum(pdf.(ŷ, ŷ.decoder.classes) .> 0))
end
return _size
end
function size_indicator(ŷ::AbstractVector; bins=5, tol=1e-10)
_sizes = set_size.(ŷ)
unique_sizes = unique(_sizes)
# Regression:
if typeof(set_size(ŷ[1])) != Int
if abs.(diff(collect(extrema(set_size.(ŷ)))))[1] < tol
idx = categorical(ones(length(_sizes)))
else
bin_caps = collect(
range(minimum(unique_sizes), maximum(unique_sizes); length=bins + 1)
)[2:end]
idx = map(_sizes) do s
# Check which is the largest bin cap that _size exceeds:
ub = argmax(x -> s - x <= 0 ? s - x : -Inf, bin_caps)
if ub == minimum(bin_caps)
ub = round(ub; digits=2)
lb = round(minimum(_sizes); digits=2)
_idx = "|C| ∈ ($lb,$ub]"
else
ub = round(ub; digits=2)
lb = round(argmin(x -> s - x > 0 ? s - x : Inf, bin_caps); digits=2)
_idx = "|C| ∈ ($lb,$ub]"
end
return _idx
end
idx = categorical(idx)
end
end
# Classification:
if typeof(set_size(ŷ[1])) == Int
bin_caps = collect(1:2:(maximum(unique_sizes) + 1))
idx = map(_sizes) do s
# Check which is the largest bin cap that _size exceeds:
ub = bin_caps[sum(s .> bin_caps) + 1]
if ub > maximum(_sizes)
ub = ub - 1
_idx = "|C| ∈ [$ub]"
else
lb = ub - 1
_idx = "|C| ∈ [$lb,$ub]"
end
return _idx
end
idx = categorical(idx)
end
return idx
end
"""
blockbootstrap(time_series_data, block_szie)
Generate a sampling method, that block bootstraps the given data
"""
function blockbootstrap(time_series, block_size)
n = length(time_series)
bootstrap_sample = similar(time_series)
rand_block = rand(1:(n - block_size))
bootstrap_sample = time_series[rand_block:(rand_block + block_size - 1), :]
return vec(bootstrap_sample)
end