-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathDIVAnd_cv.jl
387 lines (271 loc) · 11.4 KB
/
DIVAnd_cv.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
# nofmt
# disable formatting: https://github.com/domluna/JuliaFormatter.jl/issues/72
"""
bestfactorl,bestfactore, cvval,cvvalues, x2Ddata,y2Ddata,cvinter,xi2D,yi2D = DIVAnd_cv(mask,pmn,xi,x,f,len,epsilon2,nl,ne,method;...);
Performs a cross validation to estimate the analysis parameters
(correlation length and signal-to-noise ratio).
# Input
Same as for `DIVAndrun` with three more parameters `nl`,`ne` and `method`
* `mask`: binary mask delimiting the domain. true is inside and false outside.
For oceanographic application, this is the land-sea mask.
* `pmn`: scale factor of the grid. pmn is a tuple with n elements. Every
element represents the scale factor of the corresponding dimension. Its
inverse is the local resolution of the grid in a particular dimension.
* `xi`: tuple with n elements. Every element represents a coordinate
of the final grid on which the observations are interpolated
* `x`: tuple with n elements. Every element represents a coordinate of
the observations
* `f`: value of the observations *minus* the background estimate (m-by-1 array).
(see note)
* `len`: correlation length
* `epsilon2`: error variance of the observations (normalized by the error variance of the background field). `epsilon2` can be a scalar (all observations have the same error variance and their errors are decorrelated), a vector (all observations can have a difference error variance and their errors are decorrelated) or a matrix (all observations can have a difference error variance and their errors can be correlated). If `epsilon2` is a scalar, it is thus the *inverse of the signal-to-noise ratio*.
* `nl`: number of testing points around the current value of L. `1` means one additional point on both sides of the current L. `0` is allowed and means the parameter is not optimised.
* `ne`: number of testing points around the current value of epsilon2. `0` is allowed as for `nl`
* `method`: cross validation estimator method
1: full CV
2: sampled CV
3: GCV
0: automatic choice between the three possible ones, default value
* Optional input arguments specified via keyword arguments are the same as for `DIVAnd`
# Output:
* `bestfactorl`: best estimate of the multiplication factor to apply to len
* `bestfactore`: best estimate of the multiplication factor to apply to epsilon2
* `cvvales` : the cross validation values calculated
* `factors` : the tested multiplication factors
* `cvinter` : interpolated cv values for final optimisation
* `X2Data, Y2Data` : coordinates of sampled cross validation in `L,epsilon2` space . Normally only used for debugging or plotting
* `Xi2D, Yi2D` : coordinates of interpolated estimator . Normally only used for debugging or plotting
The output `bestfactorl` and `bestfactore` represent multiplication factors which should be applied to `L` and `epsilon2`.
The `len` and `epsilon2` provided should be close the real one as the tests will be performed around.
"""
function DIVAnd_cv(mask, pmn, xi, x, f, len, epsilon2, nl, ne, method = 0;
rng = Random.GLOBAL_RNG, otherargs...)
# check inputs
# For the moment, hardwired values
switchvalue1 = 130
switchvalue2 = 1000
samplesforHK = 75
# with of window so sample in log scale, so order of magnitudes worder
# For length scales, one order of magnitude; make sure the grid is fine enough
worderl = 1.0
# For noise, almost two order of magnitutes
wordere = 1.5
# sample multiplication factor to optimise in log space
if nl > 0
logfactorsl = collect(range(-worderl, stop = worderl, length = 2 * nl + 1))
else
logfactorsl = [0]
end
factorsl = 10 .^ logfactorsl
if ne > 0
logfactorse = collect(range(-wordere, stop = wordere, length = 2 * ne + 1))
else
logfactorse = [0]
end
factorse = 10 .^ logfactorse
# cvvalues at the locations
cvvalues = zeros((2 * nl + 1) * (2 * ne + 1))
# For later interpolation quality might vary
epsilon2in = zeros((2 * nl + 1) * (2 * ne + 1))
x2Ddata = zeros((2 * nl + 1) * (2 * ne + 1))
y2Ddata = zeros((2 * nl + 1) * (2 * ne + 1))
# Define method used
# 1: full CV
# 2: sampled CV
# 3: GCV
# 0: automatic choice, default value
# For automatic choice this will be done later once the exact number of useful data is known
d0d = 0
d0dmd1d = 0
ip = 0
for i = 1:size(factorsl)[1]
for j = 1:size(factorse)[1]
ip = ip + 1
x2Ddata[ip] = logfactorsl[i]
y2Ddata[ip] = logfactorse[j]
fi, s = DIVAndrun(
mask,
pmn,
xi,
x,
f,
len .* factorsl[i],
epsilon2 .* factorse[j];
otherargs...,
)
residual = DIVAnd_residualobs(s, fi)
obsin = .!s.obsout
nrealdata = sum(obsin)
d0d = s.obsconstrain.yo[obsin] ⋅ s.obsconstrain.yo[obsin]
d0dmd1d = s.obsconstrain.yo[obsin] ⋅ residual[obsin]
# Determine which method to use
if method == 0
mymethod = 2
if nrealdata < switchvalue1
mymethod = 1
end
if nrealdata > switchvalue2
mymethod = 3
end
else
mymethod = method
end
# Check if more samples than data are asked switch to direct method
if mymethod == 2
if nrealdata < samplesforHK
mymethod = 1
end
end
# TO DO : THINK ABOUT A VERSION WITH 30 REAL ESTIMATES OF KII AND THE RESIDUAL ONLY THERE
# c
# unique(collect(rand(1:1000,200)))[1:30]
if mymethod == 1
cvval = DIVAnd_cvestimator(s, residual ./ (1 .- DIVAnd_diagHKobs(s)))
epsilon2in[ip] = 1 / 5000
end
if mymethod == 2
# alternate version to test: sampling
# find(x -> x == 3,z)
# onsea=find(x->x == 0,s.obsout);
onsea = findall(s.obsout .== 0)
lonsea = length(onsea)
# @warn "So",lonsea
# if optimisation is to be used, make sure to use the same reference random points
Random.seed!(rng,nrealdata)
# otherwise you add noise to the cv field
indexlist1 =
unique(collect(rand(rng,1:lonsea, 50 * samplesforHK)))[1:samplesforHK]
# reseed
Random.seed!(rng,rand(rng,UInt32))
indexlist = onsea[indexlist1]
# indexlist=collect(1:lonsea);
residualc = zeros(length(residual))
residualc[indexlist] =
residual[indexlist] ./ (1 .- DIVAnd_diagHKobs(s, indexlist))
scalefac = float(nrealdata) / float(samplesforHK)
cvval = scalefac * DIVAnd_cvestimator(s, residualc)
epsilon2in[ip] = 1 / 5000
end
if mymethod == 3
work = (1 - DIVAnd_GCVKiiobs(s))
cvval = DIVAnd_cvestimator(s, residual ./ work)
epsilon2in[ip] = 1 / 2000 / work^2
end
cvvalues[ip] = cvval
end
end
#####################
# When no sampling is requested, just return CV value (for DIVAnd_qc) and a
# multiplication factor for epsilon2 based on the Derozier adaptation idea
# ll1= d0d/(d0d-d1d)-1
#
if (ne == 0 && nl == 0)
@warn "There is no parameter optimisation done (nl=$nl, ne=$ne)"
ll1 = d0d / (d0dmd1d) - 1
eps1 = 1 / ll1
if ndims(epsilon2) == 0
eps2 = epsilon2
elseif ndims(epsilon2) == 1
eps2 = mean(epsilon2)
else
eps2 = mean(diag(epsilon2))
end
return cvvalues[1], eps1 / eps2
end
# Now interpolate and find minimum using 1D DIVAnd or 2D DIVAnd depending on the situation
if nl == 0
# interpolate only on epsilon
epsilon2inter = collect(range(-wordere * 1.1, stop = 1.1 * wordere, length = 101))
maskcv = trues(size(epsilon2inter))
pmcv = ones(size(epsilon2inter)) / (epsilon2inter[2] - epsilon2inter[1])
lenin = wordere
m = Int(ceil(1 + 1 / 2))
alpha = [binomial(m, k) for k = 0:m]
alpha[1] = 0
cvinter, scv = DIVAndrun(
maskcv,
(pmcv,),
(epsilon2inter,),
(logfactorse,),
cvvalues,
lenin,
epsilon2in;
alpha = alpha,
alphabc = 0,
)
bestvalue = findmin(cvinter)
posbestfactor = bestvalue[2]
cvval = bestvalue[1]
bestfactor = 10^epsilon2inter[posbestfactor]
return bestfactor, cvval, cvvalues, logfactorse, cvinter, epsilon2inter
end
if ne == 0
# interpolate only on L
linter = collect(range(-worderl * 1.1, stop = 1.1 * worderl, length = 101))
maskcv = trues(size(linter))
pmcv = ones(size(linter)) / (linter[2] - linter[1])
lenin = worderl
m = Int(ceil(1 + 1 / 2))
alpha = [binomial(m, k) for k = 0:m]
alpha[1] = 0
cvinter, scv = DIVAndrun(
maskcv,
(pmcv,),
(linter,),
(logfactorsl,),
cvvalues,
lenin,
epsilon2in;
alpha = alpha,
alphabc = 0,
)
bestvalue = findmin(cvinter)
posbestfactor = bestvalue[2]
cvval = bestvalue[1]
bestfactor = 10^linter[posbestfactor]
return bestfactor, cvval, cvvalues, logfactorsl, cvinter, linter
end
# Otherwise 2D
maskcv, (pm2D, pn2D), (xi2D, yi2D) = DIVAnd_rectdom(
range(-worderl * 1.1, stop = worderl * 1.1, length = 71),
range(-wordere * 1.1, stop = wordere * 1.1, length = 71),
)
# correlation length
lenin = (worderl, wordere)
m = Int(ceil(1 + 2 / 2))
alpha = [binomial(m, k) for k = 0:m]
alpha[1] = 0
cvinter, scv = DIVAndrun(
maskcv,
(pm2D, pn2D),
(xi2D, yi2D),
(x2Ddata, y2Ddata),
cvvalues,
lenin,
epsilon2in;
alpha = alpha,
alphabc = 0,
)
bestvalue = findmin(cvinter)
posbestfactor = bestvalue[2]
cvval = bestvalue[1]
bestfactorl = 10^xi2D[posbestfactor]
bestfactore = 10^yi2D[posbestfactor]
return bestfactorl, bestfactore, cvval, cvvalues, x2Ddata, y2Ddata, cvinter, xi2D, yi2D
end
# Copyright (C) 2008-2019 Alexander Barth <[email protected]>
# Jean-Marie Beckers <[email protected]>
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
# LocalWords: fi DIVAnd pmn len diag CovarParam vel ceil moddim fracdim