-
Notifications
You must be signed in to change notification settings - Fork 4
/
dynamic_shift_detector.R
421 lines (346 loc) · 17.6 KB
/
dynamic_shift_detector.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
#script to analyse time series data to determine break points
#assume data is in form of data frame where first column is year, second is abundance (Nt)
#create function that makes an additional response variable Nt1
#where Nt1 is the abundance in the next year
addNt1<-function(data){
#create empty vector to feed 'next year' abundance into
Nt1 = c()
#pull out values from the abundance column-
#for each year, go to the NEXT year's abundance, put that in the vector
for (i in 1:(length(data[,2])-1)) {
Nt1 = c(Nt1,(data[,2])[i+1])
}
#cut out last sampling year, because there is no Nt+1 for that year
data<-data[which(data[,1]<max(data[,1])),]
#add the vector of new values to the data fram
data$Nt1<-Nt1
#rename columns in data set so that they're consistent no matter
#what they're named before importing
names(data)[1:2]<-c("year", "Nt")
#spit out that new data frame
return(data)
}
#lets also create a function that will calculate an AICc correction factor
#which can be added to the AIC values we compute- because we have small sample
#sizes we need to more heavily penalize models with more parameters
AICcorrection<-function(data, breaks){
a<-3*(breaks+1) # a is the number of parameters estimated each fit (r, k, error)
correction<-(2*a*(a+1))/(nrow(data)-a-1)
return(correction)
}
#load minpack.lm
#this is my prefered nonlinear package- fewer convergence issues.
#we'll want to use its nlsLM function to fit the Ricker equation.
library(minpack.lm)
#create function that fits the ricker model
rickerfit<-function (data){
#create an initial estimate of k to aide model convergence
kest<-mean(data$Nt)
#supress warnings about failed convergence in oddball fits- these models won't be favoured by AIC anyway
options(warn=-1)
#fit the model
ricker.model<-tryCatch(nlsLM(Nt1~ Nt*exp(r*(1- Nt/k)), start=list(r=1.5, k=kest), data=data), error=function(e) NULL)
#What outputs do we need from each run? AIC, r and k, and their resepective errors.
#we'll want to create a vecor with this information in it so we can use this information later
if(is.list(ricker.model)){
output<-c(AIC(ricker.model), #AIC
summary(ricker.model)$coefficients[1,1], # r
summary(ricker.model)$coefficients[1,2], # se for r
summary(ricker.model)$coefficients[2,1], # k
summary(ricker.model)$coefficients[2,2]) # se for k
}else{
output<-c(100000000, 0, 0, 0, 0)#if the model fails to converge, give it an an arbitrarily high but finite AIC
}
options(warn=0)#turn warnings back on
return(output)
}
#now we need to build a tool that will cut a time series up,
#fit the model, and spit out relevant parameters
#we probably want to write a function for each break point combo
#then smash them into a function that figures out what the best model is
# Okay, so now we want a generalized model so it can handle N break point cases
# this is a bit more complex because it'll require some recursion from within the function
# and because there will be N break cases, we won't be able to plan a 2 dimensional data frame
# that can have columns for all break cases, as I'd originally hoped. I think the best way to do this is by creating a
# list of break points and an associated list of AICs for the fit, and then use the sum function
# for the AICs when including them in the output, but include the break point list as a single
#element in the output data frame
# so we want a 2 column data frame with a list of the breaks, and the
#resultant AICc as columns, respectively.
breaks<-list() #create empty LIST for storing breaks
fit<-list() #create empty LIST for storing associated AICs
out.frame<-data.frame(matrix(vector(), 0, 2,
dimnames=list(c(), c("Breaks", "AICs"))),
stringsAsFactors=F)
splitnfit<-function(data, breaks, fit, out.frame){ #need to include vectors for breaks and fit to re-feed into this function
#first fit no break model, put it in the data frame
fit.0<-rickerfit(data) #fit the model
out<-cbind(list(max(data$year)), list(fit.0[1])) #output vector with no breaks
out.frame<-rbind(out.frame, out)
Break1<-min(data$year)+3 #create first breakpoint four years into the time series to avoid overfitting
while(Break1<(max(data$year))){
part1<-data[which(data$year<Break1),] #create subsets at the breakpoint
part2<-data[which(data$year>(Break1-1)),]
if(nrow(part1)>3 & nrow(part2)>3){ #constrain model to run only when 4 or more points are present
fit1<-rickerfit(part1) #fit the model to part 1
fit2<-rickerfit(part2) #fit the model to part 2
breaks.1<-c(breaks, max(part1$year), max(part2$year)) #breaks for one break
fit.1<-c(fit, fit1[1], fit2[1]) #fit for one break
out[1]<-list(breaks.1)#create output vector of two lists
out[2]<-list(fit.1)
out.frame<-rbind(out.frame, out) #bind it to previous results
}
Break1<-Break1+1 #move the break to next year
}
#rename columns in output for some reason
colnames(out.frame)<- c("Breaks", "AICs")
return(out.frame)
}
#lets take our results frame from splitnfit and test each break combinaton for the
# the ability to be broken up more. This situation will occur when the last two years
# in the breaks list are more than 5 years apart
findbreakable<-function(data){ #create a function that finds if the last subset of the data is still breakable
breakable<-c() #create vector to put results in
for (i in 1:nrow(data)){ #for each row in the data frame
if (length(unlist(data$Breaks[i]))>1){ #if the data has been subset
breakvector<-unlist(data$Breaks[i]) #create a vector of the breaks
L<-length(breakvector) # find out how long breakvector is
difference<-breakvector[L]-breakvector[L-1] #find out how big the last subset is
if(difference>5){ #we can break it down more if the last subset has more than 5 points in it
breakable.i<-TRUE
}else{
breakable.i<-FALSE #don't break more if the subset is 5 or smaller
}
}else{
breakable.i<-FALSE # don't break it down more if the data is frm the zero breaks model
}
breakable<-c(breakable, breakable.i)
}
return(breakable)
}
#create a function that uses findbreakable to apply splitntfit to the datasets that are still breakble
out.frame<-data.frame(matrix(vector(), 0, 2, #create an empty data frame we can add to later
dimnames=list(c(), c("Breaks", "AICs"))),
stringsAsFactors=F)
subsequentsplit<-function(fitdata, rawdata){
keepers<-findbreakable(fitdata) #find subsets that are still breakable
newfitdata<-fitdata[which(keepers==TRUE),] #create new data frame with only these data in it
breaklist<-newfitdata$Breaks #pull out our two operational objects out of data frame
fitlist<-newfitdata$AICs
result<-data.frame(matrix(vector(), 0, 2,
dimnames=list(c(), c("Breaks", "AICs"))),
stringsAsFactors=F)
if(nrow(newfitdata)>0){ #if there are subsets with still breakable data
for(i in 1:nrow(newfitdata)){ #for each row in the new frame we need to break down
breakvector<-unlist(breaklist[i]) #turn the list element back into a vector
fitvector<-unlist(fitlist[i])
breakvector<-breakvector[-length(breakvector)]#remove the last element from each
fitvector<-fitvector[-length(fitvector)]
cullpoint<-max(breakvector) #find point of last break
testdata<-rawdata[which(rawdata$year>cullpoint),]
out<-splitnfit(testdata, breakvector, fitvector, out.frame)
out<-out[-1,] #remove the no-break fit, we don't need that
result<-rbind(result, out)
}
}else{ #if there is no more room for breaks in any of the fits
result<-result #leave result empty
}
return(result)
}
#okay, now let's put this all together into a function that will fit all the breaks
nbreaker<-function(data){
breaks<-list() #create empty LIST for storing breaks
fit<-list() #create empty LIST for storing associated AICs
out.frame<-data.frame(matrix(vector(), 0, 2,
dimnames=list(c(), c("Breaks", "AICs"))),
stringsAsFactors=F)
onebreak<-splitnfit(data, breaks, fit, out.frame)#get fits for zero and one break
feed<-onebreak #create derrived data to feed into the fit
out<-onebreak #prepare these data to be output
keepers<-findbreakable(onebreak) #find subsets that are still breakable
newfitdata<-onebreak[which(keepers==TRUE),] #create new data frame with only these data in it
stillbreakable<-nrow(newfitdata)
while(stillbreakable>0){ #if there is data that can still be broken up
feed<-subsequentsplit(feed, data) #fit the subsequent split using data fed from last breaks
out<-rbind(out, feed) #attach this break iteration to the data frame
#see if there's more breaks to be had
keepers<-findbreakable(feed) #find subsets that are still breakable
newfitdata<-feed[which(keepers==TRUE),] #create new data frame with only these data in it
stillbreakable<-nrow(newfitdata)
}
return(out)
}
#victory! now we need to extract the data we've produced from this data frame, sum up AICs and add
# the corrections, and identify the best models
#function to tally up AICs
AICtally<-function(data){ #create a function that adds up the AICs in the list
fitdata<-nbreaker(data)
AICtots<-c() #create vector to put results in
N_AIC<-c() #create a vector to put counts of AICs in
for (i in 1:nrow(fitdata)){ #for each row in the data frame
AICsvector<-unlist(fitdata$AICs[i]) #create a vector of the AICs for the fit
total<-sum(AICsvector)#add up the AICs
N<-length(AICsvector)#count the AICs
AICtots<-c(AICtots, total)# add the total for each cell to the vector
N_AIC<-c(N_AIC, N) #add the count of AICs
}
out<-as.data.frame(cbind(AICtots, N_AIC)) #bind the outputs into a data frame
colnames(out)<- c("AICtot", "Nfits") #name the columns
#now we want to calculate the AICc correction for each
out$Nbreaks<-out$Nfits-1
out$AICc<-out$AICtot+AICcorrection(data, out$Nbreaks) #n breaks = n fits-1, add correction to AIC
return(out)
}
#see if AICtally outputs will stick to nbreaker outputs- and create a function that does this
allfits<-function(data){
out<-as.data.frame(cbind(nbreaker(data), AICtally(data))) #stick output from two functions into a df
return(out)
}
#create a function that finds equivalent fits in n breakpoint data
#add functionality for switching between AIC and AICc, with AIC as default
equivalentfit<-function(data, criterion){
breakset<-allfits(data) #generate matrix of fits by breakpoints
if(missing(criterion)){ #set AIC as the default criterion
criterion<-"AIC"
}
if(criterion=="AIC"){
AICbest<-min(breakset$AICtot) #find best AIC in the set
deltaAIC<-AICbest+2 # create rule for equivalent models
out.frame<-breakset[which(breakset$AICtot<deltaAIC),] #cut out all data but equivalent models
}
if (criterion=="AICc"){
AICbest<-min(breakset$AICc) #find best AIC in the set
deltaAIC<-AICbest+2 # create rule for equivalent models
out.frame<-breakset[which(breakset$AICc<deltaAIC),] #cut out all data but equivalent models
}
return(out.frame)
}
#but equivalent fit is one thing- if there's equivalent fit and one of the model set has
# fewer parameters, we obviously want to go with that. create a function that does that but this time for n breaks
bestfit<-function(data, criterion){
if(missing(criterion)){ #set AIC as the default criterion
criterion<-"AIC"
}
breakset<-equivalentfit(data, criterion) #get set of eqivalent models
if(criterion=="AIC"){
AICbest<-min(breakset$AICtot) #find best AIC in the set
out.frame<-breakset[which(breakset$AICtot==AICbest),] #pull models with numerically lowest AIC
}
if (criterion=="AICc"){
AICbest<-min(breakset$AICc) #find best AIC in the set
out.frame<-breakset[which(breakset$AICc==AICbest),] #pull models with numerically lowest AIC
}
return(out.frame)
}
#cool. looks like we can now adapt the model fitting function for these n breakpoint data
bestmodel<-function(data, criterion){
modelspecs<-bestfit(data, criterion) #get the particulars of the best model
out.frame<-data.frame(matrix(vector(), 0, 7,
dimnames=list(c(),
c("Year1", "Year2", "AIC", "r", "rse", "k", "kse"))),
stringsAsFactors=F)#Create a place to put our data
breakvector<-unlist(modelspecs$Breaks[1])#pull out a vector of the max year in each fit
if (length(breakvector)<=1){ #if there's no breaks
fit<-rickerfit(data)
output<-c(min(data$year), max(data$year), fit) #fit whole data series + output results
out.frame<-rbind(out.frame, output)
} else {
for (i in 1:(length(breakvector))){ #for all breakpoints, including the end of the time series, in order
part1<-data[which(data$year<breakvector[i]+1),] #create subsets at the breakpoint
part2<-data[which(data$year>breakvector[i]),]
fit1<-rickerfit(part1) #fit first segment
output<-c(min(part1$year), max(part1$year), fit1)#save results of fitting segment in vector
out.frame<-rbind(out.frame, output)#put output for segment in a data frame
data<-part2 #update data to cull out already fitted segments
}
}
colnames(out.frame)<- c("Year1", "Year2", "AIC", "r", "rse", "k", "kse")
return(out.frame)
}
#adapt this function to provide specs of any fit information, as output in the format of the bestfit function
modelspecification<-function(specs, data){
modelspecs<-specs #get the particulars of the best model
out.frame<-data.frame(matrix(vector(), 0, 7,
dimnames=list(c(),
c("Year1", "Year2", "AIC", "r", "rse", "k", "kse"))),
stringsAsFactors=F)#Create a place to put our data
breakvector<-unlist(modelspecs$Breaks[1])#pull out a vector of the max year in each fit
if (modelspecs$Nbreaks[1]==0){ #if there's no breaks
fit<-rickerfit(data)
output<-c(min(data$year), max(data$year), fit) #fit whole data series + output results
out.frame<-rbind(out.frame, output)
} else {
for (i in 1:(length(breakvector))){ #for all breakpoints, including the end of the time series, in order
part1<-data[which(data$year<breakvector[i]+1),] #create subsets at the breakpoint
part2<-data[which(data$year>breakvector[i]),]
fit1<-rickerfit(part1) #fit first segment
output<-c(min(part1$year), max(part1$year), fit1)#save results of fitting segment in vector
out.frame<-rbind(out.frame, output)#put output for segment in a data frame
data<-part2 #update data to cull out already fitted segments
}
}
colnames(out.frame)<- c("Year1", "Year2", "AIC", "r", "rse", "k", "kse")
return(out.frame)
}
#looks like that works! Okay! put it all together like we did for the 2 break model
DSdetector<-function(data, criterion){ #use raw time series data
#plot the data
plot(data)
data1<-addNt1(data)
plot(data1$Nt, data1$Nt1)
#give an output of all possible break point combinations tested
writeLines(paste("Here are the break points for all models tested"))
print(allfits(data1))
#output models with equivalent performance
writeLines(paste("Here is the set of best performing models"))
print(equivalentfit(data1, criterion))
#output model with best performance
writeLines(paste("Here is the best model- the one with the lowest AIC/AICc, or fewest parameters, in case of a tie"))
print(bestfit(data1, criterion))
# output regression parameters of best model
writeLines(paste("Here is the set of regression parameters"))
print(bestmodel(data1, criterion))
}
#looks like we have a working model! Boom!
#Okay, but what about 'break weights'- how 'strong' are the given breaks? Let's take some inspiration
#from multimodel interference and calculate the weight of a given break that the model finds
#first, we need to calculate the model weights for all of the fits
allweights<-function(data, criterion){
breakset<-allfits(data) #generate matrix of fits by breakpoints
if(missing(criterion)){ #set AIC as the default criterion
criterion<-"AIC"
}
if(criterion=="AIC"){
AICbest<-min(breakset$AICtot) #find best AIC in the set
deltaAIC<-breakset$AICtot-AICbest
sumdeltaAIC<-sum(exp(-deltaAIC/2))
}
if (criterion=="AICc"){
AICbest<-min(breakset$AICc) #find best AIC in the set
deltaAIC<-breakset$AICc-AICbest
sumdeltaAIC<-sum(exp(-deltaAIC/2))
}
breakset$modelweights<-(exp(-deltaAIC/2))/sumdeltaAIC
out.frame<-breakset[which(breakset$modelweights>0.001),] #cut out all models with weight less than 0.01, just to speed things up
return(out.frame)
}
#now we need a function that calculates parameter weights for the breaks
breakweights<-function(data, criterion){
modelset<-allweights(data, criterion)#find all models with weights over 0.001
breaksfound<-sort(unique(unlist(modelset$Breaks))) #get a list of unique breaks, sort it in numerical order
breakweights<-c() #blank vector to put the breaks in
for(i in 1:length(breaksfound)){
weightcounter<-0
for (j in 1:length(modelset$modelweights)){
if(breaksfound[i] %in% unlist(modelset$Breaks[j])){
weightcounter<-weightcounter+modelset$modelweights[j] #add model weight of parameter where that break appears
}else{
weightcounter<-weightcounter+0
}
}
breakweights<-c(breakweights, weightcounter)
}
correctedweights<-breakweights/max(breakweights) #breakweights is based on best model set- correct to normalize by break at the end of time series
out.frame<-as.data.frame(cbind(breaksfound, breakweights, correctedweights))
return(out.frame)
}