This repository has been archived by the owner on Oct 8, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis.clj
214 lines (184 loc) · 7.25 KB
/
analysis.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
(ns forma.trends.analysis
(:use [forma.matrix.utils]
[clojure.math.numeric-tower :only (sqrt floor abs expt)]
[forma.date-time :as date]
[clojure.tools.logging :only (error)])
(:require [forma.utils :as utils]
[incanter.core :as i]
[incanter.stats :as s]))
;; Hansen break statistic
(defn linear-residuals
"returns an incanter vector of residuals from a linear model; cribbed from
incanter.stats linear model.
Options:
:intercept (default true)
Example:
(use 'forma.trends.data)
(linear-residuals ndvi (utils/idx ndvi))"
[y x & {:keys [intercept] :or {intercept true}}]
(let [_x (if intercept (i/bind-columns (replicate (i/nrow x) 1) x) x)
xt (i/trans _x)
xtx (i/mmult xt _x)
coefs (i/mmult (i/solve xtx) xt y)]
(i/minus y (i/mmult _x coefs))))
(defn first-order-conditions
"returns an incanter matrix of dimension 3xT, where T is the number
of periods in the supplied time series `ts`. The values are based
on a regression of `ts` on a constant and an index. The first row is
the residual weighted time step variable; the values of the second
row are the residuals themselves; and the third row is demeaned
squared residuals. The matrix collects the first order conditions
$f_t$ in the Hansen (1992) reference listed below.
Example:
(use 'forma.trends.data)
(first-order-conditions (i/matrix ndvi))
References:
Hansen, B. (1992) Testing for Parameter Instability in Linear Models,
Journal for Policy Modeling, 14(4), pp. 517-533"
[ts]
{:pre [(i/matrix? ts)]}
(let [X (i/matrix (utils/idx ts))
resid (linear-residuals ts X)
sq-resid (i/mult resid resid)
mu (utils/average sq-resid)]
(i/trans (i/bind-columns
(i/mult resid X)
resid
(i/minus sq-resid mu)))))
(defn hansen-stat
"Returns the Hansen (1992) test statistic, based on (1) the
first-order conditions, and (2) the cumulative first-order
conditions. The try statement will most likely throw an exception if
`foc-mat` is singular, which will occur when the hansen-stat is
applied to a constant timeseries. If the matrix is singular,
`hansen-stat` will return nil, which will be filtered in the
subsequent cascalog join.
Interpretation: A large break will tend to generate a large break
statistic. From Hansen (1992):
Under the alternative hypothesis of parameter instability,
however, the cumulative sums will develop a nonzero mean in
parts of the sample, so Sn will not behave like a random walk,
and the test statistic will tend to be large
Example:
(hansen-stat ndvi) => 0.9113
Benchmark:
(time (dotimes [_ 100] (hansen-stat ndvi)))
=> Elapsed time: 157.507924 msecs"
[ts]
(let [ts-mat (i/matrix ts)
foc (first-order-conditions ts-mat)
foc-mat (i/mmult foc (i/trans foc))
focsum (map i/cumulative-sum foc)
focsum-mat (i/mmult focsum (i/trans focsum))
scaled-foc (i/mult foc-mat (i/nrow ts-mat))]
(if-not (singular? scaled-foc)
(-> (i/solve scaled-foc)
(i/mmult focsum-mat)
(i/trace))
nil)))
;; Long-term trend characteristic; supporting functions
(defn trend-characteristics
"returns a nested vector of (1) the OLS coefficients from regressing
a vector `y` on cofactor matrix `x`, and (2) the associated
t-statistics, ordered appropriately.
Options:
:intercept (default true)
Example:
(use 'forma.trends.data)
(trend-characteristics ndvi (utils/idx ndvi))
=> [[7312.6512 -1.1430] [37.4443 -0.9183]]
"
[y x & {:keys [intercept] :or {intercept true}}]
(let [_x (if intercept (i/bind-columns (replicate (i/nrow x) 1) x) x)
xt (i/trans _x)
xtx (i/mmult xt _x)]
(if-not (singular? xtx)
(let [coefs (i/mmult (i/solve xtx) xt y)
fitted (i/mmult _x coefs)
resid (i/to-list (i/minus y fitted))
sse (i/sum-of-squares resid)
mse (/ sse (- (i/nrow y) (i/ncol _x)))
coef-var (i/mult mse (i/solve xtx))
std-errors (i/sqrt (i/diag coef-var))
t-tests (i/div coefs std-errors)]
[coefs t-tests])
[nil nil])))
(defn long-stats
"returns a tuple with the trend coefficient and associated
t-statistic for the supplied time series `ts`, based on a linear
regression on an intercept, time step, and a variable number of
cofactors (e.g., rain).
Example:
(use 'forma.trends.data)
(long-stats ndvi rain) => (-1.2382 -0.9976)
(long-stats ndvi) => (-1.1430 -0.9183)"
[ts & cofactors]
(let [cofactors (remove utils/constant? cofactors)
time-step (utils/idx ts)
X (if (empty? cofactors)
(i/matrix time-step)
(apply i/bind-columns time-step cofactors))]
(map second (trend-characteristics ts X))))
;; Short-term trend characteristic; supporting functions
(defn trend-mat
"returns a Tx2 incanter matrix, with first column of ones and the
second column ranging from 1 through T. Used as a cofactor matrix
to calculate short-term OLS coefficients.
Example:
(trend-mat 10)
(type (trend-mat 10)) => incanter.Matrix"
[T]
(let [ones (repeat T 1)]
(i/bind-columns ones (utils/idx ones))))
(defn pseudoinverse
"returns the pseudoinverse of a matrix `x`; the coefficient vector
of OLS is the dependent variable vector premultiplied by the
pseudoinverse of the cofactor matrix `x`. Note that the dimensions
will be (k x N), where k is the number of regressors and N is the
number of observations.
Example:
(pseudoinverse (trend-mat 10))
References:
Moore-Penrose Pseudoinverse (wikipedia): goo.gl/TTJwv"
[x]
{:pre [(i/matrix? x)]}
(let [xt (i/trans x)]
(i/mmult (i/solve (i/mmult xt x)) xt)))
(defn grab-trend
"returns the trend coefficient from an OLS regression of a from an
ordinary least squares regression of a `coll` of values on an
intercept and time step.
Example:
(use 'forma.trends.data)
(def pseudo-mat (pseudoinverse (trend-mat 30)))
(grab-trend pseudo-mat (take 30 ndvi))"
[pseudo-trend-mat coll]
(let [v (i/matrix coll)]
(second (i/mmult pseudo-trend-mat v))))
(defn windowed-trend
"returns a vector of short-term trend coefficients of block length
`block-len`"
[block-len ts]
(let [pseudo-mat (pseudoinverse (trend-mat block-len))]
(map (partial grab-trend pseudo-mat)
(partition block-len 1 ts))))
(defn short-stat
"returns a single value indicating the largest, short-term drop in
`ts`. The long-block length indicates the block length over which
the trend coefficient is calculated; for NDVI analysis, should be of
length over 1 year. The short-block length is used to smooth the
time-series.
Example:
(short-stat 30 10 ndvi) => -63.334638487208096"
[long-block short-block ts]
(->> (windowed-trend long-block ts)
(utils/moving-average short-block)
(reduce min)))
(defn short-stat-all
"Similar to short stat, but returns a collection of values, where
each value represents the largest short-term drop seen in the moving
averages of the trends data up to the current moving average block."
[long-block short-block ts]
(->> (windowed-trend long-block ts)
(utils/moving-average short-block)
(reductions min)))