-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
240 lines (178 loc) · 6.46 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import os
from copy import deepcopy
from math import erf, log, sqrt, exp
from math import pi
import cftime
import numpy as np
import xarray as xr
def load_glob_data(datadir, datafile_base, varlist, forcelist, enslist):
data_dict = {}
# compute for each variable
for var in varlist:
print(f"Variable {var}")
data_dict[var] = {
"da_list": [],
"da_mean_list": [],
}
# collect ensemble means for each forcing level
for force in forcelist:
print(f"Forcing {force}")
# load global time series for all ensemble members
da_list = []
for ens in enslist:
print(f"Ensemble {ens}")
datafile = datafile_base.format(
varname=var,
force=force,
ens=ens,
)
datafile = os.path.join(datadir, datafile)
da = xr.open_dataarray(datafile)
da_list.append(da.copy(deep=True))
# store raw data lists
data_dict[var]["da_list"].append(deepcopy(da_list))
# compute ensemble mean, store
da_mean = (xr.concat(da_list, dim="member")).mean(dim="member")
data_dict[var]["da_mean_list"].append(da_mean.copy(deep=True))
return data_dict
def dates_to_years(
dates,
calendar="noleap",
has_year_zero=True,
from_monthly=False,
):
DAYS_OF_MONTHS = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
first_year = dates[0].year
days = cftime.date2num(
dates,
f"days since {first_year}-01-01",
calendar=calendar,
has_year_zero=has_year_zero,
)
years = first_year + days / 365
# offsets to center of previous month
# monthly data is originally set to first day of following month
if from_monthly:
for date_idx, date in enumerate(dates):
month_idx = date.month - 2 # previous month, zero index
days_in_month = DAYS_OF_MONTHS[month_idx]
years[date_idx] -= (days_in_month / 2) / 365
return years
def threshold_array(arr_in, direction):
if direction == 1:
arr_out = (arr_in >= 0.0) * arr_in
elif direction == -1:
arr_out = (arr_in <= 0.0) * arr_in
else:
raise ValueError(f"Unexpected threshold direction: {direction}")
return arr_out
def calc_peak_values(da_list_in, expected_direction):
if not isinstance(da_list_in, list):
da_list = [da_list_in]
else:
da_list = da_list_in
delta_list = []
peak_idx_list = []
for da in da_list:
vals_glob = da.values
# compute deviation with respect to first data point
dev_vals = vals_glob - vals_glob[0]
# expert-informed peak-finding
dev_vals_exp = threshold_array(dev_vals, expected_direction)
# find index of peak, store
peak_idx = np.nanargmax(np.abs(dev_vals_exp))
peak_idx_list.append(peak_idx)
# extract peak deviation, store
delta = dev_vals[peak_idx]
delta_list.append(delta)
return delta_list, peak_idx_list
def calc_ovl_binormal(mu_1, mu_2, s_1, s_2):
# assumes that s_1 < s_2
# switch if not the case
if s_2 < s_1:
temp = s_1
s_1 = s_2
s_2 = temp
temp = mu_1
mu_1 = mu_2
mu_2 = temp
s_1_sq = s_1**2
s_2_sq = s_2**2
# compute x_1 and x_2
a = mu_1 * s_2_sq - mu_2 * s_1_sq
b = s_1 * s_2 * sqrt( (mu_1 - mu_2)**2 + (s_1_sq - s_2_sq) * log(s_1_sq / s_2_sq) )
c = s_2_sq - s_1_sq
x_1 = (a - b) / c
x_2 = (a + b) / c
# compute OVL
ovl = 1.0 + normal_cdf(x_1, mu_1, s_1) - normal_cdf(x_1, mu_2, s_2) - normal_cdf(x_2, mu_1, s_1) + normal_cdf(x_2, mu_2, s_2)
return ovl
def normal_cdf(x, mu, s):
return 0.5 * (1.0 + erf((x - mu) / (s * sqrt(2))))
def collect_linregress_data(
data_dict,
xvars,
yvar,
forcelist,
enslist,
expected_directions,
force_var,
force_obs,
):
nens = len(enslist)
x_deltas_concat = None
y_deltas_concat = None
for force_idx, force in enumerate(forcelist):
# collect peak impacts for predictor variables
for x_idx, xvar in enumerate(xvars):
if xvar == force_var:
# forcing variable is not in the peak impact data
x_deltas = force * np.ones(nens, dtype=np.float64)
else:
x_da_list = data_dict[xvar]["da_list"][force_idx]
x_deltas, _ = calc_peak_values(x_da_list, expected_directions[xvar])
x_deltas = np.expand_dims(x_deltas, axis=-1)
if x_idx == 0:
x_deltas_tot = x_deltas.copy()
else:
x_deltas_tot = np.concatenate((x_deltas_tot, x_deltas), axis=1)
# peak impacts for target variables
y_da_list = data_dict[yvar]["da_list"][force_idx]
y_deltas, _ = calc_peak_values(y_da_list, expected_directions[yvar])
y_deltas = np.expand_dims(y_deltas, axis=-1)
# separate "observed" forcing
if force == force_obs:
x_deltas_obs = x_deltas_tot.copy()
y_deltas_obs = y_deltas.copy()
else:
# concatenate for later regression
if x_deltas_concat is None:
x_deltas_concat = x_deltas_tot.copy()
y_deltas_concat = y_deltas.copy()
else:
x_deltas_concat = np.concatenate((x_deltas_concat, x_deltas_tot), axis=0)
y_deltas_concat = np.concatenate((y_deltas_concat, y_deltas), axis=0)
return x_deltas_concat, y_deltas_concat, x_deltas_obs, y_deltas_obs
def calc_ols(Xin, Yin, intercept=False):
# compute ordinary least squares regression coefficients
yobs = Yin.shape[0]
xobs = Xin.shape[0]
assert yobs == xobs, f"Number of observations do not match: {yobs} v {xobs}"
if Yin.ndim == 1:
Yin = Yin[:, None]
if Xin.ndim == 1:
Xin = Xin[:, None]
# add intercept
if intercept:
Xin = np.concatenate((np.ones((xobs, 1), dtype=Xin.dtype), Xin), axis=1)
# compute approximate parameters
A = Xin.T @ Xin
B = Xin.T @ Yin
beta = np.linalg.solve(A, B)
return beta
def calc_likelihood_normal(betas, var, xvals, yval):
assert betas.shape[0] == (xvals.shape[0] + 1)
y_pred = (betas[0] + xvals @ betas[1:]).item()
exponential = exp(-0.5 * (yval - y_pred)**2 / var)
likelihood = (1.0 / (sqrt(var * 2 * pi))) * exponential
return likelihood