-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathecoliver.py
399 lines (356 loc) · 13.7 KB
/
ecoliver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
import numpy as np
import scipy as sp
from scipy import stats
from scipy import linalg
import scipy.ndimage as ndimage
from datetime import date
def find_nearest(array, value):
idx=(np.abs(array-value)).argmin()
return array[idx], idx
def findxy(x, y, loc):
'''
Find (i,j) coordinates of loc = (x0,y0) in 2D irregularly spaced
coordinate matrices (2D numpy arrays) x and y.
'''
# Dimensions
Y, X = x.shape
# Make matrix of indices
iijj = np.meshgrid(range(X), range(Y))
ii = iijj[0].flatten()
jj = iijj[1].flatten()
# Calculate distance-squared
dist2 = (x-loc[0])**2 + (y-loc[1])**2
k = np.argmin(dist2)
return ii[k], jj[k]
def nanmean(array, axis=None):
return np.mean(np.ma.masked_array(array, np.isnan(array)), axis)
def nanvar(array, axis=None):
return np.var(np.ma.masked_array(array, np.isnan(array)), axis)
def nanskew(array, axis=None):
# only woks for 1D data
return stats.skew(array[np.logical_not(np.isnan(array))])
def nanmax(array, axis=None):
maxes = np.max(np.ma.masked_array(array, np.isnan(array)), axis)
data = maxes.data
mask = maxes.mask
return data[np.logical_not(mask)]
def nonans(array):
'''
Return input array [1D numpy array] with
all nan values removed
'''
return array[~np.isnan(array)]
def nozeros(array):
'''
Return input array [1D numpy array] with
all zeros removed
'''
return array[~(array==0)]
def latlon2km(lon1, lat1, lon2, lat2):
EARTH_RADIUS = 6378.1
c = np.sin(np.radians(lat1)) * np.sin(np.radians(lat2)) + np.cos(np.radians(lon1-lon2)) * np.cos(np.radians(lat1)) * np.cos(np.radians(lat2))
d = EARTH_RADIUS * np.arccos(c)
return d
def latlonArea(lon1, lat1, lon2, lat2):
'''
Surface area (in km2^) of a lat/lon "rectangle" included between specified longitudes and latitudes
'''
EARTH_RADIUS = 6378.1
return (EARTH_RADIUS**2) * np.abs(lon1*np.pi/180. - lon2*np.pi/180.) * np.abs(np.sin(lat1*np.pi/180.) - np.sin(lat2*np.pi/180.))
def dxdy(lon, lat):
'''
Takes M+1 length lat and N+1 length lon vectors
and returns MxN 2D arrays of distances across cells
in x and y directions
'''
X = len(lon)-1
Y = len(lat)-1
dx = np.zeros((Y,X))
dy = np.zeros((Y,X))
for j in range(dx.shape[0]):
for i in range(dx.shape[1]):
dx[j,i] = 1e3 * latlon2km(lon[i+1], lat[j], lon[i], lat[j])
dy[j,i] = 1e3 * latlon2km(lon[i], lat[j+1], lon[i], lat[j])
return dx, dy
def gradient(field, dx, dy):
'''
Performs the gradient of input field given
dx and dy fields (in metres)
'''
field_y, field_x = np.gradient(field)
field_x = field_x / dx
field_y = field_y / dy
return field_x, field_y
def acf(x):
result = np.correlate(x, x, mode = 'full')
maxcorr = np.argmax(result)
result = result / result[maxcorr] # <=== normalization
return result[result.size/2:]
def ccf(x, y):
x = (x-np.mean(x))/(np.std(x)*len(x))
y = (y-np.mean(y))/np.std(y)
ccf = np.correlate(x, y, mode='full')
lags = np.arange(len(ccf)) - (len(x)-1)
return lags, ccf
def ttest_serialcorr(x, y):
'''
Calculates the t-test for the means of two samples under an assumption of serial
correlation, following the technique of Zwiers and von Storch (Journal of Climate, 1995)
'''
# Valid (non-Nan) data, and return NaN if insufficient valid data
validx = ~np.isnan(x)
validy = ~np.isnan(y)
if (validx.sum() <= 1) + (validy.sum() <= 1):
return np.nan, np.nan
else:
# Sample lengths
nx = len(x[validx])
ny = len(y[validy])
# Autocorrelation Function (pad NaN values for an approximation)
rhox = acf(pad(x - np.nanmean(x)))
rhoy = acf(pad(y - np.nanmean(y)))
# Equivalent sample lengths
nx = nx / (1 + ((1-np.arange(1, int(nx))/nx)*rhox[validx][:-1]).sum())
ny = ny / (1 + ((1-np.arange(1, int(ny))/ny)*rhoy[validy][:-1]).sum())
#if (nx < 30) or (ny < 30):
# print 'Effective sample size(s) are less than 30: distribution of t statistics will deviate significantly from the t-distribution'
# Sample standard deviations
sx = np.sqrt(x[validx].var())
sy = np.sqrt(y[validy].var())
s = np.sqrt(sx**2/nx + sy**2/ny)
# t-statistic
t = (np.nanmean(x) - np.nanmean(y))/s
# Degrees of freedom
df = (sx**2/nx + sy**2/ny)**2 / ((sx**2/nx)**2/(nx-1) + (sy**2/ny)**2/(ny-1))
# p-value
p = 1 - stats.t.cdf(t, df)
return t, p
def pad(data, maxPadLength=False):
'''
Linearly interpolate over missing data (NaNs) in a time series.
Inputs:
data Time series [1D numpy array]
maxPadLength Specifies the maximum length over which to interpolate,
i.e., any consecutive blocks of NaNs with length greater
than maxPadLength will be left as NaN. Set as an integer.
maxPadLength=False (default) interpolates over all NaNs.
Written by Eric Oliver, Institue for Marine and Antarctic Studies, University of Tasmania, Jun 2015
'''
data_padded = data.copy()
bad_indexes = np.isnan(data)
good_indexes = np.logical_not(bad_indexes)
good_data = data[good_indexes]
interpolated = np.interp(bad_indexes.nonzero()[0], good_indexes.nonzero()[0], good_data)
data_padded[bad_indexes] = interpolated
if maxPadLength:
blocks, n_blocks = ndimage.label(np.isnan(data))
for bl in range(1, n_blocks+1):
if (blocks==bl).sum() > maxPadLength:
data_padded[blocks==bl] = np.nan
return data_padded
def runavg_periodic(ts, w):
'''
Perform running average of ts (1D numpy array) using uniform window
of width w (w must be odd). Assumes periodicity of ts.
'''
N = len(ts)
ts = np.append(ts, np.append(ts, ts))
ts_smooth = np.convolve(ts, np.ones(w)/w, mode='same')
ts = ts_smooth[N:2*N]
return ts
def runavg(ts, w, mode='same'):
'''
Perform running average of ts (1D numpy array) using uniform window
of width w (w must be odd). Pads with NaNs outside of valid range.
Option 'mode' specifies if output should be defined over
'''
if mode == 'same':
ts_smooth = np.convolve(ts, np.ones(w)/w, mode=mode)
elif mode == 'valid':
ts_smooth = np.append(np.append(np.nan*np.ones((w-1)/2), np.convolve(ts, np.ones(w)/w, mode=mode)), np.nan*np.ones((w-1)/2))
return ts_smooth
def timevector(date_start, date_end):
'''
Generated daily time vector, along with year, month, day, day-of-year,
and full date information, given start and and date. Format is a 3-element
list so that a start date of 3 May 2005 is specified date_start = [2005,5,3]
Note that day-of year (doy) is [0 to 59, 61 to 366] for non-leap years and [0 to 366]
for leap years.
returns: t, dates, T, year, month, day, doy
'''
# Time vector
t = np.arange(date(date_start[0],date_start[1],date_start[2]).toordinal(),date(date_end[0],date_end[1],date_end[2]).toordinal()+1)
T = len(t)
# Date list
dates = [date.fromordinal(tt.astype(int)) for tt in t]
# Vectors for year, month, day-of-month
year = np.zeros((T))
month = np.zeros((T))
day = np.zeros((T))
for tt in range(T):
year[tt] = date.fromordinal(t[tt]).year
month[tt] = date.fromordinal(t[tt]).month
day[tt] = date.fromordinal(t[tt]).day
year = year.astype(int)
month = month.astype(int)
day = day.astype(int)
# Leap-year baseline for defining day-of-year values
year_leapYear = 2012 # This year was a leap-year and therefore doy in range of 1 to 366
t_leapYear = np.arange(date(year_leapYear, 1, 1).toordinal(),date(year_leapYear, 12, 31).toordinal()+1)
dates_leapYear = [date.fromordinal(tt.astype(int)) for tt in t_leapYear]
month_leapYear = np.zeros((len(t_leapYear)))
day_leapYear = np.zeros((len(t_leapYear)))
doy_leapYear = np.zeros((len(t_leapYear)))
for tt in range(len(t_leapYear)):
month_leapYear[tt] = date.fromordinal(t_leapYear[tt]).month
day_leapYear[tt] = date.fromordinal(t_leapYear[tt]).day
doy_leapYear[tt] = t_leapYear[tt] - date(date.fromordinal(t_leapYear[tt]).year,1,1).toordinal() + 1
# Calculate day-of-year values
doy = np.zeros((T))
for tt in range(T):
doy[tt] = doy_leapYear[(month_leapYear == month[tt]) * (day_leapYear == day[tt])]
doy = doy.astype(int)
return t, dates, T, year, month, day, doy
def spatial_filter(field, res, cut_lon, cut_lat):
'''
Performs a spatial filter, removing all features with
wavelenth scales larger than cut_lon in longitude and
cut_lat in latitude from field. Field has spatial
resolution of res and land identified by np.nan's
'''
field_filt = np.zeros(field.shape)
# see Chelton et al, Prog. Ocean., 2011 for explanation of factor of 1/5
sig_lon = (cut_lon/5.) / res
sig_lat = (cut_lat/5.) / res
land = np.isnan(field)
field[land] = nanmean(field)
field_filt = field - ndimage.gaussian_filter(field, [sig_lat, sig_lon])
field_filt[land] = np.nan
return field_filt
def trend(x, y, alpha=0.05):
'''
Calculates the trend of y given the linear
independent variable x. Outputs the mean,
trend, and alpha-level (e.g., 0.05 for 95%)
confidence limit on the trend.
returns mean, trend, dtrend_95
'''
valid = ~np.isnan(y)
X = np.array([np.ones(len(x)), x-x.mean()])
beta = linalg.lstsq(X[:,valid].T, y[valid])[0]
yhat = np.sum(beta*X.T, axis=1)
t_stat = stats.t.isf(alpha/2, len(x[valid])-2)
s = np.sqrt(np.sum((y[valid] - yhat[valid])**2) / (len(x[valid])-2))
Sxx = np.sum(X[1,valid]**2) - (np.sum(X[1,valid])**2)/len(x[valid]) # np.var(X, axis=1)[1]
return beta[0], beta[1], t_stat * s / np.sqrt(Sxx)
def trend_TheilSen(x, y, alpha=0.05):
'''
Calculates the trend of y given the linear
independent variable x. Outputs the mean,
trend, and alpha-level (e.g., 0.05 for 95%)
confidence limit on the trend. Estimate of
the trend uses a Theil-Sen estimator.
returns mean, trend, dtrend_95
'''
# Construct matrix of predictors, first column is all ones to estimate the mean,
# second column is the time vector, equal to zero at mid-point.
X = x-x.mean()
#
# Predictand (MHW property of interest)
valid = ~np.isnan(y) # non-NaN indices
#
# Perform linear regression over valid indices
if np.sum(~np.isnan(y)) > 0: # If at least one non-NaN value
slope, y0, beta_lr, beta_up = stats.mstats.theilslopes(y[valid], X[valid], alpha=1-alpha)
beta = np.array([y0, slope])
else:
beta_lr, beta_up = [np.nan, np.nan]
beta = [np.nan, np.nan]
#
return beta[0], beta[1], [beta_lr, beta_up]
def acf(x):
result = np.correlate(x, x, mode = 'full')
maxcorr = np.argmax(result)
result = result / result[maxcorr] # <=== normalization
return result[result.size/2:]
def ttest_unequalvar(x, y):
'''
Calculates the t-test for the means of two samples under an assumption of no serial
correlation (but different variances), following the technique of Zwiers and von Storch
(Journal of Climate, 1995)
'''
# Sample lengths
nx = len(x)
ny = len(y)
# Sample standard deviations
sx = np.sqrt(x.var())
sy = np.sqrt(y.var())
s = np.sqrt(sx**2/nx + sy**2/ny)
# t-statistic
t = np.abs(x.mean() - y.mean())/s
# Degrees of freedom
df = (sx**2/nx + sy**2/ny)**2 / ((sx**2/nx)**2/(nx-1) + (sy**2/ny)**2/(ny-1))
# p-value
p = 1 - stats.t.cdf(t, df)
return t, p
def pattern_correlation(x1, x2, centred=True):
'''
Calculates the pattern correlation of x1 and x2.
Assumes and x1 and x2 are 2D numpy arrays. Can
handle missing values, even if missing values are
distributed differently in x1 and x2. By default
calculated the centred pattern correlation (centred
=True) in which the spatial means of x1 and x2 are
removed prior to calculation. Can calculated uncentred
pattern correlation (centred=False) in which these
means are not removed.
.
Written by Eric Oliver, IMAS/UTAS, Nov 2015
'''
# Flatten 2D arrays and find shared valid (non-nan) indices
X1 = x1.flatten()
X2 = x2.flatten()
valid = ~(np.isnan(X1) + np.isnan(X2))
# Create Nx2 array of valid data
X = np.zeros((valid.sum(), 2))
X[:,0] = X1[valid]
X[:,1] = X2[valid]
# Centre data if desired
if centred:
X[:,0] = X[:,0] - np.mean(X[:,0])
X[:,1] = X[:,1] - np.mean(X[:,1])
#
# Calculate pattern correlation
pcorr = np.corrcoef(X.T)[0,1]
return pcorr
def polyArea(x, y):
'''
Area of a simple polygon, defined by vertices (x,y)
in the plane. Assumes x and y and numpy arrays of the
same length. Algorithm is based on the Shoelace Formula
https://en.wikipedia.org/wiki/Shoelace_formula
http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
'''
return 0.5*np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
def point_inside_polygon(x, y, poly):
'''
Determine if a point is inside a given polygon or not
Polygon is a list of (x,y) pairs.
http://www.ariel.com.au/a/python-point-int-poly.html
'''
n = len(poly)
inside =False
#
p1x, p1y = poly[0]
for i in range(n+1):
p2x, p2y = poly[i % n]
if y > min(p1y, p2y):
if y <= max(p1y, p2y):
if x <= max(p1x, p2x):
if p1y != p2y:
xinters = (y - p1y)*(p2x - p1x)/(p2y - p1y)+p1x
if p1x == p2x or x <= xinters:
inside = not inside
p1x, p1y = p2x, p2y
#
return inside