-
Notifications
You must be signed in to change notification settings - Fork 0
/
durolib.py
executable file
·537 lines (456 loc) · 20.1 KB
/
durolib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
# -*- coding: utf-8 -*-
"""
Created on Mon May 27 16:49:02 2013
Paul J. Durack 27th May 2013
PJD 2 Jun 2013 - Added clearall and outer_locals functions
PJD 13 Jun 2013 - Updated global_att_write to globalAttWrite
PJD 13 Jun 2013 - Added writeToLog function
PJD 26 Jun 2013 - Added fixInterpAxis function
PJD 18 Jul 2013 - Sphinx docs/syntax http://pythonhosted.org/an_example_pypi_project/sphinx.html
PJD 23 Jul 2013 - Added fixVarUnits function
PJD 25 Jul 2013 - Updated fixVarUnits function to print and log changes
PJD 5 Aug 2013 - Added fitPolynomial function following Pete G's code example
PJD 9 Aug 2013 - Added writePacked function
PJD 9 Aug 2013 - Added keyboard function
PJD 22 Aug 2013 - Added setTimeBoundsYearly() to fixInterpAxis
PJD 1 Apr 2014 - Added trimModelList
- TODO: Consider implementing multivariate polynomial regression:
https://github.com/mrocklin/multipolyfit
CRT 5 Jan 2015 - Modified globalAttWrite to output my (terai) username and info
This library contains all functions written to replicate matlab functionality in python
@author: durack1
"""
## Import common modules ##
import cdat_info,cdtime,code,datetime,gc,inspect,os,pytz,re,string,sys
import cdms2 as cdm
import cdutil as cdu
#import genutil as genu
#import matplotlib as plt
import MV2 as mv
import numpy as np
#import scipy as sp
from numpy.core.fromnumeric import shape
from socket import gethostname
from string import replace
# Consider modules listed in /work/durack1/Shared/130103_data_SteveGriffies/130523_mplib_tips/importNPB.py
##
## Specify UVCDAT specific stuff ##
# Turn off cdat ping reporting - Does this speed up Spyder?
cdat_info.ping = False
# Set netcdf file criterion - turned on from default 0s
cdm.setCompressionWarnings(0) ; # Suppress warnings
cdm.setNetcdfShuffleFlag(0)
cdm.setNetcdfDeflateFlag(1)
cdm.setNetcdfDeflateLevelFlag(9)
# Hi compression: 1.4Gb file ; # Single salt variable
# No compression: 5.6Gb ; Standard (compression/shuffling): 1.5Gb ; Hi compression w/ shuffling: 1.5Gb
cdm.setAutoBounds(1) ; # Ensure bounds on time and depth axes are generated
##
## Define useful functions ##
def clearAll():
"""
Documentation for clearall():
-------
The clearall() function purges all variables in global namespace
Author: Paul J. Durack : [email protected]
Usage:
------
>>> from durolib import clearall
>>> clearall()
Notes:
-----
Currently not working ...
"""
for uniquevariable in [variable for variable in globals().copy() if variable[0] != "_" and variable != 'clearall']:
del globals()[uniquevariable]
def environment():
return False
def fillHoles(var):
return var
#http://tcl-nap.cvs.sourceforge.net/viewvc/tcl-nap/tcl-nap/library/nap_function_lib.tcl?revision=1.56&view=markup
#http://tcl-nap.cvs.sourceforge.net/viewvc/tcl-nap/tcl-nap/library/stat.tcl?revision=1.29&view=markup
#http://stackoverflow.com/questions/5551286/filling-gaps-in-a-numpy-array
#http://stackoverflow.com/questions/3662361/fill-in-missing-values-with-nearest-neighbour-in-python-numpy-masked-arrays
#https://www.google.com/search?q=python+nearest+neighbor+fill
"""
# fill_holes --
329 #
330 # Replace missing values by estimates based on means of neighbours
331 #
332 # Usage:
333 # fill_holes(x, max_nloops)
334 # where:
335 # - x is array to be filled
336 # - max_nloops is max. no. iterations (Default is to keep going until
337 # there are no missing values)
338
339 proc fill_holes {
340 x
341 {max_nloops -1}
342 } {
343 set max_nloops [[nap "max_nloops"]]
344 set n [$x nels]
345 set n_present 0; # ensure at least one loop
346 for {set nloops 0} {$n_present < $n && $nloops != $max_nloops} {incr nloops} {
347 nap "ip = count(x, 0)"; # Is present? (0 = missing, 1 = present)
348 set n_present [[nap "sum_elements(ip)"]]
349 if {$n_present == 0} {
350 error "fill_holes: All elements are missing"
351 } elseif {$n_present < $n} {
352 nap "x = ip ? x : moving_average(x, 3, -1)"
353 }
354 }
355 nap "x"
356 }
"""
def fitPolynomial(var,time,polyOrder):
"""
Documentation for fitPolynomial(var):
-------
The fitPolynomial(var,time,polyOrder) function returns a new variable which is the polyOrder
estimate of the variable argument
Author: Paul J. Durack : [email protected]
Usage:
------
>>> from durolib import fitPolynomial
>>> var_cubic = fitPolynomial(var,time,polyOrder=3)
Notes:
-----
- PJD 5 Aug 2013 - Implemented following examples from Pete G.
- TODO: only works on 2D arrays, improve to work on 3D
http://docs.scipy.org/doc/numpy/reference/generated/numpy.polyfit.html
"""
if polyOrder > 3:
print "".join(['** fitPolynomial Error: >cubic fits not supported **',])
return
varFitted = mv.multiply(var,0.) ; # Preallocate output array
coefs,residuals,rank,singularValues,rcond = np.polyfit(time,var,polyOrder,full=True)
for timeIndex in range(len(time)):
timeVal = time[timeIndex]
if polyOrder == 1:
varFitted[timeIndex] = (coefs[0]*timeVal + coefs[1])
elif polyOrder == 2:
varFitted[timeIndex] = (coefs[0]*(timeVal**2) + coefs[1]*timeVal + coefs[2])
elif polyOrder == 3:
varFitted[timeIndex] = (coefs[0]*(timeVal**3) + coefs[1]*(timeVal**2) + coefs[2]*timeVal + coefs[3])
return varFitted
def fixInterpAxis(var):
"""
Documentation for fixInterpAxis(var):
-------
The fixInterpAxis(var) function corrects temporal axis so that genutil.statistics.linearregression
returns coefficients which are unscaled by the time axis
Author: Paul J. Durack : [email protected]
Usage:
------
>>> from durolib import fixInterpAxis
>>> (slope),(slope_err) = linearregression(fixInterpAxis(var),error=1,nointercept=1)
Notes:
-----
...
"""
tind = range(shape(var)[0]) ; # Assume time axis is dimension 0
t = cdm.createAxis(tind,id='time')
t.units = 'years since 0-01-01 0:0:0.0'
t.calendar = var.getTime().calendar
cdu.times.setTimeBoundsYearly(t) ; # Explicitly set time bounds to yearly
var.setAxis(0,t)
return var
def fixVarUnits(var,varName,report=False,logFile=None):
"""
Documentation for fixVarUnits():
-------
The fixVarUnits() function corrects units of salinity and converts thetao from K to degrees_C
Author: Paul J. Durack : [email protected]
Usage:
------
>>> from durolib import fixVarUnits
>>> [var,var_fixed] = fixVarUnits(var,'so',True,'logfile.txt')
Notes:
-----
...
"""
var_fixed = False
if varName in ['so','sos']:
if var.max() < 1. and var.mean() < 1.:
if report:
print "".join(['*SO mean: {:+06.2f}'.format(var.mean()),'; min: {:+06.2f}'.format(var.min().astype('float64')),'; max: {:+06.2f}'.format(var.max().astype('float64'))])
if logFile is not None:
writeToLog(logFile,"".join(['*SO mean: {:+06.2f}'.format(var.mean()),'; min: {:+06.2f}'.format(var.min().astype('float64')),'; max: {:+06.2f}'.format(var.max().astype('float64'))]))
var_ = var*1000
var_.id = var.id
var_.name = var.id
for k in var.attributes.keys():
setattr(var_,k,var.attributes[k])
var = var_
var_fixed = True
if report:
print "".join(['*SO mean: {:+06.2f}'.format(var.mean()),'; min: {:+06.2f}'.format(var.min().astype('float64')),'; max: {:+06.2f}'.format(var.max().astype('float64'))])
if logFile is not None:
writeToLog(logFile,"".join(['*SO mean: {:+06.2f}'.format(var.mean()),'; min: {:+06.2f}'.format(var.min().astype('float64')),'; max: {:+06.2f}'.format(var.max().astype('float64'))]))
elif varName in 'thetao':
if var.max() > 50. and var.mean() > 265.:
if report:
print "".join(['*THETAO mean: {:+06.2f}'.format(var.mean()),'; min: {:+06.2f}'.format(var.min().astype('float64')),'; max: {:+06.2f}'.format(var.max().astype('float64'))])
if logFile is not None:
writeToLog(logFile,"".join(['*THETAO mean: {:+06.2f}'.format(var.mean()),'; min: {:+06.2f}'.format(var.min().astype('float64')),'; max: {:+06.2f}'.format(var.max().astype('float64'))]))
var_ = var-273.15
var_.id = var.id
var_.name = var.id
for k in var.attributes.keys():
setattr(var_,k,var.attributes[k])
var = var_
var_fixed = True
if report:
print "".join(['*THETAO mean: {:+06.2f}'.format(var.mean()),'; min: {:+06.2f}'.format(var.min().astype('float64')),'; max: {:+06.2f}'.format(var.max().astype('float64'))])
if logFile is not None:
writeToLog(logFile,"".join(['*THETAO mean: {:+06.2f}'.format(var.mean()),'; min: {:+06.2f}'.format(var.min().astype('float64')),'; max: {:+06.2f}'.format(var.max().astype('float64'))]))
return var,var_fixed
def globalAttWrite(file_handle,options):
"""
Documentation for globalAttWrite():
-------
The globalAttWrite() function writes standard global_attributes to an
open netcdf specified by file_handle
Author: Paul J. Durack : [email protected]
Returns:
-------
Nothing.
Usage:
------
>>> from durolib import globalAttWrite
>>> globalAttWrite(file_handle)
Where file_handle is a handle to an open, writeable netcdf file
Optional Arguments:
-------------------
option=optionalArguments
Restrictions: option has to be a string
Default : ...
You can pass option='SOMETHING', ...
Examples:
---------
>>> from durolib import globalAttWrite
>>> f = cdms2.open('data_file_name','w')
>>> globalAttWrite(f)
# Writes standard global attributes to the netcdf file specified by file_handle
Notes:
-----
When ...
"""
# Create timestamp, corrected to UTC for history
local = pytz.timezone("America/Los_Angeles")
time_now = datetime.datetime.now();
local_time_now = time_now.replace(tzinfo = local)
utc_time_now = local_time_now.astimezone(pytz.utc)
time_format = utc_time_now.strftime("%d-%m-%Y %H:%M:%S %p")
file_handle.institution = "Department of Earth System Science, UC-Irvine"
file_handle.data_contact = "Chris Terai; [email protected]"
file_handle.history = "".join(['File processed: ',time_format,' UTC; Irvine, CA, USA'])
file_handle.analysis_host = "".join([gethostname(),'; UVCDAT version: ',".".join(["%s" % el for el in cdat_info.version()]),
'; Python version: ',replace(replace(sys.version,'\n','; '),') ;',');')])
def inpaint(array,method):
#/work/durack1/csiro/Backup/110808/Z_dur041_linux/bin/inpaint_nans/inpaint_nans.m
return False
def keyboard(banner=None):
"""
Documentation for keyboard():
-------
The keyboard() function mimics matlab's keyboard function allowing control
sent to the keyboard within a running script
Author: Paul J. Durack : [email protected]
Returns:
-------
Nothing.
Usage:
------
>>> from durolib import keyboard
>>> keyboard()
Examples:
---------
...
Notes:
-----
...
"""
# use exception trick to pick up the current frame
try:
raise None
except:
frame = sys.exc_info()[2].tb_frame.f_back
print "# Use quit() to exit :) Happy debugging!"
# evaluate commands in current namespace
namespace = frame.f_globals.copy()
namespace.update(frame.f_locals)
try:
code.interact(banner=banner,local=namespace)
except SystemExit:
return
def outerLocals(depth=0):
return inspect.getouterframes(inspect.currentframe())[depth+1][0].f_locals
def smooth(array,method):
#/apps/MATLAB/R2011b/toolbox/matlab/specgraph/smooth3.m
#/apps/MATLAB/R2011b/toolbox/curvefit/curvefit/smooth.m
return False
def spyderClean():
"""
Documentation for spyder_clean():
-------
The spyder_clean() function purges variables initialised upon startup
Author: Paul J. Durack : [email protected]
Usage:
------
>>> from durolib import spyder_clean
>>> spyder_clean()
Notes:
-----
Currently not working ...
"""
local_vars = outerLocals()
if 'e' in local_vars:
print 'yep..'
del(e,pi,sctypeNA,typeNA)
gc.collect()
def trimModelList(modelFileList):
"""
Documentation for trimModelList(modelFileList):
-------
The trimModelList(modelFileList) function takes a python list of model files
and trims these for duplicates using file creation_date attribute along with
temporal ordering info obtained from the file version identifier
Author: Paul J. Durack : [email protected]
Usage:
------
>>> modelFileList = glob.glob(os.path.join(filePath,'*.nc')) ; # provides full directory/file path
>>> from durolib import trimModelList
>>> modelFileListTrimmed = trimModelList(modelFileList)
Notes:
-----
- PJD 1 Apr 2014 - Implement sanity checks for r1i1p1 matching for e.g.
- PJD 1 Apr 2014 - Removed hard-coded ver- position
- PJD 1 Apr 2014 - Added realisation test to ensure expected format
"""
# Check for list variable
if type(modelFileList) is not list:
print '** Function argument not type list, exiting.. **'
return ''
# Sort list and declare output
modelFileList.sort()
modelFileListTmp = []
modelFileIndex = []
# Create subset modelFileList
for file1 in modelFileList:
file1 = file1.split('/')[-1]
mod = file1.split('.')[1]
exp = file1.split('.')[2]
rea = file1.split('.')[3]
# Test rea for r1i1p111 format match
reaTest = re.compile('^r\d{1,2}i\d{1,2}p\d{1,3}')
if not reaTest.match(rea):
print '** Filename format invalid - rea: ',rea,', exiting.. **'
return ''
modelFileListTmp.append('.'.join([mod,exp,rea]))
# Create unique list and index
modelFileListTmpUnique = list(set(modelFileListTmp)) ; modelFileListTmpUnique.sort()
findMatches = lambda searchList,elem: [[i for i, x in enumerate(searchList) if x == e] for e in elem]
modelFileListTmpIndex = findMatches(modelFileListTmp,modelFileListTmpUnique)
# Loop through unique list
for count,modelNum in enumerate(modelFileListTmpIndex):
if len(modelFileListTmpIndex[count]) == 1: # Case single version
modelFileIndex.append(int(str(modelNum).strip('[]')))
else: # Case multiple versions
# Get version and creation_date info from file
modelFileListVersion = [] ; modelFileListCreationDate = [] ; modelFileListIndex = []
for index in modelFileListTmpIndex[count]:
file1 = modelFileList[index].split('/')[-1]
verInd = int(str([count for count,x in enumerate(file1.split('.')) if 'ver-' in x]).strip('[]'))
ver1 = file1.split('.')[verInd].replace('ver-','')
f_h = cdm.open(modelFileList[index])
CD = f_h.creation_date
f_h.close()
modelFileListVersion.append(ver1)
modelFileListCreationDate.append(CD)
modelFileListIndex.append(index)
#print modelFileListVersion
#print modelFileListCreationDate
# Use creation_date to determine latest file
listLen = len(modelFileListCreationDate)
modelFileListCreationDate = map(string.replace,modelFileListCreationDate,['T',]*listLen, [' ',]*listLen)
modelFileListCreationDate = map(cdtime.s2c,modelFileListCreationDate)
modelFileListCreationDate = map(cdtime.c2r,modelFileListCreationDate,['days since 1-1-1',]*listLen)
modelFileListCreationDate = [x.value for x in modelFileListCreationDate]
maxes = [i for i,x in enumerate(modelFileListCreationDate) if x == max(modelFileListCreationDate)]
ver = [modelFileListVersion[i] for i in maxes]
ind = [modelFileListIndex[i] for i in maxes]
#print modelFileListCreationDate
#print maxes,ver,ind
# If creation_dates match check version info to determine latest file
indTest = '-' ; #verTest = '-'
if len(maxes) > 1:
pubTest = 0 ; dateTest = 0;
for count,ver1 in reversed(list(enumerate(ver))):
# Take datestamp versioned data
if 'v' in ver1 and ver1 > dateTest:
#verTest = ver[count]
indTest = ind[count]
dateTest = ver1
# Use published data preferentially: 1,2,3,4, ...
if ver1.isdigit() and ver1 > pubTest:
indTest = ind[count]
pubTest = ver1
modelFileIndex.append(int(str(indTest).strip('[]')))
else:
modelFileIndex.append(int(str(ind).strip('[]')))
#print pubTest,dateTest
# Trim original list with new index
modelFileListTrimmed = [modelFileList[i] for i in modelFileIndex]
#return modelFileListTrimmed,modelFileIndex,modelFileListTmp,modelFileListTmpUnique,modelFileListTmpIndex ; # Debugging
return modelFileListTrimmed
def writeToLog(logFilePath,textToWrite):
"""
Documentation for writeToLog(logFilePath,textToWrite):
-------
The writeToLog() function writes specified text to a text log file
Author: Paul J. Durack : [email protected]
Usage:
------
>>> from durolib import writeToLog
>>> writeToLog(~/somefile.txt,'text to write to log file')
Notes:
-----
Current version appends a new line after each call to the function.
File will be created if it doesn't already exist, otherwise new text
will be appended to an existing log file.
"""
if os.path.isfile(logFilePath):
logHandle = open(logFilePath,'a') ; # Open to append
else:
logHandle = open(logFilePath,'w') ; # Open to write
logHandle.write("".join([textToWrite,'\n']))
logHandle.close()
def writePacked(var,fileObject='tmp.nc'):
"""
Documentation for writePacked(var,fileObject):
-------
The writePacked() function generates a 16-bit (int16) cdms2 variable and
writes this to a netcdf file
Author: Paul J. Durack : [email protected]
Usage:
------
>>> from durolib import writePacked
>>> writePacked(var,'16bitPacked.nc')
Notes:
-----
TODO: clean up fileObject existence..
TODO: deal with incredibly slow write-times
TODO: deal with input data precision
"""
#varType = var.dtype
varMin = var.min()
varMax = var.max()
var.scale_factor = np.float32((varMax-varMin)/(2**16)-1)
var.add_offset = np.float32(varMin+var.scale_factor*(2**15))
if 'tmp.nc' in fileObject:
fileObject = cdm.open(fileObject,'w')
fileObject.write((var-var.add_offset)/var.scale_factor,dtype=np.int16)
return
##