forked from Kang-Sun-CfA/Oversampling_matlab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
popy.py
7932 lines (7485 loc) · 386 KB
/
popy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 26 15:50:30 2019
@author: Kang Sun
2019/03/09: match measures l3 format
2019/03/25: use control.txt
2019/04/22: include S5PNO2
2019/05/26: sample met data
2019/07/13: implement optimized regridding from chris chan miller
2019/07/19: fix fwhm -> w bug (pixel size corrected, by 2)
2019/10/23: add CrISNH3 subsetting function
2020/03/14: standardize met sampling functions
2020/05/19: add subsetting fields option as input
2020/07/20: parallel regrid function done
2021/04/11: OMPS-NM to OMPS-NPP; MEaSUREs, IASI, CrIS subsetting
2021/04/26: l3 wrapper, MethaneSAT, l3 data object
2021/06/15: S5PSO2, l2_path_pattern, basemap
2021/07/27: breaking change for F_wrapper_l3. l2_path_pattern prevails
2021/09/27: add MethaneAIR and projection option
2022/05/22: start adding l2_list (list of l2 paths) besides l2_path_pattern
2022/06/07: updates for MethaneAIR level3 (merging, inflation)
2022/06/20: flux divergence 2.0
"""
import numpy as np
np.seterr(divide='ignore', invalid='ignore')
import datetime
import os, sys, glob
import logging
import inspect
from calendar import monthrange
def F_wrapper_l3(instrum,product,grid_size,
start_year=None,start_month=None,end_year=None,end_month=None,
start_day=None,end_day=None,
west=None,east=None,south=None,north=None,
column_unit='umol/m2',
if_use_presaved_l2g=True,
subset_function=None,
l2_list=None,
l2_path_pattern=None,
if_plot_l3=False,existing_ax=None,
ncores=0,block_length=200,
subset_kw=None,plot_kw=None,
start_date_array=None,
end_date_array=None,
proj=None,
nudge_grid_origin=None,
k1=None,k2=None,k3=None,inflatex=None,inflatey=None,
flux_kw=None,gradient_kw=None,flux_grid_size=None):
'''
instrum:
instrument name
product:
product name, usually the name of molecule (NO2, CH4, NH3, HCHO, etc.)
grid_size:
grid size, should be <~ 0.5 l2 pixel size
start/end_year/month/day:
recommend to use start/end_date_array instead
west/east/south/north:
spatial boundaries
column_unit:
umol/m2, mol/m2, or molec/cm2 when column_amount is there; ppb or ppm when XCH4 or XCO2 are there
if_use_presaved_l2g:
if True, use presaved .mat files, otherwise read/subset raw level 2 files
subset_function:
function name in popy object to subset level 2 data. should be string like "F_subset_S5PNO2"
l2_list:
a list of level 2 file paths. If provided, l2_path_pattern will be ignored.
l2_path_pattern:
a format string indicating the path structure of level 2 (or level 2g if if_use_presaved_l2g is True). e.g.,
r'C:/data/*O2-CH4_%Y%m%dT*CO2proxy.nc' for level 2 or r'C:/data/CONUS_%Y_%m.mat' for level 2g
ncores:
0 means serial, None uses half, > max cpu uses max cpu
block_length:
granularity to divide level 3 mesh. each block is sent to a core in parallel regrid. 100-300 work fine.
subset_kw:
arguments input to the subset function
plot_kw:
arguments input to Level3_Data.plot function
start/end_date_array:
each element should be the start/end datetime.date(). e.g., to average all July data in 2005-2021,
start_date_array = [datetime.date(y,7,1) for y in range(2005,2022)],
end_date_array = [datetime.date(y,7,31) for y in range(2005,2022)],
proj:
if provided, should be a pyproj.Proj object
nudge_grid_origin:
does nothing if none. if integar, adjust west and south to multiplies of grid_size. useful when tiling l3_data together
k1/2/3:
shape exponents for the 2d super gaussian. see https://doi.org/10.5194/amt-11-6679-2018, fig. 5
inflatex/y:
options to inflate level2 pixels across (x) and along (y) track
flux_kw:
arguments input to F_calculate_horizontal_flux function, will trigger flux calculation
gradient_kw:
arguments input to F_prepare_gradient function, will trigger flux calculation. it is preferred over flux_kw
flux_grid_size:
grid size on which to calculate flux divergence, should be >~ 1 l2 pixel size
output:
if if_plot_l3 is False, return a Level3_Data object. otherwise return a dictionary containing the
Level3_Data object and the figout dictionary
'''
subset_kw = subset_kw or {}
plot_kw = plot_kw or {}
if flux_kw is not None:
logging.info('divergence-based flux calculation is enabled using wind fields {} and {}'.format(flux_kw['x_wind_field'],flux_kw['y_wind_field']))
do_flux = True
do_div = True
flux_grid_size = flux_grid_size or grid_size
else:
do_div = False
if gradient_kw is not None:
logging.info('gradient-based flux calculation is enabled using wind fields {} and {}'.format(gradient_kw['x_wind_field'],gradient_kw['y_wind_field']))
do_flux = True
do_grad = True
flux_grid_size = flux_grid_size or grid_size
else:
do_grad = False
if flux_kw is None and gradient_kw is None:
do_flux = False
do_div = False
do_grad = False
if nudge_grid_origin is not None:
step_grid_size = nudge_grid_origin*grid_size
west1 = np.floor(west/step_grid_size)*step_grid_size
south1 = np.floor(south/step_grid_size)*step_grid_size
logging.info('west will be adjusted from {} to {}'.format(west,west1))
west = west1
logging.info('south will be adjusted from {} to {}'.format(south,south1))
south = south1
if l2_list is not None and l2_path_pattern is not None:
logging.info('both l2_list and l2_path_pattern are provided. l2_path_pattern will be overwritten')
l2_path_pattern = None
if start_year is None:
if start_date_array is None:
if subset_function in ['F_subset_combined_MethaneAIR'] and 'alongtrack_mask' in subset_kw.keys():
logging.warning('no time constraint will be applied')
# create a dummy time
start_date_array = np.array([datetime.datetime(1900,1,1)])
end_date_array = np.array([datetime.datetime(2100,1,1)])
else:
logging.error('start/end_date_array have to be provided')
return
else:
logging.warning('please use start/end_date_array instead of setting year/month/day')
if end_day is None:
end_day = monthrange(end_year,end_month)[-1]
if start_date_array is not None:
if start_year is not None:
logging.info('Array of date provided, superseding start/end_year/month/day')
if end_date_array is None:
logging.info('end dates not provided, assuming end of months')
end_date_array = np.array([datetime.date(d.year,d.month,monthrange(d.year,d.month)[-1]) for d in start_date_array])
else:
start_date_array = np.array([datetime.date(start_year,start_month,start_day)])
end_date_array = np.array([datetime.date(end_year,end_month,end_day)])
if isinstance(start_date_array[0],datetime.datetime):
start_dt_array = start_date_array
end_dt_array = end_date_array
elif isinstance(start_date_array[0],datetime.date):
start_dt_array = np.array([datetime.datetime(d.year,d.month,d.day) for d in start_date_array])
end_dt_array = np.array([datetime.datetime(d.year,d.month,d.day) for d in end_date_array])
l3_data = Level3_Data(proj=proj)
for idate in range(len(start_date_array)):
start_date = start_date_array[idate]
end_date = end_date_array[idate]
start_dt = start_dt_array[idate]
end_dt = end_dt_array[idate]
o = popy(instrum=instrum,product=product,grid_size=grid_size,
start_year=start_dt.year,start_month=start_dt.month,start_day=start_dt.day,
start_hour=start_dt.hour,start_minute=start_dt.minute,start_second=start_dt.second,
end_year=end_dt.year,end_month=end_dt.month,end_day=end_dt.day,
end_hour=end_dt.hour,end_minute=end_dt.minute,end_second=end_dt.second,
west=west,east=east,south=south,north=north,proj=proj,
k1=k1,k2=k2,k3=k3,inflatex=inflatex,inflatey=inflatey,flux_grid_size=flux_grid_size)
if not if_use_presaved_l2g:
if subset_function is None:
subset_function = o.default_subset_function
subset_arg_list = inspect.getfullargspec(getattr(o,subset_function)).args
if 'l2_path_pattern' in subset_arg_list and \
'l2_path_pattern' not in subset_kw.keys() and \
l2_path_pattern is not None:
subset_kw['l2_path_pattern'] = l2_path_pattern
if 'l2_list' in subset_arg_list and \
'l2_list' not in subset_kw.keys() and \
l2_list is not None:
subset_kw['l2_list'] = l2_list
getattr(o, subset_function)(**subset_kw)
if do_div:
o.F_calculate_horizontal_flux(**flux_kw)
if do_grad:
o.F_prepare_gradient(**gradient_kw)
if column_unit is not None:
o.F_adjust_column_unit(column_unit)
#kludge for CrIS
if instrum == 'CrIS':
if isinstance(o.l2g_data,dict):
mask = (o.l2g_data['column_amount'] > 0) & (o.l2g_data['column_uncertainty'] > 0)
o.l2g_data = {k:v[mask,] for (k,v) in o.l2g_data.items()}
elif isinstance(o.l2g_data,list):
for iorbit in range(len(o.l2g_data)):
mask = (o.l2g_data[iorbit]['column_amount'] > 0) & (o.l2g_data[iorbit]['column_uncertainty'] > 0)
o.l2g_data[iorbit] = {k:v[mask,] for (k,v) in o.l2g_data[iorbit].items()}
if proj is not None:
l3_data0 = o.F_parallel_regrid_proj(ncores=ncores,block_length=block_length)
else:
l3_data0 = o.F_parallel_regrid(ncores=ncores,block_length=block_length)
else:
l3_data0 = Level3_Data(proj=proj)
for year in range(start_date.year,end_date.year+1):
for month in range(1,13):
if year == start_date.year and month < start_date.month:
continue
elif year == end_date.year and month > end_date.month:
continue
l2g_path = datetime.date(year,month,1).strftime(l2_path_pattern)
if not os.path.exists(l2g_path):
logging.warning(l2g_path+' does not exist!')
continue
o.F_mat_reader(l2g_path)
if do_div:
o.F_calculate_horizontal_flux(**flux_kw)
if do_grad:
o.F_prepare_gradient(**gradient_kw)
#kludge for CrIS
if instrum == 'CrIS':
mask = (o.l2g_data['column_amount'] > 0) & (o.l2g_data['column_uncertainty'] > 0)
o.l2g_data = {k:v[mask,] for (k,v) in o.l2g_data.items()}
# xch4 or xco2 products
x_set = set(o.oversampling_list).intersection({'xch4','XCH4','XCO2','xco2'})
if len(x_set)>0:
if o.default_column_unit == 'mol/mol' and column_unit in ['ppb','ppbv','nmol/mol']:
for x_something in x_set:
o.l2g_data[x_something] = o.l2g_data[x_something]*1e9
if o.default_column_unit == 'mol/mol' and column_unit in ['ppm','ppmv','umol/mol']:
for x_something in x_set:
o.l2g_data[x_something] = o.l2g_data[x_something]*1e6
if 'column_amount' in o.oversampling_list:
if o.default_column_unit == 'molec/cm2' and column_unit == 'mol/m2':
o.l2g_data['column_amount'] = o.l2g_data['column_amount']/6.02214e19
elif o.default_column_unit == 'mol/m2' and column_unit == 'molec/cm2':
o.l2g_data['column_amount'] = o.l2g_data['column_amount']*6.02214e19
if column_unit == 'umol/m2':
if o.default_column_unit == 'molec/cm2':
o.l2g_data['column_amount'] = o.l2g_data['column_amount']/6.02214e19*1e6
if o.default_column_unit == 'mol/m2':
o.l2g_data['column_amount'] = o.l2g_data['column_amount']*1e6
if proj is not None:
monthly_l3_data = o.F_parallel_regrid_proj(ncores=ncores,block_length=block_length)
else:
monthly_l3_data = o.F_parallel_regrid(ncores=ncores,block_length=block_length)
l3_data0 = l3_data0.merge(monthly_l3_data)
l3_data = l3_data.merge(l3_data0)
if hasattr(l3_data,'check'):
l3_data.check()
if 'flux_div' not in l3_data.keys() and do_div:
if flux_grid_size > grid_size:
l3_data = l3_data.block_reduce(flux_grid_size)
l3_data.calculate_flux_divergence(**o.calculate_flux_divergence_kw)
if 'wind_column' not in l3_data.keys() and do_grad:
if flux_grid_size > grid_size:
l3_data = l3_data.block_reduce(flux_grid_size)
l3_data.calculate_gradient(**o.calculate_gradient_kw)
if if_plot_l3 and hasattr(l3_data,'plot'):
figout = l3_data.plot(existing_ax=existing_ax,**plot_kw)
else:
figout = None
if if_plot_l3:
return {'l3_data':l3_data,'figout':figout}
else:
return l3_data
def F_download_gesdisc_l2(txt_fn,
start_dt,end_dt,
re_pattern=r'\d{8}T\d{6}',
orbit_start_dt_idx=0,orbit_end_dt_idx=None,
dt_pattern='%Y%m%dT%H%M%S',
l2_dir=None,tmp_txt_dir=None,
tmp_txt_fn=None,
download_str=None,
if_delete_tmp_txt=True):
'''
txt_fn:
path to the txt file from nasa ges disc
start/end_t:
datetime for start/end of download
re_pattern:
pattern recognizable by re in python, usually r'\d{8}T\d{6}' <=> '%Y%m%dT%H%M%S'
orbit_start/end_dt_idx:
index of orbit start/end datetime from lines in the txt file
dt_pattern:
how to extract datetime from the sub string
l2_dir:
where to save level 2 files
tmp_txt_dir/fn:
directory/path to write a temporary txt file
download_str:
usually wrapping wget
2021/10/17
'''
import datetime as dt
import re
if l2_dir is None:
logging.warning('use cwd as l2_dir')
l2_dir = os.getcwd()
if not os.path.exists(l2_dir):
os.makedirs(l2_dir)
if tmp_txt_dir is None:
logging.warning('use cwd to save temporary txt file')
tmp_txt_dir = os.getcwd()
tmp_txt_fn = tmp_txt_fn or os.path.join(tmp_txt_dir,'tmp.txt')
if download_str is None:
download_str = 'cd {}; wget -N -q --load-cookies ~/.urs_cookies\
--save-cookies ~/.urs_cookies --auth-no-challenge=on --keep-session-cookies\
--content-disposition -i {}'.format(l2_dir,tmp_txt_fn)
count = 0
with open(txt_fn,'r') as f, open(tmp_txt_fn,'w') as t:
while True:
l = f.readline()
if not l:
break
try:
tmp = re.findall(re_pattern,l)
if len(tmp) == 0:
continue
orbit_start_dt = dt.datetime.strptime(tmp[orbit_start_dt_idx],dt_pattern)
if orbit_end_dt_idx is not None:
orbit_end_dt = dt.datetime.strptime(tmp[orbit_end_dt_idx],dt_pattern)
else:
orbit_end_dt = orbit_start_dt
if orbit_start_dt >= start_dt and orbit_end_dt <= end_dt:
t.write(l)
count += 1
except Exception as e:
logging.warning(e)
logging.info('{} files saved to tmp txt file and to be downloaded'.format(count))
os.system(download_str)
if if_delete_tmp_txt:
os.remove(tmp_txt_fn)
return download_str
def datedev_py(matlab_datenum):
"""
convert matlab datenum double to python datetime object
"""
python_datetime = datetime.datetime.fromordinal(int(matlab_datenum)) + datetime.timedelta(days=matlab_datenum%1) - datetime.timedelta(days = 366)
return python_datetime
def datetime2datenum(python_datetime):
'''
convert python datetime to matlab datenum
'''
matlab_datenum = python_datetime.toordinal()\
+python_datetime.hour/24.\
+python_datetime.minute/1440.\
+python_datetime.second/86400.+366.
return matlab_datenum
def F_collocate_l2g(l2g_data1,l2g_data2,hour_difference=0.5,
field_to_average='column_amount'):
'''
collocate two l2g dictionaries
l2g_data1:
the one with bigger pixels
hour_difference:
max difference between pixels in hour
field_to_average:
the l2g field in l2g_data2 to be averaged to l2g_data1 pixels
updated on 2020/08/23
updated on 2020/10/13
'''
from shapely.geometry import Polygon
l2g_2_west = np.min(l2g_data2['lonr'],axis=1)
l2g_2_east = np.max(l2g_data2['lonr'],axis=1)
l2g_2_south = np.min(l2g_data2['latr'],axis=1)
l2g_2_north = np.max(l2g_data2['latr'],axis=1)
l2g_1_west = np.min(l2g_data1['lonr'],axis=1)
l2g_1_east = np.max(l2g_data1['lonr'],axis=1)
l2g_1_south = np.min(l2g_data1['latr'],axis=1)
l2g_1_north = np.max(l2g_data1['latr'],axis=1)
l2g_2_utc = l2g_data2['UTC_matlab_datenum']
l2g_1_utc = l2g_data1['UTC_matlab_datenum']
l2g_2_lonr = l2g_data2['lonr']
l2g_1_lonr = l2g_data1['lonr']
l2g_2_latr = l2g_data2['latr']
l2g_1_latr = l2g_data1['latr']
l2g_2_C = l2g_data2[field_to_average]
mask_list = [np.where((l2g_2_utc >= l2g_1_utc[i]-hour_difference/24)\
& (l2g_2_utc <= l2g_1_utc[i]+hour_difference/24)\
& (l2g_2_south <= l2g_1_north[i])\
& (l2g_2_north >= l2g_1_south[i])\
& (l2g_2_east >= l2g_1_west[i])\
& (l2g_2_west <= l2g_1_east[i])) for i in range(len(l2g_data1['latc']))]
def F_poly_intersect(x1,y1,X2,Y2,l2g_2_C):
'''
x1, y1 defines a bigger polygon
each row of X2 Y2 defines a smaller polygon
'''
if len(X2) == 0:
return np.array([np.nan, np.nan, np.nan])
poly1 = Polygon(np.vstack((x1,y1)).T)
area1 = poly1.area
n = X2.shape[0]
poly2_list = [Polygon(np.vstack((X2[j,],Y2[j,])).T) for j in range(n)]
area_list = np.array([np.array([poly1.intersection(poly2).area,poly2.area]) for poly2 in poly2_list])
npix = np.sum(area_list[:,0]/area_list[:,1])
weighted_mean_l2g_2_C = np.sum(area_list[:,0]*l2g_2_C)/np.sum(area_list[:,0])
relative_overlap = np.sum(area_list[:,0])/area1
return np.array([weighted_mean_l2g_2_C,relative_overlap,npix])
result_array = np.array([F_poly_intersect(l2g_1_lonr[i,],
l2g_1_latr[i,],
l2g_2_lonr[mask_list[i][0],],
l2g_2_latr[mask_list[i][0],],
l2g_2_C[mask_list[i][0]]) for i in range(len(l2g_data1['latc']))])
l2g_data1[field_to_average+'2'] = result_array[:,0]
l2g_data1['relative_overlap2'] = result_array[:,1]
l2g_data1['npix2'] = result_array[:,2]
overlap_mask = (~np.isnan(result_array[:,0])) & (result_array[:,2] > 0)
l2g_data1_has2 = {k:v[overlap_mask,] for (k,v) in l2g_data1.items()}
l2g_data1_hasnot2 = {k:v[~overlap_mask,] for (k,v) in l2g_data1.items()}
return l2g_data1_has2, l2g_data1_hasnot2
def F_interp_gcrs(sounding_lon,sounding_lat,sounding_datenum,sounding_ps,
gcrs_dir='/mnt/Data2/GEOS-Chem_Silvern/',
product='NO2',if_monthly=False):
"""
sample a field from GEOS-Chem data by Rachel Silvern (gcrs) in .nc format.
sounding_lon:
longitude for interpolation
sounding_lat:
latitude for interpolation
sounding_datenum:
time for interpolation in matlab datenum double format
sounding_ps:
surface pressure for each sounding
gcrs_dir:
directory where geos chem data are saved
if_monthly:
if use monthly profile, instead of daily profile
created on 2020/03/09
"""
from netCDF4 import Dataset
from scipy.interpolate import RegularGridInterpolator
from calendar import isleap
# hybrid Ap parameter in Pa
Ap = np.array([0.000000e+00, 4.804826e-02, 6.593752e+00, 1.313480e+01, 1.961311e+01, 2.609201e+01,
3.257081e+01, 3.898201e+01, 4.533901e+01, 5.169611e+01, 5.805321e+01, 6.436264e+01,
7.062198e+01, 7.883422e+01, 8.909992e+01, 9.936521e+01, 1.091817e+02, 1.189586e+02,
1.286959e+02, 1.429100e+02, 1.562600e+02, 1.696090e+02, 1.816190e+02, 1.930970e+02,
2.032590e+02, 2.121500e+02, 2.187760e+02, 2.238980e+02, 2.243630e+02, 2.168650e+02,
2.011920e+02, 1.769300e+02, 1.503930e+02, 1.278370e+02, 1.086630e+02, 9.236572e+01,
7.851231e+01, 5.638791e+01, 4.017541e+01, 2.836781e+01, 1.979160e+01, 9.292942e+00,
4.076571e+00, 1.650790e+00, 6.167791e-01, 2.113490e-01, 6.600001e-02, 1.000000e-02],dtype=np.float32)*1e2
# hybrid Bp parameter
Bp = np.array([1.000000e+00, 9.849520e-01, 9.634060e-01, 9.418650e-01, 9.203870e-01, 8.989080e-01,
8.774290e-01, 8.560180e-01, 8.346609e-01, 8.133039e-01, 7.919469e-01, 7.706375e-01,
7.493782e-01, 7.211660e-01, 6.858999e-01, 6.506349e-01, 6.158184e-01, 5.810415e-01,
5.463042e-01, 4.945902e-01, 4.437402e-01, 3.928911e-01, 3.433811e-01, 2.944031e-01,
2.467411e-01, 2.003501e-01, 1.562241e-01, 1.136021e-01, 6.372006e-02, 2.801004e-02,
6.960025e-03, 8.175413e-09, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00],dtype=np.float32)
start_datenum = np.amin(sounding_datenum)
end_datenum = np.amax(sounding_datenum)
start_datetime = datedev_py(start_datenum)
end_datetime = datedev_py(end_datenum)
nl2 = len(sounding_datenum)
nlayer = 47 # layer number in geos chem
# claim space for interopolated profiles, at all level 2 pixels
sounding_profile = np.zeros((nl2,nlayer),dtype=np.float32)
sounding_pEdge = sounding_ps[:,np.newaxis]*Bp+Ap
lat_interp = np.tile(sounding_lat,(nlayer,1)).T.astype(np.float32)
lon_interp = np.tile(sounding_lon,(nlayer,1)).T.astype(np.float32)
layer_interp = np.tile(np.arange(nlayer),(nl2,1)).astype(np.float32)
for year in range(start_datetime.year,end_datetime.year+1):
if product == 'NO2' and if_monthly == False:
year_sounding_doy = np.floor(sounding_datenum)-(datetime.datetime(year=year,month=1,day=1).toordinal()+365.)
year_sounding_doy = year_sounding_doy.astype(int)
if isleap(year):
nday = 366
else:
nday = 365
loop_sounding_doy = np.unique(year_sounding_doy)
f1 = (loop_sounding_doy>=1)
f2 = (loop_sounding_doy<=nday)
loop_sounding_doy = loop_sounding_doy[f1&f2]
gc_fn = os.path.join(gcrs_dir,'NO2_PROF.05x0625_NA.%0d.nc'%year)
print('loading '+gc_fn)
gc_id = Dataset(gc_fn)
gc_gas = gc_id['NO2_ppb'][:].astype(np.float32)
gc_lon = gc_id['longitude'][:]
gc_lat = gc_id['latitude'][:]
for doy in loop_sounding_doy:
# remember python is 0-based
gc_gas_doy = gc_gas[doy-1,...].squeeze()
rowIndex = np.nonzero(year_sounding_doy==doy)
f = RegularGridInterpolator((np.arange(nlayer),gc_lat,gc_lon),\
gc_gas_doy,bounds_error=False,fill_value=np.nan)
sounding_profile[rowIndex,:] = f((layer_interp[rowIndex,:],\
lat_interp[rowIndex,:],lon_interp[rowIndex,:]))
elif product in {'NH3','HCHO'} or if_monthly == True:
sounding_dt = [datedev_py(sounding_datenum[il2]) for il2 in range(nl2)]
sounding_year = np.array([dt.year for dt in sounding_dt])
sounding_month = np.array([dt.month for dt in sounding_dt])
loop_month = np.unique(sounding_month[sounding_year==year])
gc_fn = os.path.join(gcrs_dir,'NH3_HCHO_PROF.05x0625_NA.%0d.nc'%year)
print('loading '+gc_fn)
gc_id = Dataset(gc_fn)
gc_gas = gc_id[product+'_ppb'][:].astype(np.float32)
gc_lon = gc_id['longitude'][:]
gc_lat = gc_id['latitude'][:]
for month in loop_month:
# remember python is 0-based
gc_gas_doy = gc_gas[month-1,...].squeeze()
rowIndex = np.nonzero((sounding_year==year)&(sounding_month==month))
f = RegularGridInterpolator((np.arange(nlayer),gc_lat,gc_lon),\
gc_gas_doy,bounds_error=False,fill_value=np.nan)
sounding_profile[rowIndex,:] = f((layer_interp[rowIndex,:],\
lat_interp[rowIndex,:],lon_interp[rowIndex,:]))
return sounding_profile, sounding_pEdge
def F_interp_merra2_global(sounding_lon,sounding_lat,sounding_datenum,\
merra2_dir='/mnt/Data2/MERRA2_2x2.5/',\
interp_fields=None,\
fn_suffix='.A1.2x25'):
"""
sample a field from geos chem merra2 data
see /mnt/Data2/MERRA2_2x2.5/test_download_cris.py for downloading
sounding_lon:
longitude for interpolation
sounding_lat:
latitude for interpolation
sounding_datenum:
time for interpolation in matlab datenum double format
merra2_dir:
directory where merra2 data are saved
interp_fields:
variables to interpolate from merra2, only 2d fields are supported
fn_suffix:
only A1 2d data are supported
created on 2021/04/18
"""
import glob
from scipy.interpolate import RegularGridInterpolator
interp_fields = interp_fields or ['TROPPT']
start_datenum = np.amin(sounding_datenum)
end_datenum = np.amax(sounding_datenum)
start_date = datedev_py(start_datenum).date()
end_date = datedev_py(end_datenum).date()
days = (end_date-start_date).days+1
DATES = [start_date + datetime.timedelta(days=d) for d in range(days)]
merra2_data = {}
iday = 0
for DATE in DATES:
merra_filedir = os.path.join(merra2_dir,DATE.strftime('Y%Y'),\
DATE.strftime('M%m'),DATE.strftime('D%d'))
merra_flist = glob.glob(merra_filedir+'/*'+fn_suffix+'.nc4')
if len(merra_flist) > 1:
print('Careful! More than one nc file in MERRA daily folder!')
elif len(merra_flist) == 0:
print('No merra file')
continue
fn = merra_flist[0]
if not merra2_data:
nc_out = F_ncread_selective(fn,np.concatenate(
(interp_fields,['lat','lon','time'])))
merra2_data['lon'] = np.append(nc_out['lon'],180.)
merra2_data['lat'] = nc_out['lat']
# how many hours are there in each daily file? have to be the same
nhour = len(nc_out['time'])
merra2_data['datenum'] = np.zeros((nhour*(days)),dtype=np.float64)
# merra2 time is defined as minutes since 00:30:00 on that day
merra2_data['datenum'][iday*nhour:((iday+1)*nhour)] = DATE.toordinal()+366.+(nc_out['time']+30)/1440
for field in interp_fields:
merra2_data[field] = np.zeros((len(merra2_data['lon']),len(merra2_data['lat']),nhour*(days)))
# was read in as 3-d array in time, lat, lon; transpose to lon, lat, time
tmp = nc_out[field].transpose((2,1,0))
# add 180 longitude dummy
merra2_data[field][...,iday*nhour:((iday+1)*nhour)] = np.append(tmp,tmp[[0],:,:],axis=0)
else:
nc_out = F_ncread_selective(fn,np.concatenate(
(interp_fields,['time'])))
# merra2 time is defined as minutes since 00:30:00 on that day
merra2_data['datenum'][iday*nhour:((iday+1)*nhour)] = DATE.toordinal()+366.+(nc_out['time']+30)/1440
for field in interp_fields:
# was read in as 3-d array in time, lat, lon; transpose to lon, lat, time
tmp = nc_out[field].transpose((2,1,0))
merra2_data[field][...,iday*nhour:((iday+1)*nhour)] = np.append(tmp,tmp[[0],:,:],axis=0)
# forgot to increment iday
iday = iday+1
sounding_interp = {}
if not merra2_data:
for fn in interp_fields:
sounding_interp[fn] = sounding_lon*np.nan
return sounding_interp
# interpolate
for fn in interp_fields:
my_interpolating_function = \
RegularGridInterpolator((merra2_data['lon'],merra2_data['lat'],merra2_data['datenum']),\
merra2_data[fn],bounds_error=False,fill_value=np.nan)
sounding_interp[fn] = my_interpolating_function((sounding_lon,sounding_lat,sounding_datenum))
return sounding_interp
def F_interp_merra2(sounding_lon,sounding_lat,sounding_datenum,\
merra2_dir='/mnt/Data2/MERRA/',\
interp_fields=None,\
fn_header='MERRA2_300.tavg1_2d_slv_Nx'):
"""
sample a field from merra2 data in .nc format.
see download_merra2.py for downloading
sounding_lon:
longitude for interpolation
sounding_lat:
latitude for interpolation
sounding_datenum:
time for interpolation in matlab datenum double format
merra2_dir:
directory where merra2 data are saved
interp_fields:
variables to interpolate from merra2, only 2d fields are supported
fn_header:
following nasa ges disc naming
created on 2020/03/09
noted on 2021/03/01 that some troppt is masked
"""
import glob
from scipy.interpolate import RegularGridInterpolator
interp_fields = interp_fields or ['PBLTOP','PS','TROPPT']
start_datenum = np.amin(sounding_datenum)
end_datenum = np.amax(sounding_datenum)
start_date = datedev_py(start_datenum).date()
end_date = datedev_py(end_datenum).date()
days = (end_date-start_date).days+1
DATES = [start_date + datetime.timedelta(days=d) for d in range(days)]
merra2_data = {}
iday = 0
for DATE in DATES:
merra_filedir = os.path.join(merra2_dir,DATE.strftime('Y%Y'),\
DATE.strftime('M%m'),DATE.strftime('D%d'))
merra_flist = glob.glob(merra_filedir+'/*.nc')
if len(merra_flist) > 1:
print('Careful! More than one nc file in MERRA daily folder!')
elif len(merra_flist) == 0:
print('No merra file')
continue
fn = merra_flist[0]
if not merra2_data:
nc_out = F_ncread_selective(fn,np.concatenate(
(interp_fields,['lat','lon','time'])))
merra2_data['lon'] = nc_out['lon']
merra2_data['lat'] = nc_out['lat']
# how many hours are there in each daily file? have to be the same
nhour = len(nc_out['time'])
merra2_data['datenum'] = np.zeros((nhour*(days)),dtype=np.float64)
# merra2 time is defined as minutes since 00:30:00 on that day
merra2_data['datenum'][iday*nhour:((iday+1)*nhour)] = DATE.toordinal()+366.+(nc_out['time']+30)/1440
for field in interp_fields:
merra2_data[field] = np.zeros((len(merra2_data['lon']),len(merra2_data['lat']),nhour*(days)))
# was read in as 3-d array in time, lat, lon; transpose to lon, lat, time
merra2_data[field][...,iday*nhour:((iday+1)*nhour)] = nc_out[field].transpose((2,1,0))
else:
nc_out = F_ncread_selective(fn,np.concatenate(
(interp_fields,['time'])))
# merra2 time is defined as minutes since 00:30:00 on that day
merra2_data['datenum'][iday*nhour:((iday+1)*nhour)] = DATE.toordinal()+366.+(nc_out['time']+30)/1440
for field in interp_fields:
# was read in as 3-d array in time, lat, lon; transpose to lon, lat, time
merra2_data[field][...,iday*nhour:((iday+1)*nhour)] = nc_out[field].transpose((2,1,0))
# forgot to increment iday
iday = iday+1
sounding_interp = {}
if not merra2_data:
for fn in interp_fields:
sounding_interp[fn] = sounding_lon*np.nan
return sounding_interp
# interpolate
for fn in interp_fields:
my_interpolating_function = \
RegularGridInterpolator((merra2_data['lon'],merra2_data['lat'],merra2_data['datenum']),\
merra2_data[fn],bounds_error=False,fill_value=np.nan)
sounding_interp[fn] = my_interpolating_function((sounding_lon,sounding_lat,sounding_datenum))
return sounding_interp
def F_interp_era5_3D(sounding_lon,sounding_lat,sounding_datenum,
sounding_p0,sounding_p1,nlevel=10,\
era5_dir='/mnt/Data2/ERA5/',\
interp_fields=None,\
fn_header='CONUS'):
"""
sample 3D field from era5 data in .nc format.
see era5.py for era5 downloading/subsetting
sounding_lon:
longitude for interpolation
sounding_lat:
latitude for interpolation
sounding_datenum:
time for interpolation in matlab datenum double format
sounding_p0:
bottom bound of pressure
sounding_p1:
top bound of pressure
nlevel:
how many pressure-linear levels between sounding_p0 and sounding_p1
era5_dir:
directory where subset era5 data in .nc are saved
interp_fields:
variables to interpolate from era5, u and v
fn_header:
in general should denote domain location of era5 data
created on 2020/09/20
"""
from scipy.interpolate import RegularGridInterpolator
# nl2 = len(sounding_datenum)
interp_fields = interp_fields or ['v','u']
p_interp = np.linspace(sounding_p0,sounding_p1,nlevel).T
lat_interp = np.tile(sounding_lat,(nlevel,1)).T
lon_interp = np.tile(sounding_lon,(nlevel,1)).T
time_interp = np.tile(sounding_datenum,(nlevel,1)).T
start_datenum = np.amin(sounding_datenum)
end_datenum = np.amax(sounding_datenum)
start_date = datedev_py(start_datenum).date()
end_date = datedev_py(end_datenum).date()
days = (end_date-start_date).days+1
DATES = [start_date + datetime.timedelta(days=d) for d in range(days)]
era5_data = {}
iday = 0
for DATE in DATES:
fn = os.path.join(era5_dir,DATE.strftime('Y%Y'),\
DATE.strftime('M%m'),\
DATE.strftime('D%d'),\
fn_header+'_3D_'+DATE.strftime('%Y%m%d')+'.nc')
if not era5_data:
nc_out = F_ncread_selective(fn,np.concatenate(
(interp_fields,['latitude','longitude','time','level'])))
era5_data['lon'] = nc_out['longitude']
era5_data['level'] = nc_out['level']*100 # hPa to Pa
era5_data['lat'] = nc_out['latitude'][::-1]
# how many hours are there in each daily file? have to be the same
nhour = len(nc_out['time'])
era5_data['datenum'] = np.zeros((nhour*(days)),dtype=np.float64)
# era5 time is defined as 'hours since 1900-01-01 00:00:00.0'
era5_data['datenum'][iday*nhour:((iday+1)*nhour)] = nc_out['time']/24.+693962.
for field in interp_fields:
era5_data[field] = np.zeros((len(era5_data['lon']),len(era5_data['lat']),len(era5_data['level']),nhour*(days)))
if len(nc_out[field].shape) != 4:
print('Warning!!! Anomaly in the dimension of ERA5 fields.')
print('Tentatively taking only the first element of the second dimension')
nc_out[field] = nc_out[field][:,0,...].squeeze()
# was read in as 4-d array in time, level, lat, lon; transpose to lon, lat, level, time
era5_data[field][...,iday*nhour:((iday+1)*nhour)] = nc_out[field].transpose((3,2,1,0))[:,::-1,:,:]
else:
nc_out = F_ncread_selective(fn,np.concatenate(
(interp_fields,['time'])))
# era5 time is defined as 'hours since 1900-01-01 00:00:00.0'
era5_data['datenum'][iday*nhour:((iday+1)*nhour)] = nc_out['time']/24.+693962.
for field in interp_fields:
# was read in as 4-d array in time, level, lat, lon; transpose to lon, lat, level, time
era5_data[field][...,iday*nhour:((iday+1)*nhour)] = nc_out[field].transpose((3,2,1,0))[:,::-1,:,:]
# forgot to increment iday
iday = iday+1
sounding_interp = {}
if not era5_data:
for fn in interp_fields:
sounding_interp[fn] = lon_interp*np.nan
return sounding_interp
# interpolate
for fn in interp_fields:
my_interpolating_function = \
RegularGridInterpolator((era5_data['lon'],era5_data['lat'],era5_data['level'],era5_data['datenum']),\
era5_data[fn],bounds_error=False,fill_value=np.nan)
sounding_interp[fn] = my_interpolating_function((lon_interp,lat_interp,p_interp,time_interp))
return sounding_interp
def F_interp_era5(sounding_lon,sounding_lat,sounding_datenum,\
era5_dir='/mnt/Data2/ERA5/',\
interp_fields=None,\
fn_header=None):
"""
sample a field from era5 data in .nc format.
see era5.py for era5 downloading/subsetting
sounding_lon:
longitude for interpolation
sounding_lat:
latitude for interpolation
sounding_datenum:
time for interpolation in matlab datenum double format
era5_dir:
directory where subset era5 data in .nc are saved, if fn_header is None, use as file path pattern
interp_fields:
variables to interpolate from era5, only 2d fields are supported
fn_header:
in general should denote domain location of era5 data
created on 2019/09/18
"""
from scipy.interpolate import RegularGridInterpolator
interp_fields = interp_fields or ['blh','u10','v10','u100','v100','sp']
start_datenum = np.amin(sounding_datenum)
end_datenum = np.amax(sounding_datenum)
start_date = datedev_py(start_datenum).date()
end_date = datedev_py(end_datenum).date()
days = (end_date-start_date).days+1
DATES = [start_date + datetime.timedelta(days=d) for d in range(days)]
era5_data = {}
iday = 0
for DATE in DATES:
if fn_header is None:
flist = glob.glob(DATE.strftime(era5_dir))
if len(flist) == 0:
logging.warning('no file found on '+DATE.strftime('%Y%m%d'))
continue
if len(flist) > 1:
logging.warning('{} files found on {}, using the first one'.format(len(flist),DATE.strftime('%Y%m%d')))
fn = flist[0]
else:
fn = os.path.join(era5_dir,DATE.strftime('Y%Y'),\
DATE.strftime('M%m'),\
DATE.strftime('D%d'),\
fn_header+'_2D_'+DATE.strftime('%Y%m%d')+'.nc')
if not era5_data:
nc_out = F_ncread_selective(fn,np.concatenate(
(interp_fields,['latitude','longitude','time'])))
era5_data['lon'] = nc_out['longitude']
era5_data['lat'] = nc_out['latitude'][::-1]
# how many hours are there in each daily file? have to be the same
nhour = len(nc_out['time'])
era5_data['datenum'] = np.zeros((nhour*(days)),dtype=np.float64)
# era5 time is defined as 'hours since 1900-01-01 00:00:00.0'
era5_data['datenum'][iday*nhour:((iday+1)*nhour)] = nc_out['time']/24.+693962.
for field in interp_fields:
era5_data[field] = np.zeros((len(era5_data['lon']),len(era5_data['lat']),nhour*(days)))
if len(nc_out[field].shape) != 3:
print('Warning!!! Anomaly in the dimension of ERA5 fields.')
print('Tentatively taking only the first element of the second dimension')
nc_out[field] = nc_out[field][:,0,...].squeeze()
# was read in as 3-d array in time, lat, lon; transpose to lon, lat, time
era5_data[field][...,iday*nhour:((iday+1)*nhour)] = nc_out[field].transpose((2,1,0))[:,::-1,:]
else:
nc_out = F_ncread_selective(fn,np.concatenate(
(interp_fields,['time'])))
# era5 time is defined as 'hours since 1900-01-01 00:00:00.0'
era5_data['datenum'][iday*nhour:((iday+1)*nhour)] = nc_out['time']/24.+693962.
for field in interp_fields:
# was read in as 3-d array in time, lat, lon; transpose to lon, lat, time
era5_data[field][...,iday*nhour:((iday+1)*nhour)] = nc_out[field].transpose((2,1,0))[:,::-1,:]
# forgot to increment iday
iday = iday+1
sounding_interp = {}
if not era5_data:
for fn in interp_fields:
sounding_interp[fn] = sounding_lon*np.nan
return sounding_interp
# interpolate
for fn in interp_fields:
my_interpolating_function = \
RegularGridInterpolator((era5_data['lon'],era5_data['lat'],era5_data['datenum']),\
era5_data[fn],bounds_error=False,fill_value=np.nan)
sounding_interp[fn] = my_interpolating_function((sounding_lon,sounding_lat,sounding_datenum))
return sounding_interp
def F_interp_hrrr_mat(sounding_lon,sounding_lat,sounding_datenum,
file_pattern='/projects/academic/kangsun/data/hrrr/%Y%m%d/hrrr_sfc_uv.mat',
interp_fields=None):
'''interpolate fields from hrrr data, concatenated as daily mat files
sounding_lon:
longitude for interpolation
sounding_lat:
latitude for interpolation
sounding_datenum:
time for interpolation in matlab datenum double format
file_pattern:
pattern of daily met files, similar to l2_path_pattern
interp_fields:
variables to interpolate from hrrr, only 2d fields are supported
created on 2022/06/13
'''
from scipy.io import loadmat
from scipy.interpolate import RegularGridInterpolator
from pyproj import Proj
import glob
interp_fields = interp_fields or ['u80','v80']
p2 = Proj(proj='lcc',R=6371.229, lat_1=38.5, lat_2=38.5,lon_0=262.5,lat_0=38.5)
sounding_x,sounding_y = p2(sounding_lon,sounding_lat)
start_datenum = np.amin(sounding_datenum)
end_datenum = np.amax(sounding_datenum)
start_datetime = datedev_py(start_datenum)
end_datetime = datedev_py(end_datenum)
days = (end_datetime-start_datetime).days+1
DATES = [start_datetime + datetime.timedelta(days=d) for d in range(days)]
hrrr_data = {}
for date in DATES:
flist = glob.glob(date.strftime(file_pattern))
if len(flist) == 0:
logging.warning('no file found on '+date.strftime('%Y%m%d'))
continue
if len(flist) > 1:
logging.warning('{} files found on {}, using the first one'.format(len(flist),date.strftime('%Y%m%d')))
fn = flist[0]
if not hrrr_data:
d = loadmat(fn,squeeze_me=True)
hrrr_data['x'] = d['x']
hrrr_data['y'] = d['y']
hrrr_data['datenum'] = d['datenum']
for field in interp_fields:
# was read in as 3-d array in time, y, x; transpose to x, y, time
hrrr_data[field] = d[field].transpose((2,1,0))
else:
d = loadmat(fn,squeeze_me=True)
hrrr_data['datenum'] = np.append(hrrr_data['datenum'],d['datenum'])
for field in interp_fields:
# was read in as 3-d array in time, y, x; transpose to x, y, time
hrrr_data[field] = np.append(hrrr_data[field],d[field].transpose((2,1,0)),axis=2)
sounding_interp = {}
if not hrrr_data:
for fn in interp_fields:
sounding_interp[fn] = sounding_lon*np.nan
return sounding_interp
# interpolate
for fn in interp_fields:
my_interpolating_function = \
RegularGridInterpolator((hrrr_data['x'],hrrr_data['y'],hrrr_data['datenum']),\
hrrr_data[fn],bounds_error=False,fill_value=np.nan)
sounding_interp[fn] = my_interpolating_function((sounding_x,sounding_y,sounding_datenum))
return sounding_interp
def F_interp_geos_mat(sounding_lon,sounding_lat,sounding_datenum,\
geos_dir='/mnt/Data2/GEOS/s5p_interp/',\
interp_fields=None,\
time_collection='inst3',\
fn_header='subset'):
"""
sample a field from subset geos fp data in .mat format.
see geos.py for geos downloading/subsetting
sounding_lon:
longitude for interpolation
sounding_lat:
latitude for interpolation
sounding_datenum:
time for interpolation in matlab datenum double format
geos_dir:
directory where subset geos data in .mat are saved
interp_fields:
variables to interpolate from geos fp, only 2d fields are supported
time_collection:
choose from inst3, tavg1, tavg3
created on 2019/05/26
updated on 2019/07/01 to be compatible with different file collections and non continues time steps
"""
from scipy.io import loadmat
from scipy.interpolate import RegularGridInterpolator
interp_fields = interp_fields or ['TROPPT']
if time_collection == 'inst3' or time_collection == '':
step_hour = 3
daily_start_time = datetime.time(hour=0,minute=0)