Skip to content

Commit

Permalink
add parallel processing script
Browse files Browse the repository at this point in the history
  • Loading branch information
dlnash committed Sep 17, 2024
1 parent 525b6ee commit 8f17d57
Show file tree
Hide file tree
Showing 4 changed files with 815 additions and 10 deletions.
2 changes: 1 addition & 1 deletion read_deterministic_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,10 @@ def __init__(self, F, fdate=None):
self.date_string = regex.findall(self.fpath)[-1]
elif fdate is not None:
self.date_string = fdate
self.fpath = '/home/dnash/comet_data/tmp'
fname = '/gfs_{0}_f{1}.grb'.format(self.date_string, str(self.F).zfill(3))

## for now: copy the files to local space
# self.fpath = '/home/dnash/comet_data/tmp'
# shutil.copy(self.fpath+fname, repo_path+fname) # copy file over to data folder
self.fname = self.fpath+fname

Expand Down
30 changes: 21 additions & 9 deletions run_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,7 @@
model_data.close() ## close data


for i, F in enumerate(F_lst):
start_time = pd.Timestamp.today()
#######################
### LOAD MODEL DATA ###
#######################
print('... Loading intermediate {0} data for {1} hour lead'.format(model_name, F))
out_fname = '/data/projects/operations/ivt_cross_sections/data/tmp_{0}_{1}.nc'.format(model_name, F)
model_data = xr.open_dataset(out_fname, engine='netcdf4')

def multiP():
## SECOND LOOP - LOOP THROUGH LONGITUDE FOR CROSS SECTION ##
for k, current_line in enumerate(line_lst):
## subset vertical data and IVT data to current line
Expand All @@ -77,6 +69,26 @@
print('...... Creating figure for {0}'.format(current_line[1]))
plot_ivt_cross_sections(model_data, cross, line_lst, current_line, model_name, F)

for i in range(2):
p = multiprocessing.Process(target=plot, args=(i, i, i))
p.start()

if __name__ == "__main__":
input('Value: ')
multiP()


for i, F in enumerate(F_lst):
start_time = pd.Timestamp.today()
#######################
### LOAD MODEL DATA ###
#######################
print('... Loading intermediate {0} data for {1} hour lead'.format(model_name, F))
out_fname = '/data/projects/operations/ivt_cross_sections/data/tmp_{0}_{1}.nc'.format(model_name, F)
model_data = xr.open_dataset(out_fname, engine='netcdf4')



end_time = pd.Timestamp.today()
td = end_time - start_time
td = format_timedelta_to_HHMMSS(td)
Expand Down
95 changes: 95 additions & 0 deletions run_tool_parallel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""
Filename: run_tool.py
Author: Deanna Nash, [email protected]
Description: For GEFS, and ECMWF: output .png files of ivt cross section plot for different logitudinal cross sections and lead times.
"""
import os
import sys
import numpy as np
import pandas as pd
import xarray as xr
import multiprocessing

from read_deterministic_data import load_GFS_datasets, load_ECMWF_datasets
from plotter import plot_ivt_cross_sections
from calc_funcs import format_timedelta_to_HHMMSS

model_name = sys.argv[1]
print('Creating ivt cross sections for {0}'.format(model_name))

## FIRST LOOP - LEAD TIME ##
F_lst = np.arange(0, 132, 12)

## create list that has the start and end points for the cross section
lon_lst = np.arange(185., 260., 5.)
line_lst = []
for i, lon in enumerate(lon_lst):
## create list of lines where [start_lat, start_lon, end_lat, end_lon]
line = [25., lon, 65., lon]
line_lst.append(line)


for i, F in enumerate(F_lst):
start_time = pd.Timestamp.today()
#######################
### LOAD MODEL DATA ###
#######################
print('... Loading {0} data for {1} hour lead'.format(model_name, F))
if model_name == 'ECMWF':
s = load_ECMWF_datasets(F=F, fdate=None)
model_data= s.calc_vars()

elif model_name == 'GFS':
s = load_GFS_datasets(F=F, fdate=None)
model_data= s.calc_vars()

model_data
## write intermediate data files
out_fname = '/data/projects/operations/ivt_cross_sections/data/tmp_{0}_{1}.nc'.format(model_name, F)
model_data.to_netcdf(path=out_fname, mode = 'w', format='NETCDF4')
end_time = pd.Timestamp.today()
td = end_time - start_time
td = format_timedelta_to_HHMMSS(td)
print('Data for {0} lead took {1} to preprocess'.format(F, td))
model_data.close() ## close data


def multiP(model_data, line_lst, F):
## SECOND LOOP - LOOP THROUGH LONGITUDE FOR CROSS SECTION ##
for k, current_line in enumerate(line_lst):
## subset vertical data and IVT data to current line
if model_name == 'ECMWF':
cross = model_data.sel(latitude = slice(current_line[2]+0.1, current_line[0]-0.1))
cross = cross.sel(longitude=current_line[1], method='nearest')
elif model_name == 'GFS':
cross = model_data.sel(latitude = slice(current_line[2], current_line[0]), longitude=current_line[1])

cross = cross.sortby('latitude')

### Create Plots
p = multiprocessing.Process(target=plot_ivt_cross_sections, args=(model_data, cross, line_lst, current_line, model_name, F))
p.start()



for i, F in enumerate(F_lst):
start_time = pd.Timestamp.today()
#######################
### LOAD MODEL DATA ###
#######################
print('... Loading intermediate {0} data for {1} hour lead'.format(model_name, F))
out_fname = '/data/projects/operations/ivt_cross_sections/data/tmp_{0}_{1}.nc'.format(model_name, F)
model_data = xr.open_dataset(out_fname, engine='netcdf4')

if __name__ == "__main__":
multiP(model_data, line_lst, F)


end_time = pd.Timestamp.today()
td = end_time - start_time
td = format_timedelta_to_HHMMSS(td)
print('Plots for {0} lead took {1} to run'.format(F, td))
model_data.close() ## close data



Loading

0 comments on commit 8f17d57

Please sign in to comment.