add parallel processing script

CW3E · Sep 17, 2024 · 8f17d57 · 8f17d57
1 parent 525b6ee
commit 8f17d57
Show file tree

Hide file tree

Showing 4 changed files with 815 additions and 10 deletions.
diff --git a/read_deterministic_data.py b/read_deterministic_data.py
@@ -227,10 +227,10 @@ def __init__(self, F, fdate=None):
             self.date_string = regex.findall(self.fpath)[-1]
         elif fdate is not None:
             self.date_string = fdate
+            self.fpath = '/home/dnash/comet_data/tmp'
         fname = '/gfs_{0}_f{1}.grb'.format(self.date_string, str(self.F).zfill(3))
 
         ## for now: copy the files to local space
-        # self.fpath = '/home/dnash/comet_data/tmp'
         # shutil.copy(self.fpath+fname, repo_path+fname) # copy file over to data folder
         self.fname = self.fpath+fname
 

diff --git a/run_tool.py b/run_tool.py
@@ -53,15 +53,7 @@
     model_data.close() ## close data
 
 
-for i, F in enumerate(F_lst):
-    start_time = pd.Timestamp.today()
-    #######################
-    ### LOAD MODEL DATA ###
-    #######################
-    print('... Loading intermediate {0} data for {1} hour lead'.format(model_name, F))
-    out_fname = '/data/projects/operations/ivt_cross_sections/data/tmp_{0}_{1}.nc'.format(model_name, F)
-    model_data = xr.open_dataset(out_fname, engine='netcdf4')
-
+def multiP():
     ## SECOND LOOP - LOOP THROUGH LONGITUDE FOR CROSS SECTION ##
     for k, current_line in enumerate(line_lst):
         ## subset vertical data and IVT data to current line
@@ -77,6 +69,26 @@
         print('...... Creating figure for {0}'.format(current_line[1]))
         plot_ivt_cross_sections(model_data, cross, line_lst, current_line, model_name, F)
 
+    for i in range(2):
+        p = multiprocessing.Process(target=plot, args=(i, i, i))
+        p.start()
+
+if __name__ == "__main__": 
+    input('Value: ') 
+    multiP()
+
+
+for i, F in enumerate(F_lst):
+    start_time = pd.Timestamp.today()
+    #######################
+    ### LOAD MODEL DATA ###
+    #######################
+    print('... Loading intermediate {0} data for {1} hour lead'.format(model_name, F))
+    out_fname = '/data/projects/operations/ivt_cross_sections/data/tmp_{0}_{1}.nc'.format(model_name, F)
+    model_data = xr.open_dataset(out_fname, engine='netcdf4')
+
+
+
     end_time = pd.Timestamp.today()
     td = end_time - start_time
     td = format_timedelta_to_HHMMSS(td)

diff --git a/run_tool_parallel.py b/run_tool_parallel.py
@@ -0,0 +1,95 @@
+"""
+Filename:    run_tool.py
+Author:      Deanna Nash, [email protected]
+Description: For GEFS, and ECMWF: output .png files of ivt cross section plot for different logitudinal cross sections and lead times.
+"""
+import os
+import sys
+import numpy as np
+import pandas as pd
+import xarray as xr
+import multiprocessing
+
+from read_deterministic_data import load_GFS_datasets, load_ECMWF_datasets
+from plotter import plot_ivt_cross_sections
+from calc_funcs import format_timedelta_to_HHMMSS
+
+model_name = sys.argv[1]
+print('Creating ivt cross sections for {0}'.format(model_name))
+
+## FIRST LOOP - LEAD TIME ##
+F_lst = np.arange(0, 132, 12)
+
+## create list that has the start and end points for the cross section
+lon_lst = np.arange(185., 260., 5.)
+line_lst = []
+for i, lon in enumerate(lon_lst):
+    ## create list of lines where [start_lat, start_lon, end_lat, end_lon]
+    line = [25., lon, 65., lon]
+    line_lst.append(line)
+
+
+for i, F in enumerate(F_lst):
+    start_time = pd.Timestamp.today()
+    #######################
+    ### LOAD MODEL DATA ###
+    #######################
+    print('... Loading {0} data for {1} hour lead'.format(model_name, F))
+    if model_name == 'ECMWF':
+        s = load_ECMWF_datasets(F=F, fdate=None)
+        model_data= s.calc_vars()
+
+    elif model_name == 'GFS':
+        s = load_GFS_datasets(F=F, fdate=None)
+        model_data= s.calc_vars()
+
+    model_data
+    ## write intermediate data files
+    out_fname = '/data/projects/operations/ivt_cross_sections/data/tmp_{0}_{1}.nc'.format(model_name, F)
+    model_data.to_netcdf(path=out_fname, mode = 'w', format='NETCDF4')
+    end_time = pd.Timestamp.today()
+    td = end_time - start_time
+    td = format_timedelta_to_HHMMSS(td)
+    print('Data for {0} lead took {1} to preprocess'.format(F, td))
+    model_data.close() ## close data
+
+
+def multiP(model_data, line_lst, F):
+    ## SECOND LOOP - LOOP THROUGH LONGITUDE FOR CROSS SECTION ##
+    for k, current_line in enumerate(line_lst):
+        ## subset vertical data and IVT data to current line
+        if model_name == 'ECMWF':
+            cross = model_data.sel(latitude = slice(current_line[2]+0.1, current_line[0]-0.1))
+            cross = cross.sel(longitude=current_line[1], method='nearest')
+        elif model_name == 'GFS':
+            cross = model_data.sel(latitude = slice(current_line[2], current_line[0]), longitude=current_line[1])
+
+        cross = cross.sortby('latitude')
+
+        ### Create Plots
+        p = multiprocessing.Process(target=plot_ivt_cross_sections, args=(model_data, cross, line_lst, current_line, model_name, F))
+        p.start()
+
+
+
+for i, F in enumerate(F_lst):
+    start_time = pd.Timestamp.today()
+    #######################
+    ### LOAD MODEL DATA ###
+    #######################
+    print('... Loading intermediate {0} data for {1} hour lead'.format(model_name, F))
+    out_fname = '/data/projects/operations/ivt_cross_sections/data/tmp_{0}_{1}.nc'.format(model_name, F)
+    model_data = xr.open_dataset(out_fname, engine='netcdf4')
+
+    if __name__ == "__main__": 
+        multiP(model_data, line_lst, F)
+
+
+    end_time = pd.Timestamp.today()
+    td = end_time - start_time
+    td = format_timedelta_to_HHMMSS(td)
+    print('Plots for {0} lead took {1} to run'.format(F, td))
+    model_data.close() ## close data
+
+
+