diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json index 9a9a0627b..309205c39 100644 --- a/.github/parm/use_case_groups.json +++ b/.github/parm/use_case_groups.json @@ -179,6 +179,11 @@ "index_list": "13", "run": false }, + { + "category": "s2s", + "index_list": "14", + "run": false + }, { "category": "space_weather", "index_list": "0-1", diff --git a/docs/_static/s2s-GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.png b/docs/_static/s2s-GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.png new file mode 100644 index 000000000..6d9775a2a Binary files /dev/null and b/docs/_static/s2s-GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.png differ diff --git a/docs/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.py b/docs/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.py new file mode 100644 index 000000000..79502d2b3 --- /dev/null +++ b/docs/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.py @@ -0,0 +1,144 @@ +""" +GridStat: Determine dominant ensemble members terciles and calculate categorical outputs +======================================================================================== + +model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.conf + +""" +############################################################################## +# Scientific Objective +# -------------------- +# This use case ingests a CFSv2 Ensemble forecast, with all ensemble members in a single file for a given year. +# 29 years of forecast ensembles are used to create probabilities for each tercile, which is accomplished by a Python script. +# Of the terciles, each gridpoint is assigned a value corresponding to the tercile that is most likely to occur. This is compared to an observation set +# that contains the tercile data and MCTS line type is requested. +# This use case highlights the inclusion of tercile data for calculating HSS; in particular, how to utilize the hss_ec_value option to +# preset the expected values rather than relying on categorical values. + +############################################################################## +# Datasets +# --------------------- +# +# | **Forecast:** 29 CFSv2 Ensemble files, 2m temperature fields +# +# | **Observations:** GHCNCAMS, 2m temperature field +# +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information. +# +# | **Data Source:** CPC + +############################################################################## +# METplus Components +# ------------------ +# +# This use case calls a Python script 29 times, once for each year of data of the CFSv2 ensemble. +# Each time a successful call to the script is made, a grid of 1s, 2s, and 3s is returned, representing which tercile was dominant for the gridpoint. +# GridStat processes the forecast and observation fields, and outputs the requested line types. + +############################################################################## +# METplus Workflow +# ---------------- +# +# This use case utilizes 29 years of forecast data, with 24 members in each ensemble forecast. +# The following boundary times are used for the entire script: +# +# | **Init Beg:** 1982-01-01 +# | **Init End:** 2010-01-02 +# +# Because the increment is 1 year, all January 1st from 1982 to 2010 are processed for a total of 29 years. +# + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads all of the configuration files found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line +# i.e. -c parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.conf +# + +############################################################################## +# MET Configuration +# --------------------- +# +# METplus sets environment variables based on the values in the METplus configuration file. These variables are referenced in the MET configuration file. **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** If there is a setting in the MET configuration file that is not controlled by an environment variable, you can add additional environment variables to be set only within the METplus environment using the [user_env_vars] section of the METplus configuration files. See the ‘User Defined Config’ section on the ‘System Configuration’ page of the METplus User’s Guide for more information. +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/GridStatConfig_wrapped +# + +############################################################################## +# Running METplus +# --------------- +# +# This use case can be run two ways: +# +# 1) Passing in GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.conf then a user-specific system configuration file:: +# +# run_metplus.py /path/to/METplus/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile /path/to/user_system.conf +# +# 2) Modifying the configurations in parm/metplus_config, then passing in GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile:: +# +# run_metplus.py /path/to/METplus/parm/use_cases/model_applications/marine_and_cryosphere/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.conf +# +# The former method is recommended. Whether you add them to a user-specific configuration file or modify the metplus_config files, the following variables must be set correctly: +# +# * **INPUT_BASE** - Path to directory where sample data tarballs are unpacked (See Datasets section to obtain tarballs). This is not required to run METplus, but it is required to run the examples in parm/use_cases +# * **OUTPUT_BASE** - Path where METplus output will be written. This must be in a location where you have write permissions +# * **MET_INSTALL_DIR** - Path to location where MET is installed locally +# +# Example User Configuration File:: +# +# [config] +# INPUT_BASE = /path/to/sample/input/data +# OUTPUT_BASE = /path/to/output/dir +# MET_INSTALL_DIR = /path/to/met-X.Y +# +# + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for the use case will be found in 29 folders(relative to **OUTPUT_BASE**). +# The output will follow the time information of the run. Specifically: +# +# * YYYY01 +# +# where YYYY will be replaced by values corresponding to each of the years (1982 through 2010). +# Each of those folders will have the following files: +# +# * grid_stat_000000L_19820101_000000V_pairs.nc +# * grid_stat_000000L_19820101_000000V_mctc.txt +# * grid_stat_000000L_19820101_000000V_mcts.txt +# * grid_stat_000000L_19820101_000000V.stat +# + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * GridStatUseCase +# * ProbabilityVerificationUseCase +# * PythonEmbeddingFileUseCase +# * S2SAppUseCase +# * NETCDFFileUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# +# +# sphinx_gallery_thumbnail_path = '_static/s2s-GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.png' + diff --git a/internal_tests/use_cases/all_use_cases.txt b/internal_tests/use_cases/all_use_cases.txt index 81aa3b400..58a525501 100644 --- a/internal_tests/use_cases/all_use_cases.txt +++ b/internal_tests/use_cases/all_use_cases.txt @@ -138,6 +138,7 @@ Category: s2s 11:: UserScript_fcstGFS_obsERA_WeatherRegime:: model_applications/s2s/UserScript_fcstGFS_obsERA_WeatherRegime.conf:: weatherregime_env,cartopy,metplus 12:: UserScript_obsERA_obsOnly_Stratosphere:: model_applications/s2s/UserScript_obsERA_obsOnly_Stratosphere.conf:: metplotpy_env,metdatadb 13::SeriesAnalysis_fcstCFSv2_obsGHCNCAMS_climoStandardized_MultiStatisticTool:: model_applications/s2s/SeriesAnalysis_fcstCFSv2_obsGHCNCAMS_climoStandardized_MultiStatisticTool.conf:: netcdf4_env +14::GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile:: model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.conf:: netcdf4_env Category: space_weather 0::GridStat_fcstGloTEC_obsGloTEC_vx7:: model_applications/space_weather/GridStat_fcstGloTEC_obsGloTEC_vx7.conf diff --git a/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.conf b/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.conf new file mode 100644 index 000000000..228280838 --- /dev/null +++ b/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile.conf @@ -0,0 +1,102 @@ +[config] + +PROCESS_LIST = GridStat + +### +# Time Info +### + +LOOP_BY = INIT +INIT_TIME_FMT = %Y%m%d%H +INIT_BEG=1982010100 +INIT_END=2010020100 +INIT_INCREMENT = 1Y + +LEAD_SEQ = + +LOOP_ORDER = processes + +### +# File I/O +### + + +FCST_GRID_STAT_INPUT_TEMPLATE = PYTHON_NUMPY + +OBS_GRID_STAT_INPUT_TEMPLATE = PYTHON_NUMPY + +GRID_STAT_CLIMO_MEAN_INPUT_DIR = +GRID_STAT_CLIMO_MEAN_INPUT_TEMPLATE = + +GRID_STAT_CLIMO_STDEV_INPUT_DIR = +GRID_STAT_CLIMO_STDEV_INPUT_TEMPLATE = + +GRID_STAT_OUTPUT_DIR = {OUTPUT_BASE}/HSS_out_Mplus +GRID_STAT_OUTPUT_TEMPLATE = {init?fmt=%Y%m} + + +### +# Field Info +### + +MODEL = CFSv2 +OBTYPE = OBS + +FCST_VAR1_NAME = {CONFIG_DIR}/forecast_read-in_CFSv2_categoricalthresholds.py {INPUT_BASE}/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile/CFSv2.tmp2m.{init?fmt=%Y%m}.fcst.nc:tmp2m:{init?fmt=%Y%m%d%H}:0:0 +FCST_VAR1_LEVELS = +FCST_VAR1_THRESH = lt1.5, lt2.5 + +OBS_VAR1_NAME = {CONFIG_DIR}/forecast_read-in_CFSv2_categoricalthresholds_obs.py {INPUT_BASE}/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile/CFSv2.tmp2m.{init?fmt=%Y%m}.fcst.nc:tmp2m:{init?fmt=%Y%m%d%H}:0:0 +OBS_VAR1_LEVELS = +OBS_VAR1_THRESH = lt1.5, lt2.5 + +CONFIG_DIR = {PARM_BASE}/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile + +### +# GridStat +### + + + +GRID_STAT_CONFIG_FILE = {PARM_BASE}/met_config/GridStatConfig_wrapped + + +GRID_STAT_REGRID_TO_GRID = FCST + + +GRID_STAT_DESC = NA + +FCST_GRID_STAT_FILE_WINDOW_BEGIN = 0 +FCST_GRID_STAT_FILE_WINDOW_END = 0 +OBS_GRID_STAT_FILE_WINDOW_BEGIN = 0 +OBS_GRID_STAT_FILE_WINDOW_END = 0 + +GRID_STAT_NEIGHBORHOOD_WIDTH = 1 +GRID_STAT_NEIGHBORHOOD_SHAPE = SQUARE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >=0.5 + +GRID_STAT_ONCE_PER_FIELD = False + +FCST_IS_PROB = false + +FCST_GRID_STAT_PROB_THRESH = ==0.1 + +OBS_IS_PROB = false + +OBS_GRID_STAT_PROB_THRESH = ==0.1 + +GRID_STAT_OUTPUT_PREFIX = + + + +GRID_STAT_OUTPUT_FLAG_MCTC = BOTH +GRID_STAT_OUTPUT_FLAG_MCTS = BOTH + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE + + +GRID_STAT_HSS_EC_VALUE = + diff --git a/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile/forecast_read-in_CFSv2_categoricalthresholds.py b/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile/forecast_read-in_CFSv2_categoricalthresholds.py new file mode 100644 index 000000000..cee496070 --- /dev/null +++ b/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile/forecast_read-in_CFSv2_categoricalthresholds.py @@ -0,0 +1,183 @@ + +import sys +import re +import numpy as np +import datetime as dt +from dateutil.relativedelta import * # New import +from netCDF4 import Dataset, chartostring +from preprocessFun_Modified import preprocess, dominant_tercile_fcst, dominant_tercile_obs, get_init_year # New import + +#grab input from user +#should be (1)input file using full path (2) variable name (3) valid time for the forecast in %Y%m%d%H%M format and (4) ensemble member number, all separated by ':' characters +#program can only accept that 1 input, while still maintaining user flexability to change multiple +#variables, including valid time, ens member, etc. +# Addition by Johnna - model, lead, init taken from file name +# Addition by Johnna - climatology and std dev caclulated for given lead, model, init +# Addition by Johnna - added command line pull for lead + +print('Type in input_file using full path:variable name:valid time in %Y%m%d%H%M:ensemble member number:lead') +# Example +# /cpc/nmme/CFSv2/hcst_new/010100/CFSv2.tmp2m.198201.fcst.nc:tmp2m:198201010101:0:0 + +input_file, var_name, init_time, ens_mem, lead = sys.argv[1].split(':') +ens_mem = int(ens_mem) +lead = int(lead) # Added by Johnna +init_time = dt.datetime.strptime(init_time,"%Y%m%d%H%M") + +# --- +# Added by Johnna +input_file_split = input_file.split('/') +#EDIT BY JOHN O +init_temp = "010100" +#init_temp = input_file_split[5] + +#fil_name = input_file_split[6] +fil_name = input_file +year_temp = fil_name.split('.') +#year = year_temp[2] +year = year_temp[-3] +print('YYYYMM: ' + str(year)) + +# Setup Climatology +#EDIT BY JOHN O +#model = input_file_split[3] +model = "CFSv2" +init = init_temp.replace('0100','') +lead = lead +clim_per = '1982_2010' +member = ens_mem +variable = var_name + +# Get path based on model (only works for this file name) +input_file_split2 = input_file.split(model + '.') +path = input_file_split2[0] + +# Calculate climatologies and standard deviations/etc. This calculates every time the loop runs (i.e. for each file read into MET) +# There might be a better positioning for this function such that it doesn't recalc each loop, but I pulled from the command line input +# above to create the function, so right now it has dependance on the file names/user input/etc. +# Fine for now since data are smallish, but if theres something higher res might slow things down. +print('Model: ' + model + ' Init: ' + str(init) + ' lead: ' + str(lead) + ' Member: ' + str(ens_mem) + ' Variable: ' + variable) + +# We only need the fcst_cat_thresh +# Commenting out the original preprocess function +#clim, stddev, anom, std_anom = preprocess(path, model, init, variable, lead, clim_per, member) + +# New preprocessing function to get the cat thresholds +# In order 0, 1, 2 where 0 is LT, 1 is MT, 2 is UT +# fcst_cat_thresh has ALL times (28), and is calculated for ALL 24 members +# So the array fcst_cat_thresh is time | 29, lat | 181, lon | 360) +# I wrote a clumsy function to split this into years based on the filename read in (get_init_year) +# But it works! Basically just a bunch of if statements like if the filename is 198201 then its index 0 of the array and so on to the last index +# I also swap the fcst lats to be -90 to 90 instead of 90 to -90 and match up the longitudes (theres a cyclic point in the fcst so its actually 361 pts) +fcst_cat_thresh = dominant_tercile_fcst(path, model, init, variable, clim_per, lead) +idx = get_init_year(year) +fcst_cat_thresh_1year = fcst_cat_thresh[idx,::-1,0:360] + +# Going to do the obs in the same wrapper. I realized this would be easier so I hope this is okay... I think its just a flag...? +# Using same clunky function to get the right year out of the big array +#EDIT BY JOHN O +obs_cat_thresh = dominant_tercile_obs(path) +obs_cat_thresh_1year = obs_cat_thresh[idx,:,0:360] + +# Redefine var_name to fcst (necessary for below) +var_name = 'fcst' +# --- + +try: + print('The file you are working on is: ' + input_file) + # all of this is actually pointless eexcept to get the dimensions and times, all of the calculations are done in the functions + #set pointers to file and group name in file + + f = Dataset(input_file) + v = f[var_name][member,lead,:,:] + + #grab data from file + lat = np.float64(f.variables['lat'][::-1]) + lon = np.float64(f.variables['lon'][:]) + + # Grab and format time, this is taken from the file name, which might not be the best way of doing it + # Can also potentially pull from the netCDF; but need an extra package in netCDF4 to do that, and its a little weird + # given units of months since. This was a bit easier. + # Do need to add relativedelta package but thats fairly common (its from dateutil) + val_time = init_time + relativedelta(months=lead) + print('Valid Time: ' + str(val_time)) + + + # Coming from the function + # uncomment out the obs one if you want to use the obs? + v = fcst_cat_thresh_1year + #v = obs_cat_thresh_1year + print('Shape of variable to read into MET: ' + str(v.shape)) + + # -------------------------- + # Commented out by Johnna, defined above in user input + # Print statement erroring so commented out + #grab intialization time from file name and hold + #also compute the lead time + #i_time_ind = input_file.split("_").index("aod.nc")-1 + #i_time = input_file.split("_")[i_time_ind] + #i_time_obj = dt.datetime.strptime(i_time,"%Y%m%d%H") + #lead, rem = divmod((val_time - i_time_obj).total_seconds(), 3600) + + #print("Ensemble Member evaluation for: "+f.members.split(',')[ens_mem]) + + #checks if the the valid time for the forecast from user is present in file. + #Exits if the time is not present with a message + #if not val_time.timestamp() in f['time'][:]: + # print("valid time of "+str(val_time)+" is not present. Check file initialization time, passed valid time.") + # f.close() + # sys.exit(1) + + #grab index in the time array for the valid time provided by user (val_time) + #val_time_ind = np.where(f['time'][:] == val_time.timestamp())[0][0] + #var = np.float64(v[val_time_ind:val_time_ind+1,ens_mem:ens_mem+1,::-1,:]) + # -------------------------- + + #squeeze out all 1d arrays, add fill value, convert to float64 + var = np.float64(v) + var[var < -800] = -9999 + + met_data = np.squeeze(var).copy() + #JOHN O ADDED TO TEST IF FLIPPING IS OCCURING + met_data = met_data[::-1,:] + met_data = np.nan_to_num(met_data, nan=-1) + print('Done, no exceptions') + +except NameError: + print("Can't find input file") + sys.exit(1) + +########## +#create a metadata dictionary + +attrs = { + + 'valid': str(val_time.strftime("%Y%m%d"))+'_'+str(val_time.strftime("%H%M%S")), + 'init': str(init_time.strftime("%Y%m%d"))+'_'+str(init_time.strftime("%H%M%S")), + 'name': var_name, + 'long_name': input_file, + 'lead': str(int(lead)), + 'accum': '00', + 'level': 'sfc', + 'units': 'Degrees K', + + 'grid': { + 'name': 'Global 1 degree', + 'type': 'LatLon', + 'lat_ll': -90.0, + 'lon_ll': 0.0, + 'delta_lat': 1.0, + 'delta_lon': 1.0, + + 'Nlon': f.dimensions['lon'].size, + 'Nlat': f.dimensions['lat'].size, + } + } + +#print some output to show script ran successfully +print("Input file: " + repr(input_file)) +print("Variable name: " + repr(var_name)) +print("valid time: " + repr(val_time.strftime("%Y%m%d%H%M"))) +print("Attributes:\t" + repr(attrs)) +f.close() + diff --git a/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile/forecast_read-in_CFSv2_categoricalthresholds_obs.py b/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile/forecast_read-in_CFSv2_categoricalthresholds_obs.py new file mode 100644 index 000000000..931115f68 --- /dev/null +++ b/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile/forecast_read-in_CFSv2_categoricalthresholds_obs.py @@ -0,0 +1,186 @@ + +import sys +import re +import numpy as np +import datetime as dt +from dateutil.relativedelta import * # New import +from netCDF4 import Dataset, chartostring +from preprocessFun_Modified import preprocess, dominant_tercile_fcst, dominant_tercile_obs, get_init_year # New import + +#grab input from user +#should be (1)input file using full path (2) variable name (3) valid time for the forecast in %Y%m%d%H%M format and (4) ensemble member number, all separated by ':' characters +#program can only accept that 1 input, while still maintaining user flexability to change multiple +#variables, including valid time, ens member, etc. +# Addition by Johnna - model, lead, init taken from file name +# Addition by Johnna - climatology and std dev caclulated for given lead, model, init +# Addition by Johnna - added command line pull for lead + +print('Type in input_file using full path:variable name:valid time in %Y%m%d%H%M:ensemble member number:lead') +# Example +# /cpc/nmme/CFSv2/hcst_new/010100/CFSv2.tmp2m.198201.fcst.nc:tmp2m:198201010101:0:0 + +input_file, var_name, init_time, ens_mem, lead = sys.argv[1].split(':') +ens_mem = int(ens_mem) +lead = int(lead) # Added by Johnna +init_time = dt.datetime.strptime(init_time,"%Y%m%d%H%M") + +# --- +# Added by Johnna +input_file_split = input_file.split('/') +#EDIT BY JOHN O +init_temp = "010100" +#init_temp = input_file_split[5] + +#fil_name = input_file_split[6] +fil_name = input_file +year_temp = fil_name.split('.') +#year = year_temp[2] +year = year_temp[-3] +print('YYYYMM: ' + str(year)) + +# Setup Climatology +#EDIT BY JOHN O +#model = input_file_split[3] +model = "CFSv2" +init = init_temp.replace('0100','') +lead = lead +clim_per = '1982_2010' +member = ens_mem +variable = var_name + +# Get path based on model (only works for this file name) +input_file_split2 = input_file.split(model + '.') +path = input_file_split2[0] + +# Calculate climatologies and standard deviations/etc. This calculates every time the loop runs (i.e. for each file read into MET) +# There might be a better positioning for this function such that it doesn't recalc each loop, but I pulled from the command line input +# above to create the function, so right now it has dependance on the file names/user input/etc. +# Fine for now since data are smallish, but if theres something higher res might slow things down. +print('Model: ' + model + ' Init: ' + str(init) + ' lead: ' + str(lead) + ' Member: ' + str(ens_mem) + ' Variable: ' + variable) + +# We only need the fcst_cat_thresh +# Commenting out the original preprocess function +#clim, stddev, anom, std_anom = preprocess(path, model, init, variable, lead, clim_per, member) + +# New preprocessing function to get the cat thresholds +# In order 0, 1, 2 where 0 is LT, 1 is MT, 2 is UT +# fcst_cat_thresh has ALL times (28), and is calculated for ALL 24 members +# So the array fcst_cat_thresh is time | 29, lat | 181, lon | 360) +# I wrote a clumsy function to split this into years based on the filename read in (get_init_year) +# But it works! Basically just a bunch of if statements like if the filename is 198201 then its index 0 of the array and so on to the last index +# I also swap the fcst lats to be -90 to 90 instead of 90 to -90 and match up the longitudes (theres a cyclic point in the fcst so its actually 361 pts) +fcst_cat_thresh = dominant_tercile_fcst(path, model, init, variable, clim_per, lead) +idx = get_init_year(year) +fcst_cat_thresh_1year = fcst_cat_thresh[idx,::-1,0:360] + +# Going to do the obs in the same wrapper. I realized this would be easier so I hope this is okay... I think its just a flag...? +# Using same clunky function to get the right year out of the big array +#EDIT BY JOHN O +obs_cat_thresh = dominant_tercile_obs(path) +obs_cat_thresh_1year = obs_cat_thresh[idx,:,0:360] + +# Redefine var_name to fcst (necessary for below) +var_name = 'fcst' +# --- + +try: + print('The file you are working on is: ' + input_file) + # all of this is actually pointless eexcept to get the dimensions and times, all of the calculations are done in the functions + #set pointers to file and group name in file + + f = Dataset(input_file) + v = f[var_name][member,lead,:,:] + + #grab data from file + lat = np.float64(f.variables['lat'][::-1]) + lon = np.float64(f.variables['lon'][:]) + + # Grab and format time, this is taken from the file name, which might not be the best way of doing it + # Can also potentially pull from the netCDF; but need an extra package in netCDF4 to do that, and its a little weird + # given units of months since. This was a bit easier. + # Do need to add relativedelta package but thats fairly common (its from dateutil) + val_time = init_time + relativedelta(months=lead) + print('Valid Time: ' + str(val_time)) + + + # Coming from the function + # uncomment out the obs one if you want to use the obs? + #v = fcst_cat_thresh_1year + v = obs_cat_thresh_1year + print('Shape of variable to read into MET: ' + str(v.shape)) + + # -------------------------- + # Commented out by Johnna, defined above in user input + # Print statement erroring so commented out + #grab intialization time from file name and hold + #also compute the lead time + #i_time_ind = input_file.split("_").index("aod.nc")-1 + #i_time = input_file.split("_")[i_time_ind] + #i_time_obj = dt.datetime.strptime(i_time,"%Y%m%d%H") + #lead, rem = divmod((val_time - i_time_obj).total_seconds(), 3600) + + #print("Ensemble Member evaluation for: "+f.members.split(',')[ens_mem]) + + #checks if the the valid time for the forecast from user is present in file. + #Exits if the time is not present with a message + #if not val_time.timestamp() in f['time'][:]: + # print("valid time of "+str(val_time)+" is not present. Check file initialization time, passed valid time.") + # f.close() + # sys.exit(1) + + #grab index in the time array for the valid time provided by user (val_time) + #val_time_ind = np.where(f['time'][:] == val_time.timestamp())[0][0] + #var = np.float64(v[val_time_ind:val_time_ind+1,ens_mem:ens_mem+1,::-1,:]) + # -------------------------- + + #squeeze out all 1d arrays, add fill value, convert to float64 + var = np.float64(v) + var[var < -800] = -9999 + + met_data = np.squeeze(var).copy() + #JOHN ADDED TO REMOVE EXTRA LON + #met_data = met_data[:,:-1] + #JOHN O ADDED TO FLIP OBS GRID + met_data = met_data[::-1,:] + met_data = np.nan_to_num(met_data, nan=-1) + print('Done, no exceptions') + print('New shape of variable to read into MET: ' + str(met_data.shape)) + +except NameError: + print("Can't find input file") + sys.exit(1) + +########## +#create a metadata dictionary + +attrs = { + + 'valid': str(val_time.strftime("%Y%m%d"))+'_'+str(val_time.strftime("%H%M%S")), + 'init': str(init_time.strftime("%Y%m%d"))+'_'+str(init_time.strftime("%H%M%S")), + 'name': var_name, + 'long_name': input_file, + 'lead': str(int(lead)), + 'accum': '00', + 'level': 'sfc', + 'units': 'Degrees K', + + 'grid': { + 'name': 'Global 1 degree', + 'type': 'LatLon', + 'lat_ll': -90.0, + 'lon_ll': 0.0, + 'delta_lat': 1.0, + 'delta_lon': 1.0, + + 'Nlon': 360, + 'Nlat': f.dimensions['lat'].size, + } + } + +#print some output to show script ran successfully +print("Input file: " + repr(input_file)) +print("Variable name: " + repr(var_name)) +print("valid time: " + repr(val_time.strftime("%Y%m%d%H%M"))) +print("Attributes:\t" + repr(attrs)) +f.close() + diff --git a/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile/preprocessFun_Modified.py b/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile/preprocessFun_Modified.py new file mode 100644 index 000000000..f4a638331 --- /dev/null +++ b/parm/use_cases/model_applications/s2s/GridStat_fcstCFSv2_obsGHCNCAMS_MultiTercile/preprocessFun_Modified.py @@ -0,0 +1,348 @@ + +# Functions to pre-process data + +def preprocess(path, model, init, variable, lead, clim_per, member): + import numpy as np + from netCDF4 import Dataset + + if clim_per == '1982_2010': + years = np.arange(1982,2011,1) + elif clim_per == '1991_2020': + years = np.arange(1991,2020,1) + else: + print('Check your climatology period') + + # Get the directory + dir = path + + # Make an empty array to store the climatology and SD (just store for 1 lead, 1 member) + full_fcst_array = np.zeros((len(years), 181, 360)) + anom = np.zeros((len(years), 181, 360)) + std_anom = np.zeros((len(years), 181, 360)) + clim = np.zeros((181, 360)) + stddev = np.zeros((181, 360)) + + for y in range(len(years)): + year = years[y] + # Can comment out if this bothers you + path = str(dir + model + '.' + variable + '.' + str(year) + str(init) + '.fcst.nc') + #print('Opening ' + path) + + dataset = Dataset(path) + # Shape of array before subset (24, 10, 181, 360) + fcst = dataset.variables['fcst'][member,lead,:,:] + #print(fcst.shape) + full_fcst_array[y,:,:] = fcst + + # Can comment out if this bothers you + #print('Shape of fcst array with all times: ' + str(full_fcst_array.shape)) + + # Define climatology for the lead and member of interest + clim = np.nanmean(full_fcst_array,axis=0) + + # Define standard deviation for the lead and member of interest + stddev = np.nanstd(full_fcst_array,axis=0) + + # Define anomalies and standardized anomalies (perhaps unnecessary) + for y in range(len(years)): + anom[y,:,:] = full_fcst_array[y,:,:] - clim + std_anom[y,:,:] = anom[y,:,:]/stddev + + return clim, stddev, anom, std_anom +# -------------------------------------------------------------------------------------------------- + +# -------------------------------------------------------------------------------------------------- +def dominant_tercile_fcst(path, model, init, variable, clim_per, lead): + + import numpy as np + member = 0 + members = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] + + nEns = len(members) + + # Make climos, std anoms, etc. from function for 1 member (to get shapes for empty variables) + clim, stddev, anom, std_anom = preprocess(path, model, init, variable, lead, clim_per, member) + + # Empty variables + std_anom_all = np.zeros((std_anom.shape[0], std_anom.shape[1], std_anom.shape[2], nEns)) + + for i in range(len(members)): + member = members[i] + clim, stddev, anom, std_anom = preprocess(path, model, init, variable, lead, clim_per, member) + std_anom_all[:, :, :, i] = std_anom + + ut_ones = np.where(std_anom_all[:,:,:,:] > 0.43,1,0) + lt_ones = np.where(std_anom_all[:,:,:,:] < -0.43,1,0) + mt_ones = np.where( (std_anom_all[:,:,:,:] > -0.43) & (std_anom_all[:,:,:,:] < 0.43),1,0) + + ut_prob = np.nansum(ut_ones,3)/len(members) + lt_prob = np.nansum(lt_ones,3)/len(members) + mt_prob = np.nansum(mt_ones,3)/len(members) + + # Put all in 1 array + all_probs = np.zeros((3, ut_prob.shape[0], ut_prob.shape[1], ut_prob.shape[2])) + + all_probs[0,:,:,:] = lt_prob + all_probs[1,:,:,:] = mt_prob + all_probs[2,:,:,:] = ut_prob + + + # Johnna's test statements: + #print('Model stuff') + #print(std_anom_all[0,50,100,:]) + + #print('lt') + #print(all_probs[0,:,50,100]) + + #print('mt') + #print(all_probs[1,:,50,100]) + + #print('ut') + #print(all_probs[2,:,50,100]) + + dominant_tercile = np.argmax(all_probs, axis=0) + + #print('dominant tercile') + #print(dominant_tercile[:,50,100]) + + temp = np.where(dominant_tercile == 2, 3., dominant_tercile) + temp = np.where(dominant_tercile == 1, 2., temp) + temp = np.where(dominant_tercile == 0, 1., temp) + + #print('temp') + #print(temp[:,50,100]) + + #cat_thresh_fcst = dominant_tercile + #cat_thresh_fcst = temp + + # Swap lats to match obs (will swap back later) + temp = temp[:,::-1,0:360] + # Mask according to obs: + # Note, will need to change path name here: + data_mask = mask(path) + + temp_masked = np.zeros((temp.shape[0], temp.shape[1], temp.shape[2])) + for i in range(0,temp.shape[0]): + #MODIFIED BY JOHN O, CHANGE NANS TO -9999s + temp_masked[i,:,:] = np.where(data_mask[:,0:360] == -9.99000000e+08, -9999, temp[i,:,:]) + + #print('temp masked') + #print(temp_masked[:,50,100]) + + cat_thresh_fcst = temp_masked[:,::-1,:] # MET function swaps lats to normal, putting this back into abnormal + + return cat_thresh_fcst +# -------------------------------------------------------------------------------------------------- + + + + +# -------------------------------------------------------------------------------------------------- +def dominant_tercile_obs(path_obs): + from netCDF4 import Dataset + import numpy as np + + # Read in obs data + obs_data = Dataset(path_obs + "ghcn_cams.1x1.1982-2020.mon.nc") + obs_clim_data = Dataset(path_obs + "ghcn_cams.1x1.1982-2010.mon.clim.nc") + obs_stddev_data = Dataset(path_obs + "ghcn_cams.1x1.1982-2010.mon.stddev.nc") + + + print('Note this function for obs is ONLY meant to be used for January monthly verification with a 1982-2010 base period') + obs_full = obs_data.variables['tmp2m'][:,:,:] + obs_clim = obs_clim_data.variables['clim'][0,:,:] + obs_stddev = obs_stddev_data.variables['stddev'][0,:,:] + + # Grab only Januaries + obs_jan_full = obs_full[::12,:,:] + + # For 1982-2010 + obs_jan = obs_jan_full[0:29,:,:] + + # Make std anoms + obs_std_anom = np.zeros((obs_jan.shape[0], obs_jan.shape[1], obs_jan.shape[2])) + + for t in range(0,obs_jan.shape[0]): + obs_std_anom[t,:,:] = (obs_jan[t,:,:] - obs_clim) / obs_stddev + + ut_obs_ones = np.where(obs_std_anom[:,:,:] > 0.43,3,0) + lt_obs_ones = np.where(obs_std_anom[:,:,:] < -0.43,1,0) + mt_obs_ones = np.where( (obs_std_anom[:,:,:] > -0.43) & (obs_std_anom[:,:,:] < 0.43),2,0) + + # Put all in 1 array + all_probs = np.zeros((3, ut_obs_ones.shape[0], ut_obs_ones.shape[1], ut_obs_ones.shape[2])) + + all_probs[0,:,:,:] = lt_obs_ones + all_probs[1,:,:,:] = mt_obs_ones + all_probs[2,:,:,:] = ut_obs_ones + + + # Johnna testing: + #print('obs stuff') + #print(obs_std_anom[:,100,50]) + + #print('lt') + #print(all_probs[0,:,100,50]) + + #print('mt') + #print(all_probs[1,:,100,50]) + + #print('ut') + #print(all_probs[2,:,100,50]) + + + # Mask according to obs: + # Note, will need to change path name here: + data_mask = mask(path_obs) + + temp1 = np.nansum(all_probs,axis=0) + temp = temp1[:,:,0:360] + #print(cat_thresh_obs[:, 100, 50]) + #print(np.nanmax(cat_thresh_obs)) + #print(np.nanmin(cat_thresh_obs)) + + temp_masked = np.zeros((temp.shape[0], temp.shape[1], temp.shape[2])) + for i in range(0,temp.shape[0]): + #MODIFIED BY JOHN O, CHANGE NANS TO -9999s + temp_masked[i,:,:] = np.where(data_mask[:,0:360] == -9.99000000e+08, -9999, temp[i,:,:]) + + cat_thresh_obs = temp_masked + + return cat_thresh_obs +# -------------------------------------------------------------------------------------------------- + + + + + + +# -------------------------------------------------------------------------------------------------- +def plot_bs(varObs, plotType): + import numpy as np + import matplotlib as mpl + mpl.use('Agg') + import matplotlib.pyplot as plt + from mpl_toolkits.basemap import Basemap + from matplotlib.colors import LinearSegmentedColormap, ListedColormap, BoundaryNorm + from matplotlib import ticker + + lats = np.arange(-90, 90, 1) + lons = np.arange(0, 360, 1) + lon, lat = np.meshgrid(lons, lats) + #clevs = [240, 250, 260, 270, 280, 290, 300] + #clevs = [-4.0, -3.0, -2.0, -1.0, -0.5, -0.25, 0.25, 0.5, 1.0, 2.0, 3.0, 4.0] + #clevs = [0.125, 0.25, 0.375, 0.5, 0.625, 0.755, 0.875] + #clevs = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9] + + if plotType == 'BS': + clevs = np.arange(0.025, 0.425, 0.025) + #print(clevs) + #clevs = [0.0, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.275, 0.3, 0.325, 0.35, 0.375, 0.4] + elif plotType == 'BS_clim': + clevs = np.arange(0.025, 0.425, 0.025) + elif plotType == 'bs_py_min_met': + clevs = np.arange(-0.1, 0.1, 0.01) + elif plotType == 'bsc_py_min_met': + clevs = np.arange(-0.1, 0.1, 0.01) + else: + clevs = np.arange(-0.4, 0.4, 0.025) + if varObs == 'tmp2m': + myBlues = ['#5a00d2', '#3e9dff', '#30c6f3', '#00ffff', '#c7eab9'] + myReds = ['#f9e3b4', '#eeff41', '#ffc31b', '#e69138', '#c43307'] + else: + myBlues = ['#995005', '#da751f', '#e0a420', '#f9cb9c', '#f9e3b4'] + myReds = ['#5affd1', '#b3eaac', '#75e478', '#4db159', '#2e8065'] + myColors = myBlues+['white']+myReds + cmap = ListedColormap(myColors, 'mycmap', N = len(clevs)-1) + norm = BoundaryNorm(clevs, cmap.N) + m = Basemap(projection='mill', resolution='l', llcrnrlon=0, llcrnrlat= -90, + urcrnrlon=360, urcrnrlat = 90) + fig = plt.figure(figsize=(10,10)) + + return lats, lons, clevs, myBlues, myReds, myColors, cmap, norm, m, fig +# -------------------------------------------------------------------------------------------------- + + + +# -------------------------------------------------------------------------------------------------- +def get_init_year(year): + + # There is absolutely a better way to do this... + + if year == '198201': + idx = 0 + if year == '198301': + idx = 1 + if year == '198401': + idx = 2 + if year == '198501': + idx = 3 + if year == '198601': + idx = 4 + if year == '198701': + idx = 5 + if year == '198801': + idx = 6 + if year == '198901': + idx = 7 + if year == '199001': + idx = 8 + if year == '199101': + idx = 9 + if year == '199201': + idx = 10 + if year == '199301': + idx = 11 + if year == '199401': + idx = 12 + if year == '199501': + idx = 13 + if year == '199601': + idx = 14 + if year == '199701': + idx = 15 + if year == '199801': + idx = 16 + if year == '199901': + idx = 17 + if year == '200001': + idx = 18 + if year == '200101': + idx = 19 + if year == '200201': + idx = 20 + if year == '200301': + idx = 21 + if year == '200401': + idx = 22 + if year == '200501': + idx = 23 + if year == '200601': + idx = 24 + if year == '200701': + idx = 25 + if year == '200801': + idx = 26 + if year == '200901': + idx = 27 + if year == '201001': + idx = 28 + + return idx +# -------------------------------------------------------------------------------------------------- + + + + +# -------------------------------------------------------------------------------------------------- +def mask(path_obs): + import numpy as np + from netCDF4 import Dataset + obs_data = Dataset(path_obs + "ghcn_cams.1x1.1982-2020.mon.nc") + obs_mask_all = obs_data.variables['tmp2m'][::12,:,:] + obs_mask = obs_mask_all[0,:,0:360] + #print(obs_mask[:,100]) + obs_mask = np.where(obs_mask.all == '--', np.nan, obs_mask) + #print(obs_mask[:,100]) + return obs_mask +# --------------------------------------------------------------------------------------------------