From de58025ffb61261594971153e0372e8f4f634398 Mon Sep 17 00:00:00 2001 From: George McCabe <23407799+georgemccabe@users.noreply.github.com> Date: Wed, 12 Jun 2024 11:53:22 -0600 Subject: [PATCH] Feature #2578 PCPCombine -input_thresh for missing inputs (#2609) * refactoring to reduce duplicate/redundant code, reduce cognitive complexity to satisfy SonarQube, etc * change wording of error log to satisfy test * set -input_thresh argument if set for add, derive, and sum methods. refactor setting of method arguments, e.g. -add, -sum, etc., cleanup * refactor how level is handled in find_data function so that if the level has already been set, it will use that value, otherwise try to get it from {data_type}_level, e.g. fcst_level, otherwise set it to 0 to prevent errors * refactor duplicate code into function to satisfy SonarQube * removed unused variable * use find_data function to find input files to be consistent with other wrappers, only allow multiple input files to be found for a given call to find_data if using the -derive method * fix typo in key * suppress warnings when files aren't found because it is expected * formatting to be consistent in doc string * per #2578, add MISSING before file path that is not found if input_thresh is set and less than 1.0, added unit test to ensure correct behavior occurs * add documentation blocks for new functions, ci-run-all-diff * use pytest fixture instead of local function * add support for setting -vld_thresh argument * per #2578, added documentation and tests for setting -input_thresh and -vld_thresh * moved verbosity argument to end of command to more easily change it when debugging * refactor to reduce cognitive complexity to satisfy SonarQube * update usage statement to include METplus version number --- docs/Users_Guide/glossary.rst | 26 + docs/Users_Guide/wrappers.rst | 4 + internal/tests/pytests/conftest.py | 8 +- .../pcp_combine/test_pcp_combine_wrapper.py | 201 ++++--- metplus/util/system_util.py | 43 +- metplus/util/time_util.py | 2 +- metplus/wrappers/command_builder.py | 221 ++++---- metplus/wrappers/pcp_combine_wrapper.py | 522 +++++++++--------- metplus/wrappers/reformat_gridded_wrapper.py | 34 +- metplus/wrappers/regrid_data_plane_wrapper.py | 120 ++-- .../PCPCombine/PCPCombine_add.conf | 3 + .../PCPCombine/PCPCombine_bucket.conf | 3 + .../PCPCombine/PCPCombine_derive.conf | 3 + .../PCPCombine/PCPCombine_loop_custom.conf | 3 + .../PCPCombine_python_embedding.conf | 3 + .../PCPCombine/PCPCombine_subtract.conf | 2 + .../PCPCombine/PCPCombine_sum.conf | 3 + ush/run_metplus.py | 21 +- 18 files changed, 646 insertions(+), 576 deletions(-) diff --git a/docs/Users_Guide/glossary.rst b/docs/Users_Guide/glossary.rst index 29890ccfcf..f351df3f53 100644 --- a/docs/Users_Guide/glossary.rst +++ b/docs/Users_Guide/glossary.rst @@ -11979,3 +11979,29 @@ METplus Configuration Glossary There is no default, so a value must be specified. All runtime frequencies are supported. | *Used by:* UserScript + + FCST_PCP_COMBINE_INPUT_THRESH + Specify the value for the command line argument '-input_thresh' for the + forecast run of PCPCombine, e.g. :term:`FCST_PCP_COMBINE_RUN` is True. + Not used when :term:`FCST_PCP_COMBINE_METHOD` is SUBTRACT or USER_DEFINED. + + | *Used by:* PCPCombine + + OBS_PCP_COMBINE_INPUT_THRESH + Specify the value for the command line argument '-input_thresh' for the + observation run of PCPCombine, e.g. :term:`OBS_PCP_COMBINE_RUN` is True. + Not used when :term:`OBS_PCP_COMBINE_METHOD` is SUBTRACT or USER_DEFINED. + + | *Used by:* PCPCombine + + FCST_PCP_COMBINE_VLD_THRESH + Specify the value for the command line argument '-vld_thresh' for the + forecast run of PCPCombine, e.g. :term:`FCST_PCP_COMBINE_RUN` is True. + + | *Used by:* PCPCombine + + OBS_PCP_COMBINE_VLD_THRESH + Specify the value for the command line argument '-vld_thresh' for the + observation run of PCPCombine, e.g. :term:`OBS_PCP_COMBINE_RUN` is True. + + | *Used by:* PCPCombine diff --git a/docs/Users_Guide/wrappers.rst b/docs/Users_Guide/wrappers.rst index cb24c991dd..870ccf672d 100644 --- a/docs/Users_Guide/wrappers.rst +++ b/docs/Users_Guide/wrappers.rst @@ -6027,6 +6027,10 @@ METplus Configuration | :term:`PCP_COMBINE_INC_VALID_TIMES` | :term:`PCP_COMBINE_SKIP_INIT_TIMES` | :term:`PCP_COMBINE_INC_INIT_TIMES` +| :term:`FCST_PCP_COMBINE_INPUT_THRESH` +| :term:`FCST_PCP_COMBINE_VLD_THRESH` +| :term:`OBS_PCP_COMBINE_INPUT_THRESH` +| :term:`OBS_PCP_COMBINE_VLD_THRESH` | .. warning:: **DEPRECATED:** diff --git a/internal/tests/pytests/conftest.py b/internal/tests/pytests/conftest.py index de6a6f4efa..9cbcfa1701 100644 --- a/internal/tests/pytests/conftest.py +++ b/internal/tests/pytests/conftest.py @@ -199,11 +199,13 @@ def make_nc(tmp_path, lon, lat, z, data, variable='Temp', file_name='fake.nc'): def get_test_data_dir(): """!Get path to directory containing test data. """ - def get_test_data_path(subdir): + def get_test_data_path(subdir=None): internal_tests_dir = os.path.abspath( - os.path.join(os.path.dirname(__file__), os.pardir) + os.path.join(os.path.dirname(__file__), os.pardir, 'data') ) - return os.path.join(internal_tests_dir, 'data', subdir) + if subdir: + internal_tests_dir = os.path.join(internal_tests_dir, subdir) + return internal_tests_dir return get_test_data_path diff --git a/internal/tests/pytests/wrappers/pcp_combine/test_pcp_combine_wrapper.py b/internal/tests/pytests/wrappers/pcp_combine/test_pcp_combine_wrapper.py index 5b4ed11317..a0291936ac 100644 --- a/internal/tests/pytests/wrappers/pcp_combine/test_pcp_combine_wrapper.py +++ b/internal/tests/pytests/wrappers/pcp_combine/test_pcp_combine_wrapper.py @@ -9,20 +9,7 @@ from metplus.util import ti_calculate -def get_test_data_dir(config, subdir=None): - top_dir = os.path.join(config.getdir('METPLUS_BASE'), - 'internal', 'tests', 'data') - if subdir: - top_dir = os.path.join(top_dir, subdir) - return top_dir - - -def pcp_combine_wrapper(metplus_config, d_type): - """! Returns a default PCPCombineWrapper with /path/to entries in the - metplus_system.conf and metplus_runtime.conf configuration - files. Subsequent tests can customize the final METplus configuration - to over-ride these /path/to values.""" - config = metplus_config +def set_minimum_config_settings(config, d_type): config.set('config', 'FCST_PCP_COMBINE_INPUT_ACCUMS', '6') config.set('config', 'FCST_PCP_COMBINE_INPUT_NAMES', 'P06M_NONE') config.set('config', 'FCST_PCP_COMBINE_INPUT_LEVELS', '"(*,*)"') @@ -56,14 +43,21 @@ def pcp_combine_wrapper(metplus_config, d_type): elif d_type == "OBS": config.set('config', 'OBS_PCP_COMBINE_RUN', True) +def pcp_combine_wrapper(metplus_config, d_type): + """! Returns a default PCPCombineWrapper with /path/to entries in the + metplus_system.conf and metplus_runtime.conf configuration + files. Subsequent tests can customize the final METplus configuration + to over-ride these /path/to values.""" + config = metplus_config + set_minimum_config_settings(config, d_type) return PCPCombineWrapper(config) @pytest.mark.wrapper -def test_get_accumulation_1_to_6(metplus_config): +def test_get_accumulation_1_to_6(metplus_config, get_test_data_dir): data_src = "OBS" pcw = pcp_combine_wrapper(metplus_config, data_src) - input_dir = get_test_data_dir(pcw.config, subdir='accum') + input_dir = get_test_data_dir('accum') task_info = {} task_info['valid'] = datetime.strptime("2016090418", '%Y%m%d%H') time_info = ti_calculate(task_info) @@ -85,10 +79,10 @@ def test_get_accumulation_1_to_6(metplus_config): @pytest.mark.wrapper -def test_get_accumulation_6_to_6(metplus_config): +def test_get_accumulation_6_to_6(metplus_config, get_test_data_dir): data_src = "FCST" pcw = pcp_combine_wrapper(metplus_config, data_src) - input_dir = get_test_data_dir(pcw.config, subdir='accum') + input_dir = get_test_data_dir('accum') task_info = {} task_info['valid'] = datetime.strptime("2016090418", '%Y%m%d%H') time_info = ti_calculate(task_info) @@ -107,10 +101,10 @@ def test_get_accumulation_6_to_6(metplus_config): @pytest.mark.wrapper -def test_get_lowest_forecast_file_dated_subdir(metplus_config): +def test_get_lowest_forecast_file_dated_subdir(metplus_config, get_test_data_dir): data_src = "FCST" pcw = pcp_combine_wrapper(metplus_config, data_src) - input_dir = get_test_data_dir(pcw.config, subdir='fcst') + input_dir = get_test_data_dir('fcst') valid_time = datetime.strptime("201802012100", '%Y%m%d%H%M') pcw.c_dict[f'{data_src}_INPUT_DIR'] = input_dir pcw._build_input_accum_list(data_src, {'valid': valid_time}) @@ -120,11 +114,11 @@ def test_get_lowest_forecast_file_dated_subdir(metplus_config): @pytest.mark.wrapper -def test_forecast_constant_init(metplus_config): +def test_forecast_constant_init(metplus_config, get_test_data_dir): data_src = "FCST" pcw = pcp_combine_wrapper(metplus_config, data_src) pcw.c_dict['FCST_CONSTANT_INIT'] = True - input_dir = get_test_data_dir(pcw.config, subdir='fcst') + input_dir = get_test_data_dir('fcst') init_time = datetime.strptime("2018020112", '%Y%m%d%H') valid_time = datetime.strptime("2018020121", '%Y%m%d%H') pcw.c_dict[f'{data_src}_INPUT_DIR'] = input_dir @@ -134,11 +128,11 @@ def test_forecast_constant_init(metplus_config): @pytest.mark.wrapper -def test_forecast_not_constant_init(metplus_config): +def test_forecast_not_constant_init(metplus_config, get_test_data_dir): data_src = "FCST" pcw = pcp_combine_wrapper(metplus_config, data_src) pcw.c_dict['FCST_CONSTANT_INIT'] = False - input_dir = get_test_data_dir(pcw.config, subdir='fcst') + input_dir = get_test_data_dir('fcst') init_time = datetime.strptime("2018020112", '%Y%m%d%H') valid_time = datetime.strptime("2018020121", '%Y%m%d%H') pcw.c_dict[f'{data_src}_INPUT_DIR'] = input_dir @@ -149,10 +143,10 @@ def test_forecast_not_constant_init(metplus_config): @pytest.mark.wrapper -def test_get_lowest_forecast_file_no_subdir(metplus_config): +def test_get_lowest_forecast_file_no_subdir(metplus_config, get_test_data_dir): data_src = "FCST" pcw = pcp_combine_wrapper(metplus_config, data_src) - input_dir = get_test_data_dir(pcw.config, subdir='fcst') + input_dir = get_test_data_dir('fcst') valid_time = datetime.strptime("201802012100", '%Y%m%d%H%M') template = "file.{init?fmt=%Y%m%d%H}f{lead?fmt=%HHH}.nc" pcw.c_dict[f'{data_src}_INPUT_TEMPLATE'] = template @@ -163,10 +157,10 @@ def test_get_lowest_forecast_file_no_subdir(metplus_config): @pytest.mark.wrapper -def test_get_lowest_forecast_file_yesterday(metplus_config): +def test_get_lowest_forecast_file_yesterday(metplus_config, get_test_data_dir): data_src = "FCST" pcw = pcp_combine_wrapper(metplus_config, data_src) - input_dir = get_test_data_dir(pcw.config, subdir='fcst') + input_dir = get_test_data_dir('fcst') valid_time = datetime.strptime("201802010600", '%Y%m%d%H%M') template = "file.{init?fmt=%Y%m%d%H}f{lead?fmt=%HHH}.nc" pcw.c_dict[f'{data_src}_INPUT_TEMPLATE'] = template @@ -177,14 +171,14 @@ def test_get_lowest_forecast_file_yesterday(metplus_config): @pytest.mark.wrapper -def test_setup_add_method(metplus_config): +def test_setup_add_method(metplus_config, get_test_data_dir): data_src = "OBS" pcw = pcp_combine_wrapper(metplus_config, data_src) task_info = {} task_info['valid'] = datetime.strptime("2016090418", '%Y%m%d%H') time_info = ti_calculate(task_info) - input_dir = get_test_data_dir(pcw.config, subdir='accum') + input_dir = get_test_data_dir('accum') lookback = 6 * 3600 files_found = pcw.setup_add_method(time_info, lookback, data_src) assert files_found @@ -239,14 +233,14 @@ def test_setup_subtract_method(metplus_config, custom): @pytest.mark.wrapper -def test_pcp_combine_add_subhourly(metplus_config): +def test_pcp_combine_add_subhourly(metplus_config, get_test_data_dir): fcst_name = 'A000500' fcst_level = 'Surface' fcst_output_name = 'A001500' fcst_fmt = f'\'name="{fcst_name}"; level="{fcst_level}";\'' config = metplus_config - test_data_dir = get_test_data_dir(config) + test_data_dir = get_test_data_dir() fcst_input_dir = os.path.join(test_data_dir, 'pcp_in', 'add') @@ -285,16 +279,12 @@ def test_pcp_combine_add_subhourly(metplus_config): app_path = os.path.join(config.getdir('MET_BIN_DIR'), wrapper.app_name) verbosity = f"-v {wrapper.c_dict['VERBOSITY']}" out_dir = wrapper.c_dict.get('FCST_OUTPUT_DIR') - expected_cmds = [(f"{app_path} {verbosity} " - "-add " - f"{fcst_input_dir}/20190802_i1800_m0_f1815.nc " - f"{fcst_fmt} " - f"{fcst_input_dir}/20190802_i1800_m0_f1810.nc " - f"{fcst_fmt} " - f"{fcst_input_dir}/20190802_i1800_m0_f1805.nc " - f"{fcst_fmt} " + expected_cmds = [(f"{app_path} -add " + f"{fcst_input_dir}/20190802_i1800_m0_f1815.nc {fcst_fmt} " + f"{fcst_input_dir}/20190802_i1800_m0_f1810.nc {fcst_fmt} " + f"{fcst_input_dir}/20190802_i1800_m0_f1805.nc {fcst_fmt} " f'-name "{fcst_output_name}" ' - f"{out_dir}/5min_mem00_lag00.nc"), + f"{out_dir}/5min_mem00_lag00.nc {verbosity}"), ] all_cmds = wrapper.run_all_times() @@ -307,11 +297,11 @@ def test_pcp_combine_add_subhourly(metplus_config): @pytest.mark.wrapper -def test_pcp_combine_bucket(metplus_config): +def test_pcp_combine_bucket(metplus_config, get_test_data_dir): fcst_output_name = 'APCP' config = metplus_config - test_data_dir = get_test_data_dir(config) + test_data_dir = get_test_data_dir() fcst_input_dir = os.path.join(test_data_dir, 'pcp_in', 'bucket') @@ -349,8 +339,7 @@ def test_pcp_combine_bucket(metplus_config): app_path = os.path.join(config.getdir('MET_BIN_DIR'), wrapper.app_name) verbosity = f"-v {wrapper.c_dict['VERBOSITY']}" out_dir = wrapper.c_dict.get('FCST_OUTPUT_DIR') - expected_cmds = [(f"{app_path} {verbosity} " - "-add " + expected_cmds = [(f"{app_path} -add " f"{fcst_input_dir}/2012040900_F015.grib " "'name=\"APCP\"; level=\"A03\";' " f"{fcst_input_dir}/2012040900_F012.grib " @@ -358,7 +347,7 @@ def test_pcp_combine_bucket(metplus_config): f"{fcst_input_dir}/2012040900_F006.grib " "'name=\"APCP\"; level=\"A06\";' " f'-name "{fcst_output_name}" ' - f"{out_dir}/2012040915_A015.nc"), + f"{out_dir}/2012040915_A015.nc {verbosity}"), ] all_cmds = wrapper.run_all_times() @@ -384,14 +373,14 @@ def test_pcp_combine_bucket(metplus_config): ] ) @pytest.mark.wrapper -def test_pcp_combine_derive(metplus_config, config_overrides, extra_fields): +def test_pcp_combine_derive(metplus_config, get_test_data_dir, config_overrides, extra_fields): stat_list = 'sum,min,max,range,mean,stdev,vld_count' fcst_name = 'APCP' fcst_level = 'A03' fcst_fmt = f'-field \'name="{fcst_name}"; level="{fcst_level}";\'' config = metplus_config - test_data_dir = get_test_data_dir(config) + test_data_dir = get_test_data_dir() fcst_input_dir = os.path.join(test_data_dir, 'pcp_in', 'derive') @@ -437,8 +426,7 @@ def test_pcp_combine_derive(metplus_config, config_overrides, extra_fields): app_path = os.path.join(config.getdir('MET_BIN_DIR'), wrapper.app_name) verbosity = f"-v {wrapper.c_dict['VERBOSITY']}" out_dir = wrapper.c_dict.get('FCST_OUTPUT_DIR') - expected_cmds = [(f"{app_path} {verbosity} " - f"-derive {stat_list} " + expected_cmds = [(f"{app_path} -derive {stat_list} " f"{fcst_input_dir}/2005080700/24.tm00_G212 " f"{fcst_input_dir}/2005080700/21.tm00_G212 " f"{fcst_input_dir}/2005080700/18.tm00_G212 " @@ -446,7 +434,7 @@ def test_pcp_combine_derive(metplus_config, config_overrides, extra_fields): f"{fcst_input_dir}/2005080700/12.tm00_G212 " f"{fcst_input_dir}/2005080700/09.tm00_G212 " f"{fcst_fmt} {extra_fields}" - f"{out_dir}/2005080700_f24_A18.nc"), + f"{out_dir}/2005080700_f24_A18.nc {verbosity}"), ] all_cmds = wrapper.run_all_times() @@ -459,12 +447,12 @@ def test_pcp_combine_derive(metplus_config, config_overrides, extra_fields): @pytest.mark.wrapper -def test_pcp_combine_loop_custom(metplus_config): +def test_pcp_combine_loop_custom(metplus_config, get_test_data_dir): fcst_name = 'APCP' ens_list = ['ens1', 'ens2', 'ens3', 'ens4', 'ens5', 'ens6'] config = metplus_config - test_data_dir = get_test_data_dir(config) + test_data_dir = get_test_data_dir() fcst_input_dir = os.path.join(test_data_dir, 'pcp_in', 'loop_custom') @@ -505,12 +493,11 @@ def test_pcp_combine_loop_custom(metplus_config): out_dir = wrapper.c_dict.get('FCST_OUTPUT_DIR') expected_cmds = [] for ens in ens_list: - cmd = (f"{app_path} {verbosity} " - f"-add " + cmd = (f"{app_path} -add " f"{fcst_input_dir}/{ens}/2009123112_02400.grib " "'name=\"APCP\"; level=\"A24\";' " f'-name "{fcst_name}" ' - f"{out_dir}/{ens}/2009123112_02400.nc") + f"{out_dir}/{ens}/2009123112_02400.nc {verbosity}") expected_cmds.append(cmd) all_cmds = wrapper.run_all_times() @@ -523,10 +510,10 @@ def test_pcp_combine_loop_custom(metplus_config): @pytest.mark.wrapper -def test_pcp_combine_subtract(metplus_config): +def test_pcp_combine_subtract(metplus_config, get_test_data_dir): config = metplus_config - test_data_dir = get_test_data_dir(config) + test_data_dir = get_test_data_dir() fcst_input_dir = os.path.join(test_data_dir, 'pcp_in', 'derive') @@ -562,14 +549,13 @@ def test_pcp_combine_subtract(metplus_config): app_path = os.path.join(config.getdir('MET_BIN_DIR'), wrapper.app_name) verbosity = f"-v {wrapper.c_dict['VERBOSITY']}" out_dir = wrapper.c_dict.get('FCST_OUTPUT_DIR') - expected_cmds = [(f"{app_path} {verbosity} " - f"-subtract " + expected_cmds = [(f"{app_path} -subtract " f"{fcst_input_dir}/2005080700/18.tm00_G212 " "'name=\"APCP\"; level=\"A18\";' " f"{fcst_input_dir}/2005080700/15.tm00_G212 " "'name=\"APCP\"; level=\"A15\";' " '-name "APCP" ' - f"{out_dir}/2005080718_A003.nc"), + f"{out_dir}/2005080718_A003.nc {verbosity}"), ] all_cmds = wrapper.run_all_times() @@ -582,14 +568,14 @@ def test_pcp_combine_subtract(metplus_config): @pytest.mark.wrapper -def test_pcp_combine_sum_subhourly(metplus_config): +def test_pcp_combine_sum_subhourly(metplus_config, get_test_data_dir): fcst_name = 'A000500' fcst_level = 'Surface' fcst_output_name = 'A001500' fcst_fmt = f'-field \'name="{fcst_name}"; level="{fcst_level}";\'' config = metplus_config - test_data_dir = get_test_data_dir(config) + test_data_dir = get_test_data_dir() fcst_input_dir = os.path.join(test_data_dir, 'pcp_in', 'add') @@ -628,15 +614,14 @@ def test_pcp_combine_sum_subhourly(metplus_config): app_path = os.path.join(config.getdir('MET_BIN_DIR'), wrapper.app_name) verbosity = f"-v {wrapper.c_dict['VERBOSITY']}" out_dir = wrapper.c_dict.get('FCST_OUTPUT_DIR') - expected_cmds = [(f"{app_path} {verbosity} " - "-sum " + expected_cmds = [(f"{app_path} -sum " "20190802_180000 000500 " "20190802_181500 001500 " f"-pcpdir {fcst_input_dir} " f"-pcprx 20190802_i1800_m0_f* " f"{fcst_fmt} " f"-name \"{fcst_output_name}\" " - f"{out_dir}/5min_mem00_lag00.nc"), + f"{out_dir}/5min_mem00_lag00.nc {verbosity}"), ] all_cmds = wrapper.run_all_times() @@ -712,7 +697,7 @@ def test_get_extra_fields(metplus_config, names, levels, expected_args): wrapper = PCPCombineWrapper(config) - wrapper._handle_extra_field_arguments(data_src) + wrapper.set_command_line_arguments(data_src) wrapper._handle_name_argument('', data_src) for index, expected_arg in enumerate(expected_args): assert wrapper.args[index] == expected_arg @@ -720,7 +705,6 @@ def test_get_extra_fields(metplus_config, names, levels, expected_args): @pytest.mark.wrapper def test_add_method_single_file(metplus_config): - data_src = 'FCST' config = metplus_config config.set('config', 'DO_NOT_RUN_EXE', True) config.set('config', 'INPUT_MUST_EXIST', False) @@ -761,21 +745,21 @@ def test_add_method_single_file(metplus_config): in_file = (f"{wrapper.c_dict.get('FCST_INPUT_DIR')}/" "20191002_prec_1hracc_75hrfcst_e00.nc") expected_cmds = [ - (f"{app_path} {verbosity} -add " + (f"{app_path} -add " f"{in_file} 'name=\"rf\"; level=\"(20191003_00,*,*)\";' " f"{in_file} 'name=\"rf\"; level=\"(20191002_23,*,*)\";' " f"{in_file} 'name=\"rf\"; level=\"(20191002_22,*,*)\";' " - f"{out_dir}/2019100300_prec_03hracc_e00.nc"), - (f"{app_path} {verbosity} -add " + f"{out_dir}/2019100300_prec_03hracc_e00.nc {verbosity}"), + (f"{app_path} -add " f"{in_file} 'name=\"rf\"; level=\"(20191003_03,*,*)\";' " f"{in_file} 'name=\"rf\"; level=\"(20191003_02,*,*)\";' " f"{in_file} 'name=\"rf\"; level=\"(20191003_01,*,*)\";' " - f"{out_dir}/2019100303_prec_03hracc_e00.nc"), - (f"{app_path} {verbosity} -add " + f"{out_dir}/2019100303_prec_03hracc_e00.nc {verbosity}"), + (f"{app_path} -add " f"{in_file} 'name=\"rf\"; level=\"(20191003_06,*,*)\";' " f"{in_file} 'name=\"rf\"; level=\"(20191003_05,*,*)\";' " f"{in_file} 'name=\"rf\"; level=\"(20191003_04,*,*)\";' " - f"{out_dir}/2019100306_prec_03hracc_e00.nc"), + f"{out_dir}/2019100306_prec_03hracc_e00.nc {verbosity}"), ] assert len(all_cmds) == len(expected_cmds) @@ -817,7 +801,6 @@ def test_subtract_method_zero_accum(metplus_config): config.set('config', 'FCST_PCP_COMBINE_OUTPUT_ACCUM', '1H') config.set('config', 'FCST_PCP_COMBINE_OUTPUT_NAME', input_name) - # NETCDF example should use zero accum, GRIB example should not (use -add) expected_cmds_dict = {} expected_cmds_dict['NETCDF'] = [ @@ -855,10 +838,76 @@ def test_subtract_method_zero_accum(metplus_config): app_path = os.path.join(config.getdir('MET_BIN_DIR'), wrapper.app_name) verbosity = f"-v {wrapper.c_dict['VERBOSITY']}" - expected_cmds = [f"{app_path} {verbosity} {item}" + expected_cmds = [f"{app_path} {item} {verbosity}" for item in expected_cmds_dict[data_type]] assert len(all_cmds) == len(expected_cmds) for (cmd, env_vars), expected_cmd in zip(all_cmds, expected_cmds): # ensure commands are generated as expected assert cmd == expected_cmd + + +@pytest.mark.parametrize( + 'input_thresh, vld_thresh, success', [ + (None, None, False), + (0.6, None, True), + (1.0, None, False), + (None, 0.2, False), + (0.6, 0.2, True), + (1.0, 0.2, False), + ] +) +@pytest.mark.wrapper +def test_add_method_missing_input(metplus_config, get_test_data_dir, input_thresh, vld_thresh, success): + data_src = "OBS" + input_dir = get_test_data_dir('accum') + + config = metplus_config + set_minimum_config_settings(config, data_src) + config.set('config', 'LOOP_BY', "VALID") + config.set('config', 'VALID_TIME_FMT', "%Y%m%d%H") + config.set('config', 'VALID_BEG', "2016090415") + config.set('config', 'VALID_END', "2016090415") + config.set('config', 'VALID_INCREMENT', "1d") + config.set('config', f'{data_src}_PCP_COMBINE_INPUT_DIR', input_dir) + config.set('config', f'{data_src}_PCP_COMBINE_OUTPUT_ACCUM', '6H') + config.set('config', f'{data_src}_PCP_COMBINE_INPUT_ACCUMS', '1H') + if input_thresh is not None: + config.set('config', f'{data_src}_PCP_COMBINE_INPUT_THRESH', input_thresh) + if vld_thresh is not None: + config.set('config', f'{data_src}_PCP_COMBINE_VLD_THRESH', vld_thresh) + wrapper = PCPCombineWrapper(config) + + assert wrapper.isOK + + all_cmds = wrapper.run_all_times() + if not success: + assert len(all_cmds) == 0 + return + + field_name = wrapper.config.get('config', f'{data_src}_PCP_COMBINE_INPUT_NAMES') + field_info = f"'name=\"{field_name}\";'" + + app_path = os.path.join(config.getdir('MET_BIN_DIR'), wrapper.app_name) + verbosity = f"-v {wrapper.c_dict['VERBOSITY']}" + out_dir = wrapper.c_dict.get(f'{data_src}_OUTPUT_DIR') + extra_args = '' + if input_thresh: + extra_args += f' -input_thresh {input_thresh}' + if vld_thresh: + extra_args += f' -vld_thresh {vld_thresh}' + expected_cmds = [ + f"{app_path} -add" + f" {input_dir}/20160904/file.2016090415.01h {field_info}" + f" {input_dir}/20160904/file.2016090414.01h {field_info}" + f" {input_dir}/20160904/file.2016090413.01h {field_info}" + f" {input_dir}/20160904/file.2016090412.01h {field_info}" + f" MISSING{input_dir}/20160904/file.2016090411.01h {field_info}" + f" MISSING{input_dir}/20160904/file.2016090410.01h {field_info}" + f"{extra_args} {out_dir}/20160904/outfile.2016090415_A06h {verbosity}" + ] + assert len(all_cmds) == len(expected_cmds) + + for (cmd, env_vars), expected_cmd in zip(all_cmds, expected_cmds): + # ensure commands are generated as expected + assert cmd == expected_cmd diff --git a/metplus/util/system_util.py b/metplus/util/system_util.py index 29f4e8bc19..55f2f964ae 100644 --- a/metplus/util/system_util.py +++ b/metplus/util/system_util.py @@ -121,26 +121,19 @@ def prune_empty(output_dir, logger): """ # Check for empty files. - for root, dirs, files in os.walk(output_dir): - # Create a full file path by joining the path - # and filename. - for a_file in files: - a_file = os.path.join(root, a_file) - if os.stat(a_file).st_size == 0: - logger.debug("Empty file: " + a_file + - "...removing") - os.remove(a_file) + for a_file in traverse_dir(output_dir): + if os.stat(a_file).st_size == 0: + logger.debug("Empty file: " + a_file + + "...removing") + os.remove(a_file) # Now check for any empty directories, some # may have been created when removing # empty files. - for root, dirs, files in os.walk(output_dir): - for direc in dirs: - full_dir = os.path.join(root, direc) - if not os.listdir(full_dir): - logger.debug("Empty directory: " + full_dir + - "...removing") - os.rmdir(full_dir) + for full_dir in traverse_dir(output_dir, get_dirs=True): + if not os.listdir(full_dir): + logger.debug("Empty directory: " + full_dir + "...removing") + os.rmdir(full_dir) def get_files(filedir, filename_regex): @@ -353,3 +346,21 @@ def preprocess_file(filename, data_type, config, allow_dir=False): return filename return None + + +def traverse_dir(data_dir, get_dirs=False): + """!Generator used to navigate through and yield full path to all files or + directories under data_dir. + + @param data_dir directory to traverse + @param get_dirs If True, get all directories under data_dir. If False, get + all files under data_dir. Defaults to False (files). + """ + for dir_path, dirs, all_files in os.walk(data_dir, followlinks=True): + if get_dirs: + items = sorted(dirs) + else: + items = sorted(all_files) + + for dir_name in items: + yield os.path.join(dir_path, dir_name) diff --git a/metplus/util/time_util.py b/metplus/util/time_util.py index 6dc305b4b6..88c7646e41 100755 --- a/metplus/util/time_util.py +++ b/metplus/util/time_util.py @@ -13,7 +13,7 @@ from dateutil.relativedelta import relativedelta import re -from .string_manip import split_level, format_thresh +from .string_manip import format_thresh '''!@namespace TimeInfo @brief Utility to handle timing in METplus wrappers diff --git a/metplus/wrappers/command_builder.py b/metplus/wrappers/command_builder.py index cc5d6aedc9..5a18ff9be4 100755 --- a/metplus/wrappers/command_builder.py +++ b/metplus/wrappers/command_builder.py @@ -29,7 +29,7 @@ from ..util import get_wrapper_name, is_python_script from ..util.met_config import add_met_config_dict, handle_climo_dict from ..util import mkdir_p, get_skip_times -from ..util import get_log_path, RunArgs, run_cmd +from ..util import get_log_path, RunArgs, run_cmd, traverse_dir # pylint:disable=pointless-string-statement @@ -449,7 +449,7 @@ def find_obs_offset(self, time_info, mandatory=True, return_list=False): # errors when searching through offset list is_mandatory = mandatory if offsets == [0] else False - self.c_dict['SUPRESS_WARNINGS'] = True + self.c_dict['SUPPRESS_WARNINGS'] = True for offset in offsets: time_info['offset_hours'] = offset time_info = ti_calculate(time_info) @@ -458,10 +458,10 @@ def find_obs_offset(self, time_info, mandatory=True, return_list=False): return_list=return_list) if obs_path is not None: - self.c_dict['SUPRESS_WARNINGS'] = False + self.c_dict['SUPPRESS_WARNINGS'] = False return obs_path, time_info - self.c_dict['SUPRESS_WARNINGS'] = False + self.c_dict['SUPPRESS_WARNINGS'] = False # if no files are found return None # if offsets are specified, log error with list offsets used @@ -496,22 +496,6 @@ def find_data(self, time_info, data_type='', mandatory=True, if data_type and not data_type.endswith('_'): data_type_fmt += '_' - # set generic 'level' to level that corresponds to data_type if set - level = time_info.get(f'{data_type_fmt.lower()}level', '0') - - # strip off prefix letter if it exists - level = split_level(level)[1] - - # set level to 0 character if it is not a number, e.g. NetCDF level - if not level.isdigit(): - level = '0' - - # if level is a range, use the first value, i.e. if 250-500 use 250 - level = level.split('-')[0] - - # if level is in hours, convert to seconds - level = get_seconds_from_string(level, 'H') - # arguments for find helper functions arg_dict = {'data_type': data_type_fmt, 'mandatory': mandatory, @@ -522,13 +506,12 @@ def find_data(self, time_info, data_type='', mandatory=True, if (self.c_dict.get(data_type_fmt + 'FILE_WINDOW_BEGIN', 0) == 0 and self.c_dict.get(data_type_fmt + 'FILE_WINDOW_END', 0) == 0): - return self._find_exact_file(**arg_dict, allow_dir=allow_dir, - level=level) + return self._find_exact_file(**arg_dict, allow_dir=allow_dir) # if looking for a file within a time window: return self._find_file_in_window(**arg_dict) - def _find_exact_file(self, level, data_type, time_info, mandatory=True, + def _find_exact_file(self, data_type, time_info, mandatory=True, return_list=False, allow_dir=False): input_template = self.c_dict.get(f'{data_type}INPUT_TEMPLATE', '') data_dir = self.c_dict.get(f'{data_type}INPUT_DIR', '') @@ -550,19 +533,37 @@ def _find_exact_file(self, level, data_type, time_info, mandatory=True, "does not allow multiple files to be provided.") return None - # pop level from time_info to avoid conflict with explicit level - # then add it back after the string sub call - saved_level = time_info.pop('level', None) + # If level is not already set in time_info, set it and remove it later. + # Check if {data_type}level is set, e.g. fcst_level, + # otherwise use 0 to prevent error when level is requested in template. + has_level = True if time_info.get('level') else False + if not has_level: + # set generic 'level' to level that corresponds to data_type if set + level = time_info.get(f'{data_type.lower()}level', '0') + + # strip off prefix letter if it exists + level = split_level(level)[1] + + # set level to 0 character if it is not a number, e.g. NetCDF level + if not level.isdigit(): + level = '0' + + # if level is a range, use the first value, i.e. if 250-500 use 250 + level = level.split('-')[0] + + # if level is in hours, convert to seconds + level = get_seconds_from_string(level, 'H') + time_info['level'] = level input_must_exist = self._get_input_must_exist(template_list, data_dir) - check_file_list = self._get_files_to_check(template_list, level, + check_file_list = self._get_files_to_check(template_list, time_info, data_dir, data_type) - # if it was set, add level back to time_info - if saved_level is not None: - time_info['level'] = saved_level + # if it was not set, remove it from time_info + if not has_level: + time_info.pop('level', None) # if multiple files are not supported by the wrapper and multiple # files are found, error and exit @@ -578,17 +579,7 @@ def _find_exact_file(self, level, data_type, time_info, mandatory=True, # return None if no files were found if not check_file_list: msg = f"Could not find any {data_type}INPUT files" - # warn instead of error if it is not mandatory to find files - if (not mandatory - or not self.c_dict.get('MANDATORY', True) - or self.c_dict.get('ALLOW_MISSING_INPUTS', False)): - if self.c_dict.get('SUPRESS_WARNINGS', False): - self.logger.debug(msg) - else: - self.logger.warning(msg) - else: - self.log_error(msg) - + self._log_message_dynamic_level(msg, mandatory) return None found_files = self._check_that_files_exist(check_file_list, data_type, @@ -603,6 +594,28 @@ def _find_exact_file(self, level, data_type, time_info, mandatory=True, return found_files + def _is_optional_input(self, mandatory): + return (not self.c_dict.get('MANDATORY', True) + or self.c_dict.get('ALLOW_MISSING_INPUTS', False) + or not mandatory) + + def _log_message_dynamic_level(self, msg, mandatory): + """!Log message based on rules. If mandatory input and missing inputs + are not allowed, log an error. Otherwise, log a warning unless warnings + are suppressed, in which case log debug. + + @param msg message to be logged + @param mandatory boolean indicating if input data is mandatory + """ + # warn instead of error if it is not mandatory to find files + if self._is_optional_input(mandatory): + if self.c_dict.get('SUPPRESS_WARNINGS', False): + self.logger.debug(msg) + else: + self.logger.warning(msg) + else: + self.log_error(msg) + def _get_input_must_exist(self, template_list, data_dir): """!Check if input must exist. The config dict setting INPUT_MUST_EXIST can force a False result to skip checks for files existing. Also, if @@ -629,7 +642,7 @@ def _get_input_must_exist(self, template_list, data_dir): return False return True - def _get_files_to_check(self, template_list, level, time_info, data_dir, + def _get_files_to_check(self, template_list, time_info, data_dir, data_type): """!Get list of files to check if they exist. @returns list of tuples containing file path and template used to build @@ -641,7 +654,7 @@ def _get_files_to_check(self, template_list, level, time_info, data_dir, full_template = os.path.join(data_dir, template) # perform string substitution on full path - full_path = do_string_sub(full_template, **time_info, level=level) + full_path = do_string_sub(full_template, **time_info) if os.path.sep not in full_path: self.logger.debug(f"{full_path} is not a file path. " @@ -686,20 +699,11 @@ def _check_that_files_exist(self, check_file_list, data_type, allow_dir, if not processed_path: msg = (f"Could not find {data_type}INPUT file {file_path} " f"using template {template}") - if (not mandatory - or not self.c_dict.get('MANDATORY', True) - or self.c_dict.get('ALLOW_MISSING_INPUTS', False)): - - if self.c_dict.get('SUPRESS_WARNINGS', False): - self.logger.debug(msg) - else: - self.logger.warning(msg) - - if self.c_dict.get(f'{data_type}FILL_MISSING'): - found_file_list.append(f'MISSING{file_path}') - continue - else: - self.log_error(msg) + self._log_message_dynamic_level(msg, mandatory) + if (self._is_optional_input(mandatory) and + self.c_dict.get(f'{data_type}FILL_MISSING')): + found_file_list.append(f'MISSING{file_path}') + continue return None @@ -737,18 +741,7 @@ def _find_file_in_window(self, data_type, time_info, mandatory=True, if not closest_files: msg = (f"Could not find {data_type}INPUT files under {data_dir} within range " f"[{valid_range_lower},{valid_range_upper}] using template {template}") - if (not mandatory - or not self.c_dict.get('MANDATORY', True) - or self.c_dict.get('ALLOW_MISSING_INPUTS', False)): - - if self.c_dict.get('SUPRESS_WARNINGS', False): - self.logger.debug(msg) - else: - self.logger.warning(msg) - - else: - self.log_error(msg) - + self._log_message_dynamic_level(msg, mandatory) return None # remove any files that are the same as another but zipped @@ -796,41 +789,38 @@ def _get_closest_files(self, data_dir, template, valid_time, "%Y%m%d%H%M%S").strftime("%s")) # step through all files under input directory in sorted order - for dirpath, _, all_files in os.walk(data_dir, followlinks=True): - for filename in sorted(all_files): - fullpath = os.path.join(dirpath, filename) - - # remove input data directory to get relative path - rel_path = fullpath.replace(f'{data_dir}/', "") - # extract time information from relative path using template - file_time_info = get_time_from_file(rel_path, template, - self.logger) - if file_time_info is None: - continue + for fullpath in traverse_dir(data_dir): + # remove input data directory to get relative path + rel_path = fullpath.replace(f'{data_dir}/', "") + # extract time information from relative path using template + file_time_info = get_time_from_file(rel_path, template, + self.logger) + if file_time_info is None: + continue - # get valid time and check if it is within the time range - file_valid_time = file_time_info['valid'].strftime("%Y%m%d%H%M%S") - # skip if could not extract valid time - if not file_valid_time: - continue - file_valid_dt = datetime.strptime(file_valid_time, "%Y%m%d%H%M%S") - file_valid_seconds = int(file_valid_dt.strftime("%s")) - # skip if outside time range - if file_valid_seconds < lower_limit or file_valid_seconds > upper_limit: - continue + # get valid time and check if it is within the time range + file_valid_time = file_time_info['valid'].strftime("%Y%m%d%H%M%S") + # skip if could not extract valid time + if not file_valid_time: + continue + file_valid_dt = datetime.strptime(file_valid_time, "%Y%m%d%H%M%S") + file_valid_seconds = int(file_valid_dt.strftime("%s")) + # skip if outside time range + if file_valid_seconds < lower_limit or file_valid_seconds > upper_limit: + continue - # if multiple files are allowed, get all files within range - if self.c_dict.get('ALLOW_MULTIPLE_FILES', False): - closest_files.append(fullpath) - continue + # if multiple files are allowed, get all files within range + if self.c_dict.get('ALLOW_MULTIPLE_FILES', False): + closest_files.append(fullpath) + continue - # if only 1 file is allowed, check if file is - # closer to desired valid time than previous match - diff = abs(valid_seconds - file_valid_seconds) - if diff < closest_time: - closest_time = diff - del closest_files[:] - closest_files.append(fullpath) + # if only 1 file is allowed, check if file is + # closer to desired valid time than previous match + diff = abs(valid_seconds - file_valid_seconds) + if diff < closest_time: + closest_time = diff + del closest_files[:] + closest_files.append(fullpath) return closest_files @@ -874,11 +864,7 @@ def find_input_files_ensemble(self, time_info, fill_missing=True): input_files = self.find_model(time_info, return_list=True, mandatory=False) if not input_files: msg = "Could not find any input files" - if (not self.c_dict.get('MANDATORY', True) - or self.c_dict.get('ALLOW_MISSING_INPUTS', False)): - self.logger.warning(msg) - else: - self.log_error(msg) + self._log_message_dynamic_level(msg, True) return False # if control file is requested, remove it from input list @@ -1029,15 +1015,7 @@ def find_and_check_output_file(self, time_info=None, # get directory that the output file will exist if is_directory: parent_dir = output_path - valid = '*' - lead = '*' - if time_info: - if time_info['valid'] != '*': - valid = time_info['valid'].strftime('%Y%m%d_%H%M%S') - if time_info['lead'] != '*': - lead = seconds_to_met_time(time_info['lead_seconds'], - force_hms=True) - + valid, lead = self._get_valid_and_lead_from_time_info(time_info) prefix = self.get_output_prefix(time_info, set_env_vars=False) prefix = f'{self.app_name}_{prefix}' if prefix else self.app_name search_string = f'{prefix}_{lead}L_{valid}V*' @@ -1077,6 +1055,19 @@ def find_and_check_output_file(self, time_info=None, 'to process') return False + @staticmethod + def _get_valid_and_lead_from_time_info(time_info): + valid = '*' + lead = '*' + if not time_info: + return valid, lead + + if time_info['valid'] != '*': + valid = time_info['valid'].strftime('%Y%m%d_%H%M%S') + if time_info['lead'] != '*': + lead = seconds_to_met_time(time_info['lead_seconds'], force_hms=True) + return valid, lead + def check_for_externals(self): self.check_for_gempak() diff --git a/metplus/wrappers/pcp_combine_wrapper.py b/metplus/wrappers/pcp_combine_wrapper.py index d4617e3053..e00b3153cf 100755 --- a/metplus/wrappers/pcp_combine_wrapper.py +++ b/metplus/wrappers/pcp_combine_wrapper.py @@ -7,12 +7,12 @@ import os from datetime import timedelta -from ..util import do_string_sub, getlist, preprocess_file +from ..util import do_string_sub, getlist from ..util import get_seconds_from_string, ti_get_lead_string, ti_calculate from ..util import get_relativedelta, ti_get_seconds_from_relativedelta from ..util import time_string_to_met_time, seconds_to_met_time from ..util import parse_var_list, template_to_regex, split_level -from ..util import add_field_info_to_time_info, sub_var_list +from ..util import add_field_info_to_time_info, sub_var_list, MISSING_DATA_VALUE from . import ReformatGriddedWrapper '''!@namespace PCPCombineWrapper @@ -29,7 +29,7 @@ class PCPCombineWrapper(ReformatGriddedWrapper): RUNTIME_FREQ_SUPPORTED = ['RUN_ONCE_FOR_EACH'] # valid values for [FCST/OBS]_PCP_COMBINE_METHOD - valid_run_methods = ['ADD', 'SUM', 'SUBTRACT', 'DERIVE', 'USER_DEFINED'] + VALID_RUN_METHODS = ['ADD', 'SUM', 'SUBTRACT', 'DERIVE', 'USER_DEFINED'] def __init__(self, config, instance=None): self.app_name = 'pcp_combine' @@ -49,154 +49,169 @@ def create_c_dict(self): c_dict['VERBOSITY'] = self.config.getstr('config', 'LOG_PCP_COMBINE_VERBOSITY', c_dict['VERBOSITY']) - c_dict['ALLOW_MULTIPLE_FILES'] = True - fcst_run = self.config.getbool('config', 'FCST_PCP_COMBINE_RUN', False) - obs_run = self.config.getbool('config', 'OBS_PCP_COMBINE_RUN', False) - if not fcst_run and not obs_run: - self.log_error("Must set either FCST_PCP_COMBINE_RUN or " - "OBS_PCP_COMBINE_RUN") - return c_dict + if c_dict['FCST_RUN']: + c_dict = self._set_fcst_or_obs_dict_items('FCST', c_dict) - if fcst_run: - c_dict = self.set_fcst_or_obs_dict_items('FCST', c_dict) - c_dict['VAR_LIST_FCST'] = parse_var_list( - self.config, - data_type='FCST', - met_tool=self.app_name - ) - if obs_run: - c_dict = self.set_fcst_or_obs_dict_items('OBS', c_dict) - c_dict['VAR_LIST_OBS'] = parse_var_list( - self.config, - data_type='OBS', - met_tool=self.app_name - ) + if c_dict['OBS_RUN']: + c_dict = self._set_fcst_or_obs_dict_items('OBS', c_dict) return c_dict - def set_fcst_or_obs_dict_items(self, d_type, c_dict): + def _set_fcst_or_obs_dict_items(self, d_type, c_dict): """! Set c_dict values specific to either forecast (FCST) or observation (OBS) data. - @param d_type data type, either FCST or OBS + @param d_type data type, either 'FCST' or 'OBS' @param c_dict config dictionary to populate @returns c_dict with values for given data type set """ # handle run method - run_method = self.config.getstr( - 'config', - f'{d_type}_PCP_COMBINE_METHOD', '' + run_method = self.config.getraw( + 'config', f'{d_type}_PCP_COMBINE_METHOD' ).upper() # change CUSTOM (deprecated) to USER_DEFINED - if run_method == 'CUSTOM': - run_method = 'USER_DEFINED' + run_method = 'USER_DEFINED' if run_method == 'CUSTOM' else run_method - if run_method not in self.valid_run_methods: + if run_method not in self.VALID_RUN_METHODS: self.log_error(f"Invalid value for {d_type}_PCP_COMBINE_METHOD: " f"{run_method}. Valid options are " - f"{','.join(self.valid_run_methods)}.") + f"{','.join(self.VALID_RUN_METHODS)}.") return c_dict c_dict[f'{d_type}_RUN_METHOD'] = run_method + # if derive method, allow multiple files and read stat list + if c_dict[f'{d_type}_RUN_METHOD'] == "DERIVE": + c_dict[f'{d_type}_STAT_LIST'] = getlist( + self.config.getraw('config', f'{d_type}_PCP_COMBINE_STAT_LIST') + ) + c_dict['ALLOW_MULTIPLE_FILES'] = True + + # handle I/O directories and templates + c_dict[f'{d_type}_INPUT_DIR'] = self.config.getdir( + f'{d_type}_PCP_COMBINE_INPUT_DIR', '' + ) + c_dict[f'{d_type}_INPUT_TEMPLATE'] = self.config.getraw( + 'config', f'{d_type}_PCP_COMBINE_INPUT_TEMPLATE' + ) + + c_dict[f'{d_type}_OUTPUT_DIR'] = self.config.getdir( + f'{d_type}_PCP_COMBINE_OUTPUT_DIR', '' + ) + c_dict[f'{d_type}_OUTPUT_TEMPLATE'] = self.config.getraw( + 'config', f'{d_type}_PCP_COMBINE_OUTPUT_TEMPLATE' + ) + # get lookback from _LOOKBACK or _OUTPUT_ACCUM or _DERIVE_LOOKBACK c_dict[f'{d_type}_LOOKBACK'] = self._handle_lookback(c_dict, d_type) c_dict[f'{d_type}_MIN_FORECAST'] = self.config.getstr( - 'config', - f'{d_type}_PCP_COMBINE_MIN_FORECAST', '0' + 'config', f'{d_type}_PCP_COMBINE_MIN_FORECAST', '0' ) c_dict[f'{d_type}_MAX_FORECAST'] = self.config.getstr( - 'config', - f'{d_type}_PCP_COMBINE_MAX_FORECAST', '256H' + 'config', f'{d_type}_PCP_COMBINE_MAX_FORECAST', '256H' ) c_dict[f'{d_type}_INPUT_DATATYPE'] = self.config.getstr( - 'config', - f'{d_type}_PCP_COMBINE_INPUT_DATATYPE', '' + 'config', f'{d_type}_PCP_COMBINE_INPUT_DATATYPE', '' ) c_dict[f'{d_type}_ACCUMS'] = getlist( - self.config.getraw('config', - f'{d_type}_PCP_COMBINE_INPUT_ACCUMS', '') + self.config.getraw('config', f'{d_type}_PCP_COMBINE_INPUT_ACCUMS') ) c_dict[f'{d_type}_NAMES'] = getlist( - self.config.getraw('config', - f'{d_type}_PCP_COMBINE_INPUT_NAMES', '') + self.config.getraw('config', f'{d_type}_PCP_COMBINE_INPUT_NAMES') ) c_dict[f'{d_type}_LEVELS'] = getlist( - self.config.getraw('config', - f'{d_type}_PCP_COMBINE_INPUT_LEVELS', '') + self.config.getraw('config', f'{d_type}_PCP_COMBINE_INPUT_LEVELS') ) c_dict[f'{d_type}_OPTIONS'] = getlist( - self.config.getraw('config', - f'{d_type}_PCP_COMBINE_INPUT_OPTIONS', '') + self.config.getraw('config', f'{d_type}_PCP_COMBINE_INPUT_OPTIONS') ) c_dict[f'{d_type}_OUTPUT_NAME'] = self.config.getstr( - 'config', - f'{d_type}_PCP_COMBINE_OUTPUT_NAME', '' - ) - c_dict[f'{d_type}_INPUT_DIR'] = self.config.getdir( - f'{d_type}_PCP_COMBINE_INPUT_DIR', '' - ) - c_dict[f'{d_type}_INPUT_TEMPLATE'] = self.config.getraw( - 'config', - f'{d_type}_PCP_COMBINE_INPUT_TEMPLATE' - ) - - c_dict[f'{d_type}_OUTPUT_DIR'] = self.config.getdir( - f'{d_type}_PCP_COMBINE_OUTPUT_DIR', '' - ) - c_dict[f'{d_type}_OUTPUT_TEMPLATE'] = self.config.getraw( - 'config', - f'{d_type}_PCP_COMBINE_OUTPUT_TEMPLATE' - ) - - c_dict[f'{d_type}_STAT_LIST'] = getlist( - self.config.getstr('config', - f'{d_type}_PCP_COMBINE_STAT_LIST', '') + 'config', f'{d_type}_PCP_COMBINE_OUTPUT_NAME', '' ) c_dict[f'{d_type}_BUCKET_INTERVAL'] = self.config.getseconds( - 'config', - f'{d_type}_PCP_COMBINE_BUCKET_INTERVAL', 0 + 'config', f'{d_type}_PCP_COMBINE_BUCKET_INTERVAL', 0 ) c_dict[f'{d_type}_CONSTANT_INIT'] = self.config.getbool( - 'config', - f'{d_type}_PCP_COMBINE_CONSTANT_INIT', False + 'config', f'{d_type}_PCP_COMBINE_CONSTANT_INIT', False ) # read any additional names/levels to add to command c_dict[f'{d_type}_EXTRA_NAMES'] = getlist( - self.config.getraw('config', - f'{d_type}_PCP_COMBINE_EXTRA_NAMES', '') + self.config.getraw('config', f'{d_type}_PCP_COMBINE_EXTRA_NAMES') ) c_dict[f'{d_type}_EXTRA_LEVELS'] = getlist( - self.config.getraw('config', - f'{d_type}_PCP_COMBINE_EXTRA_LEVELS', '') + self.config.getraw('config', f'{d_type}_PCP_COMBINE_EXTRA_LEVELS') ) # fill in missing extra level values with None fill_num = (len(c_dict[f'{d_type}_EXTRA_NAMES']) - len(c_dict[f'{d_type}_EXTRA_LEVELS'])) - if fill_num > 0: - for _ in range(fill_num): - c_dict[f'{d_type}_EXTRA_LEVELS'].append(None) + for _ in range(fill_num): + c_dict[f'{d_type}_EXTRA_LEVELS'].append(None) c_dict[f'{d_type}_EXTRA_OUTPUT_NAMES'] = getlist( - self.config.getraw('config', - f'{d_type}_PCP_COMBINE_EXTRA_OUTPUT_NAMES', '') + self.config.getraw('config', f'{d_type}_PCP_COMBINE_EXTRA_OUTPUT_NAMES') ) c_dict[f'{d_type}_USE_ZERO_ACCUM'] = self.config.getbool( - 'config', - f'{d_type}_PCP_COMBINE_USE_ZERO_ACCUM', False + 'config', f'{d_type}_PCP_COMBINE_USE_ZERO_ACCUM', False + ) + + c_dict[f'VAR_LIST_{d_type}'] = parse_var_list( + self.config, + data_type=d_type, + met_tool=self.app_name ) + self._set_thresholds(c_dict, d_type) + self._error_check_config(c_dict, d_type) + + # skip RuntimeFreq input file logic - remove once integrated + c_dict['FIND_FILES'] = False + return c_dict + + def _set_thresholds(self, c_dict, d_type): + """!Read input_thresh value from METplusConfig and set c_dict. Report + an error if value is not between 0 and 1. Set {d_type}_FILL_MISSING to + True if input_thresh is less than 1, meaning missing input is allowed. + + @param c_dict dictionary to set values + @param d_type data type, either 'FCST' or 'OBS' + """ + for t_type in ('VLD_THRESH', 'INPUT_THRESH'): + thresh = ( + self.config.getfloat('config', f'{d_type}_PCP_COMBINE_{t_type}') + ) + if thresh == float(MISSING_DATA_VALUE): + continue + + if thresh < 0 or thresh > 1: + self.log_error(f'{d_type}_PCP_COMBINE_{t_type} must be 0-1') + continue + + c_dict[f'{d_type}_{t_type}'] = thresh + + # if missing input is allowed, add MISSING to path if file is not found + # subtract method does not support missing inputs + if (c_dict.get(f'{d_type}_INPUT_THRESH', 1) < 1 and + c_dict[f'{d_type}_RUN_METHOD'] != "SUBTRACT"): + c_dict[f'{d_type}_FILL_MISSING'] = True + + def _error_check_config(self, c_dict, d_type): + """!Check c_dict values and log errors if anything is not set properly. + + @param c_dict dictionary containing values read from config + @param d_type type of input, either 'FCST' or 'OBS' + """ + run_method = c_dict[f'{d_type}_RUN_METHOD'] if run_method == 'DERIVE' and not c_dict[f'{d_type}_STAT_LIST']: self.log_error('Statistic list is empty. Must set ' f'{d_type}_PCP_COMBINE_STAT_LIST if running ' @@ -228,16 +243,12 @@ def set_fcst_or_obs_dict_items(self, d_type, c_dict): self.log_error(f'{d_type}_PCP_COMBINE_INPUT_LEVELS list ' 'should be either empty or the same length as ' f'{d_type}_PCP_COMBINE_INPUT_ACCUMS list.') - # skip RuntimeFreq input file logic - remove once integrated - c_dict['FIND_FILES'] = False - return c_dict def run_at_time_once(self, time_info): var_list = sub_var_list(self.c_dict['VAR_LIST'], time_info) data_src = self.c_dict['DATA_SRC'] - if not var_list: - var_list = [None] + var_list = [None] if not var_list else var_list for var_info in var_list: self.run_at_time_one_field(time_info, var_info, data_src) @@ -263,24 +274,23 @@ def run_at_time_one_field(self, time_info, var_info, data_src): time_info['level'] = lookback_seconds add_field_info_to_time_info(time_info, var_info) - # if method is not USER_DEFINED or DERIVE, - # check that field information is set + can_run = False if method == "USER_DEFINED": can_run = self.setup_user_method(time_info, data_src) - elif method == "DERIVE": - can_run = self.setup_derive_method(time_info, lookback_seconds, - data_src) - elif method == "ADD": - can_run = self.setup_add_method(time_info, lookback_seconds, - data_src) - elif method == "SUM": - can_run = self.setup_sum_method(time_info, lookback_seconds, - data_src) - elif method == "SUBTRACT": - can_run = self.setup_subtract_method(time_info, lookback_seconds, - data_src) else: - can_run = None + self.args.append(f'-{method.lower()}') + if method == "DERIVE": + can_run = self.setup_derive_method(time_info, lookback_seconds, + data_src) + elif method == "ADD": + can_run = self.setup_add_method(time_info, lookback_seconds, + data_src) + elif method == "SUM": + can_run = self.setup_sum_method(time_info, lookback_seconds, + data_src) + elif method == "SUBTRACT": + can_run = self.setup_subtract_method(time_info, lookback_seconds, + data_src) if not can_run: self.log_error("pcp_combine could not generate command") @@ -289,7 +299,7 @@ def run_at_time_one_field(self, time_info, var_info, data_src): # set time info level back to lookback seconds time_info['level'] = lookback_seconds - self._handle_extra_field_arguments(data_src, time_info) + self.set_command_line_arguments(data_src, time_info) # add -name argument output_name = self.c_dict.get(f'{data_src}_OUTPUT_NAME') @@ -316,15 +326,12 @@ def setup_user_method(self, time_info, data_src): """! Setup pcp_combine to call user defined command @param time_info dictionary containing timing information - @params data_src data type (FCST or OBS) - @rtype string - @return path to output file + @param data_src data type, either 'FCST' or 'OBS' + @rtype bool + @return True always """ - command_template = self.config.getraw( - 'config', - f'{data_src}_PCP_COMBINE_COMMAND' - ) - user_command = do_string_sub(command_template, **time_info) + temp = self.config.getraw('config', f'{data_src}_PCP_COMBINE_COMMAND') + user_command = do_string_sub(temp, **time_info) self.args.extend(user_command.split()) return True @@ -334,12 +341,10 @@ def setup_subtract_method(self, time_info, accum, data_src): @param time_info object containing timing information @param accum accumulation amount to compute in seconds - @params data_src data type (FCST or OBS) + @param data_src data type, either 'FCST' or 'OBS' @rtype string @return path to output file """ - self.args.append('-subtract') - lead = time_info['lead_seconds'] lead2 = lead - accum @@ -351,18 +356,8 @@ def setup_subtract_method(self, time_info, accum, data_src): files_found = [] - full_template = os.path.join(self.c_dict[f'{data_src}_INPUT_DIR'], - self.c_dict[f'{data_src}_INPUT_TEMPLATE']) - - # get first file - filepath1 = do_string_sub(full_template, **time_info) - file1 = preprocess_file(filepath1, - self.c_dict[data_src+'_INPUT_DATATYPE'], - self.config) - - if file1 is None: - self.log_error(f'Could not find {data_src} file {filepath1} ' - f'using template {full_template}') + file1 = self.find_data(time_info, data_type=data_src) + if not file1: return None # handle field information @@ -376,6 +371,12 @@ def setup_subtract_method(self, time_info, accum, data_src): if self.c_dict.get(f"{data_src}_OPTIONS"): field_args['extra'] = self.c_dict[f"{data_src}_OPTIONS"][0] + field_info1 = self.get_field_string( + time_info=time_info, + search_accum=seconds_to_met_time(lead), + **field_args + ) + # if data is GRIB and second lead is 0, then # run PCPCombine in -add mode with just the first file if lead2 == 0 and not self.c_dict[f'{data_src}_USE_ZERO_ACCUM']: @@ -385,14 +386,9 @@ def setup_subtract_method(self, time_info, accum, data_src): f"{data_src}_PCP_COMBINE_USE_ZERO_ACCUM = True") self.args.clear() self.args.append('-add') - field_info = self.get_field_string( - time_info=time_info, - search_accum=seconds_to_met_time(lead), - **field_args - ) self.args.append(file1) - self.args.append(field_info) - files_found.append((file1, field_info)) + self.args.append(field_info1) + files_found.append((file1, field_info1)) return files_found # else continue building -subtract command @@ -404,21 +400,10 @@ def setup_subtract_method(self, time_info, accum, data_src): time_info2['level'] = accum time_info2['custom'] = time_info.get('custom', '') - filepath2 = do_string_sub(full_template, **time_info2) - file2 = preprocess_file(filepath2, - self.c_dict[data_src+'_INPUT_DATATYPE'], - self.config) - - if file2 is None: - self.log_error(f'Could not find {data_src} file {filepath2} ' - f'using template {full_template}') + file2 = self.find_data(time_info2, data_type=data_src) + if not file2: return None - field_info1 = self.get_field_string( - time_info=time_info, - search_accum=seconds_to_met_time(lead), - **field_args - ) field_info2 = self.get_field_string( time_info=time_info2, search_accum=seconds_to_met_time(lead2), @@ -441,16 +426,13 @@ def setup_sum_method(self, time_info, lookback, data_src): @param time_info object containing timing information @param lookback accumulation amount to compute in seconds - @params data_src data type (FCST or OBS) - @rtype string - @return path to output file + @param data_src data type, either 'FCST' or 'OBS' + @rtype bool + @return True always """ - self.args.append('-sum') - + in_accum = 0 if self.c_dict[f"{data_src}_ACCUMS"]: in_accum = self.c_dict[data_src+'_ACCUMS'][0] - else: - in_accum = 0 in_accum = time_string_to_met_time(in_accum, 'H') out_accum = time_string_to_met_time(lookback, 'S') @@ -480,6 +462,8 @@ def setup_sum_method(self, time_info, lookback, data_src): # set -field name and level if set in config self._handle_field_argument(data_src, time_info) + self._handle_input_thresh_argument(data_src) + return True def setup_add_method(self, time_info, lookback, data_src): @@ -491,8 +475,6 @@ def setup_add_method(self, time_info, lookback, data_src): @rtype string @return path to output file """ - self.args.append('-add') - # create list of tuples for input levels and optional field names self._build_input_accum_list(data_src, time_info) @@ -510,6 +492,8 @@ def setup_add_method(self, time_info, lookback, data_src): self.log_error(msg) return False + self._handle_input_thresh_argument(data_src) + return files_found def setup_derive_method(self, time_info, lookback, data_src): @@ -521,8 +505,6 @@ def setup_derive_method(self, time_info, lookback, data_src): @rtype string @return path to output file """ - self.args.append('-derive') - # add list of statistics self.args.append(','.join(self.c_dict[f"{data_src}_STAT_LIST"])) @@ -541,8 +523,7 @@ def setup_derive_method(self, time_info, lookback, data_src): level=accum_dict['level'], extra=accum_dict['extra']) self.run_count += 1 - input_files = self.find_data(time_info, - data_type=data_src, + input_files = self.find_data(time_info, data_type=data_src, return_list=True) if not input_files: self.missing_input_count += 1 @@ -575,6 +556,8 @@ def setup_derive_method(self, time_info, lookback, data_src): # set -field name and level from first file field info self.args.append(f'-field {files_found[0][1]}') + self._handle_input_thresh_argument(data_src) + return files_found def _handle_lookback(self, c_dict, d_type): @@ -652,9 +635,11 @@ def get_accumulation(self, time_info, accum, data_src, @param time_info dictionary containing time information @param accum desired accumulation to build in seconds - @param data_src type of data (FCST or OBS) - @rtype bool - @return True if full set of files to build accumulation is found + @param data_src type of data, either 'FCST' or 'OBS' + @param field_info_after_file if True, add field information after each + file in the arguments (defaults to True) + @rtype list + @return list of files to build accumulation or None """ search_time = time_info['valid'] custom = time_info.get('custom', '') @@ -679,18 +664,7 @@ def get_accumulation(self, time_info, accum, data_src, time_info['valid']) # log the input and output accumulation information - search_accum_list = [] - for lev in self.c_dict['ACCUM_DICT_LIST']: - if lev['template'] is not None: - search_accum_list.append(lev['template']) - else: - search_accum_list.append(ti_get_lead_string(lev['amount'], - plural=False)) - - self.logger.debug("Trying to build a " - f"{ti_get_lead_string(total_accum, plural=False)} " - "accumulation using " - f"{' or '.join(search_accum_list)} input data") + self._log_search_accum_list(total_accum) files_found = [] @@ -703,50 +677,17 @@ def get_accumulation(self, time_info, accum, data_src, # look for biggest accum that fits search for accum_dict in self.c_dict['ACCUM_DICT_LIST']: - if (accum_dict['amount'] > total_accum and - accum_dict['template'] is None): - continue - - search_file, lead = self.find_input_file(time_info['init'], - search_time, - accum_dict['amount'], - data_src, - custom) - - if not search_file: + search_file, field_info, accum_amount = ( + self._find_file_for_accum(accum_dict, total_accum, + time_info, search_time, + data_src, custom) + ) + if search_file is None: continue - # if found a file, add it to input list with info - # if template is used in accum, find value and - # apply bucket interval is set - if accum_dict['template'] is not None: - accum_amount = self.get_template_accum(accum_dict, - search_time, - lead, - data_src, - custom) - if accum_amount > total_accum: - self.logger.debug("Accumulation amount is bigger " - "than remaining accumulation.") - continue - else: - accum_amount = accum_dict['amount'] - - search_time_info = { - 'valid': search_time, - 'lead': lead, - } - field_info = self.get_field_string( - time_info=search_time_info, - search_accum=time_string_to_met_time(accum_amount), - name=accum_dict['name'], - level=accum_dict['level'], - extra=accum_dict['extra'] - ) # add file to input list and step back to find more data - self.args.append(search_file) - if field_info_after_file: - self.args.append(field_info) + self._add_file_and_field_info_to_args(search_file, field_info, + field_info_after_file) files_found.append((search_file, field_info)) self.logger.debug(f"Adding input file: {search_file} " @@ -771,6 +712,84 @@ def get_accumulation(self, time_info, accum, data_src, return files_found + def _log_search_accum_list(self, total_accum): + """!Format and log information about the desired accumulation to build + and the configurations that define the search accumulations. + + @param total_accum desired accumulation to build + """ + search_accum_list = [] + for lev in self.c_dict['ACCUM_DICT_LIST']: + if lev['template'] is not None: + search_accum_list.append(lev['template']) + else: + search_accum_list.append(ti_get_lead_string(lev['amount'], + plural=False)) + + self.logger.debug("Trying to build a " + f"{ti_get_lead_string(total_accum, plural=False)} " + "accumulation using " + f"{' or '.join(search_accum_list)} input data") + + def _add_file_and_field_info_to_args(self, search_file, field_info, + field_info_after_file): + """!Helper function to add arguments to reduce cognitive complexity + of get_accumulation function. Adds to self.args list. + + @param search_file file path + @param field_info formatted field information associate with file path + @param field_info_after_file boolean if True add field info after file + """ + self.args.append(search_file) + if field_info_after_file: + self.args.append(field_info) + + def _find_file_for_accum(self, accum_dict, total_accum, time_info, + search_time, data_src, custom): + if (accum_dict['amount'] > total_accum and + accum_dict['template'] is None): + return None, None, None + + self.c_dict['SUPPRESS_WARNINGS'] = True + search_file, lead = self.find_input_file(time_info['init'], + search_time, + accum_dict['amount'], + data_src, + custom) + self.c_dict['SUPPRESS_WARNINGS'] = False + if not search_file: + return None, None, None + + # if found a file, add it to input list with info + # if template is used in accum, find value and + # apply bucket interval is set + if accum_dict['template'] is not None: + accum_amount = self.get_template_accum(accum_dict, + search_time, + lead, + data_src, + custom) + if accum_amount > total_accum: + self.logger.debug("Accumulation amount is bigger " + "than remaining accumulation.") + return None, None, None + else: + accum_amount = accum_dict['amount'] + + search_time_info = { + 'valid': search_time, + 'lead': lead, + } + field_info = self.get_field_string( + time_info=search_time_info, + search_accum=time_string_to_met_time(accum_amount), + name=accum_dict['name'], + level=accum_dict['level'], + extra=accum_dict['extra'] + ) + + return search_file, field_info, accum_amount + def get_lowest_fcst_file(self, valid_time, data_src, custom): """! Find the lowest forecast hour that corresponds to the valid time @@ -813,18 +832,10 @@ def get_lowest_fcst_file(self, valid_time, data_src, custom): } time_info = ti_calculate(input_dict) time_info['custom'] = custom - search_file = os.path.join(self.c_dict[f'{data_src}_INPUT_DIR'], - self.c_dict[data_src+'_INPUT_TEMPLATE']) - search_file = do_string_sub(search_file, **time_info) - self.logger.debug(f"Looking for {search_file}") - - search_file = preprocess_file( - search_file, - self.c_dict[data_src+'_INPUT_DATATYPE'], - self.config) - - if search_file is not None: - return search_file, forecast_lead + search_file = self.find_data(time_info, data_type=data_src, + return_list=True, mandatory=False) + if search_file: + return search_file[0], forecast_lead forecast_lead += smallest_input_accum return None, 0 @@ -850,7 +861,6 @@ def get_field_string(self, time_info=None, search_accum=0, name=None, def find_input_file(self, init_time, valid_time, search_accum, data_src, custom): lead = 0 - in_template = self.c_dict[data_src+'_INPUT_TEMPLATE'] if ('{lead?' in in_template or @@ -871,13 +881,12 @@ def find_input_file(self, init_time, valid_time, search_accum, data_src, time_info = ti_calculate(input_dict) time_info['custom'] = custom time_info['level'] = int(search_accum) - input_path = os.path.join(self.c_dict[f'{data_src}_INPUT_DIR'], - in_template) - input_path = do_string_sub(input_path, **time_info) + input_path = self.find_data(time_info, data_type=data_src, + return_list=True, mandatory=False) + if input_path: + input_path = input_path[0] - return preprocess_file(input_path, - self.c_dict[f'{data_src}_INPUT_DATATYPE'], - self.config), lead + return input_path, lead def get_template_accum(self, accum_dict, search_time, lead, data_src, custom): @@ -908,22 +917,30 @@ def get_template_accum(self, accum_dict, search_time, lead, data_src, def get_command(self): - cmd = (f"{self.app_path} -v {self.c_dict['VERBOSITY']} " - f"{' '.join(self.args)} {self.get_output_path()}") + cmd = (f"{self.app_path} {' '.join(self.args)}" + f" {self.get_output_path()} -v {self.c_dict['VERBOSITY']}") return cmd - def _handle_extra_field_arguments(self, data_src, time_info=None): - extra_names = self.c_dict.get(data_src + '_EXTRA_NAMES') - if not extra_names: - return + def set_command_line_arguments(self, data_src, time_info=None): + """!Handle extra field arguments and vld_thresh argument. - extra_levels = self.c_dict.get(data_src + '_EXTRA_LEVELS') - for name, level in zip(extra_names, extra_levels): - field_string = self.get_field_string(time_info=time_info, - name=name, - level=level) - field_format = f"-field {field_string}" - self.args.append(field_format) + @param data_src type of data, either 'FCST' or 'OBS' + @param time_info dictionary containing time information or None + (defaults to None) + """ + extra_names = self.c_dict.get(data_src + '_EXTRA_NAMES') + if extra_names: + extra_levels = self.c_dict.get(data_src + '_EXTRA_LEVELS') + for name, level in zip(extra_names, extra_levels): + field_string = self.get_field_string(time_info=time_info, + name=name, + level=level) + field_format = f"-field {field_string}" + self.args.append(field_format) + + vld_thresh = self.c_dict.get(f'{data_src}_VLD_THRESH') + if vld_thresh: + self.args.append(f'-vld_thresh {vld_thresh}') def _handle_field_argument(self, data_src, time_info): if not self.c_dict[f'{data_src}_NAMES']: @@ -958,6 +975,13 @@ def _handle_name_argument(self, output_name, data_src): name_format = f'-name "{name_format}"' self.args.append(name_format) + def _handle_input_thresh_argument(self, data_src): + input_thresh = self.c_dict.get(f'{data_src}_INPUT_THRESH') + if not input_thresh: + return + + self.args.append(f'-input_thresh {input_thresh}') + def _build_input_accum_list(self, data_src, time_info): accum_list = self.c_dict[data_src + '_ACCUMS'] level_list = self.c_dict[data_src + '_LEVELS'] diff --git a/metplus/wrappers/reformat_gridded_wrapper.py b/metplus/wrappers/reformat_gridded_wrapper.py index 92aa3ce162..13522a2327 100755 --- a/metplus/wrappers/reformat_gridded_wrapper.py +++ b/metplus/wrappers/reformat_gridded_wrapper.py @@ -10,10 +10,6 @@ Condition codes: 0 for success, 1 for failure ''' -import os - -from ..util import get_lead_sequence -from ..util import time_util, skip_time from . import LoopTimesWrapper # pylint:disable=pointless-string-statement @@ -34,6 +30,22 @@ class ReformatGriddedWrapper(LoopTimesWrapper): def __init__(self, config, instance=None): super().__init__(config, instance=instance) + def create_c_dict(self): + c_dict = super().create_c_dict() + + # check if FCST or OBS should be run + app = self.app_name.upper() + for fcst_or_obs in ('FCST', 'OBS'): + c_dict[f'{fcst_or_obs}_RUN'] = ( + self.config.getbool('config', f'{fcst_or_obs}_{app}_RUN', False) + ) + + if not c_dict['FCST_RUN'] and not c_dict['OBS_RUN']: + self.log_error(f'Must set either FCST_{app}_RUN or OBS_{app}_RUN') + return c_dict + + return c_dict + def run_at_time(self, input_dict): """! Runs the MET application for a given run time. Processing forecast or observation data is determined by conf variables. @@ -42,22 +54,12 @@ def run_at_time(self, input_dict): @param input_dict dictionary containing init or valid time info """ - app_name_caps = self.app_name.upper() run_list = [] - if self.config.getbool('config', 'FCST_'+app_name_caps+'_RUN', False): + if self.c_dict['FCST_RUN']: run_list.append("FCST") - if self.config.getbool('config', 'OBS_'+app_name_caps+'_RUN', False): + if self.c_dict['OBS_RUN']: run_list.append("OBS") - if not run_list: - class_name = self.__class__.__name__[0: -7] - self.log_error(f"{class_name} specified in process_list, but " - f"FCST_{app_name_caps}_RUN and " - f"OBS_{app_name_caps}_RUN are both False. " - f"Set one or both to true or remove {class_name} " - "from the process_list") - return - for to_run in run_list: self.logger.info("Processing {} data".format(to_run)) self.c_dict['VAR_LIST'] = self.c_dict.get(f'VAR_LIST_{to_run}') diff --git a/metplus/wrappers/regrid_data_plane_wrapper.py b/metplus/wrappers/regrid_data_plane_wrapper.py index 940743800e..ace6a6aeb8 100755 --- a/metplus/wrappers/regrid_data_plane_wrapper.py +++ b/metplus/wrappers/regrid_data_plane_wrapper.py @@ -47,95 +47,39 @@ def create_c_dict(self): f'{app}_ONCE_PER_FIELD', True) - c_dict['FCST_INPUT_TEMPLATE'] = \ - self.config.getraw('filename_templates', - f'FCST_{app}_INPUT_TEMPLATE', - '') - - if not c_dict['FCST_INPUT_TEMPLATE']: - c_dict['FCST_INPUT_TEMPLATE'] = \ - self.config.getraw('filename_templates', - f'FCST_{app}_TEMPLATE', - '') - - c_dict['OBS_INPUT_TEMPLATE'] = \ - self.config.getraw('filename_templates', - 'OBS_REGRID_DATA_PLANE_INPUT_TEMPLATE', - '') - - if not c_dict['OBS_INPUT_TEMPLATE']: - c_dict['OBS_INPUT_TEMPLATE'] = \ - self.config.getraw('filename_templates', - 'OBS_REGRID_DATA_PLANE_TEMPLATE', - '') - - c_dict['FCST_OUTPUT_TEMPLATE'] = \ - self.config.getraw('filename_templates', - 'FCST_REGRID_DATA_PLANE_OUTPUT_TEMPLATE', - '') - - if not c_dict['FCST_OUTPUT_TEMPLATE']: - c_dict['FCST_OUTPUT_TEMPLATE'] = \ - self.config.getraw('filename_templates', - 'FCST_REGRID_DATA_PLANE_TEMPLATE', - '') - - c_dict['OBS_OUTPUT_TEMPLATE'] = \ - self.config.getraw('filename_templates', - 'OBS_REGRID_DATA_PLANE_OUTPUT_TEMPLATE', - '') - - if not c_dict['OBS_OUTPUT_TEMPLATE']: - c_dict['OBS_OUTPUT_TEMPLATE'] = \ - self.config.getraw('filename_templates', - 'OBS_REGRID_DATA_PLANE_TEMPLATE', - '') - window_types = [] - if self.config.getbool('config', 'FCST_REGRID_DATA_PLANE_RUN', False): - window_types.append('FCST') - c_dict['FCST_INPUT_DIR'] = \ - self.config.getdir('FCST_REGRID_DATA_PLANE_INPUT_DIR', '') - - c_dict['FCST_OUTPUT_DIR'] = \ - self.config.getdir('FCST_REGRID_DATA_PLANE_OUTPUT_DIR', '') - - if not c_dict['FCST_INPUT_TEMPLATE']: - self.log_error("FCST_REGRID_DATA_PLANE_INPUT_TEMPLATE must be set if " - "FCST_REGRID_DATA_PLANE_RUN is True") - - - if not c_dict['FCST_OUTPUT_TEMPLATE']: - self.log_error("FCST_REGRID_DATA_PLANE_OUTPUT_TEMPLATE must be set if " - "FCST_REGRID_DATA_PLANE_RUN is True") - - c_dict['VAR_LIST_FCST'] = parse_var_list( - self.config, - data_type='FCST', - met_tool=self.app_name - ) - - if self.config.getbool('config', 'OBS_REGRID_DATA_PLANE_RUN', False): - window_types.append('OBS') - c_dict['OBS_INPUT_DIR'] = \ - self.config.getdir('OBS_REGRID_DATA_PLANE_INPUT_DIR', '') - - c_dict['OBS_OUTPUT_DIR'] = \ - self.config.getdir('OBS_REGRID_DATA_PLANE_OUTPUT_DIR', '') - - if not c_dict['OBS_INPUT_TEMPLATE']: - self.log_error("OBS_REGRID_DATA_PLANE_INPUT_TEMPLATE must be set if " - "OBS_REGRID_DATA_PLANE_RUN is True") - - if not c_dict['OBS_OUTPUT_TEMPLATE']: - self.log_error("OBS_REGRID_DATA_PLANE_OUTPUT_TEMPLATE must be set if " - "OBS_REGRID_DATA_PLANE_RUN is True") - - c_dict['VAR_LIST_OBS'] = parse_var_list( - self.config, - data_type='OBS', - met_tool=self.app_name - ) + for fcst_or_obs in ('FCST', 'OBS'): + if not c_dict[f'{fcst_or_obs}_RUN']: + continue + + window_types.append(fcst_or_obs) + + for in_or_out in ('INPUT', 'OUTPUT'): + # read FCST/OBS_INPUT/OUTPUT_DIR + c_dict[f'{fcst_or_obs}_{in_or_out}_DIR'] = ( + self.config.getdir(f'{fcst_or_obs}_{app}_{in_or_out}_DIR') + ) + + # read FCST/OBS_INPUT/OUTPUT_TEMPLATE + name = self.config.get_mp_config_name( + [f'{fcst_or_obs}_{app}_{in_or_out}_TEMPLATE', + f'{fcst_or_obs}_{app}_TEMPLATE'] + ) + if not name: + self.log_error(f"{fcst_or_obs}_{app}_{in_or_out}_TEMPLATE " + f"must be set if {fcst_or_obs}_{app}_RUN") + continue + + c_dict[f'{fcst_or_obs}_{in_or_out}_TEMPLATE'] = ( + self.config.getraw('config', name) + ) + + # set list of variables (fields) + c_dict[f'VAR_LIST_{fcst_or_obs}'] = parse_var_list( + self.config, + data_type=fcst_or_obs, + met_tool=self.app_name + ) self.handle_file_window_variables(c_dict, data_types=window_types) diff --git a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_add.conf b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_add.conf index 529e6999ca..5c41b22184 100644 --- a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_add.conf +++ b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_add.conf @@ -70,3 +70,6 @@ FCST_PCP_COMBINE_INPUT_LEVELS = Surface FCST_PCP_COMBINE_OUTPUT_ACCUM = 15M FCST_PCP_COMBINE_OUTPUT_NAME = A001500 + +#FCST_PCP_COMBINE_INPUT_THRESH = +#FCST_PCP_COMBINE_VLD_THRESH = diff --git a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_bucket.conf b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_bucket.conf index bd8ca11286..6325df895b 100644 --- a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_bucket.conf +++ b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_bucket.conf @@ -66,3 +66,6 @@ FCST_PCP_COMBINE_INPUT_ACCUMS = {lead} FCST_PCP_COMBINE_OUTPUT_ACCUM = 15H FCST_PCP_COMBINE_OUTPUT_NAME = APCP + +#FCST_PCP_COMBINE_INPUT_THRESH = +#FCST_PCP_COMBINE_VLD_THRESH = diff --git a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_derive.conf b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_derive.conf index b2f8d6f637..831f636ba7 100644 --- a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_derive.conf +++ b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_derive.conf @@ -79,3 +79,6 @@ FCST_PCP_COMBINE_OUTPUT_NAME = #FCST_PCP_COMBINE_EXTRA_NAMES = #FCST_PCP_COMBINE_EXTRA_LEVELS = #FCST_PCP_COMBINE_EXTRA_OUTPUT_NAMES = + +#FCST_PCP_COMBINE_INPUT_THRESH = +#FCST_PCP_COMBINE_VLD_THRESH = diff --git a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_loop_custom.conf b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_loop_custom.conf index 64d449d8ba..720d6ccb43 100644 --- a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_loop_custom.conf +++ b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_loop_custom.conf @@ -70,3 +70,6 @@ FCST_PCP_COMBINE_INPUT_ACCUMS = 24H FCST_PCP_COMBINE_OUTPUT_ACCUM = 24H FCST_PCP_COMBINE_OUTPUT_NAME = APCP + +#FCST_PCP_COMBINE_INPUT_THRESH = +#FCST_PCP_COMBINE_VLD_THRESH = diff --git a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_python_embedding.conf b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_python_embedding.conf index d9cd56f96e..ebb7a0127e 100644 --- a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_python_embedding.conf +++ b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_python_embedding.conf @@ -64,6 +64,9 @@ OBS_PCP_COMBINE_INPUT_DATATYPE = PYTHON_NUMPY OBS_PCP_COMBINE_INPUT_ACCUMS = 6 OBS_PCP_COMBINE_INPUT_NAMES = {PARM_BASE}/use_cases/met_tool_wrapper/PCPCombine/sum_IMERG_V06_HDF5.py {OBS_PCP_COMBINE_INPUT_DIR} IRprecipitation {valid?fmt=%Y%m%d%H} 02 +#OBS_PCP_COMBINE_INPUT_THRESH = +#OBS_PCP_COMBINE_VLD_THRESH = + [user_env_vars] # uncomment and change this to the path of a version of python that has the h5py package installed #MET_PYTHON_EXE = /path/to/python/with/h5-py/and/numpy/packages/bin/python diff --git a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_subtract.conf b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_subtract.conf index caf0890409..59174513d1 100644 --- a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_subtract.conf +++ b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_subtract.conf @@ -69,3 +69,5 @@ FCST_PCP_COMBINE_OUTPUT_ACCUM = 3H FCST_PCP_COMBINE_OUTPUT_NAME = APCP_03 FCST_PCP_COMBINE_USE_ZERO_ACCUM = False + +#FCST_PCP_COMBINE_VLD_THRESH = diff --git a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_sum.conf b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_sum.conf index bdfa337ebd..9012e7fb3b 100644 --- a/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_sum.conf +++ b/parm/use_cases/met_tool_wrapper/PCPCombine/PCPCombine_sum.conf @@ -66,3 +66,6 @@ FCST_PCP_COMBINE_INPUT_LEVELS = Surface FCST_PCP_COMBINE_OUTPUT_ACCUM = 15M FCST_PCP_COMBINE_OUTPUT_NAME = A001500 + +#FCST_PCP_COMBINE_INPUT_THRESH = +#FCST_PCP_COMBINE_VLD_THRESH = diff --git a/ush/run_metplus.py b/ush/run_metplus.py index 9886e1119b..eb374b4427 100755 --- a/ush/run_metplus.py +++ b/ush/run_metplus.py @@ -28,6 +28,7 @@ import produtil.setup from metplus.util import pre_run_setup, run_metplus, post_run_cleanup +from metplus import __version__ as metplus_version '''!@namespace run_metplus Main script the processes all the tasks in the PROCESS_LIST @@ -57,15 +58,12 @@ def main(): def usage(): """!How to call this script.""" - print (''' -Usage: %s arg1 arg2 arg3 - -h|--help Display this usage statement - -Arguments: -/path/to/parmfile.conf -- Specify custom configuration file to use -section.option=value -- override conf options on the command line - -'''%(basename(__file__))) + print(f"Running METplus v{metplus_version}\n" + f"Usage: {basename(__file__)} arg1 arg2 arg3\n" + " -h|--help Display this usage statement\n\n" + "Arguments:\n" + "/path/to/parmfile.conf -- Specify custom configuration file to use\n" + "section.option=value -- override conf options on the command line") sys.exit(2) @@ -83,9 +81,8 @@ def get_config_inputs_from_command_line(): # print usage statement and exit if help arg is found help_args = ('-h', '--help', '-help') - for help_arg in help_args: - if help_arg in sys.argv: - usage() + if any(arg in sys.argv for arg in help_args): + usage() # pull out command line arguments config_inputs = []