From bd6a344326d279796d7cdca0e63c2478361a9e78 Mon Sep 17 00:00:00 2001 From: "Chan-Hoo.Jeon-NOAA" <60152248+chan-hoo@users.noreply.github.com> Date: Fri, 22 Jul 2022 13:40:58 -0400 Subject: [PATCH] Fix issue on get_extrn_lbcs when FCST_LEN_HRS>=40 with netcdf (#814) * activate b file on hpss for >40h * add a new we2e test for fcst_len_hrs>40 * reduce fcst time for we2e --- ...S_lbcs_FV3GFS_fmt_netcdf_2022060112_48h.sh | 26 +++++++++++++++++ ush/retrieve_data.py | 29 +++++++++++++++---- 2 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 tests/WE2E/test_configs/wflow_features/config.get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_netcdf_2022060112_48h.sh diff --git a/tests/WE2E/test_configs/wflow_features/config.get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_netcdf_2022060112_48h.sh b/tests/WE2E/test_configs/wflow_features/config.get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_netcdf_2022060112_48h.sh new file mode 100644 index 0000000000..c119306516 --- /dev/null +++ b/tests/WE2E/test_configs/wflow_features/config.get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_netcdf_2022060112_48h.sh @@ -0,0 +1,26 @@ +# +# TEST PURPOSE/DESCRIPTION: +# ------------------------ +# +# This test checks the capability of the workflow to retrieve from NOAA +# HPSS netcdf-formatted output files generated by the FV3GFS external +# model (FCST_LEN_HRS>=40). +# + +RUN_ENVIR="community" +PREEXISTING_DIR_METHOD="rename" + +PREDEF_GRID_NAME="RRFS_CONUS_25km" +CCPP_PHYS_SUITE="FV3_GFS_v16" + +EXTRN_MDL_NAME_ICS="FV3GFS" +FV3GFS_FILE_FMT_LBCS="netcdf" +EXTRN_MDL_NAME_LBCS="FV3GFS" +FV3GFS_FILE_FMT_ICS="netcdf" + +DATE_FIRST_CYCL="20220601" +DATE_LAST_CYCL="20220601" +CYCL_HRS=( "12" ) + +FCST_LEN_HRS="48" +LBC_SPEC_INTVL_HRS="12" diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index d68c082370..4267d6faaf 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -214,6 +214,8 @@ def find_archive_files(paths, file_names, cycle_date): zipped_archive_file_paths = zip(paths, file_names) + existing_archive={} + # Narrow down which HPSS files are available for this date for list_item, (archive_path, archive_file_names) in \ enumerate(zipped_archive_file_paths): @@ -226,9 +228,14 @@ def find_archive_files(paths, file_names, cycle_date): file_path = os.path.join(archive_path, archive_file_names[0]) file_path = fill_template(file_path, cycle_date) - existing_archive = hsi_single_file(file_path) + existing_archive[0] = hsi_single_file(file_path) + + if len(archive_file_names)>1: + file_path_b = os.path.join(archive_path, archive_file_names[1]) + file_path_b = fill_template(file_path_b, cycle_date) + existing_archive[1] = hsi_single_file(file_path_b) - if existing_archive: + if existing_archive[0]: logging.info(f'Found HPSS file: {file_path}') return existing_archive, list_item @@ -387,6 +394,7 @@ def hpss_requested_files(cla, file_names, store_specs): source_paths = [] for fcst_hr in cla.fcst_hrs: for file_name in file_names: + source_paths.append(fill_template( os.path.join(archive_internal_dir, file_name), cla.cycle_date, @@ -395,13 +403,13 @@ def hpss_requested_files(cla, file_names, store_specs): if store_specs.get('archive_format', 'tar') == 'zip': # Get the entire file from HPSS - existing_archive = hsi_single_file(existing_archive, mode='get') + existing_archive[0] = hsi_single_file(existing_archive[0], mode='get') # Grab only the necessary files from the archive - cmd = f'unzip -o {os.path.basename(existing_archive)} {" ".join(source_paths)}' + cmd = f'unzip -o {os.path.basename(existing_archive[0])} {" ".join(source_paths)}' else: - cmd = f'htar -xvf {existing_archive} {" ".join(source_paths)}' + cmd = f'htar -xvf {existing_archive[0]} {" ".join(source_paths)}' logging.info(f'Running command \n {cmd}') subprocess.run(cmd, @@ -409,13 +417,22 @@ def hpss_requested_files(cla, file_names, store_specs): shell=True, ) + if len(existing_archive)>1: + cmd = f'htar -xvf {existing_archive[1]} {" ".join(source_paths)}' + logging.info(f'Running command \n {cmd}') + subprocess.run(cmd, + check=True, + shell=True, + ) + # Check that files exist and Remove any data transfer artifacts. unavailable = clean_up_output_dir( expected_subdir=archive_internal_dir, - local_archive=os.path.basename(existing_archive), + local_archive=os.path.basename(existing_archive[0]), output_path=output_path, source_paths=source_paths, ) + if not unavailable: return unavailable