From 6023fd25aa53e3a4d1b463c0c2cd092b5a5d2c32 Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA <58948505+AndrewEichmann-NOAA@users.noreply.github.com> Date: Wed, 31 Jan 2024 13:35:15 -0500 Subject: [PATCH] Runs netcdf ioda converters in parallel (#888) Sets up the ocean obs prep task to run the NetCDF to IODA converter executable in parallel by obs space, node permitting. Also tries to detect and notify of failures. Tested with ctests on Hera, including pointing `DMPDIR` at `/scratch1/NCEPDEV/stmp4/Shastri.Paturi/forAndrew/`. Runtime was reduced only from 16 minutes to 15 minutes, but that is probably because only rads and metop b GHRSST are processed, with the latter being the bottleneck. Partially addresses https://github.com/NOAA-EMC/GDASApp/issues/886 --- scripts/exglobal_prep_ocean_obs.py | 31 ++++++++++++++++++++++++++++-- ush/soca/run_jjobs.py | 2 +- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/scripts/exglobal_prep_ocean_obs.py b/scripts/exglobal_prep_ocean_obs.py index aaa6f485a..2ba05ba34 100755 --- a/scripts/exglobal_prep_ocean_obs.py +++ b/scripts/exglobal_prep_ocean_obs.py @@ -2,6 +2,7 @@ # exglobal_prep_ocean_obs.py # Prepares observations for marine DA from datetime import datetime, timedelta +from multiprocessing import Process import os import subprocess from soca import prep_marine_obs @@ -34,7 +35,20 @@ logger.critical(f"OBSPREP_YAML file {OBSPREP_YAML} does not exist") raise FileNotFoundError + +def run_netcdf_to_ioda(obsspace_to_convert): + name, iodaYamlFilename = obsspace_to_convert + try: + subprocess.run([OCNOBS2IODAEXEC, iodaYamlFilename], check=True) + logger.info(f"ran ioda converter on obs space {name} successfully") + except subprocess.CalledProcessError as e: + logger.info(f"ioda converter failed with error {e}, \ + return code {e.returncode}") + return e.returncode + + files_to_save = [] +obsspaces_to_convert = [] try: for observer in obsConfig['observers']: @@ -46,6 +60,8 @@ logger.warning("Ill-formed observer yaml file, skipping") continue + # find match to the obs space from OBS_YAML in OBSPREP_YAML + # this is awkward and unpythonic, so feel free to improve for observation in obsprepConfig['observations']: obsprepSpace = observation['obs space'] obsprepSpaceName = obsprepSpace['name'] @@ -82,16 +98,27 @@ iodaYamlFilename = obsprepSpaceName + '2ioda.yaml' save_as_yaml(obsprepSpace, iodaYamlFilename) - subprocess.run([OCNOBS2IODAEXEC, iodaYamlFilename], check=True) - files_to_save.append([obsprepSpace['output file'], os.path.join(COMOUT_OBS, obsprepSpace['output file'])]) files_to_save.append([iodaYamlFilename, os.path.join(COMOUT_OBS, iodaYamlFilename)]) + + obsspaces_to_convert.append((obs_space_name, iodaYamlFilename)) + except TypeError: logger.critical("Ill-formed OBS_YAML or OBSPREP_YAML file, exiting") raise +processes = [] +for obsspace_to_convert in obsspaces_to_convert: + process = Process(target=run_netcdf_to_ioda, args=(obsspace_to_convert,)) + process.start() + processes.append(process) + +# Wait for all processes to finish +for process in processes: + process.join() + if not os.path.exists(COMOUT_OBS): os.makedirs(COMOUT_OBS) diff --git a/ush/soca/run_jjobs.py b/ush/soca/run_jjobs.py index 2008c3042..b38786a56 100755 --- a/ush/soca/run_jjobs.py +++ b/ush/soca/run_jjobs.py @@ -196,7 +196,7 @@ def copy_bkgs(self): self.f.write(f"cp -r {ensbkgs} $ROTDIR \n") else: print('Aborting, ensemble backgrounds not found') - sys.exit() + sys.exit(1) def fixconfigs(self): """