Skip to content

Commit

Permalink
Add facility to take obs from multiple dmpdir cycles, forward and back (
Browse files Browse the repository at this point in the history
#882)

Number of cycles forward and back are set in new entries in
`parm/soca/obsprep/obsprep_config.yaml`, with a hardcoded default of 0.

More cycles of obs (adt and metop b sst) have been added to the ctests
and `test/soca/testdata/` had been reorganized to a structure more
resembling the `dmpdir` in order to keep everything straight.

Most of the "Files changed" are cdl files added to the testdata.

At least partially addresses
#837

Now also fixes #881

Tested with `DMPDIR: /scratch1/NCEPDEV/stmp4/Shastri.Paturi/forAndrew`
as well - `JGLOBAL_PREP_OCEAN_OBS` take 16 minutes
  • Loading branch information
AndrewEichmann-NOAA authored Jan 27, 2024
1 parent 5549447 commit 0ceff32
Show file tree
Hide file tree
Showing 42 changed files with 144,480 additions and 99 deletions.
2 changes: 1 addition & 1 deletion parm/soca/obs/obs_list.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
observers:
- !INC ${OBS_YAML_DIR}/adt_rads_all.yaml
#- !INC ${OBS_YAML_DIR}/sst_metopb_l3u.yaml
- !INC ${OBS_YAML_DIR}/sst_metopb_l3u.yaml
- !INC ${OBS_YAML_DIR}/icec_amsr2_north.yaml
- !INC ${OBS_YAML_DIR}/icec_amsr2_south.yaml
6 changes: 6 additions & 0 deletions parm/soca/obsprep/obsprep_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ observations:
dmpdir regex: rads_adt_*.nc
provider: RADS
output file: adt_rads_all.nc4
window:
back: 8 # look back 8 six-hourly obs dumps
forward: 1 # look forward 1 six-hourly bin
- obs space:
name: icec_amsr2_north
provider: AMSR2
Expand All @@ -39,6 +42,9 @@ observations:
units: C
min: -3.0
max: 50.0
window:
back: 4
forward: 0
binning:
stride: 5
min number of obs: 10
59 changes: 32 additions & 27 deletions scripts/exglobal_prep_ocean_obs.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
#!/usr/bin/env python3
# exglobal_prep_ocean_obs.py
# Pepares observations for marine DA
# Prepares observations for marine DA
from datetime import datetime, timedelta
import os
from soca import prep_marine_obs
import subprocess
from soca import prep_marine_obs
from wxflow import YAMLFile, save_as_yaml, FileHandler, Logger

logger = Logger()

cyc = os.getenv('cyc')
PDY = os.getenv('PDY')

# set the window times
# Set the window times
cdateDatetime = datetime.strptime(PDY + cyc, '%Y%m%d%H')
windowBeginDatetime = cdateDatetime - timedelta(hours=3)
windowEndDatetime = cdateDatetime + timedelta(hours=3)
Expand All @@ -23,8 +23,7 @@
COMOUT_OBS = os.getenv('COMOUT_OBS')

OBS_YAML = os.getenv('OBS_YAML')
# this will fail with FileNotFoundError if all yaml files in OBS_YAML are not
# present in OBS_YAML_DIR

obsConfig = YAMLFile(OBS_YAML)

OBSPREP_YAML = os.getenv('OBSPREP_YAML')
Expand All @@ -35,59 +34,65 @@
logger.critical(f"OBSPREP_YAML file {OBSPREP_YAML} does not exist")
raise FileNotFoundError

filesToSave = []
files_to_save = []

# TODO (AFE): needs more error handling (missing sources, missing files)
try:
# For each of the observation sources (observers) specificed in the OBS_YAML...
for observer in obsConfig['observers']:

try:
obsSpaceName = observer['obs space']['name']
logger.info(f"obsSpaceName: {obsSpaceName}")
obs_space_name = observer['obs space']['name']
logger.info(f"obsSpaceName: {obs_space_name}")
except KeyError:
logger.warning(f"observer: {observer}")
logger.warning("Ill-formed observer yaml file, skipping")
continue # to next observer
continue

# ...look through the observations in OBSPREP_YAML...
for observation in obsprepConfig['observations']:

obsprepSpace = observation['obs space']
obsprepSpaceName = obsprepSpace['name']

# ...for a matching name, and process the observation source
if obsprepSpaceName == obsSpaceName:
if obsprepSpaceName == obs_space_name:
logger.info(f"obsprepSpaceName: {obs_space_name}")

pdyDatetime = datetime.strptime(PDY + cyc, '%Y%m%d%H')
cycles = []

try:
obsWindowBack = obsprepSpace['window']['back']
obsWindowForward = obsprepSpace['window']['forward']
except KeyError:
obsWindowBack = 0
obsWindowForward = 0

logger.info(f"obsprepSpaceName: {obsSpaceName}")
for i in range(-obsWindowBack, obsWindowForward + 1):
interval = timedelta(hours=6 * i)
cycles.append(pdyDatetime + interval)

# fetch the obs files from DMPDIR to RUNDIR
matchingFiles = prep_marine_obs.obs_fetch(obsprepSpace)
matchingFiles = prep_marine_obs.obs_fetch(obsprepSpace, cycles)

if not matchingFiles:
logger.warning("No files found for obs source , skipping")
break # to next observation source in OBS_YAML
logger.warning("No files found for obs source, skipping")
break

obsprepSpace['input files'] = matchingFiles
obsprepSpace['window begin'] = windowBegin
obsprepSpace['window end'] = windowEnd
outputFilename = f"gdas.t{cyc}z.{obsSpaceName}.{PDY}{cyc}.nc4"
outputFilename = f"gdas.t{cyc}z.{obs_space_name}.{PDY}{cyc}.nc4"
obsprepSpace['output file'] = outputFilename

iodaYamlFilename = obsprepSpaceName + '2ioda.yaml'
save_as_yaml(obsprepSpace, iodaYamlFilename)

subprocess.run([OCNOBS2IODAEXEC, iodaYamlFilename], check=True)

filesToSave.append([obsprepSpace['output file'],
os.path.join(COMOUT_OBS, obsprepSpace['output file'])])
filesToSave.append([iodaYamlFilename,
os.path.join(COMOUT_OBS, iodaYamlFilename)])
files_to_save.append([obsprepSpace['output file'],
os.path.join(COMOUT_OBS, obsprepSpace['output file'])])
files_to_save.append([iodaYamlFilename,
os.path.join(COMOUT_OBS, iodaYamlFilename)])
except TypeError:
logger.critical("Ill-formed OBS_YAML or OBSPREP_YAML file, exiting")
raise

if not os.path.exists(COMOUT_OBS):
os.makedirs(COMOUT_OBS)

FileHandler({'copy': filesToSave}).sync()
FileHandler({'copy': files_to_save}).sync()
41 changes: 0 additions & 41 deletions test/soca/gw/prepdata.sh

This file was deleted.

42 changes: 34 additions & 8 deletions test/soca/gw/setup_obsprep.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,43 @@
#!/bin/bash
set -ex

project_source_dir=$1
# working directory as set in cmake assumed to be ${PROJECT_BINARY_DIR}/test/soca/gw/obsprep
# which is the soca ctest's fake dmpdir

# working directory should be ${PROJECT_BINARY_DIR}/test/soca/gw/obsprep, set in ctest command
test_dmpdir="gdas.20180415/12"

rm -rf ${test_dmpdir}
mkdir -p ${test_dmpdir}
# Ensure project source directory is provided as argument
if [ "$#" -ne 1 ]; then
echo "Usage: $0 <project_source_dir>"
exit 1
fi

cd ${test_dmpdir}
project_source_dir="$1"
testdatadir="${project_source_dir}/test/soca/testdata"

mkdir -p ocean/sss ocean/adt ocean/icec ocean/sst
#clean up previous attempts
rm -rf gdas.20180414 gdas.20180415

${project_source_dir}/test/soca/gw/prepdata.sh ${project_source_dir}
# Define PDYs, cycs, and obstypes
PDYs=("20180414" "20180415")
cycs=("00" "06" "12" "18")
obstypes=("SSS" "adt" "icec" "sst")

# Convert cdl files into nc for all cycles and obstypes
for PDY in "${PDYs[@]}"; do
PDYdir="gdas.${PDY}"
for cyc in "${cycs[@]}"; do
for obstype in "${obstypes[@]}"; do
fullsubdir="$PDYdir/$cyc/ocean/$obstype"
mkdir -p "$fullsubdir"

indir="${testdatadir}/${fullsubdir}"
for file in "$indir"/*.cdl; do
if [ -f "$file" ]; then
filename=$(basename -- "$file")
filename_noext="${filename%.cdl}"
ncgen -o "$fullsubdir/${filename_noext}.nc" "$file"
fi
done
done
done
done
100 changes: 100 additions & 0 deletions test/soca/testdata/gdas.20180414/00/ocean/adt/rads_adt_3a_2018104.cdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
netcdf rads_adt_3a_2018104.tmp {
dimensions:
time = UNLIMITED ; // (11 currently)
variables:
int adt_egm2008(time) ;
adt_egm2008:_FillValue = 2147483647 ;
adt_egm2008:long_name = "absolute dynamic topography (EGM2008)" ;
adt_egm2008:standard_name = "absolute_dynamic_topography_egm2008" ;
adt_egm2008:units = "m" ;
adt_egm2008:scale_factor = 0.0001 ;
adt_egm2008:coordinates = "lon lat" ;
int adt_xgm2016(time) ;
adt_xgm2016:_FillValue = 2147483647 ;
adt_xgm2016:long_name = "absolute dynamic topography (XGM2016)" ;
adt_xgm2016:standard_name = "absolute_dynamic_topography_xgm2016" ;
adt_xgm2016:units = "m" ;
adt_xgm2016:scale_factor = 0.0001 ;
adt_xgm2016:coordinates = "lon lat" ;
int cycle(time) ;
cycle:_FillValue = 2147483647 ;
cycle:long_name = "cycle number" ;
cycle:field = 9905s ;
int lat(time) ;
lat:_FillValue = 2147483647 ;
lat:long_name = "latitude" ;
lat:standard_name = "latitude" ;
lat:units = "degrees_north" ;
lat:scale_factor = 1.e-06 ;
lat:field = 201s ;
lat:comment = "Positive latitude is North latitude, negative latitude is South latitude" ;
int lon(time) ;
lon:_FillValue = 2147483647 ;
lon:long_name = "longitude" ;
lon:standard_name = "longitude" ;
lon:units = "degrees_east" ;
lon:scale_factor = 1.e-06 ;
lon:field = 301s ;
lon:comment = "East longitude relative to Greenwich meridian" ;
int pass(time) ;
pass:_FillValue = 2147483647 ;
pass:long_name = "pass number" ;
pass:field = 9906s ;
short sla(time) ;
sla:_FillValue = 32767s ;
sla:long_name = "sea level anomaly" ;
sla:standard_name = "sea_surface_height_above_sea_level" ;
sla:units = "m" ;
sla:quality_flag = "swh sig0 range_rms range_numval flags swh_rms sig0_rms" ;
sla:scale_factor = 0.0001 ;
sla:coordinates = "lon lat" ;
sla:field = 0s ;
sla:comment = "Sea level determined from satellite altitude - range - all altimetric corrections" ;
double time_mjd(time) ;
time_mjd:long_name = "Modified Julian Days" ;
time_mjd:standard_name = "time" ;
time_mjd:units = "days since 1858-11-17 00:00:00 UTC" ;
time_mjd:field = 105s ;
time_mjd:comment = "UTC time of measurement expressed in Modified Julian Days" ;

// global attributes:
:Conventions = "CF-1.7" ;
:title = "RADS 4 pass file" ;
:institution = "EUMETSAT / NOAA / TU Delft" ;
:source = "radar altimeter" ;
:references = "RADS Data Manual, Version 4.2 or later" ;
:featureType = "trajectory" ;
:ellipsoid = "TOPEX" ;
:ellipsoid_axis = 6378136.3 ;
:ellipsoid_flattening = 0.00335281317789691 ;
:filename = "rads_adt_3a_2018104.nc" ;
:mission_name = "SNTNL-3A" ;
:mission_phase = "a" ;
:log01 = "2019-01-11 | rads2nc --ymd=180414000000,180415000000 -C1,1000 -S3a -Vadt_egm2008,adt_xgm2016,sla,time_mjd,lon,lat,cycle,pass -Xxgm2016 -Xadt.xml -o/ftp/rads/adt//2018/rads_adt_3a_2018104.nc: RAW data from" ;
:history = "Fri Jan 26 15:44:21 2024: ncks -d time,0,10 /scratch1/NCEPDEV/stmp4/Shastri.Paturi/forAndrew/gdas.20180414/00/adt/rads_adt_3a_2018104.nc rads_adt_3a_2018104.tmp.nc\n",
"2019-01-11 12:29:34 : rads2nc --ymd=180414000000,180415000000 -C1,1000 -S3a -Vadt_egm2008,adt_xgm2016,sla,time_mjd,lon,lat,cycle,pass -Xxgm2016 -Xadt.xml -o/ftp/rads/adt//2018/rads_adt_3a_2018104.nc" ;
:NCO = "netCDF Operators version 5.0.6 (Homepage = http://nco.sf.net, Code = http://github.com/nco/nco)" ;
data:

adt_egm2008 = -10834, -11080, -11752, -12215, -12322, -12631, -12835,
-13106, -13520, -13476, -13197 ;

adt_xgm2016 = -11476, -10955, -12265, -13723, -13874, -13752, -13406,
-13319, -13383, -13127, -12743 ;

cycle = 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30 ;

lat = -68340968, -68286987, -68232981, -68178950, -68124894, -68070813,
-68016708, -67962578, -67908424, -67854245, -67800043 ;

lon = -8586260, -8654949, -8723334, -8791417, -8859200, -8926685, -8993875,
-9060771, -9127375, -9193690, -9259717 ;

pass = 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175 ;

sla = 347, 463, 323, 207, 153, 107, 174, -96, -252, -8, 527 ;

time_mjd = 58222.0026388889, 58222.002650463, 58222.002662037,
58222.0026736111, 58222.0026851852, 58222.0026967593, 58222.0027083333,
58222.0027199074, 58222.0027314815, 58222.0027430556, 58222.0027546296 ;
}
Loading

0 comments on commit 0ceff32

Please sign in to comment.