Skip to content

Commit

Permalink
Combine CI infrastructure, use get_expts_status etc to simplify Jenkins.
Browse files Browse the repository at this point in the history
  • Loading branch information
danielabdi-noaa committed Sep 25, 2022
1 parent 177193f commit 3965b56
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 290 deletions.
7 changes: 4 additions & 3 deletions .cicd/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ pipeline {
axes {
axis {
name 'SRW_PLATFORM'
values 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'pclusternoaav2use1' //, 'azclusternoaav2eus1', 'gclusternoaav2usc1'
values 'hera' // 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'pclusternoaav2use1' //, 'azclusternoaav2eus1', 'gclusternoaav2usc1'
}

axis {
Expand Down Expand Up @@ -97,6 +97,7 @@ pipeline {
BRANCH_NAME_ESCAPED = env.BRANCH_NAME.replace('/', '_')
BUILD_VERSION = "${env.SRW_PLATFORM}-${env.SRW_COMPILER}-${env.BRANCH_NAME_ESCAPED}-${env.BUILD_NUMBER}"
BUILD_NAME = "ufs-srweather-app_${env.BUILD_VERSION}"
INSTALL_NAME = "install_${env.SRW_COMPILER}"
}

stages {
Expand Down Expand Up @@ -127,7 +128,7 @@ pipeline {

post {
success {
sh 'tar --create --gzip --verbose --file "${WORKSPACE}/${BUILD_NAME}.tgz" bin include lib share'
sh 'tar --create --gzip --verbose --file "${WORKSPACE}/${BUILD_NAME}.tgz" "${INSTALL_NAME}"'
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'woc-epic-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: true, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.BUILD_NAME}.tgz", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false], [bucket: 'woc-epic-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: true, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "build/srw_build-${env.SRW_PLATFORM}-${env.SRW_COMPILER}.log", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
}
}
Expand All @@ -140,7 +141,7 @@ pipeline {
}

environment {
SRW_WE2E_EXPERIMENT_BASE_DIR = "${env.WORKSPACE}/experiments"
SRW_WE2E_EXPERIMENT_BASE_DIR = "${env.WORKSPACE}/../expt_dirs"
}

steps {
Expand Down
20 changes: 8 additions & 12 deletions .cicd/scripts/srw_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,13 @@ else
platform="${SRW_PLATFORM}"
fi

build_dir="${workspace}/build"
# Build and install
cd ${workspace}/test
./build.sh ${platform} ${SRW_COMPILER}
cd -

# Set build related environment variables and load required modules.
source "${workspace}/etc/lmod-setup.sh" "${platform}"
module use "${workspace}/modulefiles"
module load "build_${platform}_${SRW_COMPILER}"
# Create combined log file for upload to s3
build_dir="${workspace}/build_${SRW_COMPILER}"
cat ${build_dir}/log.cmake ${build_dir}/log.make \
>${build_dir}/srw_build-${platform}-${SRW_COMPILER}.log

# Compile SRW application and install to repository root.
mkdir "${build_dir}"
pushd "${build_dir}"
build_log_file="${build_dir}/srw_build-${platform}-${SRW_COMPILER}.log"
cmake -DCMAKE_INSTALL_PREFIX="${workspace}" "${workspace}" | tee "${build_log_file}"
make -j "${MAKE_JOBS}" | tee --append "${build_log_file}"
popd
223 changes: 23 additions & 200 deletions .cicd/scripts/srw_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,219 +25,42 @@ else
platform="${SRW_PLATFORM}"
fi

declare we2e_experiment_base_dir
if [[ -n "${SRW_WE2E_EXPERIMENT_BASE_DIR}" ]]; then
we2e_experiment_base_dir="${SRW_WE2E_EXPERIMENT_BASE_DIR}"
else
we2e_experiment_base_dir="${workspace}/experiments"
fi

# Test directories
we2e_experiment_base_dir="${workspace}/../expt_dirs"
we2e_test_dir="${workspace}/tests/WE2E"

we2e_test_file="${we2e_test_dir}/experiments.txt"

# The fundamental set of end-to-end tests to run.
declare -a we2e_fundamental_tests
we2e_fundamental_tests=('grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta'
'grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_RAP_suite_HRRR'
'grid_RRFS_CONUS_25km_ics_NAM_lbcs_NAM_suite_HRRR'
'grid_RRFS_CONUS_25km_ics_NAM_lbcs_NAM_suite_RRFS_v1beta'
'grid_RRFS_CONUScompact_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_HRRR_suite_HRRR'
'grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_HRRR'
'grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta'
'grid_SUBCONUS_Ind_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_SUBCONUS_Ind_3km_ics_HRRR_lbcs_RAP_suite_HRRR'
'grid_SUBCONUS_Ind_3km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta'
'nco_grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_HRRR'
'community_ensemble_2mems'
'custom_ESGgrid'
'deactivate_tasks'
'inline_post'
'nco_ensemble'
'specify_DOT_OR_USCORE'
'specify_DT_ATMOS_LAYOUT_XY_BLOCKSIZE'
'specify_RESTART_INTERVAL'
'specify_template_filenames')

if [[ "${platform}" != 'gaea' && "${platform}" != 'noaacloud' ]]; then
we2e_fundamental_tests+=('MET_ensemble_verification'
'MET_verification'
'pregen_grid_orog_sfc_climo')
fi

# The comprehensive set of end-to-end tests to run.
declare -a we2e_comprehensive_tests
we2e_comprehensive_tests=('community_ensemble_008mems'
'custom_GFDLgrid'
'custom_GFDLgrid__GFDLgrid_USE_NUM_CELLS_IN_FILENAMES_eq_FALSE'
'custom_GFDLgrid__GFDLgrid_USE_NUM_CELLS_IN_FILENAMES_eq_TRUE'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_grib2_2019061200'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_grib2_2019101818'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_grib2_2020022518'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_grib2_2020022600'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_grib2_2021010100'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_nemsio'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_nemsio_2019061200'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_nemsio_2019101818'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_nemsio_2020022518'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_nemsio_2020022600'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_nemsio_2021010100'
'get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_netcdf_2021062000'
'get_from_HPSS_ics_GSMGFS_lbcs_GSMGFS'
'get_from_HPSS_ics_HRRR_lbcs_RAP'
'get_from_HPSS_ics_RAP_lbcs_RAP'
'get_from_NOMADS_ics_FV3GFS_lbcs_FV3GFS_fmt_nemsio'
'grid_CONUS_25km_GFDLgrid_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_CONUS_3km_GFDLgrid_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_RRFS_AK_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_RRFS_AK_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2'
'grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR'
'grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta'
'grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_2017_gfdlmp'
'grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_2017_gfdlmp_regional'
'grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2'
'grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR'
'grid_RRFS_CONUS_25km_ics_GSMGFS_lbcs_GSMGFS_suite_GFS_2017_gfdlmp'
'grid_RRFS_CONUS_25km_ics_GSMGFS_lbcs_GSMGFS_suite_GFS_v15p2'
'grid_RRFS_CONUS_25km_ics_GSMGFS_lbcs_GSMGFS_suite_GFS_v16'
'grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15_thompson_mynn_lam3km'
'grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2'
'grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR'
'grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta'
'grid_RRFS_CONUScompact_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_RRFS_CONUScompact_13km_ics_HRRR_lbcs_RAP_suite_HRRR'
'grid_RRFS_CONUScompact_13km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta'
'grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_HRRR_suite_RRFS_v1beta'
'grid_RRFS_CONUScompact_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_GFS_v15p2'
'grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_HRRR'
'grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta'
'grid_RRFS_NA_13km_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta'
'grid_RRFS_NA_3km_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta'
'grid_RRFS_SUBCONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'grid_RRFS_SUBCONUS_3km_ics_HRRR_lbcs_RAP_suite_GFS_v15p2'
'nco_grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16'
'nco_grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15_thompson_mynn_lam3km')

declare -a we2e_tests
we2e_tests=("${we2e_fundamental_tests[@]}")
# Run the end-to-end tests.
if "${SRW_WE2E_COMPREHENSIVE_TESTS}"; then
we2e_tests+=("${we2e_comprehensive_tests[@]}")

# Add additional tests for Hera.
if [[ "${platform}" == 'hera' ]]; then
we2e_tests+=('specify_EXTRN_MDL_SYSBASEDIR_ICS_LBCS')
fi
fi

# Parses the test log for the status of a specific test.
function workflow_status() {
local test="$1"

local test_dir="${we2e_experiment_base_dir}/${test}"
local log_file="${test_dir}/log.launch_FV3LAM_wflow"

if [[ -f "${log_file}" ]]; then
local status
status="$(awk 'BEGIN {FS=":";} $1 ~ "^[[:space:]]+Workflow status" {print $2}' "${log_file}" |\
tail -1 |\
sed --regexp-extended --expression 's/^[[:space:]]*(.*)[[:space:]]*$/\1/')"
if [[ "${status}" == 'IN PROGRESS' || "${status}" == 'SUCCESS' || "${status}" == 'FAILURE' ]]; then
echo "${status}"
else
echo 'UNKNOWN'
fi
else
echo 'NOT FOUND'
fi
}

# Gets the status of all tests. Prints the number of tests that are running.
# Returns a non-zero code when all tests reach a final state.
function check_progress() {
local in_progress=false
local remaining=0

for test in "${we2e_tests[@]}"; do
local status
status="$(workflow_status "${test}")"
if [[ "${status}" == 'IN PROGRESS' ]]; then
in_progress=true
(( remaining++ ))
fi
done

if "${in_progress}"; then
echo "Tests remaining: ${remaining}"
else
return 1
fi
}

# Prints the status of all tests.
function get_results() {
for test in "${we2e_tests[@]}"; do
local status
status="$(workflow_status "${test}")"
echo "${test} ${status}"
done
}

# Verify that there is a non-zero sized weather model executable.
[[ -s "${workspace}/bin/ufs_model" ]] || [[ -s "${workspace}/bin/NEMS.exe" ]]

# Set test related environment variables and load required modules.
source "${workspace}/etc/lmod-setup.sh" "${platform}"
module use "${workspace}/modulefiles"
module load "build_${platform}_${SRW_COMPILER}"
module load "wflow_${platform}"

if [[ "${platform}" == 'cheyenne' ]]; then
export PATH="/glade/p/ral/jntp/UFS_CAM/ncar_pylib_20200427/bin:${PATH}"
test_type="comprehensive"
else
if [[ "${platform}" == 'noaacloud' && -z "${PROJ_LIB-}" ]]; then
PROJ_LIB=''
fi

conda activate regional_workflow
test_type="fundamental"
fi

# Create the experiments/tests base directory.
mkdir "${we2e_experiment_base_dir}"

# Generate the experiments/tests file.
for test in "${we2e_tests[@]}"; do
echo "${test}" >> "${we2e_test_file}"
done

# Run the end-to-end tests.
"${we2e_test_dir}/run_WE2E_tests.sh" \
tests_file="${we2e_test_file}" \
machine="${platform}" \
account="${SRW_PROJECT}" \
expt_basedir="${we2e_experiment_base_dir}" \
compiler="${SRW_COMPILER}"
cd ${we2e_test_dir}
./setup_WE2E_tests.sh ${platform} ${SRW_PROJECT} ${SRW_COMPILER} #${test_type}

# Allow the tests to start before checking for status.
# TODO: Create a parameter that sets the initial start delay.
sleep 180

# Wait for all tests to complete.
while check_progress; do
while true; do
# Check status of all experiments
progress_file="${workspace}/we2e_test_results-${platform}-${SRW_COMPILER}.txt"
./get_expts_status.sh expts_basedir="${we2e_experiment_base_dir}" \
verbose="FALSE" num_log_lines=24000 | tee ${progress_file}

# Exit loop only if there are not tests in progress
set +e
grep -q "Workflow status: IN PROGRESS" ${progress_file}
exit_code=$?
set -e

if [[ $exit_code -ne 0 ]]; then
break
fi

# TODO: Create a paremeter that sets the poll frequency.
sleep 60
done

# Get test results and write to a file.
results="$(get_results |\
tee "${workspace}/we2e_test_results-${platform}-${SRW_COMPILER}.txt")"

# Check that the number of tests equals the number of successes, otherwise
# exit with a non-zero code that equals the difference.
successes="$(awk '$2 == "SUCCESS" {print $1}' <<< "${results}" | wc -l)"
exit "$(( ${#we2e_tests[@]} - ${successes} ))"
Loading

0 comments on commit 3965b56

Please sign in to comment.