Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --cpus-per-task to srun commands for Hera and Orion #2077

Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add --cpus-per-task flag to APRUN commands
- Update to SLURM on Orion now requires jobs to explicitly
state thread needs in APRUN/srun command at runtime, instead
of exporting to environment and getting it from there.
- Add "--cpus-per-task" flag with thread # to APRUN commands.

Refs #2044
KateFriedman-NOAA committed Nov 14, 2023
commit 4b492f46464a2a97da26b2e02c3addc2d3af734c
40 changes: 20 additions & 20 deletions env/ORION.env
Original file line number Diff line number Diff line change
@@ -59,15 +59,15 @@ elif [[ "${step}" = "atmanlrun" ]]; then

export NTHREADS_ATMANL=${nth_atmanlrun:-${nth_max}}
[[ ${NTHREADS_ATMANL} -gt ${nth_max} ]] && export NTHREADS_ATMANL=${nth_max}
export APRUN_ATMANL="${launcher} -n ${npe_atmanlrun}"
export APRUN_ATMANL="${launcher} -n ${npe_atmanlrun} --cpus-per-task=${NTHREADS_ATMANL}"

elif [[ "${step}" = "atmensanlrun" ]]; then

nth_max=$((npe_node_max / npe_node_atmensanlrun))

export NTHREADS_ATMENSANL=${nth_atmensanlrun:-${nth_max}}
[[ ${NTHREADS_ATMENSANL} -gt ${nth_max} ]] && export NTHREADS_ATMENSANL=${nth_max}
export APRUN_ATMENSANL="${launcher} -n ${npe_atmensanlrun}"
export APRUN_ATMENSANL="${launcher} -n ${npe_atmensanlrun} --cpus-per-task=${NTHREADS_ATMENSANL}"

elif [[ "${step}" = "aeroanlrun" ]]; then

@@ -77,15 +77,15 @@ elif [[ "${step}" = "aeroanlrun" ]]; then

export NTHREADS_AEROANL=${nth_aeroanlrun:-${nth_max}}
[[ ${NTHREADS_AEROANL} -gt ${nth_max} ]] && export NTHREADS_AEROANL=${nth_max}
export APRUN_AEROANL="${launcher} -n ${npe_aeroanlrun}"
export APRUN_AEROANL="${launcher} -n ${npe_aeroanlrun} --cpus-per-task=${NTHREADS_AEROANL}"

elif [[ "${step}" = "landanl" ]]; then

nth_max=$((npe_node_max / npe_node_landanl))

export NTHREADS_LANDANL=${nth_landanl:-${nth_max}}
[[ ${NTHREADS_LANDANL} -gt ${nth_max} ]] && export NTHREADS_LANDANL=${nth_max}
export APRUN_LANDANL="${launcher} -n ${npe_landanl}"
export APRUN_LANDANL="${launcher} -n ${npe_landanl} --cpus-per-task=${NTHREADS_LANDANL}"

export APRUN_APPLY_INCR="${launcher} -n 6"

@@ -97,7 +97,7 @@ elif [[ "${step}" = "ocnanalbmat" ]]; then

export NTHREADS_OCNANAL=${nth_ocnanalbmat:-${nth_max}}
[[ ${NTHREADS_OCNANAL} -gt ${nth_max} ]] && export NTHREADS_OCNANAL=${nth_max}
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalbmat}"
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalbmat} --cpus-per-task=${NTHREADS_OCNANAL}"

elif [[ "${step}" = "ocnanalrun" ]]; then

@@ -107,7 +107,7 @@ elif [[ "${step}" = "ocnanalrun" ]]; then

export NTHREADS_OCNANAL=${nth_ocnanalrun:-${nth_max}}
[[ ${NTHREADS_OCNANAL} -gt ${nth_max} ]] && export NTHREADS_OCNANAL=${nth_max}
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalrun}"
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalrun} --cpus-per-task=${NTHREADS_OCNANAL}"

elif [[ "${step}" = "ocnanalchkpt" ]]; then

@@ -117,7 +117,7 @@ elif [[ "${step}" = "ocnanalchkpt" ]]; then

export NTHREADS_OCNANAL=${nth_ocnanalchkpt:-${nth_max}}
[[ ${NTHREADS_OCNANAL} -gt ${nth_max} ]] && export NTHREADS_OCNANAL=${nth_max}
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalchkpt}"
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalchkpt} --cpus-per-task=${NTHREADS_OCNANAL}"

elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then

@@ -153,7 +153,7 @@ elif [[ "${step}" = "sfcanl" ]]; then
export NTHREADS_CYCLE=${nth_sfcanl:-14}
[[ ${NTHREADS_CYCLE} -gt ${npe_node_max} ]] && export NTHREADS_CYCLE=${npe_node_max}
npe_sfcanl=${ntiles:-6}
export APRUN_CYCLE="${launcher} -n ${npe_sfcanl}"
export APRUN_CYCLE="${launcher} -n ${npe_sfcanl} --cpus-per-task=${NTHREADS_CYCLE}"

elif [[ "${step}" = "eobs" ]]; then

@@ -168,7 +168,7 @@ elif [[ "${step}" = "eobs" ]]; then

export NTHREADS_GSI=${nth_eobs:-${nth_max}}
[[ ${NTHREADS_GSI} -gt ${nth_max} ]] && export NTHREADS_GSI=${nth_max}
export APRUN_GSI="${launcher} -n ${npe_gsi:-${npe_eobs}}"
export APRUN_GSI="${launcher} -n ${npe_gsi:-${npe_eobs}} --cpus-per-task=${NTHREADS_GSI}"

elif [[ "${step}" = "eupd" ]]; then

@@ -180,7 +180,7 @@ elif [[ "${step}" = "eupd" ]]; then

export NTHREADS_ENKF=${nth_eupd:-${nth_max}}
[[ ${NTHREADS_ENKF} -gt ${nth_max} ]] && export NTHREADS_ENKF=${nth_max}
export APRUN_ENKF="${launcher} -n ${npe_enkf:-${npe_eupd}}"
export APRUN_ENKF="${launcher} -n ${npe_enkf:-${npe_eupd}} --cpus-per-task=${NTHREADS_ENKF}"

elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then

@@ -204,56 +204,56 @@ elif [[ "${step}" = "upp" ]]; then

export NTHREADS_UPP=${nth_upp:-1}
[[ ${NTHREADS_UPP} -gt ${nth_max} ]] && export NTHREADS_UPP=${nth_max}
export APRUN_UPP="${launcher} -n ${npe_upp}"
export APRUN_UPP="${launcher} -n ${npe_upp} --cpus-per-task=${NTHREADS_UPP}"

elif [[ "${step}" = "post" ]]; then

nth_max=$((npe_node_max / npe_node_post))

export NTHREADS_NP=${nth_np:-1}
[[ ${NTHREADS_NP} -gt ${nth_max} ]] && export NTHREADS_NP=${nth_max}
export APRUN_NP="${launcher} -n ${npe_post}"
export APRUN_NP="${launcher} -n ${npe_post} --cpus-per-task=${NTHREADS_NP}"

export USE_CFP="YES" # Use MPMD for downstream product generation on Orion
export NTHREADS_DWN=${nth_dwn:-1}
[[ ${NTHREADS_DWN} -gt ${nth_max} ]] && export NTHREADS_DWN=${nth_max}
export APRUN_DWN="${launcher} -n ${npe_dwn}"
export APRUN_DWN="${launcher} -n ${npe_dwn} --cpus-per-task=${NTHREADS_DWN}"

elif [[ "${step}" = "ecen" ]]; then

nth_max=$((npe_node_max / npe_node_ecen))

export NTHREADS_ECEN=${nth_ecen:-${nth_max}}
[[ ${NTHREADS_ECEN} -gt ${nth_max} ]] && export NTHREADS_ECEN=${nth_max}
export APRUN_ECEN="${launcher} -n ${npe_ecen}"
export APRUN_ECEN="${launcher} -n ${npe_ecen} --cpus-per-task=${NTHREADS_ECEN}"

export NTHREADS_CHGRES=${nth_chgres:-12}
[[ ${NTHREADS_CHGRES} -gt ${npe_node_max} ]] && export NTHREADS_CHGRES=${npe_node_max}
export APRUN_CHGRES="time"

export NTHREADS_CALCINC=${nth_calcinc:-1}
[[ ${NTHREADS_CALCINC} -gt ${nth_max} ]] && export NTHREADS_CALCINC=${nth_max}
export APRUN_CALCINC="${launcher} -n ${npe_ecen}"
export APRUN_CALCINC="${launcher} -n ${npe_ecen} --cpus-per-task=${NTHREADS_CALCINC}"

elif [[ "${step}" = "esfc" ]]; then

nth_max=$((npe_node_max / npe_node_esfc))

export NTHREADS_ESFC=${nth_esfc:-${nth_max}}
[[ ${NTHREADS_ESFC} -gt ${nth_max} ]] && export NTHREADS_ESFC=${nth_max}
export APRUN_ESFC="${launcher} -n ${npe_esfc}"
export APRUN_ESFC="${launcher} -n ${npe_esfc} --cpus-per-task=${NTHREADS_ESFC}"

export NTHREADS_CYCLE=${nth_cycle:-14}
[[ ${NTHREADS_CYCLE} -gt ${npe_node_max} ]] && export NTHREADS_CYCLE=${npe_node_max}
export APRUN_CYCLE="${launcher} -n ${npe_esfc}"
export APRUN_CYCLE="${launcher} -n ${npe_esfc} --cpus-per-task=${NTHREADS_CYCLE}"

elif [[ "${step}" = "epos" ]]; then

nth_max=$((npe_node_max / npe_node_epos))

export NTHREADS_EPOS=${nth_epos:-${nth_max}}
[[ ${NTHREADS_EPOS} -gt ${nth_max} ]] && export NTHREADS_EPOS=${nth_max}
export APRUN_EPOS="${launcher} -n ${npe_epos}"
export APRUN_EPOS="${launcher} -n ${npe_epos} --cpus-per-task=${NTHREADS_EPOS}"

elif [[ "${step}" = "postsnd" ]]; then

@@ -263,7 +263,7 @@ elif [[ "${step}" = "postsnd" ]]; then

export NTHREADS_POSTSND=${nth_postsnd:-1}
[[ ${NTHREADS_POSTSND} -gt ${nth_max} ]] && export NTHREADS_POSTSND=${nth_max}
export APRUN_POSTSND="${launcher} -n ${npe_postsnd}"
export APRUN_POSTSND="${launcher} -n ${npe_postsnd} --cpus-per-task=${NTHREADS_POSTSND}"

export NTHREADS_POSTSNDCFP=${nth_postsndcfp:-1}
[[ ${NTHREADS_POSTSNDCFP} -gt ${nth_max} ]] && export NTHREADS_POSTSNDCFP=${nth_max}
@@ -298,6 +298,6 @@ elif [[ "${step}" = "fit2obs" ]]; then

export NTHREADS_FIT2OBS=${nth_fit2obs:-1}
[[ ${NTHREADS_FIT2OBS} -gt ${nth_max} ]] && export NTHREADS_FIT2OBS=${nth_max}
export MPIRUN="${launcher} -n ${npe_fit2obs}"
export MPIRUN="${launcher} -n ${npe_fit2obs} --cpus-per-task=${NTHREADS_FIT2OBS}"

fi