From 4ac2ee78afe2aac31bc29d741f5dd8ca275d8a31 Mon Sep 17 00:00:00 2001 From: Patrick Worley Date: Sun, 6 Aug 2017 17:12:48 -0400 Subject: [PATCH 1/2] Check for NaNs and INFs in input to shr_reprosum_calc In recent development NaNs and INFs are often first identified when passed to the shr_reprosum_calc routine, where they either lead to segmentation faults or to very slow performance (due to the interaction of NaNs and INFs with the reproducible sum logic). To more readily identify this error condition (and to prevent false attributions of error to shr_reprosum_calc), the input array is checked for the presence of NaNs and INFs. If found, an appropriate error message is output and the job is terminated. [BFB] *********1*********2*********3*********4*********5*********6*********7** Longer commit message body describing the commit. Can contain lists as follows: * Item 1 * Item 2 * Item 3 A good commit message should be written like an email, a subject followed by a blank line, followed by a more descriptive body. Can also contain a tag at the bottom describing what type of commit this is. [BFB] - Bit-For-Bit [FCC] - Flag Climate Changing [Non-BFB] - Non Bit-For-Bit [CC] - Climate Changing [NML] - Namelist Changing See confluence for a more detailed description about these tags. --- cime/src/share/util/shr_reprosum_mod.F90 | 39 ++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/cime/src/share/util/shr_reprosum_mod.F90 b/cime/src/share/util/shr_reprosum_mod.F90 index ec4654fe2d29..c568cfe97b00 100644 --- a/cime/src/share/util/shr_reprosum_mod.F90 +++ b/cime/src/share/util/shr_reprosum_mod.F90 @@ -38,6 +38,7 @@ module shr_reprosum_mod use shr_log_mod, only: s_loglev => shr_log_Level use shr_log_mod, only: s_logunit => shr_log_Unit use shr_sys_mod, only: shr_sys_abort + use shr_infnan_mod,only: shr_infnan_isnan, shr_infnan_isinf use perf_mod !----------------------------------------------------------------------- @@ -338,8 +339,14 @@ subroutine shr_reprosum_calc (arr, arr_gsum, nsummands, dsummands, & logical :: validate ! flag indicating need to ! verify gmax and max_levels ! are accurate/sufficient + integer :: nan_check, inf_check ! flag on whether there are + ! NaNs and INFs in input array + + integer :: num_nans, num_infs ! count of NaNs and INFs in + ! input array integer :: omp_nthreads ! number of OpenMP threads integer :: mpi_comm ! MPI subcommunicator + integer :: mypid ! MPI process ID (COMM_WORLD) integer :: tasks ! number of MPI processes integer :: ierr ! MPI error return integer :: ifld, isum, ithread ! loop variables @@ -389,6 +396,38 @@ subroutine shr_reprosum_calc (arr, arr_gsum, nsummands, dsummands, & ! !----------------------------------------------------------------------- ! +! check whether input contains NaNs or INFs, and abort if so + + call t_startf('shr_reprosum_NaN_INF_Chk') + nan_check = .false. + inf_check = .false. + num_nans = 0 + num_infs = 0 + + nan_check = any(shr_infnan_isnan(arr)) + inf_check = any(shr_infnan_isinf(arr)) + if (nan_check .or. inf_check) then + do ifld=1,nflds + do isum=1,nsummands + if (shr_infnan_isnan(arr(isum,ifld))) then + num_nans = num_nans + 1 + endif + if (shr_infnan_isinf(arr(isum,ifld))) then + num_infs = num_infs + 1 + endif + end do + end do + endif + call t_stopf('shr_reprosum_NaN_INF_Chk') + + if ((num_nans > 0) .or. (num_infs > 0)) then + call mpi_comm_rank(MPI_COMM_WORLD, mypid, ierr) + write(s_logunit,37) real(num_nans,r8), real(num_infs,r8), mypid +37 format("SHR_REPROSUM_CALC: Input contains ",e12.5, & + " NaNs and ", e12.5, " INFs on process ", i7) + call shr_sys_abort("shr_reprosum_calc ERROR: NaNs or INFs in input") + endif + ! check whether should use shr_reprosum_ddpdd algorithm use_ddpdd_sum = repro_sum_use_ddpdd if ( present(ddpdd_sum) ) then From b53732437963214e77d08f8fca00d11c655e7724 Mon Sep 17 00:00:00 2001 From: Patrick Worley Date: Mon, 7 Aug 2017 19:14:08 -0400 Subject: [PATCH 2/2] Fix mistyping of variables New logical variables nan_check and inf_check were mistakenly declared as integer. --- cime/src/share/util/shr_reprosum_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime/src/share/util/shr_reprosum_mod.F90 b/cime/src/share/util/shr_reprosum_mod.F90 index c568cfe97b00..8e6e8f260993 100644 --- a/cime/src/share/util/shr_reprosum_mod.F90 +++ b/cime/src/share/util/shr_reprosum_mod.F90 @@ -339,7 +339,7 @@ subroutine shr_reprosum_calc (arr, arr_gsum, nsummands, dsummands, & logical :: validate ! flag indicating need to ! verify gmax and max_levels ! are accurate/sufficient - integer :: nan_check, inf_check ! flag on whether there are + logical :: nan_check, inf_check ! flag on whether there are ! NaNs and INFs in input array integer :: num_nans, num_infs ! count of NaNs and INFs in