From f31cfc99e29bcef50d4e3b46c129c838a8a72ee8 Mon Sep 17 00:00:00 2001 From: Tony Craig Date: Tue, 9 Jun 2020 08:42:29 -0700 Subject: [PATCH] remove INTEL20_WORKAROUND and port to mustang (#462) --- .../cicedynB/dynamics/ice_transport_remap.F90 | 136 +----------------- configuration/scripts/cice.batch.csh | 3 +- configuration/scripts/cice.launch.csh | 2 +- .../scripts/machines/Macros.mustang_intel18 | 46 ++++++ .../scripts/machines/Macros.mustang_intel19 | 46 ++++++ .../scripts/machines/Macros.mustang_intel20 | 46 ++++++ .../scripts/machines/env.izumi_intel | 2 +- .../scripts/machines/env.mustang_intel18 | 44 ++++++ .../scripts/machines/env.mustang_intel19 | 44 ++++++ .../scripts/machines/env.mustang_intel20 | 44 ++++++ 10 files changed, 276 insertions(+), 137 deletions(-) create mode 100644 configuration/scripts/machines/Macros.mustang_intel18 create mode 100644 configuration/scripts/machines/Macros.mustang_intel19 create mode 100644 configuration/scripts/machines/Macros.mustang_intel20 create mode 100755 configuration/scripts/machines/env.mustang_intel18 create mode 100755 configuration/scripts/machines/env.mustang_intel19 create mode 100755 configuration/scripts/machines/env.mustang_intel20 diff --git a/cicecore/cicedynB/dynamics/ice_transport_remap.F90 b/cicecore/cicedynB/dynamics/ice_transport_remap.F90 index 2c333e3a8..070f3b7ad 100644 --- a/cicecore/cicedynB/dynamics/ice_transport_remap.F90 +++ b/cicecore/cicedynB/dynamics/ice_transport_remap.F90 @@ -31,13 +31,13 @@ module ice_transport_remap use ice_kinds_mod use ice_blocks, only: nx_block, ny_block - use ice_communicate, only: my_task, ice_barrier + use ice_communicate, only: my_task use ice_constants, only: c0, c1, c2, c12, p333, p4, p5, p6, & eps13, eps16, & field_loc_center, field_type_scalar, & field_loc_NEcorner, field_type_vector use ice_domain_size, only: max_blocks, ncat - use ice_fileunits, only: nu_diag, flush_fileunit + use ice_fileunits, only: nu_diag use ice_exit, only: abort_ice use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted use icepack_intfc, only: icepack_query_parameters @@ -381,127 +381,57 @@ subroutine horizontal_remap (dt, ntrace, & ilo,ihi,jlo,jhi,&! beginning and end of physical domain n, m ! ice category, tracer indices -! tcraig, the intel 20.0.1 compiler generates a segfault when entering this subroutine -! at runtime. -! This is probably a compiler bug and a workaround is to allocate the temporary data -! rather than define it statically. Initial results don't show any slowdown, but -! to keep the issue highlighted, an ifdef was created as a workaround. - -#ifdef INTEL20_WORKAROUND - integer (kind=int_kind), dimension(:,:), allocatable :: & -#else integer (kind=int_kind), dimension(0:ncat,max_blocks) :: & -#endif icellsnc ! number of cells with ice -#ifdef INTEL20_WORKAROUND - integer (kind=int_kind), dimension(:,:), allocatable :: & -#else integer (kind=int_kind), dimension(nx_block*ny_block,0:ncat) :: & -#endif indxinc, indxjnc ! compressed i/j indices -#ifdef INTEL20_WORKAROUND - real (kind=dbl_kind), dimension(:,:), allocatable :: & -#else real (kind=dbl_kind), dimension(nx_block,ny_block) :: & -#endif edgearea_e ,&! area of departure regions for east edges edgearea_n ! area of departure regions for north edges -#ifdef INTEL20_WORKAROUND - real (kind=dbl_kind), dimension (:,:,:), allocatable :: & -#else real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: & -#endif dpx ,&! x coordinates of departure points at cell corners dpy ! y coordinates of departure points at cell corners -#ifdef INTEL20_WORKAROUND - real (kind=dbl_kind), dimension(:,:,:,:), allocatable :: & -#else real (kind=dbl_kind), dimension(nx_block,ny_block,0:ncat,max_blocks) :: & -#endif mc ,&! mass at geometric center of cell mx, my ! limited derivative of mass wrt x and y -#ifdef INTEL20_WORKAROUND - real (kind=dbl_kind), dimension(:,:,:), allocatable :: & -#else real (kind=dbl_kind), dimension(nx_block,ny_block,0:ncat) :: & -#endif mmask ! = 1. if mass is present, = 0. otherwise -#ifdef INTEL20_WORKAROUND - real (kind=dbl_kind), dimension (:,:,:,:,:), allocatable :: & -#else real (kind=dbl_kind), dimension (nx_block,ny_block,ntrace,ncat,max_blocks) :: & -#endif tc ,&! tracer values at geometric center of cell tx, ty ! limited derivative of tracer wrt x and y -#ifdef INTEL20_WORKAROUND - real (kind=dbl_kind), dimension (:,:,:,:), allocatable :: & -#else real (kind=dbl_kind), dimension (nx_block,ny_block,ntrace,ncat) :: & -#endif tmask ! = 1. if tracer is present, = 0. otherwise -#ifdef INTEL20_WORKAROUND - real (kind=dbl_kind), dimension (:,:,:), allocatable :: & -#else real (kind=dbl_kind), dimension (nx_block,ny_block,0:ncat) :: & -#endif mflxe, mflxn ! mass transports across E and N cell edges -#ifdef INTEL20_WORKAROUND - real (kind=dbl_kind), dimension (:,:,:,:), allocatable :: & -#else real (kind=dbl_kind), dimension (nx_block,ny_block,ntrace,ncat) :: & -#endif mtflxe, mtflxn ! mass*tracer transports across E and N cell edges -#ifdef INTEL20_WORKAROUND - real (kind=dbl_kind), dimension (:,:,:), allocatable :: & -#else real (kind=dbl_kind), dimension (nx_block,ny_block,ngroups) :: & -#endif triarea ! area of east-edge departure triangle -#ifdef INTEL20_WORKAROUND - real (kind=dbl_kind), dimension (:,:,:,:), allocatable :: & -#else real (kind=dbl_kind), dimension (nx_block,ny_block,0:nvert,ngroups) :: & -#endif xp, yp ! x and y coordinates of special triangle points ! (need 4 points for triangle integrals) -#ifdef INTEL20_WORKAROUND - integer (kind=int_kind), dimension (:,:,:), allocatable :: & -#else integer (kind=int_kind), dimension (nx_block,ny_block,ngroups) :: & -#endif iflux ,&! i index of cell contributing transport jflux ! j index of cell contributing transport -#ifdef INTEL20_WORKAROUND - integer (kind=int_kind), dimension(:,:), allocatable :: & -#else integer (kind=int_kind), dimension(ngroups,max_blocks) :: & -#endif icellsng ! number of cells with ice -#ifdef INTEL20_WORKAROUND - integer (kind=int_kind), dimension(:,:), allocatable :: & -#else integer (kind=int_kind), dimension(nx_block*ny_block,ngroups) :: & -#endif indxing, indxjng ! compressed i/j indices -#ifdef INTEL20_WORKAROUND - integer (kind=int_kind), dimension(:,:,:), allocatable :: & -#else integer (kind=int_kind), dimension(nx_block,ny_block,max_blocks) :: & -#endif halomask ! temporary mask for fast halo updates logical (kind=log_kind) :: & @@ -525,37 +455,6 @@ subroutine horizontal_remap (dt, ntrace, & !---! Remap the open water area (without tracers). !---!------------------------------------------------------------------- -#ifdef INTEL20_WORKAROUND - allocate(icellsnc(0:ncat,max_blocks)) - allocate(indxinc(nx_block*ny_block,0:ncat)) - allocate(indxjnc(nx_block*ny_block,0:ncat)) - allocate(edgearea_e(nx_block,ny_block)) - allocate(edgearea_n(nx_block,ny_block)) - allocate(dpx(nx_block,ny_block,max_blocks)) - allocate(dpy(nx_block,ny_block,max_blocks)) - allocate(mc(nx_block,ny_block,0:ncat,max_blocks)) - allocate(mx(nx_block,ny_block,0:ncat,max_blocks)) - allocate(my(nx_block,ny_block,0:ncat,max_blocks)) - allocate(mmask(nx_block,ny_block,0:ncat)) - allocate(tc(nx_block,ny_block,ntrace,ncat,max_blocks)) - allocate(tx(nx_block,ny_block,ntrace,ncat,max_blocks)) - allocate(ty(nx_block,ny_block,ntrace,ncat,max_blocks)) - allocate(tmask(nx_block,ny_block,ntrace,ncat)) - allocate(mflxe(nx_block,ny_block,0:ncat)) - allocate(mflxn(nx_block,ny_block,0:ncat)) - allocate(mtflxe(nx_block,ny_block,ntrace,ncat)) - allocate(mtflxn(nx_block,ny_block,ntrace,ncat)) - allocate(triarea(nx_block,ny_block,ngroups)) - allocate(xp(nx_block,ny_block,0:nvert,ngroups)) - allocate(yp(nx_block,ny_block,0:nvert,ngroups)) - allocate(iflux(nx_block,ny_block,ngroups)) - allocate(jflux(nx_block,ny_block,ngroups)) - allocate(icellsng(ngroups,max_blocks)) - allocate(indxing(nx_block*ny_block,ngroups)) - allocate(indxjng(nx_block*ny_block,ngroups)) - allocate(halomask(nx_block,ny_block,max_blocks)) -#endif - !--- tcraig, tcx, this omp loop leads to a seg fault in gnu !--- need to check private variables and debug further !$TCXOMP PARALLEL DO PRIVATE(iblk,ilo,ihi,jlo,jhi,this_block,n,m, & @@ -948,37 +847,6 @@ subroutine horizontal_remap (dt, ntrace, & enddo ! iblk !$TCXOMP END PARALLEL DO -#ifdef INTEL20_WORKAROUND - deallocate(icellsnc) - deallocate(indxinc) - deallocate(indxjnc) - deallocate(edgearea_e) - deallocate(edgearea_n) - deallocate(dpx) - deallocate(dpy) - deallocate(mc) - deallocate(mx) - deallocate(my) - deallocate(mmask) - deallocate(tc) - deallocate(tx) - deallocate(ty) - deallocate(tmask) - deallocate(mflxe) - deallocate(mflxn) - deallocate(mtflxe) - deallocate(mtflxn) - deallocate(triarea) - deallocate(xp) - deallocate(yp) - deallocate(iflux) - deallocate(jflux) - deallocate(icellsng) - deallocate(indxing) - deallocate(indxjng) - deallocate(halomask) -#endif - end subroutine horizontal_remap !======================================================================= diff --git a/configuration/scripts/cice.batch.csh b/configuration/scripts/cice.batch.csh index 07e6d86ec..79edd4bbe 100755 --- a/configuration/scripts/cice.batch.csh +++ b/configuration/scripts/cice.batch.csh @@ -91,7 +91,7 @@ cat >> ${jobfile} << EOFB #PBS -l walltime=${batchtime} EOFB -else if (${ICE_MACHINE} =~ thunder* || ${ICE_MACHINE} =~ gordon* || ${ICE_MACHINE} =~ conrad* || ${ICE_MACHINE} =~ gaffney* || ${ICE_MACHINE} =~ koehr*) then +else if (${ICE_MACHINE} =~ thunder* || ${ICE_MACHINE} =~ gordon* || ${ICE_MACHINE} =~ conrad* || ${ICE_MACHINE} =~ gaffney* || ${ICE_MACHINE} =~ koehr* || ${ICE_MACHINE} =~ mustang) then cat >> ${jobfile} << EOFB #PBS -N ${shortcase} #PBS -q ${queue} @@ -99,6 +99,7 @@ cat >> ${jobfile} << EOFB #PBS -l select=${nnodes}:ncpus=${maxtpn}:mpiprocs=${taskpernode} #PBS -l walltime=${batchtime} #PBS -j oe +#PBS -W umask=022 ###PBS -M username@domain.com ###PBS -m be EOFB diff --git a/configuration/scripts/cice.launch.csh b/configuration/scripts/cice.launch.csh index 297f1df39..e1189e23d 100755 --- a/configuration/scripts/cice.launch.csh +++ b/configuration/scripts/cice.launch.csh @@ -58,7 +58,7 @@ EOFR endif #======= -else if (${ICE_MACHINE} =~ gaffney* || ${ICE_MACHINE} =~ koehr*) then +else if (${ICE_MACHINE} =~ gaffney* || ${ICE_MACHINE} =~ koehr* || ${ICE_MACHINE} =~ mustang*) then if (${ICE_COMMDIR} =~ serial*) then cat >> ${jobfile} << EOFR ./cice >&! \$ICE_RUNLOG_FILE diff --git a/configuration/scripts/machines/Macros.mustang_intel18 b/configuration/scripts/machines/Macros.mustang_intel18 new file mode 100644 index 000000000..5d1849488 --- /dev/null +++ b/configuration/scripts/machines/Macros.mustang_intel18 @@ -0,0 +1,46 @@ +#============================================================================== +# Macros file for AFRL mustang, intel compiler +#============================================================================== + +CPP := fpp +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} +CFLAGS := -c -O2 -fp-model precise -xHost + +FIXEDFLAGS := -132 +FREEFLAGS := -FR +FFLAGS := -fp-model precise -convert big_endian -assume byterecl -ftz -traceback -xHost +FFLAGS_NOOPT:= -O0 + +ifeq ($(ICE_BLDDEBUG), true) + FFLAGS += -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created +else + FFLAGS += -O2 +endif + +SCC := icc +SFC := ifort +MPICC := icc +MPIFC := ifort + +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) +else + FC := $(SFC) + CC := $(SCC) +endif +LD:= $(FC) + +#defined by env +#NETCDF_PATH := $(NETCDF_PATH) + +INCLDIR += -I$(NETCDF_PATH)/include +LIB_NETCDF := $(NETCDF_PATH)/lib +SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -lmpi + +ifeq ($(ICE_THREADED), true) + LDFLAGS += -qopenmp + CFLAGS += -qopenmp + FFLAGS += -qopenmp +endif + diff --git a/configuration/scripts/machines/Macros.mustang_intel19 b/configuration/scripts/machines/Macros.mustang_intel19 new file mode 100644 index 000000000..5d1849488 --- /dev/null +++ b/configuration/scripts/machines/Macros.mustang_intel19 @@ -0,0 +1,46 @@ +#============================================================================== +# Macros file for AFRL mustang, intel compiler +#============================================================================== + +CPP := fpp +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} +CFLAGS := -c -O2 -fp-model precise -xHost + +FIXEDFLAGS := -132 +FREEFLAGS := -FR +FFLAGS := -fp-model precise -convert big_endian -assume byterecl -ftz -traceback -xHost +FFLAGS_NOOPT:= -O0 + +ifeq ($(ICE_BLDDEBUG), true) + FFLAGS += -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created +else + FFLAGS += -O2 +endif + +SCC := icc +SFC := ifort +MPICC := icc +MPIFC := ifort + +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) +else + FC := $(SFC) + CC := $(SCC) +endif +LD:= $(FC) + +#defined by env +#NETCDF_PATH := $(NETCDF_PATH) + +INCLDIR += -I$(NETCDF_PATH)/include +LIB_NETCDF := $(NETCDF_PATH)/lib +SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -lmpi + +ifeq ($(ICE_THREADED), true) + LDFLAGS += -qopenmp + CFLAGS += -qopenmp + FFLAGS += -qopenmp +endif + diff --git a/configuration/scripts/machines/Macros.mustang_intel20 b/configuration/scripts/machines/Macros.mustang_intel20 new file mode 100644 index 000000000..5d1849488 --- /dev/null +++ b/configuration/scripts/machines/Macros.mustang_intel20 @@ -0,0 +1,46 @@ +#============================================================================== +# Macros file for AFRL mustang, intel compiler +#============================================================================== + +CPP := fpp +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} +CFLAGS := -c -O2 -fp-model precise -xHost + +FIXEDFLAGS := -132 +FREEFLAGS := -FR +FFLAGS := -fp-model precise -convert big_endian -assume byterecl -ftz -traceback -xHost +FFLAGS_NOOPT:= -O0 + +ifeq ($(ICE_BLDDEBUG), true) + FFLAGS += -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created +else + FFLAGS += -O2 +endif + +SCC := icc +SFC := ifort +MPICC := icc +MPIFC := ifort + +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) +else + FC := $(SFC) + CC := $(SCC) +endif +LD:= $(FC) + +#defined by env +#NETCDF_PATH := $(NETCDF_PATH) + +INCLDIR += -I$(NETCDF_PATH)/include +LIB_NETCDF := $(NETCDF_PATH)/lib +SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -lmpi + +ifeq ($(ICE_THREADED), true) + LDFLAGS += -qopenmp + CFLAGS += -qopenmp + FFLAGS += -qopenmp +endif + diff --git a/configuration/scripts/machines/env.izumi_intel b/configuration/scripts/machines/env.izumi_intel index 4c7c7a648..55eccaf11 100755 --- a/configuration/scripts/machines/env.izumi_intel +++ b/configuration/scripts/machines/env.izumi_intel @@ -19,7 +19,7 @@ endif setenv ICE_MACHINE_ENVNAME izumi setenv ICE_MACHINE_COMPILER intel setenv ICE_MACHINE_MAKE gmake -setenv ICE_MACHINE_CPPDEFS '"-DINTEL20_WORKAROUND"' +setenv ICE_MACHINE_CPPDEFS "" setenv ICE_MACHINE_WKDIR /scratch/cluster/$user/CICE_RUNS setenv ICE_MACHINE_INPUTDATA /fs/cgd/csm/inputdata setenv ICE_MACHINE_BASELINE /scratch/cluster/$user/CICE_BASELINE diff --git a/configuration/scripts/machines/env.mustang_intel18 b/configuration/scripts/machines/env.mustang_intel18 new file mode 100755 index 000000000..5ebe9ec70 --- /dev/null +++ b/configuration/scripts/machines/env.mustang_intel18 @@ -0,0 +1,44 @@ +#!/bin/csh -f + +set inp = "undefined" +if ($#argv == 1) then + set inp = $1 +endif + +if ("$inp" != "-nomodules") then + +source ${MODULESHOME}/init/csh + +module unload compiler +module unload mpt +module unload netcdf-fortran + +module load costinit +module load git +module load compiler/intel/2018.3.222 +module load mpt/2.18 +module load netcdf-fortran/intel/4.4.2 + +setenv NETCDF_PATH /app/COST/netcdf-fortran/4.4.2/intel + +#setenv OMP_STACKSIZE 256M +#setenv MP_LABELIO yes +#setenv MP_INFOLEVEL 2 +#setenv MP_SHARED_MEMORY yes +#setenv MP_EUILIB us +#setenv MP_EAGER_LIMIT 0 + +endif + +setenv ICE_MACHINE_ENVNAME mustang +setenv ICE_MACHINE_COMPILER intel18 +setenv ICE_MACHINE_MAKE gmake +setenv ICE_MACHINE_WKDIR $WORKDIR/CICE_RUNS +setenv ICE_MACHINE_INPUTDATA /p/work1/projects/RASM/cice-consortium +setenv ICE_MACHINE_BASELINE $WORKDIR/CICE_BASELINE +setenv ICE_MACHINE_SUBMIT "qsub " +setenv ICE_MACHINE_ACCT P00000000 +setenv ICE_MACHINE_QUEUE "debug" +setenv ICE_MACHINE_TPNODE 48 # tasks per node +setenv ICE_MACHINE_BLDTHRDS 4 +setenv ICE_MACHINE_QSTAT "qstat " diff --git a/configuration/scripts/machines/env.mustang_intel19 b/configuration/scripts/machines/env.mustang_intel19 new file mode 100755 index 000000000..b402e3ff4 --- /dev/null +++ b/configuration/scripts/machines/env.mustang_intel19 @@ -0,0 +1,44 @@ +#!/bin/csh -f + +set inp = "undefined" +if ($#argv == 1) then + set inp = $1 +endif + +if ("$inp" != "-nomodules") then + +source ${MODULESHOME}/init/csh + +module unload compiler +module unload mpt +module unload netcdf-fortran + +module load costinit +module load git +module load compiler/intel/2019.3.199 +module load mpt/2.20 +module load netcdf-fortran/intel/4.4.2 + +setenv NETCDF_PATH /app/COST/netcdf-fortran/4.4.2/intel + +#setenv OMP_STACKSIZE 256M +#setenv MP_LABELIO yes +#setenv MP_INFOLEVEL 2 +#setenv MP_SHARED_MEMORY yes +#setenv MP_EUILIB us +#setenv MP_EAGER_LIMIT 0 + +endif + +setenv ICE_MACHINE_ENVNAME mustang +setenv ICE_MACHINE_COMPILER intel19 +setenv ICE_MACHINE_MAKE gmake +setenv ICE_MACHINE_WKDIR $WORKDIR/CICE_RUNS +setenv ICE_MACHINE_INPUTDATA /p/work1/projects/RASM/cice-consortium +setenv ICE_MACHINE_BASELINE $WORKDIR/CICE_BASELINE +setenv ICE_MACHINE_SUBMIT "qsub " +setenv ICE_MACHINE_ACCT P00000000 +setenv ICE_MACHINE_QUEUE "debug" +setenv ICE_MACHINE_TPNODE 48 # tasks per node +setenv ICE_MACHINE_BLDTHRDS 4 +setenv ICE_MACHINE_QSTAT "qstat " diff --git a/configuration/scripts/machines/env.mustang_intel20 b/configuration/scripts/machines/env.mustang_intel20 new file mode 100755 index 000000000..c664f3ca0 --- /dev/null +++ b/configuration/scripts/machines/env.mustang_intel20 @@ -0,0 +1,44 @@ +#!/bin/csh -f + +set inp = "undefined" +if ($#argv == 1) then + set inp = $1 +endif + +if ("$inp" != "-nomodules") then + +source ${MODULESHOME}/init/csh + +module unload compiler +module unload mpt +module unload netcdf-fortran + +module load costinit +module load git +module load compiler/intel/2020.0.1 +module load mpt/2.20 +module load netcdf-fortran/intel/4.4.2 + +setenv NETCDF_PATH /app/COST/netcdf-fortran/4.4.2/intel + +#setenv OMP_STACKSIZE 256M +#setenv MP_LABELIO yes +#setenv MP_INFOLEVEL 2 +#setenv MP_SHARED_MEMORY yes +#setenv MP_EUILIB us +#setenv MP_EAGER_LIMIT 0 + +endif + +setenv ICE_MACHINE_ENVNAME mustang +setenv ICE_MACHINE_COMPILER intel20 +setenv ICE_MACHINE_MAKE gmake +setenv ICE_MACHINE_WKDIR $WORKDIR/CICE_RUNS +setenv ICE_MACHINE_INPUTDATA /p/work1/projects/RASM/cice-consortium +setenv ICE_MACHINE_BASELINE $WORKDIR/CICE_BASELINE +setenv ICE_MACHINE_SUBMIT "qsub " +setenv ICE_MACHINE_ACCT P00000000 +setenv ICE_MACHINE_QUEUE "debug" +setenv ICE_MACHINE_TPNODE 48 # tasks per node +setenv ICE_MACHINE_BLDTHRDS 4 +setenv ICE_MACHINE_QSTAT "qstat "