From 4f2d3f4b8772e7d14651ec4813b22675d07a509b Mon Sep 17 00:00:00 2001 From: noel Date: Mon, 28 Aug 2017 16:55:27 -0700 Subject: [PATCH 1/2] For Intel builds, there are 7 HOMME fortran files we build with higher optimization flags. Add -fp-model fast to those 7 builds as this avoids the slow pow function on KNL, and may generally improve peformance for those files. Passes acme_developer and a HOMME test with create_test -g and -c --- config/acme/machines/Depends.intel | 3 ++- config/acme/machines/Depends.intel18 | 36 ++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 config/acme/machines/Depends.intel18 diff --git a/config/acme/machines/Depends.intel b/config/acme/machines/Depends.intel index b706186930d..468ee8a4917 100644 --- a/config/acme/machines/Depends.intel +++ b/config/acme/machines/Depends.intel @@ -30,7 +30,8 @@ kissvec.o ifeq ($(DEBUG),FALSE) $(PERFOBJS): %.o: %.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< + $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< +# $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -fp-model fast -no-prec-div $< $(REDUCED_OPT_OBJS): %.o: %.F90 $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O1 $< $(REDUCED_PRECISION_OBJS): %.o: %.F90 diff --git a/config/acme/machines/Depends.intel18 b/config/acme/machines/Depends.intel18 new file mode 100644 index 00000000000..ad2af6fc5db --- /dev/null +++ b/config/acme/machines/Depends.intel18 @@ -0,0 +1,36 @@ +# +PERFOBJS=\ +prim_advection_mod_base.o \ +vertremap_mod_base.o \ +edge_mod_base.o \ +derivative_mod_base.o \ +bndry_mod_base.o \ +prim_advance_mod.o \ +uwshcu.o + +# shr_wv_sat_mod does not need to have better than ~0.1% precision, and benefits +# enormously from a lower precision in the vector functions. +REDUCED_PRECISION_OBJS=\ +shr_wv_sat_mod.o + +SHR_RANDNUM_FORT_OBJS=\ +kissvec_mod.o \ +mersennetwister_mod.o \ +dSFMT_interface.o \ +shr_RandNum_mod.o + +SHR_RANDNUM_C_OBJS=\ +dSFMT.o \ +dSFMT_utils.o \ +kissvec.o + +ifeq ($(DEBUG),FALSE) + $(PERFOBJS): %.o: %.F90 + $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -fp-model fast -no-prec-div $< + $(REDUCED_PRECISION_OBJS): %.o: %.F90 + $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -fimf-precision=low -fp-model fast $< + $(SHR_RANDNUM_FORT_OBJS): %.o: %.F90 + $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -fp-model fast -no-prec-div -no-prec-sqrt -qoverride-limits $< + $(SHR_RANDNUM_C_OBJS): %.o: %.c + $(CC) -c $(INCLDIR) $(INCS) $(CFLAGS) -O3 -fp-model fast $< +endif From b3d0c93e35dd779f050c0214c754d7e49cf65164 Mon Sep 17 00:00:00 2001 From: noel Date: Mon, 28 Aug 2017 17:04:30 -0700 Subject: [PATCH 2/2] Whoops, I had the change I wanted commented out. --- config/acme/machines/Depends.intel | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/config/acme/machines/Depends.intel b/config/acme/machines/Depends.intel index 468ee8a4917..0ce945caded 100644 --- a/config/acme/machines/Depends.intel +++ b/config/acme/machines/Depends.intel @@ -30,8 +30,7 @@ kissvec.o ifeq ($(DEBUG),FALSE) $(PERFOBJS): %.o: %.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< -# $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -fp-model fast -no-prec-div $< + $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -fp-model fast -no-prec-div $< $(REDUCED_OPT_OBJS): %.o: %.F90 $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O1 $< $(REDUCED_PRECISION_OBJS): %.o: %.F90