diff --git a/Makefile b/Makefile index 880288ac..382ea496 100644 --- a/Makefile +++ b/Makefile @@ -186,7 +186,6 @@ else ifeq ($(arch),knl) FB_TARGET ?= 512 def_block_args ?= -b 96 def_block_threads ?= 8 - streaming_stores ?= 0 SUB_BLOCK_LOOP_INNER_MODS ?= prefetch(L1) else ifeq ($(arch),skx) @@ -224,6 +223,7 @@ else ifeq ($(arch),snb) else ifeq ($(arch),intel64) ISA ?= -xHOST + GCXX_ISA ?= -march=native FB_TARGET ?= cpp else @@ -233,7 +233,7 @@ $(error Architecture not recognized; use arch=knl, knc, skx, hsw, ivb, snb, or i endif # arch-specific. # general defaults for vars if not set above. -streaming_stores ?= 1 +streaming_stores ?= 0 omp_par_for ?= omp parallel for omp_region_schedule ?= dynamic,1 omp_block_schedule ?= static,1 @@ -282,13 +282,13 @@ else endif LD := $(CXX) MAKE := make -CXXOPT := -O3 +CXXOPT ?= -O3 CXXFLAGS += -g -std=c++11 -Wall $(CXXOPT) OMPFLAGS += -fopenmp LFLAGS += -lrt -FB_CXX := g++ # faster than icpc for the foldBuilder. +FB_EXEC := bin/foldBuilder.exe +FB_CXX ?= g++ # faster than icpc for the foldBuilder. FB_CXXFLAGS += -g -O0 -std=c++11 -Wall # low opt to reduce compile time. -EXTRA_FB_CXXFLAGS = FB_FLAGS += -st $(stencil) -cluster $(cluster) -fold $(fold) ST_MACRO_FILE := stencil_macros.hpp ST_CODE_FILE := stencil_code.hpp @@ -382,6 +382,28 @@ else # not Intel compiler endif # compiler. +# Compile with model_cache=1 or 2 to check prefetching. +ifeq ($(model_cache),1) + MACROS += MODEL_CACHE=1 + OMPFLAGS = -qopenmp-stubs +else ifeq ($(model_cache),2) + MACROS += MODEL_CACHE=2 + OMPFLAGS = -qopenmp-stubs +endif + +# Add in OMP flags and user-added flags. +CXXFLAGS += $(OMPFLAGS) $(EXTRA_CXXFLAGS) + +# Some file names. +TAG := $(stencil).$(arch) +STENCIL_BASES := stencil_main stencil_calc realv_grids utils +STENCIL_OBJS := $(addprefix src/,$(addsuffix .$(TAG).o,$(STENCIL_BASES))) +STENCIL_CXX := $(addprefix src/,$(addsuffix .$(TAG).i,$(STENCIL_BASES))) +EXEC_NAME := bin/yask.$(TAG).exe +MAKE_REPORT_FILE := make-report.$(TAG).txt +CXXFLAGS_FILE := cxx-flags.$(TAG).txt +LFLAGS_FILE := ld-flags.$(TAG).txt + # gen-loops.pl args: # Rank loops break up the whole rank into smaller regions. In order for @@ -446,40 +468,24 @@ HALO_LOOP_OUTER_VARS ?= wv,xv,yv,zv HALO_LOOP_CODE ?= $(HALO_LOOP_OUTER_MODS) loop($(HALO_LOOP_OUTER_VARS)) \ $(HALO_LOOP_INNER_MODS) { calc(halo(t)); } -# compile with model_cache=1 or 2 to check prefetching. -ifeq ($(model_cache),1) - MACROS += MODEL_CACHE=1 - OMPFLAGS = -qopenmp-stubs -else ifeq ($(model_cache),2) - MACROS += MODEL_CACHE=2 - OMPFLAGS = -qopenmp-stubs -endif - -CXXFLAGS += $(OMPFLAGS) $(EXTRA_CXXFLAGS) +#### Targets and rules #### -STENCIL_BASES := stencil_main stencil_calc realv_grids utils -STENCIL_OBJS := $(addprefix src/,$(addsuffix .$(arch).o,$(STENCIL_BASES))) -STENCIL_CXX := $(addprefix src/,$(addsuffix .$(arch).i,$(STENCIL_BASES))) -STENCIL_EXEC_NAME := stencil.$(arch).exe -MAKE_REPORT_FILE := make-report.txt -CXXFLAGS_FILE := cxx-flags.txt -LFLAGS_FILE := ld-flags.txt - -all: $(STENCIL_EXEC_NAME) $(MAKE_REPORT_FILE) +all: $(EXEC_NAME) $(MAKE_REPORT_FILE) echo $(CXXFLAGS) > $(CXXFLAGS_FILE) echo $(LFLAGS) > $(LFLAGS_FILE) @cat $(MAKE_REPORT_FILE) - @echo $(STENCIL_EXEC_NAME) "has been built." + @echo $(EXEC_NAME) "has been built. Use bin/yask.sh to run it." -$(MAKE_REPORT_FILE): $(STENCIL_EXEC_NAME) +$(MAKE_REPORT_FILE): $(EXEC_NAME) @echo MAKEFLAGS="\"$(MAKEFLAGS)"\" > $@ 2>&1 $(MAKE) -j1 $(CODE_STATS) echo-settings >> $@ 2>&1 echo-settings: @echo - @echo "Build environment for" $(STENCIL_EXEC_NAME) on `date` - @echo arch=$(arch) + @echo "Build environment for" $(EXEC_NAME) on `date` + @echo host=`hostname` @echo stencil=$(stencil) + @echo arch=$(arch) @echo def_thread_divisor=$(def_thread_divisor) @echo def_block_threads=$(def_block_threads) @echo def_rank_args=$(def_rank_args) @@ -509,7 +515,10 @@ echo-settings: @echo ISA=$(ISA) @echo OMPFLAGS="\"$(OMPFLAGS)\"" @echo EXTRA_CXXFLAGS="\"$(EXTRA_CXXFLAGS)\"" + @echo CXX=$(CXX) + @echo CXXOPT=$(CXXOPT) @echo CXXFLAGS="\"$(CXXFLAGS)\"" + @$(CXX) -v; $(CXX_VER_CMD) @echo RANK_LOOP_OPTS="\"$(RANK_LOOP_OPTS)\"" @echo RANK_LOOP_OUTER_MODS="\"$(RANK_LOOP_OUTER_MODS)\"" @echo RANK_LOOP_OUTER_VARS="\"$(RANK_LOOP_OUTER_VARS)\"" @@ -536,44 +545,41 @@ echo-settings: @echo HALO_LOOP_OUTER_VARS="\"$(HALO_LOOP_OUTER_VARS)\"" @echo HALO_LOOP_INNER_MODS="\"$(HALO_LOOP_INNER_MODS)\"" @echo HALO_LOOP_CODE="\"$(HALO_LOOP_CODE)\"" - @echo CXX=$(CXX) - @echo CXXOPT=$(CXXOPT) - @$(CXX) -v; $(CXX_VER_CMD) code_stats: @echo @echo "Code stats for stencil computation:" - ./get-loop-stats.pl -t='sub_block_loops' *.s + bin/get-loop-stats.pl -t='sub_block_loops' *.s -$(STENCIL_EXEC_NAME): $(STENCIL_OBJS) +$(EXEC_NAME): $(STENCIL_OBJS) $(LD) -o $@ $(STENCIL_OBJS) $(CXXFLAGS) $(LFLAGS) preprocess: $(STENCIL_CXX) -src/stencil_rank_loops.hpp: gen-loops.pl Makefile - ./$< -output $@ $(RANK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_RANK_LOOP_OPTS) "$(RANK_LOOP_CODE)" +src/stencil_rank_loops.hpp: bin/gen-loops.pl Makefile + $< -output $@ $(RANK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_RANK_LOOP_OPTS) "$(RANK_LOOP_CODE)" -src/stencil_region_loops.hpp: gen-loops.pl Makefile - ./$< -output $@ $(REGION_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_REGION_LOOP_OPTS) "$(REGION_LOOP_CODE)" +src/stencil_region_loops.hpp: bin/gen-loops.pl Makefile + $< -output $@ $(REGION_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_REGION_LOOP_OPTS) "$(REGION_LOOP_CODE)" -src/stencil_block_loops.hpp: gen-loops.pl Makefile - ./$< -output $@ $(BLOCK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_BLOCK_LOOP_OPTS) "$(BLOCK_LOOP_CODE)" +src/stencil_block_loops.hpp: bin/gen-loops.pl Makefile + $< -output $@ $(BLOCK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_BLOCK_LOOP_OPTS) "$(BLOCK_LOOP_CODE)" -src/stencil_sub_block_loops.hpp: gen-loops.pl Makefile - ./$< -output $@ $(SUB_BLOCK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_SUB_BLOCK_LOOP_OPTS) "$(SUB_BLOCK_LOOP_CODE)" +src/stencil_sub_block_loops.hpp: bin/gen-loops.pl Makefile + $< -output $@ $(SUB_BLOCK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_SUB_BLOCK_LOOP_OPTS) "$(SUB_BLOCK_LOOP_CODE)" -src/stencil_halo_loops.hpp: gen-loops.pl Makefile - ./$< -output $@ $(HALO_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_HALO_LOOP_OPTS) "$(HALO_LOOP_CODE)" +src/stencil_halo_loops.hpp: bin/gen-loops.pl Makefile + $< -output $@ $(HALO_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_HALO_LOOP_OPTS) "$(HALO_LOOP_CODE)" -src/layout_macros.hpp: gen-layouts.pl - ./$< -m > $@ +src/layout_macros.hpp: bin/gen-layouts.pl + $< -m > $@ -src/layouts.hpp: gen-layouts.pl - ./$< -d > $@ +src/layouts.hpp: bin/gen-layouts.pl + $< -d > $@ # Compile the stencil compiler. # TODO: move this to its own makefile. -foldBuilder: src/foldBuilder/*.*pp src/foldBuilder/stencils/*.*pp $(FB_STENCIL_LIST) +$(FB_EXEC): src/foldBuilder/*.*pp src/foldBuilder/stencils/*.*pp $(FB_STENCIL_LIST) $(FB_CXX) $(FB_CXXFLAGS) -Isrc/foldBuilder/stencils -o $@ src/foldBuilder/*.cpp $(EXTRA_FB_CXXFLAGS) $(FB_STENCIL_LIST): src/foldBuilder/stencils/*.hpp @@ -584,8 +590,8 @@ $(FB_STENCIL_LIST): src/foldBuilder/stencils/*.hpp # Run the stencil compiler and post-process its output files. # Use the gmake pattern-rule trick to specify simultaneous targets. -%/$(ST_MACRO_FILE) %/$(ST_CODE_FILE): foldBuilder - ./$< $(FB_FLAGS) $(EXTRA_FB_FLAGS) \ +%/$(ST_MACRO_FILE) %/$(ST_CODE_FILE): $(FB_EXEC) + $< $(FB_FLAGS) $(EXTRA_FB_FLAGS) \ -pm $*/$(ST_MACRO_FILE) -p$(FB_TARGET) $*/$(ST_CODE_FILE) echo >> $*/$(ST_MACRO_FILE) echo '// Settings from YASK Makefile' >> $*/$(ST_MACRO_FILE) @@ -599,20 +605,21 @@ $(FB_STENCIL_LIST): src/foldBuilder/stencils/*.hpp headers: $(GEN_HEADERS) $(FB_STENCIL_LIST) @ echo 'Header files generated.' -%.$(arch).o: %.cpp src/*.hpp src/foldBuilder/*.hpp $(GEN_HEADERS) +%.$(TAG).o: %.cpp src/*.hpp src/foldBuilder/*.hpp $(GEN_HEADERS) $(CXX) $(CXXFLAGS) -c -o $@ $< -%.$(arch).i: %.cpp src/*.hpp src/foldBuilder/*.hpp $(GEN_HEADERS) +%.$(TAG).i: %.cpp src/*.hpp src/foldBuilder/*.hpp $(GEN_HEADERS) $(CXX) $(CXXFLAGS) -E $< > $@ tags: rm -f TAGS ; find . -name '*.[ch]pp' | xargs etags -C -a clean: - rm -fv src/*.[io] *.optrpt src/*.optrpt *.s $(GEN_HEADERS) $(MAKE_REPORT_FILE) + rm -fv src/*.[io] *.optrpt */*.optrpt *.s $(GEN_HEADERS) $(MAKE_REPORT_FILE) realclean: clean - rm -fv stencil*.exe foldBuilder TAGS $(MAKE_REPORT_FILE) $(CXXFLAGS_FILE) $(LFLAGS_FILE) $(FB_STENCIL_LIST) + rm -fv bin/yask*.exe make-report*.txt cxx-flags*.txt ld-flags.*txt $(FB_EXEC) $(FB_STENCIL_LIST) TAGS + rm -fv stencil*.exe stencil-tuner-summary.csh stencil-tuner.pl gen-layouts.pl gen-loops.pl get-loop-stats.pl find . -name '*~' | xargs -r rm -v help: diff --git a/gen-layouts.pl b/bin/gen-layouts.pl similarity index 100% rename from gen-layouts.pl rename to bin/gen-layouts.pl diff --git a/gen-loops.pl b/bin/gen-loops.pl similarity index 100% rename from gen-loops.pl rename to bin/gen-loops.pl diff --git a/get-loop-stats.pl b/bin/get-loop-stats.pl similarity index 100% rename from get-loop-stats.pl rename to bin/get-loop-stats.pl diff --git a/stencil-tuner-summary.csh b/bin/yask-tuner-summary.csh similarity index 89% rename from stencil-tuner-summary.csh rename to bin/yask-tuner-summary.csh index 95be4d92..a73929e2 100755 --- a/stencil-tuner-summary.csh +++ b/bin/yask-tuner-summary.csh @@ -26,15 +26,15 @@ # Purpose: find best result from each GA search csv file. if ( "-$1" == "-" ) then - if ( `echo stencil-tuner*.csv | wc -l` > 0 ) then - $0 stencil-tuner*.csv + if ( `echo yask-tuner*.csv | wc -l` > 0 ) then + $0 yask-tuner*.csv else - echo "usage: $0 " + echo "usage: $0 " endif exit endif -echo "Summary of stencil-tuner results:" +echo "Summary of yask-tuner results:" head -n1 $1 foreach f ($*) echo '==========' diff --git a/stencil-tuner.pl b/bin/yask-tuner.pl similarity index 98% rename from stencil-tuner.pl rename to bin/yask-tuner.pl index 4d09d775..c75a3f5e 100755 --- a/stencil-tuner.pl +++ b/bin/yask-tuner.pl @@ -97,7 +97,7 @@ sub usage { " -sde Run binary on SDE (for testing only).\n". " -makePrefix= Prefix make command with .\n". " -makeArgs= Pass additional to make command.\n". - " -runArgs= Pass additional to stencil-run command.\n". + " -runArgs= Pass additional to bin/yask.sh command.\n". " -ranks= Number of ranks to use on host (x-dimension only).\n". "\nstencil options:\n". " -stencil= Specify stencil: iso3dfd, 3axis, 9axis, 3plane, cube, ave, awp, ... (required).\n". @@ -120,7 +120,7 @@ sub usage { " -dw= Set size of 'w' dim to (only for 4D problems).\n". " -mem=- Set allowable est. memory usage between and GiB (default is $minGB-$maxGB).\n". " -maxVecsInCluster= Maximum vectors allowed in cluster (default is $maxVecsInCluster).\n". - " -noPrefetch Disable any prefetching (shortcut for '-pfdl1=0 -pfdl2=0').\n". + " -noPrefetch Disable any prefetching (shortcut for '-pfd_l1=0 -pfd_l2=0').\n". " -noFolding Allow only 1D vectorization (in any direction).\n". " -zLoop Force inner loop in 'z' direction.\n". " -zLayout Force inner memory layout in 'z' direction.\n". @@ -209,8 +209,8 @@ sub usage { $stencil = $1; } elsif ($opt eq '-noprefetch') { - $geneRanges{$autoKey.'pfdl1'} = [ 0 ]; - $geneRanges{$autoKey.'pfdl2'} = [ 0 ]; + $geneRanges{$autoKey.'pfd_l1'} = [ 0 ]; + $geneRanges{$autoKey.'pfd_l2'} = [ 0 ]; } elsif ($opt =~ '^-maxvecsincluster=(\d+)$') { $maxVecsInCluster = $1; @@ -304,7 +304,7 @@ sub usage { my $hostStr = defined $host ? $host : hostname(); my $timeStamp=`date +%Y-%m-%d_%H-%M-%S`; chomp $timeStamp; -my $outFile = "stencil-tuner$searchTypeStr.$stencil.$arch.$hostStr.$timeStamp.csv"; +my $outFile = "yask-tuner$searchTypeStr.$stencil.$arch.$hostStr.$timeStamp.csv"; print "Output will be saved in '$outFile'.\n"; $outFile = '/dev/null' if $checking; @@ -504,8 +504,8 @@ sub usage { # prefetch distances for l1 and l2. # all non-pos numbers => no prefetching, so ~50% chance of being enabled. - [ -$maxPfdl1, $maxPfdl1, 1, 'pfdl1' ], - [ -$maxPfdl2, $maxPfdl2, 1, 'pfdl2' ], + [ -$maxPfdl1, $maxPfdl1, 1, 'pfd_l1' ], + [ -$maxPfdl2, $maxPfdl2, 1, 'pfd_l2' ], # other build options. [ 0, 100, 1, 'exprSize' ], # expression-size threshold. @@ -734,7 +734,7 @@ ($$) my $makeCmd = "$makePrefix make clean; ". "$makePrefix make -j all EXTRA_MACROS='$macros' ". - "arch=$arch real_bytes=$realBytes stencil=$stencil radius=$radius $margs $makeArgs"; + "stencil=$stencil arch=$arch real_bytes=$realBytes radius=$radius $margs $makeArgs"; $makeCmd = "echo 'build disabled'" if !$doBuild; return $makeCmd; } @@ -744,14 +744,14 @@ () my $exePrefix = 'time'; $exePrefix .= " sde -$arch --" if $sde; - my $runCmd = "./stencil-run.sh"; + my $runCmd = "bin/yask.sh"; if (defined $mic) { $runCmd .= " -mic $mic"; } else { $exePrefix .= " numactl -p 1" if $arch eq 'knl' && !$sde; # TODO: fix for cache mode. $runCmd .= " -host $host" if defined $host; } - $runCmd .= " -exe_prefix '$exePrefix' -arch $arch $runArgs"; + $runCmd .= " -exe_prefix '$exePrefix' -stencil $stencil -arch $arch $runArgs"; return $runCmd; } @@ -1208,8 +1208,8 @@ sub fitness { my $thread_divisor_exp = readHash($h, 'thread_divisor_exp', 0); my $bthreads_exp = readHash($h, 'bthreads_exp', 0); my $layout = readHash($h, 'layout', 1); - my $pfdl1 = readHash($h, 'pfdl1', 1); - my $pfdl2 = readHash($h, 'pfdl2', 1); + my $pfdl1 = readHash($h, 'pfd_l1', 1); + my $pfdl2 = readHash($h, 'pfd_l2', 1); my $ompRegionSchedule = readHash($h, 'ompRegionSchedule', 1); my $ompBlockSchedule = readHash($h, 'ompBlockSchedule', 1); @@ -1357,8 +1357,8 @@ sub fitness { # make sure pfld2 > pfld1. $pfdl2 = $pfdl1 + 1 if $pfdl1 >= $pfdl2; } - $macros .= " PFDL1=$pfdl1" if $pfdl1 > 0; - $macros .= " PFDL2=$pfdl2" if $pfdl2 > 0; + $mvars .= " pfd_l1=$pfdl1" if $pfdl1 > 0; + $mvars .= " pfd_l2=$pfdl2" if $pfdl2 > 0; # cluster & fold. $mvars .= " cluster=x=$cvs[0],y=$cvs[1],z=$cvs[2]"; diff --git a/bin/yask.sh b/bin/yask.sh new file mode 100755 index 00000000..2ba8ef86 --- /dev/null +++ b/bin/yask.sh @@ -0,0 +1,247 @@ +#!/bin/bash + +############################################################################## +## YASK: Yet Another Stencil Kernel +## Copyright (c) 2014-2017, Intel Corporation +## +## Permission is hereby granted, free of charge, to any person obtaining a copy +## of this software and associated documentation files (the "Software"), to +## deal in the Software without restriction, including without limitation the +## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +## sell copies of the Software, and to permit persons to whom the Software is +## furnished to do so, subject to the following conditions: +## +## * The above copyright notice and this permission notice shall be included in +## all copies or substantial portions of the Software. +## +## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +## IN THE SOFTWARE. +############################################################################## + +# Purpose: run stencil kernel in specified environment. +invo="Invocation: $0 $@" +echo $invo + +# Env vars to set. +envs="OMP_DISPLAY_ENV=VERBOSE OMP_PLACES=cores" +envs="$envs KMP_VERSION=1 KMP_HOT_TEAMS_MODE=1 KMP_HOT_TEAMS_MAX_LEVEL=2" +envs="$envs I_MPI_PRINT_VERSION=1 I_MPI_DEBUG=5" + +# Extra options for exe. +opts="" + +unset arch # Don't want to inherit from env. +while true; do + + if [[ ! -n ${1+set} ]]; then + break + + elif [[ "$1" == "-h" || "$1" == "-help" ]]; then + opts="$opts -h" + shift + echo "$0 is a wrapper around the stencil executable to set up the proper environment." + echo "usage: $0 -stencil -arch [script-options] [--] [exe-options]" + echo "required parameters to specify the executable:" + echo " -stencil " + echo " Corresponds to stencil= used during compilation" + echo " -arch " + echo " Corresponds to arch= used during compilation" + echo "script-options:" + echo " -h" + echo " Print this help." + echo " To get executable help, run '$0 -stencil -arch -- -help'" + echo " -host |-mic " + echo " Specify host to run executable on." + echo " 'ssh ' will be pre-pended to the sh_prefix command." + echo " If -arch 'knl' is given, it implies the following (which can be overridden):" + echo " -exe_prefix 'numactl --preferred=1'" + echo " If -mic is given, it implies the following (which can be overridden):" + echo " -arch 'knc'" + echo " -host "`hostname`"-mic" + echo " -sh_prefix " + echo " Add command-prefix before the sub-shell." + echo " -exe_prefix " + echo " Add command-prefix before the executable." + echo " -ranks " + echo " Simplified MPI run (x-dimension partition only)." + echo " 'mpirun -n -ppn ' is prepended to the exe_prefix command," + echo " and '-nrx' is passed to the executable." + echo " If a different MPI command or config is needed, use -exe_prefix " + echo " explicitly and -nr* options as needed (and do not use '-ranks')." + echo " -log " + echo " Write copy of output to ." + echo " Default is based on stencil, arch, host-name, and time-stamp." + echo " Use '/dev/null' to avoid making a log." + echo " " + echo " Set environment variable to ." + echo " Repeat as necessary to set multiple vars." + echo " " + exit 1 + + elif [[ "$1" == "-stencil" && -n ${2+set} ]]; then + stencil=$2 + shift + shift + + elif [[ "$1" == "-arch" && -n ${2+set} ]]; then + arch=$2 + shift + shift + + elif [[ "$1" == "-sh_prefix" && -n ${2+set} ]]; then + sh_prefix=$2 + shift + shift + + elif [[ "$1" == "-exe_prefix" && -n ${2+set} ]]; then + exe_prefix=$2 + shift + shift + + elif [[ "$1" == "-log" && -n ${2+set} ]]; then + logfile=$2 + shift + shift + + elif [[ "$1" == "-host" && -n ${2+set} ]]; then + host=$2 + shift + shift + + elif [[ "$1" == "-mic" && -n ${2+set} ]]; then + arch="knc" + host=`hostname`-mic$2 + shift + shift + + elif [[ "$1" == "-ranks" && -n ${2+set} ]]; then + nranks=$2 + opts="$opts -nrx $nranks" + shift + shift + + elif [[ "$1" =~ ^[A-Za-z0-9_]+= ]]; then + envs="$envs $1" + shift + + elif [[ "$1" == "--" ]]; then + shift + + # will pass remaining options to executable. + break + + else + # will pass remaining options to executable. + break + fi + +done # parsing options. + +# Check required opts. +if [[ -z ${stencil:+ok} ]]; then + if [[ -z ${arch:+ok} ]]; then + echo "error: missing required options: -stencil -arch " + exit 1 + fi + echo "error: missing required option: -stencil " + exit 1 +fi +if [[ -z ${arch:+ok} ]]; then + echo "error: missing required option: -arch " + exit 1 +fi + +# Set defaults for KNL. +# TODO: run numactl [on host] to determine if in flat mode. +if [[ "$arch" == "knl" ]]; then + true ${exe_prefix='numactl --preferred=1'} +fi + +# Simplified MPI in x-dim only. +if [[ -n "$nranks" ]]; then + exe_prefix="mpirun -n $nranks -ppn $nranks $exe_prefix" +fi + +# Bail on errors past this point. +set -e + +# Actual host. +exe_host=${host:-`hostname`} + +# Init log file. +true ${logfile=logs/yask.$stencil.$arch.$exe_host.`date +%Y-%m-%d_%H-%M-%S`.log} +echo "Writing log to '$logfile'." +mkdir -p `dirname $logfile` +echo $invo > $logfile + +# These values must match the ones in Makefile. +tag=$stencil.$arch +exe="bin/yask.$tag.exe" +make_report=make-report.$tag.txt + +# Try to build exe if needed. +if [[ ! -x $exe ]]; then + echo "'$exe' not found or not executable; trying to build with default settings..." + make clean; make -j stencil=$stencil arch=$arch 2>&1 | tee -a $logfile + +# Or, save most recent make report to log if it exists. +elif [[ -e $make_report ]]; then + echo "Build log from '$make_report':" >> $logfile + cat $make_report >> $logfile +fi + +# Double-check that exe exists. +if [[ ! -x $exe ]]; then + echo "error: '$exe' not found or not executable." | tee -a $logfile + exit 1 +fi + +# Additional setup for KNC. +if [[ $arch == "knc" && -n "$host" ]]; then + dir=/tmp/$USER + icc=`which icc` + iccdir=`dirname $icc`/../.. + libpath=":$iccdir/compiler/lib/mic" + ssh $host "rm -rf $dir; mkdir -p $dir/bin" + scp $exe $host:$dir/bin +else + dir=`pwd` + libpath=":$HOME/lib" +fi + +# Setup to run on specified host. +if [[ -n "$host" ]]; then + sh_prefix="ssh $host $sh_prefix" + envs="$envs PATH=$PATH LD_LIBRARY_PATH=./lib:$LD_LIBRARY_PATH$libpath" + + nm=1 + while true; do + echo "Verifying access to '$host'..." + ping -c 1 $host && ssh $host uname -a && break + echo "Waiting $nm min before trying again..." + sleep $(( nm++ * 60 )) + done +else + envs="$envs LD_LIBRARY_PATH=./lib:$LD_LIBRARY_PATH$libpath" +fi + +# Command sequence. +cmds="cd $dir; uname -a; lscpu; numactl -H; ldd $exe; env $envs $exe_prefix $exe $opts $@" + +date | tee -a $logfile +echo "===================" | tee -a $logfile + +if [[ -z "$sh_prefix" ]]; then + sh -c -x "$cmds" 2>&1 | tee -a $logfile +else + echo "Running shell under '$sh_prefix'..." + $sh_prefix "sh -c -x '$cmds'" 2>&1 | tee -a $logfile +fi + +date | tee -a $logfile +echo "Log saved in '$logfile'." diff --git a/docs/YASK-intro.pdf b/docs/YASK-intro.pdf index da4d0607..d01fe801 100755 Binary files a/docs/YASK-intro.pdf and b/docs/YASK-intro.pdf differ diff --git a/src/mem_macros.hpp b/src/mem_macros.hpp index ecf3e836..c620ef9b 100644 --- a/src/mem_macros.hpp +++ b/src/mem_macros.hpp @@ -38,12 +38,12 @@ IN THE SOFTWARE. #define YASK_PAD (17) // cache-lines between data buffers. #define YASK_ALIGNMENT (2 * 1024 * 1024) // 2MiB-page - // Set MODEL_CACHE to 1 or 2 to model L1 or L2. +// Set MODEL_CACHE to 1 or 2 to model L1 or L2. #ifdef MODEL_CACHE #include "cache_model.hpp" #endif - // L1 and L2 hints +// L1 and L2 hints #define L1 _MM_HINT_T0 #define L2 _MM_HINT_T1 @@ -57,16 +57,16 @@ IN THE SOFTWARE. #endif #endif - ////// Default prefetch distances. - // These are only used if and when prefetch code is generated - // by gen-loops.pl. +////// Default prefetch distances. +// These are only used if and when prefetch code is generated by +// gen-loops.pl. - // how far to prefetch ahead for L1. +// How far to prefetch ahead for L1. #ifndef PFDL1 #define PFDL1 1 #endif - // how far to prefetch ahead for L2. +// How far to prefetch ahead for L2. #ifndef PFDL2 #define PFDL2 2 #endif diff --git a/src/realv.hpp b/src/realv.hpp index fbf2f5d8..6865c281 100644 --- a/src/realv.hpp +++ b/src/realv.hpp @@ -163,13 +163,20 @@ namespace yask { for (int i=0; i(begin_sbw, VLEN_W); - const idx_t begin_sbxv = idiv_flr(begin_sbx, VLEN_X); - const idx_t begin_sbyv = idiv_flr(begin_sby, VLEN_Y); - const idx_t begin_sbzv = idiv_flr(begin_sbz, VLEN_Z); - const idx_t end_sbtv = sbt + CLEN_T; - const idx_t end_sbwv = idiv_flr(end_sbw, VLEN_W); - const idx_t end_sbxv = idiv_flr(end_sbx, VLEN_X); - const idx_t end_sbyv = idiv_flr(end_sby, VLEN_Y); - const idx_t end_sbzv = idiv_flr(end_sbz, VLEN_Z); - - // Evaluate sub-block of clusters. - calc_sub_block_of_clusters(begin_sbtv, ARG_W(begin_sbwv) - begin_sbxv, begin_sbyv, begin_sbzv, - end_sbtv, ARG_W(end_sbwv) end_sbxv, end_sbyv, end_sbzv); + // Full rectangular polytope: use optimized code. + else { + + // Divide indices by vector lengths. Use idiv_flr() instead of '/' + // because begin/end vars may be negative (if in halo). + const idx_t begin_sbtv = sbt; + const idx_t begin_sbwv = idiv_flr(begin_sbw, VLEN_W); + const idx_t begin_sbxv = idiv_flr(begin_sbx, VLEN_X); + const idx_t begin_sbyv = idiv_flr(begin_sby, VLEN_Y); + const idx_t begin_sbzv = idiv_flr(begin_sbz, VLEN_Z); + const idx_t end_sbtv = sbt + CLEN_T; + const idx_t end_sbwv = idiv_flr(end_sbw, VLEN_W); + const idx_t end_sbxv = idiv_flr(end_sbx, VLEN_X); + const idx_t end_sbyv = idiv_flr(end_sby, VLEN_Y); + const idx_t end_sbzv = idiv_flr(end_sbz, VLEN_Z); + + // Evaluate sub-block of clusters. + calc_sub_block_of_clusters(begin_sbtv, ARG_W(begin_sbwv) + begin_sbxv, begin_sbyv, begin_sbzv, + end_sbtv, ARG_W(end_sbwv) end_sbxv, end_sbyv, end_sbzv); + } + + // Make sure stores are visible for later loads. + make_stores_visible(); } // Init MPI-related vars and other vars related to my rank's place in diff --git a/src/utils.hpp b/src/utils.hpp index 8b54b513..7ee082ad 100644 --- a/src/utils.hpp +++ b/src/utils.hpp @@ -47,7 +47,6 @@ IN THE SOFTWARE. #include #ifdef WIN32 -#define _mm_clevict(p,h) ((void)0) #define _Pragma(x) #endif diff --git a/stencil-run.sh b/stencil-run.sh index 56be33d2..c3c0be2a 100755 --- a/stencil-run.sh +++ b/stencil-run.sh @@ -23,169 +23,6 @@ ## IN THE SOFTWARE. ############################################################################## -# Purpose: run stencil kernel in specified environment. - -# Env vars to set. -envs="OMP_DISPLAY_ENV=VERBOSE OMP_PLACES=cores" -envs="$envs KMP_VERSION=1 KMP_HOT_TEAMS_MODE=1 KMP_HOT_TEAMS_MAX_LEVEL=2" - -# Extra options for exe. -opts="" - -unset arch -while true; do - - if [[ ! -n ${1+set} ]]; then - break - - elif [[ "$1" == "-h" || "$1" == "-help" ]]; then - opts="$opts -h" - shift - echo "$0 is a wrapper around the stencil executable to facilitate setting up the proper environment." - echo "usage: $0 -arch [-mic |-host ] [-sh_prefix ] [-exe_prefix ] [-ranks ] [...] [[--] ]" - echo " " - if [[ -z ${arch:+ok} ]]; then - echo "To see executable options, run '$0 -arch -- -help'." - else - echo "To see executable options, run '$0 -arch $arch -- -help'." - fi - echo " " - echo "All options to be passed to the executable must be at the end of the command line." - echo "The sh_prefix command is used to prefix a sub-shell." - echo "The exe_prefix command is used to prefix the executable (set to 'true' to avoid actual run)." - echo "If '-host ' is given, 'ssh ' will be pre-pended to the sh_prefix command." - echo "The '-ranks' option is for simple one-socket x-dimension partitioning only." - echo " If -ranks is given, 'mpirun -n -ppn ' is pre-pended to the exe_prefix command," - echo " and -nrx is passed to the executable." - echo " If a different MPI command or config is needed, use -exe_prefix explicitly" - echo " and -nr* options as needed." - echo "If -arch 'knl' is given, it implies the following (which can be overridden):" - echo " -exe_prefix 'numactl --preferred=1'" - echo "If -mic is given, it implies the following (which can be overridden):" - echo " -arch 'knc'" - echo " -host "`hostname`"-mic" - exit 1 - - elif [[ "$1" == "-sh_prefix" && -n ${2+set} ]]; then - sh_prefix=$2 - shift - shift - - elif [[ "$1" == "-exe_prefix" && -n ${2+set} ]]; then - exe_prefix=$2 - shift - shift - - elif [[ "$1" == "-host" && -n ${2+set} ]]; then - host=$2 - shift - shift - - elif [[ "$1" == "-mic" && -n ${2+set} ]]; then - arch="knc" - host=`hostname`-mic$2 - shift - shift - - elif [[ "$1" == "-arch" && -n ${2+set} ]]; then - arch=$2 - shift - shift - - elif [[ "$1" == "-ranks" && -n ${2+set} ]]; then - nranks=$2 - opts="$opts -nrx $nranks" - shift - shift - - elif [[ "$1" =~ ^[A-Za-z0-9_]+= ]]; then - envs="$envs $1" - shift - - elif [[ "$1" == "--" ]]; then - shift - - # will pass remaining options to executable. - break - - else - # will pass remaining options to executable. - break - fi - -done # parsing options. - -# check arch. -if [[ -z ${arch:+ok} ]]; then - echo "error: must use -arch " - exit 1 -fi - -# set defaults for KNL. -if [[ "$arch" == "knl" ]]; then - true ${exe_prefix='numactl --preferred=1'} -fi - -# MPI -if [[ -n "$nranks" ]]; then - exe_prefix="mpirun -n $nranks -ppn $nranks $exe_prefix" - envs="$envs I_MPI_PRINT_VERSION=1 I_MPI_DEBUG=5" -fi - -# bail on errors past this point. -set -e - -exe="stencil.$arch.exe" -if [[ ! -x $exe ]]; then - echo "error: '$exe' not found or not executable." - exit 1 -fi - -# additional settings w/special cases for KNC when no host specified. -if [[ $arch == "knc" && -z ${host+ok} ]]; then - dir=/tmp/$USER - icc=`which icc` - iccdir=`dirname $icc`/../.. - libpath=":$iccdir/compiler/lib/mic" - ssh $host "rm -rf $dir; mkdir -p $dir" - scp $exe $host:$dir -else - dir=`pwd` - libpath=":$HOME/lib" -fi - -# run on specified host -if [[ -n "$host" ]]; then - sh_prefix="ssh $host $sh_prefix" - envs="$envs PATH=$PATH LD_LIBRARY_PATH=./lib:$LD_LIBRARY_PATH$libpath" - - nm=1 - while true; do - echo "Verifying access to '$host'..." - ping -c 1 $host && ssh $host uname -a && break - echo "Waiting $nm min before trying again..." - sleep $(( nm++ * 60 )) - done -else - envs="$envs LD_LIBRARY_PATH=./lib:$LD_LIBRARY_PATH$libpath" -fi - -# echo make report if it exists. -if [[ -e make-report.txt ]]; then - cat make-report.txt -fi - -# command sequence. -cmds="cd $dir; uname -a; lscpu; numactl -H; ldd ./$exe; env $envs $exe_prefix ./$exe $opts $*" - -date -echo "===================" - -if [[ -z "$sh_prefix" ]]; then - sh -c -x "$cmds" -else - echo "Running shell under '$sh_prefix'..." - $sh_prefix "sh -c -x '$cmds'" -fi - -date +echo "$0 has been replaced with bin/yask.sh." +echo "yask.sh requires an additional '-stencil ' argument." +exit 1