Skip to content

Commit

Permalink
Merge pull request #38 from 01org/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
chuckyount authored Apr 22, 2017
2 parents aa94dfb + e31df94 commit 29bde53
Show file tree
Hide file tree
Showing 13 changed files with 369 additions and 268 deletions.
115 changes: 61 additions & 54 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ else ifeq ($(arch),knl)
FB_TARGET ?= 512
def_block_args ?= -b 96
def_block_threads ?= 8
streaming_stores ?= 0
SUB_BLOCK_LOOP_INNER_MODS ?= prefetch(L1)

else ifeq ($(arch),skx)
Expand Down Expand Up @@ -224,6 +223,7 @@ else ifeq ($(arch),snb)
else ifeq ($(arch),intel64)

ISA ?= -xHOST
GCXX_ISA ?= -march=native
FB_TARGET ?= cpp

else
Expand All @@ -233,7 +233,7 @@ $(error Architecture not recognized; use arch=knl, knc, skx, hsw, ivb, snb, or i
endif # arch-specific.

# general defaults for vars if not set above.
streaming_stores ?= 1
streaming_stores ?= 0
omp_par_for ?= omp parallel for
omp_region_schedule ?= dynamic,1
omp_block_schedule ?= static,1
Expand Down Expand Up @@ -282,13 +282,13 @@ else
endif
LD := $(CXX)
MAKE := make
CXXOPT := -O3
CXXOPT ?= -O3
CXXFLAGS += -g -std=c++11 -Wall $(CXXOPT)
OMPFLAGS += -fopenmp
LFLAGS += -lrt
FB_CXX := g++ # faster than icpc for the foldBuilder.
FB_EXEC := bin/foldBuilder.exe
FB_CXX ?= g++ # faster than icpc for the foldBuilder.
FB_CXXFLAGS += -g -O0 -std=c++11 -Wall # low opt to reduce compile time.
EXTRA_FB_CXXFLAGS =
FB_FLAGS += -st $(stencil) -cluster $(cluster) -fold $(fold)
ST_MACRO_FILE := stencil_macros.hpp
ST_CODE_FILE := stencil_code.hpp
Expand Down Expand Up @@ -382,6 +382,28 @@ else # not Intel compiler

endif # compiler.

# Compile with model_cache=1 or 2 to check prefetching.
ifeq ($(model_cache),1)
MACROS += MODEL_CACHE=1
OMPFLAGS = -qopenmp-stubs
else ifeq ($(model_cache),2)
MACROS += MODEL_CACHE=2
OMPFLAGS = -qopenmp-stubs
endif

# Add in OMP flags and user-added flags.
CXXFLAGS += $(OMPFLAGS) $(EXTRA_CXXFLAGS)

# Some file names.
TAG := $(stencil).$(arch)
STENCIL_BASES := stencil_main stencil_calc realv_grids utils
STENCIL_OBJS := $(addprefix src/,$(addsuffix .$(TAG).o,$(STENCIL_BASES)))
STENCIL_CXX := $(addprefix src/,$(addsuffix .$(TAG).i,$(STENCIL_BASES)))
EXEC_NAME := bin/yask.$(TAG).exe
MAKE_REPORT_FILE := make-report.$(TAG).txt
CXXFLAGS_FILE := cxx-flags.$(TAG).txt
LFLAGS_FILE := ld-flags.$(TAG).txt

# gen-loops.pl args:

# Rank loops break up the whole rank into smaller regions. In order for
Expand Down Expand Up @@ -446,40 +468,24 @@ HALO_LOOP_OUTER_VARS ?= wv,xv,yv,zv
HALO_LOOP_CODE ?= $(HALO_LOOP_OUTER_MODS) loop($(HALO_LOOP_OUTER_VARS)) \
$(HALO_LOOP_INNER_MODS) { calc(halo(t)); }

# compile with model_cache=1 or 2 to check prefetching.
ifeq ($(model_cache),1)
MACROS += MODEL_CACHE=1
OMPFLAGS = -qopenmp-stubs
else ifeq ($(model_cache),2)
MACROS += MODEL_CACHE=2
OMPFLAGS = -qopenmp-stubs
endif

CXXFLAGS += $(OMPFLAGS) $(EXTRA_CXXFLAGS)
#### Targets and rules ####

STENCIL_BASES := stencil_main stencil_calc realv_grids utils
STENCIL_OBJS := $(addprefix src/,$(addsuffix .$(arch).o,$(STENCIL_BASES)))
STENCIL_CXX := $(addprefix src/,$(addsuffix .$(arch).i,$(STENCIL_BASES)))
STENCIL_EXEC_NAME := stencil.$(arch).exe
MAKE_REPORT_FILE := make-report.txt
CXXFLAGS_FILE := cxx-flags.txt
LFLAGS_FILE := ld-flags.txt

all: $(STENCIL_EXEC_NAME) $(MAKE_REPORT_FILE)
all: $(EXEC_NAME) $(MAKE_REPORT_FILE)
echo $(CXXFLAGS) > $(CXXFLAGS_FILE)
echo $(LFLAGS) > $(LFLAGS_FILE)
@cat $(MAKE_REPORT_FILE)
@echo $(STENCIL_EXEC_NAME) "has been built."
@echo $(EXEC_NAME) "has been built. Use bin/yask.sh to run it."

$(MAKE_REPORT_FILE): $(STENCIL_EXEC_NAME)
$(MAKE_REPORT_FILE): $(EXEC_NAME)
@echo MAKEFLAGS="\"$(MAKEFLAGS)"\" > $@ 2>&1
$(MAKE) -j1 $(CODE_STATS) echo-settings >> $@ 2>&1

echo-settings:
@echo
@echo "Build environment for" $(STENCIL_EXEC_NAME) on `date`
@echo arch=$(arch)
@echo "Build environment for" $(EXEC_NAME) on `date`
@echo host=`hostname`
@echo stencil=$(stencil)
@echo arch=$(arch)
@echo def_thread_divisor=$(def_thread_divisor)
@echo def_block_threads=$(def_block_threads)
@echo def_rank_args=$(def_rank_args)
Expand Down Expand Up @@ -509,7 +515,10 @@ echo-settings:
@echo ISA=$(ISA)
@echo OMPFLAGS="\"$(OMPFLAGS)\""
@echo EXTRA_CXXFLAGS="\"$(EXTRA_CXXFLAGS)\""
@echo CXX=$(CXX)
@echo CXXOPT=$(CXXOPT)
@echo CXXFLAGS="\"$(CXXFLAGS)\""
@$(CXX) -v; $(CXX_VER_CMD)
@echo RANK_LOOP_OPTS="\"$(RANK_LOOP_OPTS)\""
@echo RANK_LOOP_OUTER_MODS="\"$(RANK_LOOP_OUTER_MODS)\""
@echo RANK_LOOP_OUTER_VARS="\"$(RANK_LOOP_OUTER_VARS)\""
Expand All @@ -536,44 +545,41 @@ echo-settings:
@echo HALO_LOOP_OUTER_VARS="\"$(HALO_LOOP_OUTER_VARS)\""
@echo HALO_LOOP_INNER_MODS="\"$(HALO_LOOP_INNER_MODS)\""
@echo HALO_LOOP_CODE="\"$(HALO_LOOP_CODE)\""
@echo CXX=$(CXX)
@echo CXXOPT=$(CXXOPT)
@$(CXX) -v; $(CXX_VER_CMD)

code_stats:
@echo
@echo "Code stats for stencil computation:"
./get-loop-stats.pl -t='sub_block_loops' *.s
bin/get-loop-stats.pl -t='sub_block_loops' *.s

$(STENCIL_EXEC_NAME): $(STENCIL_OBJS)
$(EXEC_NAME): $(STENCIL_OBJS)
$(LD) -o $@ $(STENCIL_OBJS) $(CXXFLAGS) $(LFLAGS)

preprocess: $(STENCIL_CXX)

src/stencil_rank_loops.hpp: gen-loops.pl Makefile
./$< -output $@ $(RANK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_RANK_LOOP_OPTS) "$(RANK_LOOP_CODE)"
src/stencil_rank_loops.hpp: bin/gen-loops.pl Makefile
$< -output $@ $(RANK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_RANK_LOOP_OPTS) "$(RANK_LOOP_CODE)"

src/stencil_region_loops.hpp: gen-loops.pl Makefile
./$< -output $@ $(REGION_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_REGION_LOOP_OPTS) "$(REGION_LOOP_CODE)"
src/stencil_region_loops.hpp: bin/gen-loops.pl Makefile
$< -output $@ $(REGION_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_REGION_LOOP_OPTS) "$(REGION_LOOP_CODE)"

src/stencil_block_loops.hpp: gen-loops.pl Makefile
./$< -output $@ $(BLOCK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_BLOCK_LOOP_OPTS) "$(BLOCK_LOOP_CODE)"
src/stencil_block_loops.hpp: bin/gen-loops.pl Makefile
$< -output $@ $(BLOCK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_BLOCK_LOOP_OPTS) "$(BLOCK_LOOP_CODE)"

src/stencil_sub_block_loops.hpp: gen-loops.pl Makefile
./$< -output $@ $(SUB_BLOCK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_SUB_BLOCK_LOOP_OPTS) "$(SUB_BLOCK_LOOP_CODE)"
src/stencil_sub_block_loops.hpp: bin/gen-loops.pl Makefile
$< -output $@ $(SUB_BLOCK_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_SUB_BLOCK_LOOP_OPTS) "$(SUB_BLOCK_LOOP_CODE)"

src/stencil_halo_loops.hpp: gen-loops.pl Makefile
./$< -output $@ $(HALO_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_HALO_LOOP_OPTS) "$(HALO_LOOP_CODE)"
src/stencil_halo_loops.hpp: bin/gen-loops.pl Makefile
$< -output $@ $(HALO_LOOP_OPTS) $(EXTRA_LOOP_OPTS) $(EXTRA_HALO_LOOP_OPTS) "$(HALO_LOOP_CODE)"

src/layout_macros.hpp: gen-layouts.pl
./$< -m > $@
src/layout_macros.hpp: bin/gen-layouts.pl
$< -m > $@

src/layouts.hpp: gen-layouts.pl
./$< -d > $@
src/layouts.hpp: bin/gen-layouts.pl
$< -d > $@

# Compile the stencil compiler.
# TODO: move this to its own makefile.
foldBuilder: src/foldBuilder/*.*pp src/foldBuilder/stencils/*.*pp $(FB_STENCIL_LIST)
$(FB_EXEC): src/foldBuilder/*.*pp src/foldBuilder/stencils/*.*pp $(FB_STENCIL_LIST)
$(FB_CXX) $(FB_CXXFLAGS) -Isrc/foldBuilder/stencils -o $@ src/foldBuilder/*.cpp $(EXTRA_FB_CXXFLAGS)

$(FB_STENCIL_LIST): src/foldBuilder/stencils/*.hpp
Expand All @@ -584,8 +590,8 @@ $(FB_STENCIL_LIST): src/foldBuilder/stencils/*.hpp

# Run the stencil compiler and post-process its output files.
# Use the gmake pattern-rule trick to specify simultaneous targets.
%/$(ST_MACRO_FILE) %/$(ST_CODE_FILE): foldBuilder
./$< $(FB_FLAGS) $(EXTRA_FB_FLAGS) \
%/$(ST_MACRO_FILE) %/$(ST_CODE_FILE): $(FB_EXEC)
$< $(FB_FLAGS) $(EXTRA_FB_FLAGS) \
-pm $*/$(ST_MACRO_FILE) -p$(FB_TARGET) $*/$(ST_CODE_FILE)
echo >> $*/$(ST_MACRO_FILE)
echo '// Settings from YASK Makefile' >> $*/$(ST_MACRO_FILE)
Expand All @@ -599,20 +605,21 @@ $(FB_STENCIL_LIST): src/foldBuilder/stencils/*.hpp
headers: $(GEN_HEADERS) $(FB_STENCIL_LIST)
@ echo 'Header files generated.'

%.$(arch).o: %.cpp src/*.hpp src/foldBuilder/*.hpp $(GEN_HEADERS)
%.$(TAG).o: %.cpp src/*.hpp src/foldBuilder/*.hpp $(GEN_HEADERS)
$(CXX) $(CXXFLAGS) -c -o $@ $<

%.$(arch).i: %.cpp src/*.hpp src/foldBuilder/*.hpp $(GEN_HEADERS)
%.$(TAG).i: %.cpp src/*.hpp src/foldBuilder/*.hpp $(GEN_HEADERS)
$(CXX) $(CXXFLAGS) -E $< > $@

tags:
rm -f TAGS ; find . -name '*.[ch]pp' | xargs etags -C -a

clean:
rm -fv src/*.[io] *.optrpt src/*.optrpt *.s $(GEN_HEADERS) $(MAKE_REPORT_FILE)
rm -fv src/*.[io] *.optrpt */*.optrpt *.s $(GEN_HEADERS) $(MAKE_REPORT_FILE)

realclean: clean
rm -fv stencil*.exe foldBuilder TAGS $(MAKE_REPORT_FILE) $(CXXFLAGS_FILE) $(LFLAGS_FILE) $(FB_STENCIL_LIST)
rm -fv bin/yask*.exe make-report*.txt cxx-flags*.txt ld-flags.*txt $(FB_EXEC) $(FB_STENCIL_LIST) TAGS
rm -fv stencil*.exe stencil-tuner-summary.csh stencil-tuner.pl gen-layouts.pl gen-loops.pl get-loop-stats.pl
find . -name '*~' | xargs -r rm -v

help:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
8 changes: 4 additions & 4 deletions stencil-tuner-summary.csh → bin/yask-tuner-summary.csh
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@
# Purpose: find best result from each GA search csv file.

if ( "-$1" == "-" ) then
if ( `echo stencil-tuner*.csv | wc -l` > 0 ) then
$0 stencil-tuner*.csv
if ( `echo yask-tuner*.csv | wc -l` > 0 ) then
$0 yask-tuner*.csv
else
echo "usage: $0 <csv-file(s) from stencil-tuner>"
echo "usage: $0 <csv-file(s) from yask-tuner>"
endif
exit
endif

echo "Summary of stencil-tuner results:"
echo "Summary of yask-tuner results:"
head -n1 $1
foreach f ($*)
echo '=========='
Expand Down
28 changes: 14 additions & 14 deletions stencil-tuner.pl → bin/yask-tuner.pl
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ sub usage {
" -sde Run binary on SDE (for testing only).\n".
" -makePrefix=<CMD> Prefix make command with <CMD>.\n".
" -makeArgs=<ARGS> Pass additional <ARGS> to make command.\n".
" -runArgs=<ARGS> Pass additional <ARGS> to stencil-run command.\n".
" -runArgs=<ARGS> Pass additional <ARGS> to bin/yask.sh command.\n".
" -ranks=<N> Number of ranks to use on host (x-dimension only).\n".
"\nstencil options:\n".
" -stencil=<NAME> Specify stencil: iso3dfd, 3axis, 9axis, 3plane, cube, ave, awp, ... (required).\n".
Expand All @@ -120,7 +120,7 @@ sub usage {
" -dw=<N> Set size of 'w' dim to <N> (only for 4D problems).\n".
" -mem=<N>-<M> Set allowable est. memory usage between <N> and <M> GiB (default is $minGB-$maxGB).\n".
" -maxVecsInCluster=<N> Maximum vectors allowed in cluster (default is $maxVecsInCluster).\n".
" -noPrefetch Disable any prefetching (shortcut for '-pfdl1=0 -pfdl2=0').\n".
" -noPrefetch Disable any prefetching (shortcut for '-pfd_l1=0 -pfd_l2=0').\n".
" -noFolding Allow only 1D vectorization (in any direction).\n".
" -zLoop Force inner loop in 'z' direction.\n".
" -zLayout Force inner memory layout in 'z' direction.\n".
Expand Down Expand Up @@ -209,8 +209,8 @@ sub usage {
$stencil = $1;
}
elsif ($opt eq '-noprefetch') {
$geneRanges{$autoKey.'pfdl1'} = [ 0 ];
$geneRanges{$autoKey.'pfdl2'} = [ 0 ];
$geneRanges{$autoKey.'pfd_l1'} = [ 0 ];
$geneRanges{$autoKey.'pfd_l2'} = [ 0 ];
}
elsif ($opt =~ '^-maxvecsincluster=(\d+)$') {
$maxVecsInCluster = $1;
Expand Down Expand Up @@ -304,7 +304,7 @@ sub usage {
my $hostStr = defined $host ? $host : hostname();
my $timeStamp=`date +%Y-%m-%d_%H-%M-%S`;
chomp $timeStamp;
my $outFile = "stencil-tuner$searchTypeStr.$stencil.$arch.$hostStr.$timeStamp.csv";
my $outFile = "yask-tuner$searchTypeStr.$stencil.$arch.$hostStr.$timeStamp.csv";
print "Output will be saved in '$outFile'.\n";
$outFile = '/dev/null' if $checking;

Expand Down Expand Up @@ -504,8 +504,8 @@ sub usage {

# prefetch distances for l1 and l2.
# all non-pos numbers => no prefetching, so ~50% chance of being enabled.
[ -$maxPfdl1, $maxPfdl1, 1, 'pfdl1' ],
[ -$maxPfdl2, $maxPfdl2, 1, 'pfdl2' ],
[ -$maxPfdl1, $maxPfdl1, 1, 'pfd_l1' ],
[ -$maxPfdl2, $maxPfdl2, 1, 'pfd_l2' ],

# other build options.
[ 0, 100, 1, 'exprSize' ], # expression-size threshold.
Expand Down Expand Up @@ -734,7 +734,7 @@ ($$)

my $makeCmd = "$makePrefix make clean; ".
"$makePrefix make -j all EXTRA_MACROS='$macros' ".
"arch=$arch real_bytes=$realBytes stencil=$stencil radius=$radius $margs $makeArgs";
"stencil=$stencil arch=$arch real_bytes=$realBytes radius=$radius $margs $makeArgs";
$makeCmd = "echo 'build disabled'" if !$doBuild;
return $makeCmd;
}
Expand All @@ -744,14 +744,14 @@ ()
my $exePrefix = 'time';
$exePrefix .= " sde -$arch --" if $sde;

my $runCmd = "./stencil-run.sh";
my $runCmd = "bin/yask.sh";
if (defined $mic) {
$runCmd .= " -mic $mic";
} else {
$exePrefix .= " numactl -p 1" if $arch eq 'knl' && !$sde; # TODO: fix for cache mode.
$runCmd .= " -host $host" if defined $host;
}
$runCmd .= " -exe_prefix '$exePrefix' -arch $arch $runArgs";
$runCmd .= " -exe_prefix '$exePrefix' -stencil $stencil -arch $arch $runArgs";
return $runCmd;
}

Expand Down Expand Up @@ -1208,8 +1208,8 @@ sub fitness {
my $thread_divisor_exp = readHash($h, 'thread_divisor_exp', 0);
my $bthreads_exp = readHash($h, 'bthreads_exp', 0);
my $layout = readHash($h, 'layout', 1);
my $pfdl1 = readHash($h, 'pfdl1', 1);
my $pfdl2 = readHash($h, 'pfdl2', 1);
my $pfdl1 = readHash($h, 'pfd_l1', 1);
my $pfdl2 = readHash($h, 'pfd_l2', 1);
my $ompRegionSchedule = readHash($h, 'ompRegionSchedule', 1);
my $ompBlockSchedule = readHash($h, 'ompBlockSchedule', 1);

Expand Down Expand Up @@ -1357,8 +1357,8 @@ sub fitness {
# make sure pfld2 > pfld1.
$pfdl2 = $pfdl1 + 1 if $pfdl1 >= $pfdl2;
}
$macros .= " PFDL1=$pfdl1" if $pfdl1 > 0;
$macros .= " PFDL2=$pfdl2" if $pfdl2 > 0;
$mvars .= " pfd_l1=$pfdl1" if $pfdl1 > 0;
$mvars .= " pfd_l2=$pfdl2" if $pfdl2 > 0;

# cluster & fold.
$mvars .= " cluster=x=$cvs[0],y=$cvs[1],z=$cvs[2]";
Expand Down
Loading

0 comments on commit 29bde53

Please sign in to comment.