From 5bfa8ff48cdfd710be4f15759b7cee3da71eec44 Mon Sep 17 00:00:00 2001 From: Anthony Agelastos Date: Tue, 19 Sep 2023 08:49:22 -0600 Subject: [PATCH] preserving SPARTA documentation in the Manzano era --- .../manzano_era/Makefile.manzano_kokkos | 104 ++++ doc/sphinx/08_sparta/manzano_era/ats2.csv | 8 + .../08_sparta/manzano_era/ats2.gp-noauto | 25 + .../08_sparta/manzano_era/build-manzano.sh | 30 ++ .../08_sparta/manzano_era/cts1-0.25.csv | 8 + .../08_sparta/manzano_era/cts1-0.25.gp-noauto | 24 + .../08_sparta/manzano_era/cts1-0.50.csv | 8 + .../08_sparta/manzano_era/cts1-0.50.gp-noauto | 24 + .../08_sparta/manzano_era/cts1-1.00.csv | 8 + .../08_sparta/manzano_era/cts1-1.00.gp-noauto | 24 + .../08_sparta/manzano_era/cts1-2.00.csv | 8 + .../08_sparta/manzano_era/cts1-2.00.gp-noauto | 24 + doc/sphinx/08_sparta/manzano_era/cts1.csv | 8 + .../08_sparta/manzano_era/cts1.gp-noauto | 24 + .../manzano_era/cts1mem-0.25.gp-noauto | 24 + .../manzano_era/cts1mem-0.50.gp-noauto | 24 + .../manzano_era/cts1mem-1.00.gp-noauto | 24 + .../manzano_era/cts1mem-2.00.gp-noauto | 24 + doc/sphinx/08_sparta/manzano_era/log.sparta | 465 ++++++++++++++++ doc/sphinx/08_sparta/manzano_era/sparta.rst | 499 ++++++++++++++++++ .../08_sparta/manzano_era/sparta_fom.py | 236 +++++++++ 21 files changed, 1623 insertions(+) create mode 100644 doc/sphinx/08_sparta/manzano_era/Makefile.manzano_kokkos create mode 100644 doc/sphinx/08_sparta/manzano_era/ats2.csv create mode 100644 doc/sphinx/08_sparta/manzano_era/ats2.gp-noauto create mode 100755 doc/sphinx/08_sparta/manzano_era/build-manzano.sh create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1-0.25.csv create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1-0.25.gp-noauto create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1-0.50.csv create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1-0.50.gp-noauto create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1-1.00.csv create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1-1.00.gp-noauto create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1-2.00.csv create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1-2.00.gp-noauto create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1.csv create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1.gp-noauto create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1mem-0.25.gp-noauto create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1mem-0.50.gp-noauto create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1mem-1.00.gp-noauto create mode 100644 doc/sphinx/08_sparta/manzano_era/cts1mem-2.00.gp-noauto create mode 100644 doc/sphinx/08_sparta/manzano_era/log.sparta create mode 100644 doc/sphinx/08_sparta/manzano_era/sparta.rst create mode 100755 doc/sphinx/08_sparta/manzano_era/sparta_fom.py diff --git a/doc/sphinx/08_sparta/manzano_era/Makefile.manzano_kokkos b/doc/sphinx/08_sparta/manzano_era/Makefile.manzano_kokkos new file mode 100644 index 00000000..a857e028 --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/Makefile.manzano_kokkos @@ -0,0 +1,104 @@ +# kokkos_mpi_only = KOKKOS package with Serial backend (no OpenMP support), MPI compiler, default MPI + +SHELL = /bin/sh + +# --------------------------------------------------------------------- +# compiler/linker settings +# specify flags and libraries needed for your compiler + +CC = mpicxx +CCFLAGS = -g -O3 -DSPARTA_BIGBIG +SHFLAGS = -fPIC +DEPFLAGS = -M + +LINK = mpicxx +LINKFLAGS = -g -O3 +LIB = +SIZE = size + +ARCHIVE = ar +ARFLAGS = -rc +SHLIBFLAGS = -shared +KOKKOS_DEVICES = Serial +KOKKOS_ARCH = SKX + +# --------------------------------------------------------------------- +# SPARTA-specific settings +# specify settings for SPARTA features you will use +# if you change any -D setting, do full re-compile after "make clean" + +# SPARTA ifdef settings, OPTIONAL +# see possible settings in doc/Section_start.html#2_2 (step 4) + +SPARTA_INC = -DSPARTA_GZIP + +# MPI library, REQUIRED +# see discussion in doc/Section_start.html#2_2 (step 5) +# can point to dummy MPI library in src/STUBS as in Makefile.serial +# INC = path for mpi.h, MPI compiler settings +# PATH = path for MPI library +# LIB = name of MPI library + +MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1 +MPI_PATH = +MPI_LIB = + +# JPEG library, OPTIONAL +# see discussion in doc/Section_start.html#2_2 (step 7) +# only needed if -DSPARTA_JPEG listed with SPARTA_INC +# INC = path for jpeglib.h +# PATH = path for JPEG library +# LIB = name of JPEG library + +JPG_INC = +JPG_PATH = +JPG_LIB = + +# --------------------------------------------------------------------- +# build rules and dependencies +# no need to edit this section + +include Makefile.package.settings +include Makefile.package + +EXTRA_INC = $(SPARTA_INC) $(PKG_INC) $(MPI_INC) $(JPG_INC) $(PKG_SYSINC) +EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(JPG_PATH) $(PKG_SYSPATH) +EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(JPG_LIB) $(PKG_SYSLIB) +EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) +EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) + +# Path to src files + +vpath %.cpp .. +vpath %.h .. + +# Link target + +$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS) + $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) + $(SIZE) $(EXE) + +# Library targets + +lib: $(OBJ) $(EXTRA_LINK_DEPENDS) + $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) + +shlib: $(OBJ) $(EXTRA_LINK_DEPENDS) + $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ + $(OBJ) $(EXTRA_LIB) $(LIB) + +# Compilation rules + +%.o:%.cpp $(EXTRA_CPP_DEPENDS) + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< + +%.d:%.cpp $(EXTRA_CPP_DEPENDS) + $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ + +%.o:%.cu $(EXTRA_CPP_DEPENDS) + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< + +# Individual dependencies + +DEPENDS = $(OBJ:.o=.d) +sinclude $(DEPENDS) diff --git a/doc/sphinx/08_sparta/manzano_era/ats2.csv b/doc/sphinx/08_sparta/manzano_era/ats2.csv new file mode 100644 index 00000000..1a3d1503 --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/ats2.csv @@ -0,0 +1,8 @@ +No. Particles,Actual +7142385,436.3179 +14287466,575.0493 +28571551,751.5044 +35712209,840.5981 +42855549,873.2238 +49999941,894.6309 +57143091,921.2964 diff --git a/doc/sphinx/08_sparta/manzano_era/ats2.gp-noauto b/doc/sphinx/08_sparta/manzano_era/ats2.gp-noauto new file mode 100644 index 00000000..c537de6a --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/ats2.gp-noauto @@ -0,0 +1,25 @@ +#!/usr/bin/gnuplot +set terminal pngcairo enhanced size 1024, 768 dashed font 'Helvetica,18' +set output "ats2.png" + +set title "SPARTA Throughput Performance on ATS-2/Vortex" font "serif,22" +set xlabel "No. Particles" +set ylabel "Figure of Merit (M-particle-steps/sec)" + +# set xrange [1:64] +set key left top + +# set logscale x 2 +# set logscale y 2 + +set grid +show grid + +set datafile separator comma +set key autotitle columnheader + +set style line 1 linetype 6 dashtype 1 linecolor rgb "#FF0000" linewidth 2 pointtype 6 pointsize 3 +set style line 2 linetype 1 dashtype 2 linecolor rgb "#FF0000" linewidth 2 +set style line 3 linetype 6 dashtype 1 linecolor rgb "#0000FF" linewidth 2 pointtype 6 pointsize 3 + +plot "ats2.csv" using 1:2 with linespoints linestyle 3 diff --git a/doc/sphinx/08_sparta/manzano_era/build-manzano.sh b/doc/sphinx/08_sparta/manzano_era/build-manzano.sh new file mode 100755 index 00000000..a3193378 --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/build-manzano.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +umask 022 +set -e +set -x + +dir_root=`git rev-parse --show-toplevel` +dir_src="${dir_root}/sparta" + +module unload intel +module unload openmpi-intel +module use /apps/modules/modulefiles-apps/cde/v3/ +module load cde/v3/devpack/intel-ompi +module list + +pushd "${dir_src}" +git clean -fdx +git reset --hard +popd +cp -a Makefile.manzano_kokkos "${dir_src}/src/MAKE" + +pushd "${dir_src}/src" +make yes-kokkos +make -j 16 manzano_kokkos +echo "Resultant build info:" +ls -lh `pwd -P`/spa_manzano_kokkos +popd + + +exit 0 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1-0.25.csv b/doc/sphinx/08_sparta/manzano_era/cts1-0.25.csv new file mode 100644 index 00000000..655dfdd6 --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1-0.25.csv @@ -0,0 +1,8 @@ +No. Cores,Actual,Ideal,Memory (GiB) +1,27.083,27.083,11.23 +2,48.984,54.166,11.23 +4,86.519,108.332,11.37 +8,147.291,216.664,11.47 +16,245.349,433.328,11.71 +32,347.010,866.656,12.34 +48,406.431,1299.984,12.76 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1-0.25.gp-noauto b/doc/sphinx/08_sparta/manzano_era/cts1-0.25.gp-noauto new file mode 100644 index 00000000..7f4f17ff --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1-0.25.gp-noauto @@ -0,0 +1,24 @@ +#!/usr/bin/gnuplot +set terminal pngcairo enhanced size 1024, 768 dashed font 'Helvetica,18' +set output "cts1-0.25.png" + +set title "SPARTA Strong Scaling Performance on CTS-1/Manzano (0.25 GiB/PE)" font "serif,22" +set xlabel "No. Processing Elements" +set ylabel "Figure of Merit (time steps/sec)" + +set xrange [1:64] +set key left top + +set logscale x 2 +set logscale y 2 + +set grid +show grid + +set datafile separator comma +set key autotitle columnheader + +set style line 1 linetype 6 dashtype 1 linecolor rgb "#FF0000" linewidth 2 pointtype 6 pointsize 3 +set style line 2 linetype 1 dashtype 2 linecolor rgb "#FF0000" linewidth 2 + +plot "cts1-0.25.csv" using 1:2 with linespoints linestyle 1, "" using 1:3 with line linestyle 2 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1-0.50.csv b/doc/sphinx/08_sparta/manzano_era/cts1-0.50.csv new file mode 100644 index 00000000..db5c3a9d --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1-0.50.csv @@ -0,0 +1,8 @@ +No. Cores,Actual,Ideal,Memory (GiB) +1,29.078,29.078,18.42 +2,55.678,58.156,18.41 +4,100.193,116.312,18.55 +8,173.462,232.624,18.65 +16,287.659,465.248,18.89 +32,395.545,930.496,19.52 +48,454.229,1395.744,20.01 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1-0.50.gp-noauto b/doc/sphinx/08_sparta/manzano_era/cts1-0.50.gp-noauto new file mode 100644 index 00000000..3303df1f --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1-0.50.gp-noauto @@ -0,0 +1,24 @@ +#!/usr/bin/gnuplot +set terminal pngcairo enhanced size 1024, 768 dashed font 'Helvetica,18' +set output "cts1-0.50.png" + +set title "SPARTA Strong Scaling Performance on CTS-1/Manzano (0.50 GiB/PE)" font "serif,22" +set xlabel "No. Processing Elements" +set ylabel "Figure of Merit (time steps/sec)" + +set xrange [1:64] +set key left top + +set logscale x 2 +set logscale y 2 + +set grid +show grid + +set datafile separator comma +set key autotitle columnheader + +set style line 1 linetype 6 dashtype 1 linecolor rgb "#FF0000" linewidth 2 pointtype 6 pointsize 3 +set style line 2 linetype 1 dashtype 2 linecolor rgb "#FF0000" linewidth 2 + +plot "cts1-0.50.csv" using 1:2 with linespoints linestyle 1, "" using 1:3 with line linestyle 2 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1-1.00.csv b/doc/sphinx/08_sparta/manzano_era/cts1-1.00.csv new file mode 100644 index 00000000..7c64095f --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1-1.00.csv @@ -0,0 +1,8 @@ +No. Cores,Actual,Ideal,Memory (GiB) +1,32.306,32.306,33.51 +2,60.208,64.612,33.50 +4,111.753,129.224,33.63 +8,200.736,258.448,33.73 +16,328.496,516.896,33.98 +32,440.688,1033.792,34.55 +48,504.080,1550.688,35.03 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1-1.00.gp-noauto b/doc/sphinx/08_sparta/manzano_era/cts1-1.00.gp-noauto new file mode 100644 index 00000000..8661226a --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1-1.00.gp-noauto @@ -0,0 +1,24 @@ +#!/usr/bin/gnuplot +set terminal pngcairo enhanced size 1024, 768 dashed font 'Helvetica,18' +set output "cts1-1.00.png" + +set title "SPARTA Strong Scaling Performance on CTS-1/Manzano (1.00 GiB/PE)" font "serif,22" +set xlabel "No. Processing Elements" +set ylabel "Figure of Merit (time steps/sec)" + +set xrange [1:64] +set key left top + +set logscale x 2 +set logscale y 2 + +set grid +show grid + +set datafile separator comma +set key autotitle columnheader + +set style line 1 linetype 6 dashtype 1 linecolor rgb "#FF0000" linewidth 2 pointtype 6 pointsize 3 +set style line 2 linetype 1 dashtype 2 linecolor rgb "#FF0000" linewidth 2 + +plot "cts1-1.00.csv" using 1:2 with linespoints linestyle 1, "" using 1:3 with line linestyle 2 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1-2.00.csv b/doc/sphinx/08_sparta/manzano_era/cts1-2.00.csv new file mode 100644 index 00000000..cdac037e --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1-2.00.csv @@ -0,0 +1,8 @@ +No. Cores,Actual,Ideal,Memory (GiB) +1,33.259,33.259,93.86 +2,65.506,66.519,93.85 +4,119.683,133.038,93.98 +8,230.232,266.075,94.08 +16,363.815,532.151,94.29 +32,477.281,1064.302,94.81 +48,535.983,1596.453,95.23 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1-2.00.gp-noauto b/doc/sphinx/08_sparta/manzano_era/cts1-2.00.gp-noauto new file mode 100644 index 00000000..a52fe09d --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1-2.00.gp-noauto @@ -0,0 +1,24 @@ +#!/usr/bin/gnuplot +set terminal pngcairo enhanced size 1024, 768 dashed font 'Helvetica,18' +set output "cts1-2.00.png" + +set title "SPARTA Strong Scaling Performance on CTS-1/Manzano (2.00 GiB/PE)" font "serif,22" +set xlabel "No. Processing Elements" +set ylabel "Figure of Merit (time steps/sec)" + +set xrange [1:64] +set key left top + +set logscale x 2 +set logscale y 2 + +set grid +show grid + +set datafile separator comma +set key autotitle columnheader + +set style line 1 linetype 6 dashtype 1 linecolor rgb "#FF0000" linewidth 2 pointtype 6 pointsize 3 +set style line 2 linetype 1 dashtype 2 linecolor rgb "#FF0000" linewidth 2 + +plot "cts1-2.00.csv" using 1:2 with linespoints linestyle 1, "" using 1:3 with line linestyle 2 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1.csv b/doc/sphinx/08_sparta/manzano_era/cts1.csv new file mode 100644 index 00000000..4975392f --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1.csv @@ -0,0 +1,8 @@ +No. Cores,Actual,Ideal +1,33.56245,33.56245 +2,61.10145,67.1249 +4,121.2445833,134.2498 +8,224.82665,268.4996 +16,362.8721333,536.9992 +32,501.6367667,1073.9984 +48,551.21695,1610.9976 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1.gp-noauto b/doc/sphinx/08_sparta/manzano_era/cts1.gp-noauto new file mode 100644 index 00000000..72c9df27 --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1.gp-noauto @@ -0,0 +1,24 @@ +#!/usr/bin/gnuplot +set terminal pngcairo enhanced size 1024, 768 dashed font 'Helvetica,18' +set output "cts1.png" + +set title "SPARTA Strong Scaling Performance on CTS-1/Manzano" font "serif,22" +set xlabel "No. Processing Elements" +set ylabel "Figure of Merit (M-particle-steps/sec)" + +set xrange [1:64] +set key left top + +set logscale x 2 +set logscale y 2 + +set grid +show grid + +set datafile separator comma +set key autotitle columnheader + +set style line 1 linetype 6 dashtype 1 linecolor rgb "#FF0000" linewidth 2 pointtype 6 pointsize 3 +set style line 2 linetype 1 dashtype 2 linecolor rgb "#FF0000" linewidth 2 + +plot "cts1.csv" using 1:2 with linespoints linestyle 1, "" using 1:3 with line linestyle 2 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1mem-0.25.gp-noauto b/doc/sphinx/08_sparta/manzano_era/cts1mem-0.25.gp-noauto new file mode 100644 index 00000000..fcd5743d --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1mem-0.25.gp-noauto @@ -0,0 +1,24 @@ +#!/usr/bin/gnuplot +set terminal pngcairo enhanced size 1024, 768 dashed font 'Helvetica,18' +set output "cts1mem-0.25.png" + +set title "MiniEM Strong Scaling High-water Memory on CTS-1/Manzano (0.25 GiB/PE)" font "serif,22" +set xlabel "No. Processing Elements" +set ylabel "Maximum Resident Set Size (GiB)" + +set xrange [1:64] +set key left top + +set logscale x 2 +# set logscale y 2 + +set grid +show grid + +set datafile separator comma +set key autotitle columnheader + +set style line 1 linetype 6 dashtype 1 linecolor rgb "#FF0000" linewidth 2 pointtype 6 pointsize 3 +set style line 2 linetype 1 dashtype 2 linecolor rgb "#FF0000" linewidth 2 + +plot "cts1-0.25.csv" using 1:4 with linespoints linestyle 1 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1mem-0.50.gp-noauto b/doc/sphinx/08_sparta/manzano_era/cts1mem-0.50.gp-noauto new file mode 100644 index 00000000..be19f28a --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1mem-0.50.gp-noauto @@ -0,0 +1,24 @@ +#!/usr/bin/gnuplot +set terminal pngcairo enhanced size 1024, 768 dashed font 'Helvetica,18' +set output "cts1mem-0.50.png" + +set title "MiniEM Strong Scaling High-water Memory on CTS-1/Manzano (0.50 GiB/PE)" font "serif,22" +set xlabel "No. Processing Elements" +set ylabel "Maximum Resident Set Size (GiB)" + +set xrange [1:64] +set key left top + +set logscale x 2 +# set logscale y 2 + +set grid +show grid + +set datafile separator comma +set key autotitle columnheader + +set style line 1 linetype 6 dashtype 1 linecolor rgb "#FF0000" linewidth 2 pointtype 6 pointsize 3 +set style line 2 linetype 1 dashtype 2 linecolor rgb "#FF0000" linewidth 2 + +plot "cts1-0.50.csv" using 1:4 with linespoints linestyle 1 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1mem-1.00.gp-noauto b/doc/sphinx/08_sparta/manzano_era/cts1mem-1.00.gp-noauto new file mode 100644 index 00000000..979a8104 --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1mem-1.00.gp-noauto @@ -0,0 +1,24 @@ +#!/usr/bin/gnuplot +set terminal pngcairo enhanced size 1024, 768 dashed font 'Helvetica,18' +set output "cts1mem-1.00.png" + +set title "MiniEM Strong Scaling High-water Memory on CTS-1/Manzano (1.00 GiB/PE)" font "serif,22" +set xlabel "No. Processing Elements" +set ylabel "Maximum Resident Set Size (GiB)" + +set xrange [1:64] +set key left top + +set logscale x 2 +# set logscale y 2 + +set grid +show grid + +set datafile separator comma +set key autotitle columnheader + +set style line 1 linetype 6 dashtype 1 linecolor rgb "#FF0000" linewidth 2 pointtype 6 pointsize 3 +set style line 2 linetype 1 dashtype 2 linecolor rgb "#FF0000" linewidth 2 + +plot "cts1-1.00.csv" using 1:4 with linespoints linestyle 1 diff --git a/doc/sphinx/08_sparta/manzano_era/cts1mem-2.00.gp-noauto b/doc/sphinx/08_sparta/manzano_era/cts1mem-2.00.gp-noauto new file mode 100644 index 00000000..72b2d910 --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/cts1mem-2.00.gp-noauto @@ -0,0 +1,24 @@ +#!/usr/bin/gnuplot +set terminal pngcairo enhanced size 1024, 768 dashed font 'Helvetica,18' +set output "cts1mem-2.00.png" + +set title "MiniEM Strong Scaling High-water Memory on CTS-1/Manzano (2.00 GiB/PE)" font "serif,22" +set xlabel "No. Processing Elements" +set ylabel "Maximum Resident Set Size (GiB)" + +set xrange [1:64] +set key left top + +set logscale x 2 +# set logscale y 2 + +set grid +show grid + +set datafile separator comma +set key autotitle columnheader + +set style line 1 linetype 6 dashtype 1 linecolor rgb "#FF0000" linewidth 2 pointtype 6 pointsize 3 +set style line 2 linetype 1 dashtype 2 linecolor rgb "#FF0000" linewidth 2 + +plot "cts1-2.00.csv" using 1:4 with linespoints linestyle 1 diff --git a/doc/sphinx/08_sparta/manzano_era/log.sparta b/doc/sphinx/08_sparta/manzano_era/log.sparta new file mode 100644 index 00000000..a470927f --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/log.sparta @@ -0,0 +1,465 @@ +SPARTA +KOKKOS mode is enabled (../kokkos.cpp:40) + requested 1 GPU(s) per node + requested 1 thread(s) per MPI task +Running on 1 MPI task(s) +package kokkos +package kokkos reduction atomic +# advect particles on uniform Cartesian grid +# single-step moves that cross grid cell boundaries are detected +# particle effectively moves from cell to cell +# particles reflect off global box boundaries +# +################################### +# Constants +################################### +variable boltz equal 1.380658E-23 + +################################### +# Gas parameters (Ar) +################################### +variable mue equal 1.656E-5 +variable mass equal 46.5E-27 +variable visc equal 1.656E-5 +variable gamma equal 1.400 +variable To equal 293. +variable pi equal 3.14159 + +variable cbar equal sqrt(8.*${boltz}*${To}/${mass}/${pi}) +variable cbar equal sqrt(8.*1.380658e-23*${To}/${mass}/${pi}) +variable cbar equal sqrt(8.*1.380658e-23*293/${mass}/${pi}) +variable cbar equal sqrt(8.*1.380658e-23*293/4.65e-26/${pi}) +variable cbar equal sqrt(8.*1.380658e-23*293/4.65e-26/3.14159) +variable uspeed equal sqrt(${gamma}*${boltz}*${To}/${mass}) +variable uspeed equal sqrt(1.4*${boltz}*${To}/${mass}) +variable uspeed equal sqrt(1.4*1.380658e-23*${To}/${mass}) +variable uspeed equal sqrt(1.4*1.380658e-23*293/${mass}) +variable uspeed equal sqrt(1.4*1.380658e-23*293/4.65e-26) + +################################### +# Trajectory inputs +################################### +variable mach equal 1.71 +variable L equal 1. +variable Vo equal ${mach}*${uspeed} +variable Vo equal 1.71*${uspeed} +variable Vo equal 1.71*348.991145588143 +variable nden equal 1.E20 +variable Vo equal ${mach}*${uspeed} +variable Vo equal 1.71*${uspeed} +variable Vo equal 1.71*348.991145588143 + +variable surftemp equal 293. +variable temp equal 293.00 +variable beta equal 0.000 + +################################### +# Simulation initialization standards +################################### +variable ppc equal 8 +#variable nmfp equal 200 +variable cpmfp equal 4 + +################################### +# Parameter calculations +################################### +variable Vx equal ${Vo}*cos(${beta}*2*PI/360) +variable Vx equal 596.774858955725*cos(${beta}*2*PI/360) +variable Vx equal 596.774858955725*cos(0*2*PI/360) +variable Vy equal ${Vo}*sin(${beta}*2*PI/360) +variable Vy equal 596.774858955725*sin(${beta}*2*PI/360) +variable Vy equal 596.774858955725*sin(0*2*PI/360) + + +variable mfp equal 2*${mue}/(${nden}*${mass}*${cbar}) +variable mfp equal 2*1.656e-05/(${nden}*${mass}*${cbar}) +variable mfp equal 2*1.656e-05/(1e+20*${mass}*${cbar}) +variable mfp equal 2*1.656e-05/(1e+20*4.65e-26*${cbar}) +variable mfp equal 2*1.656e-05/(1e+20*4.65e-26*470.674457970473) + +variable xmin equal -5.0*${L} +variable xmin equal -5.0*1 +variable xmax equal 5.1*${L} +variable xmax equal 5.1*1 +variable ymin equal -5.1*${L} +variable ymin equal -5.1*1 +variable ymax equal 5.1*${L} +variable ymax equal 5.1*1 + +variable xncells equal (${xmax}-${xmin})/${mfp}*${cpmfp} +variable xncells equal (5.1-${xmin})/${mfp}*${cpmfp} +variable xncells equal (5.1--5)/${mfp}*${cpmfp} +variable xncells equal (5.1--5)/0.0151327112073885*${cpmfp} +variable xncells equal (5.1--5)/0.0151327112073885*4 +variable yncells equal (${ymax}-${ymin})/${mfp}*${cpmfp} +variable yncells equal (5.1-${ymin})/${mfp}*${cpmfp} +variable yncells equal (5.1--5.1)/${mfp}*${cpmfp} +variable yncells equal (5.1--5.1)/0.0151327112073885*${cpmfp} +variable yncells equal (5.1--5.1)/0.0151327112073885*4 + +variable Fnum equal ${nden}*(${xmax}-${xmin})*(${ymax}-${ymin})/${ppc}/${xncells}/${yncells} +variable Fnum equal 1e+20*(${xmax}-${xmin})*(${ymax}-${ymin})/${ppc}/${xncells}/${yncells} +variable Fnum equal 1e+20*(5.1-${xmin})*(${ymax}-${ymin})/${ppc}/${xncells}/${yncells} +variable Fnum equal 1e+20*(5.1--5)*(${ymax}-${ymin})/${ppc}/${xncells}/${yncells} +variable Fnum equal 1e+20*(5.1--5)*(5.1-${ymin})/${ppc}/${xncells}/${yncells} +variable Fnum equal 1e+20*(5.1--5)*(5.1--5.1)/${ppc}/${xncells}/${yncells} +variable Fnum equal 1e+20*(5.1--5)*(5.1--5.1)/8/${xncells}/${yncells} +variable Fnum equal 1e+20*(5.1--5)*(5.1--5.1)/8/2669.71327519122/${yncells} +variable Fnum equal 1e+20*(5.1--5)*(5.1--5.1)/8/2669.71327519122/2696.14607989608 + +variable tstep equal (-${xmin}+${xmax})/${Vx}/${xncells}/10/4 +variable tstep equal (--5+${xmax})/${Vx}/${xncells}/10/4 +variable tstep equal (--5+5.1)/${Vx}/${xncells}/10/4 +variable tstep equal (--5+5.1)/596.774858955725/${xncells}/10/4 +variable tstep equal (--5+5.1)/596.774858955725/2669.71327519122/10/4 + +################################### +# Print variable values to log file +################################### +print " Velocity = ${Vo}" + Velocity = 596.774858955725 +print " Density = ${nden}" + Density = 1e+20 +print " X-Velocity = ${Vx}" + X-Velocity = 596.774858955725 +print " Y-Velocity = ${Vy}" + Y-Velocity = 0 +print " Temp = ${temp}" + Temp = 293 +print " cbar = ${cbar}" + cbar = 470.674457970473 +print " mean free path = ${mfp}" + mean free path = 0.0151327112073885 +print " cells per free stream mean free path = ${cpmfp}" + cells per free stream mean free path = 4 +print " sound speed = ${uspeed}" + sound speed = 348.991145588143 +#print " number of mean free paths = ${nmfp}" +print " x-min = ${xmin}" + x-min = -5 +print " x-max = ${xmax}" + x-max = 5.1 +print " y-min = ${ymin}" + y-min = -5.1 +print " y-max = ${ymax}" + y-max = 5.1 +print " x-cells = ${xncells}" + x-cells = 2669.71327519122 +print " y-cells = ${yncells}" + y-cells = 2696.14607989608 +print " Simulation Ratio = ${Fnum}" + Simulation Ratio = 178905428504860 +print " Timestep = ${tstep}" + Timestep = 1.584842987717e-07 + +################################### +# Simulation parameters +################################### +seed 847384 +dimension 2 +global nrho ${nden} +global nrho 1e+20 +global fnum ${Fnum} +global fnum 178905428504860 + +timestep ${tstep} +timestep 1.584842987717e-07 +global gridcut 1.E-1 +#global surfmax 10000 +#global surfpush yes +#global comm/sort yes +#global particle/reorder 10 + +################################### +# Grid generation +################################### +boundary o o p +create_box ${xmin} ${xmax} ${ymin} ${ymax} -0.5 0.5 +create_box -5 ${xmax} ${ymin} ${ymax} -0.5 0.5 +create_box -5 5.1 ${ymin} ${ymax} -0.5 0.5 +create_box -5 5.1 -5.1 ${ymax} -0.5 0.5 +create_box -5 5.1 -5.1 5.1 -0.5 0.5 +Created orthogonal box = (-5 -5.1 -0.5) to (5.1 5.1 0.5) +create_grid ${xncells} ${yncells} 1 block * * * +create_grid 2669.71327519122 ${yncells} 1 block * * * +create_grid 2669.71327519122 2696.14607989608 1 block * * * +Created 7195624 child grid cells + CPU time = 4.52432 secs + create/ghost percent = 66.8847 33.1153 +#read_restart restart.%.100000 + +#balance_grid rcb cell +#write_grid parent grid.out + +##################################### +# Gas/Collision Model Specification # +##################################### +species air.species N2 +mixture air vstream ${Vx} ${Vy} 0.0 temp ${temp} +mixture air vstream 596.774858955725 ${Vy} 0.0 temp ${temp} +mixture air vstream 596.774858955725 0 0.0 temp ${temp} +mixture air vstream 596.774858955725 0 0.0 temp 293 +mixture air N2 frac 1.0 + +mixture air vstream ${Vx} ${Vy} 0.0 temp ${temp} +mixture air vstream 596.774858955725 ${Vy} 0.0 temp ${temp} +mixture air vstream 596.774858955725 0 0.0 temp ${temp} +mixture air vstream 596.774858955725 0 0.0 temp 293 + +collide vss all air.vss relax variable +collide_modify vremax 10000 yes vibrate no rotate smooth nearcp yes 10 + +##################################################### +# Surface generation and collision specification +##################################################### +read_surf circle_R0.5_P10000.surf group 1 invert + 10000 points + 10000 lines + -0.5 0.5 xlo xhi + -0.5 0.5 ylo yhi + 0 0 zlo zhi + 0.000314159 min line length + 1058 0 = cells overlapping surfs, overlap cells with unmarked corner pts + 7140234 54332 1058 = cells outside/inside/overlapping surfs + 1058 = surf cells with 1,2,etc splits + 102.235 102.235 = cell-wise and global flow volume + CPU time = 4.70811 secs + read/check/sort/surf2grid/ghost/inout/particle percent = 0.319644 0.0735068 3.47566 65.2149 30.9163 25.9965 6.2233e-06 + surf2grid time = 3.07039 secs + map/comm1/comm2/comm3/comm4/split percent = 30.2458 0.0792146 44.2833 1.61381 5.0372 3.48686 +#surf_collide 1 specular noslip +surf_collide 1 diffuse ${surftemp} 1.0 +surf_collide 1 diffuse 293 1.0 +surf_modify 1 collide 1 +#surf_react 1 prob air.surf +#surf_modify 1 collide 1 +#surf_modify 1 collide 1 react 1 + +################################### +# Boundary conditions +################################### +fix in emit/face air xlo xhi ylo yhi + +# adapt the grid around the surface before running the simulation +adapt_grid all refine surf all 0.00001 iterate 5 +Adapting grid ... +WARNING: One or more fix inflow faces oppose streaming velocity (../fix_emit_face.cpp:195) + 7147718 61648 8458 = cells outside/inside/overlapping surfs + 8458 = surf cells with 1,2,etc splits + 102.235 102.235 = cell-wise and global flow volume + 7400 cells refined, 0 cells coarsened + adapted to 7217824 grid cells + CPU time = 11.9306 secs + adapt/redo percent = 81.9966 18.0034 + +################################### +# Initialize simulation +################################### +create_particles air n 0 +Created 57144494 particles + CPU time = 24.6205 secs +#fix check grid/check 1 error + +################################### +# Unsteady Output +################################### +stats_style step cpu np nattempt ncoll + +compute 2 grid all all nrho +compute 5 thermal/grid all all temp +compute 3 grid all all trot + +fix 5 ave/grid all 1 1000 10000 c_5[*] ave one +fix 2 ave/grid all 1 1000 10000 c_2[*] ave one +fix 3 ave/grid all 1 1000 10000 c_3[*] ave one + +dump dgrid1 grid all 10000 tmp_grid.* id f_2[*] f_5[*] f_3[*] + +compute 1b lambda/grid c_2[1] NULL N2 kall + +#fix 10 adapt 1000 all refine coarsen value c_1b[2] 0.5 2.0 # combine min thresh less more maxlevel 10 cells 2 2 1 file grid.* + +fix load balance 1000 1.1 rcb part + +stats_style step cpu np nattempt ncoll maxlevel + +stats 50 +run 5800 +WARNING: One or more fix inflow faces oppose streaming velocity (../fix_emit_face.cpp:195) +Memory usage per proc in Mbytes: + particles (ave,min,max) = 7224.3 7224.3 7224.3 + grid (ave,min,max) = 1323.11 1323.11 1323.11 + surf (ave,min,max) = 1.02997 1.02997 1.02997 + total (ave,min,max) = 9870.06 9870.06 9870.06 +Step CPU Np Natt Ncoll Maxlevel + 0 0 57144494 0 0 4 + 50 2.058492 57144353 202798 161581 4 + 100 3.8934437 57144165 194559 151949 4 + 150 5.9264821 57144277 198187 152510 4 + 200 7.8741561 57144501 201549 153420 4 + 250 10.032195 57144624 203458 152778 4 + 300 12.061168 57144456 205469 153049 4 + 350 14.190343 57144900 207345 153059 4 + 400 16.439252 57144623 209558 153299 4 + 450 18.708537 57144477 211065 153490 4 + 500 21.039468 57144509 212701 153993 4 + 550 23.384597 57144361 214613 154199 4 + 600 25.728705 57143966 215891 154226 4 + 650 28.143147 57143817 216934 154032 4 + 700 30.525966 57143733 218282 154220 4 + 750 32.863796 57143665 218738 153527 4 + 800 35.31154 57143764 220506 154561 4 + 850 37.780522 57143900 220210 153766 4 + 900 40.252289 57143662 222260 154931 4 + 950 42.799034 57143331 222427 154383 4 + 1000 46.784784 57143434 222924 153828 4 + 1050 49.320878 57143942 224776 154388 4 + 1100 51.880107 57143933 225323 154800 4 + 1150 54.461474 57143730 225924 154748 4 + 1200 57.054725 57143876 226694 154798 4 + 1250 59.660279 57143976 226824 154500 4 + 1300 62.393098 57143087 227364 154503 4 + 1350 64.924131 57143314 227876 154537 4 + 1400 67.429157 57143344 229780 155655 4 + 1450 70.104509 57142918 228488 154507 4 + 1500 72.740532 57142183 229053 154551 4 + 1550 75.442812 57142280 230351 155093 4 + 1600 78.197287 57142138 231021 155301 4 + 1650 80.994013 57142211 230483 154476 4 + 1700 83.761101 57142448 231495 155165 4 + 1750 86.65143 57142188 231914 155327 4 + 1800 89.493893 57142476 232105 155152 4 + 1850 92.307896 57142532 232156 154803 4 + 1900 95.235481 57142581 232132 154246 4 + 1950 98.112455 57142456 233300 154753 4 + 2000 102.4587 57142235 234294 155191 4 + 2050 105.32818 57142290 234420 155515 4 + 2100 108.16064 57142479 234743 155216 4 + 2150 110.98096 57142326 234892 155137 4 + 2200 113.9036 57142344 234756 155028 4 + 2250 116.90427 57142152 234546 154570 4 + 2300 119.89731 57142410 235102 155120 4 + 2350 122.82959 57143159 235124 154849 4 + 2400 125.91148 57143258 235922 155123 4 + 2450 128.99203 57143532 236249 155125 4 + 2500 131.94321 57143525 236689 155499 4 + 2550 135.06612 57143769 236501 154799 4 + 2600 138.148 57143406 237184 155281 4 + 2650 141.24436 57143281 237065 155046 4 + 2700 144.33909 57143402 238181 155275 4 + 2750 147.43203 57143217 237535 155210 4 + 2800 150.57207 57143066 238571 155327 4 + 2850 153.71315 57142836 239021 155527 4 + 2900 156.84212 57143081 238178 155200 4 + 2950 159.93988 57142986 239445 155779 4 + 3000 164.58071 57143191 238934 155384 4 + 3050 167.73452 57143336 239788 155558 4 + 3100 170.90471 57143217 238930 154740 4 + 3150 174.09899 57143569 239882 155315 4 + 3200 177.29528 57143549 239742 154925 4 + 3250 180.50476 57143787 239765 154987 4 + 3300 183.70915 57143638 240448 155209 4 + 3350 187.00156 57143808 240348 155198 4 + 3400 190.2722 57143992 241083 155702 4 + 3450 193.49754 57143869 242247 156229 4 + 3500 196.74938 57143981 241246 155561 4 + 3550 199.99076 57144250 241679 155689 4 + 3600 203.29537 57144267 241782 155402 4 + 3650 206.64881 57143820 241640 155925 4 + 3700 209.97137 57143663 242206 155207 4 + 3750 213.2846 57143992 240968 154943 4 + 3800 216.62289 57143834 242340 155880 4 + 3850 219.91016 57143645 242296 155488 4 + 3900 223.25863 57143393 242695 155694 4 + 3950 226.58709 57143785 243128 155667 4 + 4000 231.38424 57143428 242862 155683 4 + 4050 234.78426 57143347 244074 156069 4 + 4100 238.16172 57143567 242428 155394 4 + 4150 241.58793 57143465 243571 155595 4 + 4200 244.97722 57143641 244152 156100 4 + 4250 248.39276 57143404 243866 155456 4 + 4300 251.86078 57143622 244075 155953 4 + 4350 255.29979 57143672 244470 156122 4 + 4400 258.68111 57143584 244237 155649 4 + 4450 262.11313 57143677 243505 155114 4 + 4500 265.5987 57143619 245017 156070 4 + 4550 269.11518 57143354 244043 155607 4 + 4600 272.58861 57143266 244995 155585 4 + 4650 276.09737 57143487 244923 155679 4 + 4700 279.59686 57143374 245508 156481 4 + 4750 283.11732 57143220 245794 156174 4 + 4800 286.62826 57143543 245858 155985 4 + 4850 290.16258 57143537 246163 156154 4 + 4900 293.70587 57143158 246396 155852 4 + 4950 297.24365 57143156 245881 156296 4 + 5000 302.22578 57143134 246183 156006 4 + 5050 305.82038 57143224 245437 155265 4 + 5100 309.37403 57143263 246728 156438 4 + 5150 312.91463 57143336 246483 156143 4 + 5200 316.49343 57143277 246507 155968 4 + 5250 320.04961 57143424 247266 156740 4 + 5300 323.6269 57143536 247152 156027 4 + 5350 327.22681 57143678 246797 156411 4 + 5400 330.8471 57143565 246420 155714 4 + 5450 334.43881 57143589 248494 156634 4 + 5500 338.05504 57143522 247892 156006 4 + 5550 341.61585 57143299 247864 156202 4 + 5600 345.22658 57143326 247396 155602 4 + 5650 348.86079 57143101 247539 156261 4 + 5700 352.46277 57143321 248061 156337 4 + 5750 356.1278 57143326 247768 156090 4 + 5800 359.74302 57143091 248584 156399 4 +Loop time of 359.743 on 1 procs for 5800 steps with 57143091 particles + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Move | 203.67 | 203.67 | 203.67 | 0.0 | 56.62 +Coll | 34.597 | 34.597 | 34.597 | 0.0 | 9.62 +Sort | 85.919 | 85.919 | 85.919 | 0.0 | 23.88 +Comm | 5.8387 | 5.8387 | 5.8387 | 0.0 | 1.62 +Modify | 29.706 | 29.706 | 29.706 | 0.0 | 8.26 +Output | 0.0028535 | 0.0028535 | 0.0028535 | 0.0 | 0.00 +Other | | 0.01026 | | | 0.00 + +Particle moves = 331436303918 (331B) +Cells touched = 343041342060 (343B) +Particle comms = 0 (0K) +Boundary collides = 0 (0K) +Boundary exits = 4373746 (4.37M) +SurfColl checks = 14635081 (14.6M) +SurfColl occurs = 351623 (0.352M) +Surf reactions = 0 (0K) +Collide attempts = 1355727883 (1.36B) +Collide occurs = 896228959 (896M) +Reactions = 0 (0K) +Particles stuck = 0 +Axisymm bad moves = 0 + +Particle-moves/CPUsec/proc: 9.21314e+08 +Particle-moves/step: 5.71442e+07 +Cell-touches/particle/step: 1.03501 +Particle comm iterations/step: 1 +Particle fraction communicated: 0 +Particle fraction colliding with boundary: 0 +Particle fraction exiting boundary: 1.31963e-05 +Surface-checks/particle/step: 4.41565e-05 +Surface-collisions/particle/step: 1.06091e-06 +Surf-reactions/particle/step: 0 +Collision-attempts/particle/step: 0.00409046 +Collisions/particle/step: 0.00270408 +Reactions/particle/step: 0 + +Particles: 5.71431e+07 ave 5.71431e+07 max 5.71431e+07 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Cells: 7.21782e+06 ave 7.21782e+06 max 7.21782e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +GhostCell: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +EmptyCell: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Surfs: 10000 ave 10000 max 10000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +GhostSurf: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + diff --git a/doc/sphinx/08_sparta/manzano_era/sparta.rst b/doc/sphinx/08_sparta/manzano_era/sparta.rst new file mode 100644 index 00000000..3639ec4a --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/sparta.rst @@ -0,0 +1,499 @@ +****** +SPARTA +****** + +This is the documentation for the ATS-5 Benchmark [SPARTA]_. The content herein +was created by the following authors (in alphabetical order). + +- `Anthony M. Agelastos `_ +- `Michael A. Gallis `_ +- `Stan Moore `_ +- `Douglas M. Pase `_ +- `Joel O. Stevenson `_ + +This material is based upon work supported by the Sandia National Laboratories +(SNL), a multimission laboratory managed and operated by National Technology and +Engineering Solutions of Sandia under the U.S. Department of Energy's National +Nuclear Security Administration under contract DE-NA0003525. Content herein +considered unclassified with unlimited distribution under SAND2023-01070O. + + +Purpose +======= + +Heavily pulled from their [site]_: + + SPARTA is an acronym for **S**\ tochastic **PA**\ rallel **R**\ arefied-gas + **T**\ ime-accurate **A**\ nalyzer. SPARTA is a parallel Direct Simulation + Monte Carlo (DSMC) code for performing simulations of low-density gases in + 2d or 3d. Particles advect through a hierarchical Cartesian grid that + overlays the simulation box. The grid is used to group particles by grid + cell for purposes of performing collisions and chemistry. Physical objects + with triangulated surfaces can be embedded in the grid, creating cut and + split grid cells. The grid is also used to efficiently find particle/surface + collisions. SPARTA runs on single processors or in parallel using + message-passing techniques and a spatial-decomposition of the simulation + domain. The code is designed to be easy to modify or extend with new + functionality. Running SPARTA and the input command syntax is very similar + to the LAMMPS molecular dynamics code (but SPARTA and LAMMPS use different + underlying algorithms). + + +Characteristics +=============== + + +Application Version +------------------- + +The target application version corresponds to the Git SHA that the SPARTA git +submodule at the root of this repository is set to, i.e., within ``sparta``. + + +Problem +------- + +This problem models 2D hypersonic flow of nitrogen over a circle with periodic +boundary conditions in the z dimension, which physically translates to 3D flow +over a cylinder of infinite length. Particles are continuously emitted from the +4 faces of the simulation box during the simulation, bounce off the circle, and +then exit. The hierarchical cartesian grid is statically adapted to 6 levels +around the circle. The memory array used to hold particles is reordered by grid +cell every 100 timesteps to improve data locality and cache access patterns. + +This problem is present within the upstream SPARTA repository. The components of +this problem are listed below (paths given are within SPARTA repository). Each +of these files will need to be copied into a run directory for the simulation. + +``examples/cylinder/in.cylinder`` + This is the primary input file that controls the simulation. Some parameters + within this file may need to be changed depending upon what is being run + (i.e., these parameters control how long this simulation runs for and how + much memory it uses). + +``examples/cylinder/circle_R0.5_P10000.surf`` + This is the mesh file and will remain unchanged. + +``examples/cylinder/air.*`` + These three files (i.e., ``air.species``, ``air.tce``, and ``air.vss``) + contain the composition and reactions inherent with the air. These files, + like the mesh file, are not to be edited. + +An excerpt from this input file that has its key parameters is +provided below. + +.. code-block:: + :emphasize-lines: 5,11 + + + 37 ################################### + 38 # Simulation initialization standards + 39 ################################### + 40 variable ppc equal 34 + + 149 ################################### + 150 # Unsteady Output + 151 ################################### + + 174 run 1000 + +These parameters are described below. + +``ppc`` + This sets the **p**\ articles **p**\ er **c**\ ell variable. This variable + controls the size of the problem and, accordingly, the amount of memory it + uses. + +``run`` + This sets how many iterations it will run for, which also controls the wall + time required for termination. + + +Figure of Merit +--------------- + +Each SPARTA simulation writes out a file named "log.sparta". At the end of this +simulation is a block that resembles the following example (this is from the +ATS-2/Sierra case discussed below with 57,143,091 particles whose full output is +within :download:`log.sparta `). + +.. code-block:: + :emphasize-lines: 8-14 + + Step CPU Np Natt Ncoll Maxlevel + 0 0 57144494 0 0 4 + 50 2.058492 57144353 202798 161581 4 + 100 3.8934437 57144165 194559 151949 4 + 150 5.9264821 57144277 198187 152510 4 + 200 7.8741561 57144501 201549 153420 4 + 250 10.032195 57144624 203458 152778 4 + 300 12.061168 57144456 205469 153049 4 + 350 14.190343 57144900 207345 153059 4 + 400 16.439252 57144623 209558 153299 4 + 450 18.708537 57144477 211065 153490 4 + 500 21.039468 57144509 212701 153993 4 + 550 23.384597 57144361 214613 154199 4 + 600 25.728705 57143966 215891 154226 4 + 650 28.143147 57143817 216934 154032 4 + 700 30.525966 57143733 218282 154220 4 + 750 32.863796 57143665 218738 153527 4 + 800 35.31154 57143764 220506 154561 4 + 850 37.780522 57143900 220210 153766 4 + 900 40.252289 57143662 222260 154931 4 + 950 42.799034 57143331 222427 154383 4 + 1000 46.784784 57143434 222924 153828 4 + ... + 5800 359.74302 57143091 248584 156399 4 + Loop time of 359.743 on 1 procs for 5800 steps with 57143091 particles + +The quantity of interest (QOI) is "mega particle steps per second," which can be +computed from the above table by multiplying the third column (no. of particles) by +the first (no. of steps), dividing the result by the second column (elapsed time +in seconds), and finally dividing by 1,000,000 (normalize). + +The number of steps must be large enough so the times mentioned in the second +column exceed 600 (i.e., so it runs for at least 10 minutes). The figure of +merit (FOM) is the harmonic mean of the QOI computed from the times between 300 +and 600 seconds. + +A Python script (:download:`sparta_fom.py `) is included within +the repository to aid in computing this quantity. Pass it the ``-h`` command +line argument to view its help page for additional information. + + +System Information +================== + +The platforms utilized for benchmarking activities are listed and described below. + +* Commodity Technology System 1 (CTS-1) with Intel Cascade Lake processors, + known as Manzano at SNL (see :ref:`SystemCTS1`) +* Advanced Technology System 3 (ATS-3), also known as Crossroads (see + :ref:`SystemATS3`) +* Advanced Technology System 2 (ATS-2), also known as Sierra (see + :ref:`SystemATS2`) + + +.. _SystemCTS3: + +CTS-1/Manzano +------------- + +.. note:: + The CTS-1/Manzano system is used as a placeholder for when ATS-3/Crossroads + is available. + +The Manzano HPC cluster has 1,488 compute nodes connected together by a +high-bandwidth, low-latency Intel OmniPath network where each compute node uses +two Intel Xeon Platinum 8268 (Cascade Lake) processors. Each processor has 24 +cores, and each node has 48 physical cores and 96 virtual cores. Each core has a +base frequency of 2.9 GHz and a max frequency of 3.9 GHz. Cores support two +AVX512 SIMD units each, with peak floating-point performance (RPEAK) of 2.9 GHz +x 32 FLOP/clock x 48 cores = 4.45 TF/s. Measured DGEMM performance is just under +3.5 TF/s per node (78.5% efficiency). + +Compute nodes are a Non-Uniform Memory Access (NUMA) design, with each processor +representing a separate NUMA domain. Each processor (domain) supports six +channels of 2,933 MT/s DDR4 memory. Total memory capacity is 4 GB/core, or 192 +GB/node. Memory bandwidth for the node is 12 channels x 8 bytes / channel x +2.933 GT/s = 281.568 GB/s, and measured STREAM TRIAD throughput for local memory +access is approximately 215 GB/s (76% efficiency). Cache design uses three +levels of cache, with L1 using separate instruction and data caches, L2 unifying +instruction and data, and L3 being shared across all cores in the processor. The +cache size is 1.5 MB/core, 35.75 MB/processor, or 71.5 MB/node. + + +.. _SystemATS3: + +ATS-3/Crossroads +---------------- + +This system is not available yet but is slated to be the reference platform. + + +.. _SystemATS2: + +ATS-2/Sierra +------------ + +This system has a plethora of compute nodes that are made up of Power9 +processors with four NVIDIA V100 GPUs. Please refer to [Sierra-LLNL]_ for more +detailed information. + +A Sierra application and regression testbed system named Vortex, housed at SNL, +was used for benchmarking for convenience. Vortex has the same compute node +hardware as Sierra. + + +Building +======== + +Instructions are provided on how to build SPARTA for the following systems: + +* Generic (see :ref:`BuildGeneric`) +* Commodity Technology System 1 (CTS-1) with Intel Cascade Lake processors, + known as Manzano at SNL (see :ref:`BuildCTS1`) +* Advanced Technology System 2 (ATS-2), also known as Sierra (see + :ref:`BuildATS2`) + +If submodules were cloned within this repository, then the source code to build +SPARTA is already present at the top level within the "sparta" folder. + + +.. _BuildGeneric: + +Generic +------- + +Refer to SPARTA's [build]_ documentation for generic instructions. + + +.. _BuildCTS1: + +CTS-1/Manzano +------------- + +.. note:: + The CTS-1/Manzano system is used as a placeholder for when ATS-3/Crossroads + is available. + +Instructions for building on Manzano are provided below. These instructions +assume this repository has been cloned and that the current working directory is +at the top level of this repository. + +.. code-block:: bash + + cd doc/sphinx/8_sparta + ./build-manzano.sh + + +.. _BuildATS2: + +ATS-2/Vortex +------------ + +Instructions for building on Sierra are provided below. + +.. code-block:: bash + + module load cuda/11.2.0 + module load gcc/8.3.1 + git clone https://github.com/sparta/sparta.git sparta + pushd "sparta/src" + make yes-kokkos + make -j 64 vortex_kokkos + ls -lh `pwd -P`/spa_vortex_kokkos + popd + + +Running +======= + +Instructions are provided on how to run SPARTA for the following systems: + +* Commodity Technology System 1 (CTS-1) with Intel Cascade Lake processors, + known as Manzano at SNL (see :ref:`RunCTS1`) +* Advanced Technology System 2 (ATS-2), also known as Sierra (see + :ref:`RunATS2`) + + +.. _RunCTS1: + +CTS-1/Manzano +------------- + +.. note:: + The CTS-1/Manzano system is used as a placeholder for when ATS-3/Crossroads + is available. + +An example of how to run the test case on Manzano is provided below. + +.. code-block:: bash + + module unload intel + module unload openmpi-intel + module use /apps/modules/modulefiles-apps/cde/v3/ + module load cde/v3/devpack/intel-ompi + mpiexec \ + --np ${num_procs} \ + --bind-to socket \ + --map-by socket:span \ + "sparta/src/spa_manzano_kokkos" -in "in.cylinder" \ + >"sparta.out" 2>&1 + + +.. _RunATS2: + +ATS-2/Vortex +------------ + +An example of how to run the test case with a single GPU on Sierra is provided +below. + +.. code-block:: bash + + module load gcc/8.3.1 + module load cuda/11.2.0 + jsrun \ + -M "-gpu -disable_gdr" \ + -n 1 -a 1 -c 1 -g 1 -d packed \ + "sparta/src/spa_vortex_kokkos" -in "in.cylinder" \ + -k on g 1 -sf kk -pk kokkos reduction atomic \ + >"sparta.out" 2>&1 + + + +Verification of Results +======================= + +Results from SPARTA are provided on the following systems: + +* Commodity Technology System 1 (CTS-1) with Intel Cascade Lake processors, + known as Manzano at SNL (see :ref:`ResultsCTS1`) +* Advanced Technology System 2 (ATS-2), also known as Sierra (see + :ref:`ResultsATS2`) + + +.. _ResultsCTS1: + +CTS-1/Manzano +------------- + +.. note:: + The CTS-1/Manzano system is used as a placeholder for when ATS-3/Crossroads + is available. + +Strong scaling performance (i.e., fixed problem size being run on different MPI +rank counts) plots of SPARTA on CTS-1/Manzano are provided within the following +subsections. + +``ppc`` 11 (0.25 GiB/PE) +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. csv-table:: SPARTA Strong Scaling Performance and Memory on Manzano with ppc=11 (0.25 GiB/PE) + :file: cts1-0.25.csv + :align: center + :widths: 10, 10, 10, 10 + :header-rows: 1 + +.. figure:: cts1-0.25.png + :align: center + :scale: 50% + :alt: SPARTA Strong Scaling Performance on Manzano with ppc=11 (0.25 GiB/PE) + + SPARTA Strong Scaling Performance on Manzano with ppc=11 (0.25 GiB/PE) + +.. figure:: cts1mem-0.25.png + :align: center + :scale: 50% + :alt: SPARTA Strong Scaling Memory on Manzano with ppc=11 (0.25 GiB/PE) + + SPARTA Strong Scaling Memory on Manzano with ppc=11 elements (0.25 GiB/PE) + +``ppc`` 21 (0.50 GiB/PE) +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. csv-table:: SPARTA Strong Scaling Performance and Memory on Manzano with ppc=21 (0.50 GiB/PE) + :file: cts1-0.50.csv + :align: center + :widths: 10, 10, 10, 10 + :header-rows: 1 + +.. figure:: cts1-0.50.png + :align: center + :scale: 50% + :alt: SPARTA Strong Scaling Performance on Manzano with ppc=21 (0.50 GiB/PE) + + SPARTA Strong Scaling Performance on Manzano with ppc=21 (0.50 GiB/PE) + +.. figure:: cts1mem-0.50.png + :align: center + :scale: 50% + :alt: SPARTA Strong Scaling Memory on Manzano with ppc=21 (0.50 GiB/PE) + + SPARTA Strong Scaling Memory on Manzano with ppc=21 elements (0.50 GiB/PE) + +``ppc`` 42 (1.00 GiB/PE) +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. csv-table:: SPARTA Strong Scaling Performance and Memory on Manzano with ppc=42 (1.00 GiB/PE) + :file: cts1-1.00.csv + :align: center + :widths: 10, 10, 10, 10 + :header-rows: 1 + +.. figure:: cts1-1.00.png + :align: center + :scale: 50% + :alt: SPARTA Strong Scaling Performance on Manzano with ppc=42 (1.00 GiB/PE) + + SPARTA Strong Scaling Performance on Manzano with ppc=42 (1.00 GiB/PE) + +.. figure:: cts1mem-1.00.png + :align: center + :scale: 50% + :alt: SPARTA Strong Scaling Memory on Manzano with ppc=42 (1.00 GiB/PE) + + SPARTA Strong Scaling Memory on Manzano with ppc=42 elements (1.00 GiB/PE) + +``ppc`` 126 (2.00 GiB/PE) +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. csv-table:: SPARTA Strong Scaling Performance and Memory on Manzano with ppc=126 (2.00 GiB/PE) + :file: cts1-2.00.csv + :align: center + :widths: 10, 10, 10, 10 + :header-rows: 1 + +.. figure:: cts1-2.00.png + :align: center + :scale: 50% + :alt: SPARTA Strong Scaling Performance on Manzano with ppc=126 (2.00 GiB/PE) + + SPARTA Strong Scaling Performance on Manzano with ppc=126 (2.00 GiB/PE) + +.. figure:: cts1mem-2.00.png + :align: center + :scale: 50% + :alt: SPARTA Strong Scaling Memory on Manzano with ppc=126 (2.00 GiB/PE) + + SPARTA Strong Scaling Memory on Manzano with ppc=126 elements (2.00 GiB/PE) + + +.. _ResultsATS2: + +ATS-2/Vortex +------------ + +Throughput performance of SPARTA on ATS-2/Vortex is provided within the +following table and figure. + +.. csv-table:: SPARTA Throughput Performance on ATS-2/Vortex + :file: ats2.csv + :align: center + :widths: 10, 10 + :header-rows: 1 + +.. figure:: ats2.png + :align: center + :scale: 50% + :alt: SPARTA Throughput Performance on ATS-2/Vortex + + SPARTA Throughput Performance on ATS-2/Vortex + +Output from the largest case is within :download:`log.sparta `. + +References +========== + +.. [SPARTA] S. J. Plimpton and S. G. Moore and A. Borner and A. K. Stagg + and T. P. Koehler and J. R. Torczynski and M. A. Gallis, 'Direct + Simulation Monte Carlo on petaflop supercomputers and beyond', + 2019, Physics of Fluids, 31, 086101. +.. [site] M. Gallis and S. Plimpton and S. Moore, 'SPARTA Direct Simulation + Monte Carlo Simulator', 2023. [Online]. Available: + https://sparta.github.io. [Accessed: 22- Feb- 2023] +.. [build] M. Gallis and S. Plimpton and S. Moore, 'SPARTA Documentation Getting + Started', 2023. [Online]. Available: + https://sparta.github.io/doc/Section_start.html#start_2. [Accessed: + 26- Mar- 2023] diff --git a/doc/sphinx/08_sparta/manzano_era/sparta_fom.py b/doc/sphinx/08_sparta/manzano_era/sparta_fom.py new file mode 100755 index 00000000..af1561af --- /dev/null +++ b/doc/sphinx/08_sparta/manzano_era/sparta_fom.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 + +""" +This is a self-contained script that extracts the SPARTA FOM for ATS-5. + +This self-contained script extracts the figure of merit (FOM) from SPARTA (ca. +early 2023) log.sparta output files. The FOM is the harmonic mean of the +computed Mega-cell-steps-per-second from the Loop timer block between 5 and 10 +minutes of wall time. +Author: Anthony M. Agelastos +""" + + +# import Python functions +import sys +import argparse +import os +import logging + +assert sys.version_info >= (3, 5), "Please use Python version 3.5 or later." + + +# define GLOBAL vars +VERSION = "2.71" +TIMEOUT = 30 +IS_ALL = True +EXIT_CODES = {"success": 0, "no file": 1, "bad loop time block": 2} + + +# define global functions +def print_exit_codes(): + """Print out exit codes.""" + super_str = "exit codes = {" + for key, value in EXIT_CODES.items(): + super_str += '"{}": {}, '.format(key, value) + super_str = super_str[:-2] + super_str += "}" + return super_str + + +def is_file(file_name): + """Check if the file exists and can be read.""" + return os.access(file_name, os.R_OK) + + +# define classes +class BuildDocHelp(object): + """Display help.""" + + def __init__(self): + """Initialize object and create argparse entities.""" + my_epilog = print_exit_codes() + self.parser = argparse.ArgumentParser( + description="This Python program will extract the figure of merit (FOM) for SPARTA.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + epilog=my_epilog, + ) + + self.parser.add_argument( + "-a", + "--all", + action="store_true", + default=IS_ALL, + help="Generate ALL FOM information", + ) + + self.parser.add_argument( + "-f", + "--file", + type=str, + default="log.sparta", + help="file name to read", + ) + + self.parser.add_argument( + "-l", + "--logLevel", + type=str, + default="info", + choices=("info", "debug", "warning"), + help="logging level", + ) + + self.parser.add_argument( + "-v", "--version", action="version", version="%(prog)s {}".format(VERSION) + ) + + self.args = self.parser.parse_args() + + def get_args(self): + """Return argparse-parsed arguments for checking workflow state.""" + return self.args + + +class SpartaFom(object): + """This class encapsulates the build of ADPS documentation.""" + + def __init__(self, **kwargs): + """Initialize object and define initial desired build state.""" + # set parameters from object instantiation + for key, value in kwargs.items(): + setattr(self, key, value) + + # check for required attributes + required_attr = [ + "logger", + "file_name", + "is_all", + ] + needed_attr = [item for item in required_attr if not hasattr(self, item)] + assert len(needed_attr) == 0, ( + "Please ensure object {} has the following required " + "attributes: {}!".format(self.__class____name__, required_attr) + ) + + # check attributes + self._check_attr() + + def _check_attr(self): + """Check object attributes.""" + # check inputs + assert isinstance( + self.logger, logging.RootLogger + ), "Pass appropriate logging object to {}!".format(self.__class__.__name__) + if not isinstance(self.is_all, bool): + tmp = bool(self.is_all) + self.logger.critical( + "Type issue with is_all within {} (should be bool, is {}); converted to bool and is now {}.".format( + self.__class__.__name__, type(self.is_all), tmp + ) + ) + self.is_all = tmp + + if not is_file(self.file_name): + self.logger.critical('Cannot read "{}"'.format(self.file_name)) + sys.exit(EXIT_CODES["no file"]) + + def _check_start(self, line): + """Check if this is the start of the Loop time block.""" + return "Step CPU Np Natt Ncoll Maxlevel" in line + + def _check_end(self, line): + """Check if this is the end of the Loop time block.""" + return "Loop time of " in line and "steps with" in line + + def _extract_line(self, line): + """Extract and parse the line.""" + l_line = line.split() + if len(l_line) != 6: + self.logger.critical("Loop time block not sized appropriately!") + sys.exit(EXIT_CODES["bad loop time block"]) + n_line = [] + n_line.append(int(l_line[0])) + n_line.append(float(l_line[1])) + n_line.append(int(l_line[2])) + n_line.append(int(l_line[3])) + n_line.append(int(l_line[4])) + n_line.append(int(l_line[5])) + return n_line + + def _compute_fom(self, block): + """Compute the FOM.""" + vals = [] + start = 300.0 + finish = 600.0 + for line in block: + if line[1] >= finish: + break + if line[1] > start: + fom = line[2] * line[0] / line[1] / 1000000 + fom = 1 / fom + vals.append(fom) + num_vals = len(vals) + + hmean_fom = 0 + hmean_denom = 0 + if num_vals != 0: + for item in vals: + hmean_denom = hmean_denom + item + hmean_fom = num_vals / hmean_denom + + return hmean_fom + + def run(self): + """Extract the FOM.""" + self.logger.debug("Extracting the FOM...") + + loop_info = [] + is_extract = False + with open(self.file_name) as fp: + cnt = 1 + line = fp.readline() + while line: + cnt += 1 + line = fp.readline() + if self._check_end(line): + self.logger.debug("Found end at line {}.".format(cnt)) + break + if is_extract: + loop_info.append(self._extract_line(line)) + if self._check_start(line): + self.logger.debug("Found start at line {}.".format(cnt)) + is_extract = True + continue + fom = self._compute_fom(loop_info) + self.logger.info("FOM = {}".format(fom)) + + +# do work +if __name__ == "__main__": + # manage command line arguments + build_doc_help = BuildDocHelp() + cl_args = build_doc_help.get_args() + + # manage logging + int_logging_level = getattr(logging, cl_args.logLevel.upper(), None) + if not isinstance(int_logging_level, int): + raise ValueError("Invalid log level: {}!".format(cl_args.logLevel)) + logging.basicConfig( + format="%(levelname)s - %(asctime)s - %(message)s", level=int_logging_level + ) + logging.debug("Set logging level to {}.".format(cl_args.logLevel)) + logger = logging.getLogger() + + # manage worker object + sparta_fom = SpartaFom( + logger=logger, + file_name=cl_args.file, + is_all=cl_args.all, + ) + + # do work + sparta_fom.run() + + # exit gracefully + sys.exit(EXIT_CODES["success"])