Skip to content

Commit

Permalink
Merge pull request #285 from intel/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
chuckyount authored Nov 27, 2023
2 parents 389f325 + 8fdf4fd commit 34b28fc
Show file tree
Hide file tree
Showing 8 changed files with 298 additions and 186 deletions.
Binary file added docs/YASK-logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/YASK-tutorial.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion src/common/common_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ namespace yask {
// https://semver.org/.

// Format: "major.minor.patch[-alpha|-beta]".
const string version = "4.04.07";
const string version = "4.04.08";

string yask_get_version_string() {
return version;
Expand Down
9 changes: 6 additions & 3 deletions src/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,9 @@ YK_SCRIPT_FLAGS += "-stencil" $(YK_STENCIL) "-arch" $(YK_ARCH)
ifeq ($(mpi),0)
YK_SCRIPT_FLAGS += -ranks 1
endif
ifeq ($(offload),1)
YK_SCRIPT_FLAGS += -offload
endif

# Additional flags for building kernel lib.
# These are not used for building apps that use only the APIs.
Expand Down Expand Up @@ -1128,12 +1131,12 @@ single-stencil-tests:
$(MAKE) clean; $(STENCIL_TEST) stencil=iso3dfd radius=3 $(call FOLD,x=2 y=2) domain_dims=z,x,y
$(MAKE) clean; $(STENCIL_TEST) stencil=iso3dfd_sponge radius=6 $(call FOLD,x=2 z=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=ssg $(call FOLD,x=2 y=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=ssg_merged $(call FOLD,x=2 y=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=ssg2 $(call FOLD,x=2 y=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=awp $(call FOLD,x=2 y=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=awp_abc $(call FOLD,x=2 z=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=tti $(call FOLD,x=2 y=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=fsg_merged $(call FOLD,x=2 y=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=fsg_merged_abc $(call FOLD,x=2 y=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=fsg2 $(call FOLD,x=2 y=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=fsg2_abc $(call FOLD,x=2 y=2)

# 3D tests w/seismic stencils that may generate too many kernel parameters for offload.
3d-tests4:
Expand Down
4 changes: 4 additions & 0 deletions src/kernel/lib/soln_apis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,9 @@ namespace yask {
" throughput (num-writes/sec): " << make_num_str(p->writes_ps) << endl <<
" throughput (est-FLOPS): " << make_num_str(p->flops) << endl <<
" throughput (num-points/sec): " << make_num_str(p->pts_ps));

#if 0
// TODO: Fix this: the reported stage rates are often much too high, esp. for offload.
if (psteps != steps_done) {
DEBUG_MSG(" Rate breakdown by stage(s):");
for (auto& sp : st_stages) {
Expand All @@ -543,6 +546,7 @@ namespace yask {

}
}
#endif
}

// Clear counters.
Expand Down
170 changes: 121 additions & 49 deletions src/kernel/yask.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,20 +54,35 @@ else
def_arch=intel64
fi
arch=$def_arch
arch_is_def_cpu=1 # arch has been set for CPU default only.

# Default nodes.
nnodes=1
if [[ ! -z ${SLURM_JOB_NUM_NODES:+x} ]]; then
nnodes=$SLURM_JOB_NUM_NODES
if [[ ! -z ${SLURM_NNODES:+x} ]]; then
nnodes=$SLURM_NNODES
fi

# Default type and number of GPUs.
ngpus=0
arch_offload="offload"
if command -v xpu-smi >/dev/null; then
arch_offload="$def_arch.offload-spir64"
ngpus=`xpu-smi topology -m | grep -c '^GPU'`
elif command -v nvidia-smi >/dev/null; then
arch_offload="$def_arch.offload-nv"
ngpus=`nvidia-smi topo -m | grep -c '^GPU'`
fi
is_offload=0

# Default MPI ranks.
# Try Slurm var, then numactl, then lscpu.
# For latter two, the goal is to count only NUMA nodes with CPUs.
# (Systems with HBM may have NUMA nodes without CPUs.)
nranks=1
nranks=$nnodes
nranks_is_def_cpu=1 # nranks has been set for CPU default only.
if [[ ! -z ${SLURM_NTASKS:+x} && $SLURM_NTASKS > $nnodes ]]; then
nranks=$SLURM_NTASKS
nranks_is_def_cpu=0
elif command -v numactl >/dev/null; then
ncpubinds=`numactl -s | awk '/^cpubind:/ { print NF-1 }'`
if [[ -n "$ncpubinds" ]]; then
Expand All @@ -79,10 +94,15 @@ elif command -v lscpu >/dev/null; then
nranks=$(( $nnumas * $nnodes ))
fi
fi
nranks_offload=$nnodes
if [[ $ngpus > 0 ]]; then
nranks_offload=$(( $ngpus * $nnodes ))
fi
force_mpi=0

# Other defaults.
pre_cmd=":"
post_cmd=""
pre_cmd=":" # "colon" command is a no-op.
post_cmd=":"
helping=0
opts=""
bindir=`dirname $0`
Expand Down Expand Up @@ -127,13 +147,17 @@ while true; do
echo " This will run the YASK executable with the '-help' option."
echo " -arch <name>"
echo " Specify the architecture-name part of the YASK executable."
echo " Overrides the default architecture determined from /proc/cpuinfo flags."
echo " The default arch for this host is '$def_arch'."
echo " Overrides the default architecture determined from /proc/cpuinfo flags"
echo " or the GPU system manangement software for offload kernels."
echo " Should correspond to arch=<name> used during compilation"
echo " with '.offload-<offload_arch>' appended when built with 'offload=1',"
echo " with '.offload-<offload_arch>' appended if built with 'offload=1',"
echo " or YK_ARCH=<name> if that was used to override the default."
echo " In any case, the '-stencil' and '-arch' args required to launch"
echo " any executable are printed at the end of a successful compilation."
echo " The default for this host is '$def_arch' for CPU kernels"
echo " and '$arch_offload' for offload kernels."
echo " -offload"
echo " Use an offloaded YASK kernel executable, built with 'offload=1'."
echo " -host <hostname>"
echo " Specify host to run YASK executable on."
echo " Run sub-shell under 'ssh <hostname>'."
Expand All @@ -149,24 +173,27 @@ while true; do
echo " Run YASK executable as an argument to <command>, e.g., 'numactl -N 0'."
echo " -mpi_cmd <command>"
echo " Run YASK executable as an argument to <command>, e.g., 'mpiexec.hydra -n 4'."
echo " If -mpi_cmd is used, the -ranks option is used only for computing the"
echo " default number of OpenMP threads to use."
echo " If -mpi_cmd and -exe_prefix are both specified, this one is used first."
echo " -ranks <N>"
echo " Run the YASK executable on <N> MPI ranks."
echo " Shortcut for the following option if <N> > 1:"
echo " -mpi_cmd 'mpirun -np <N>'"
echo " If a different MPI command is needed, use -mpi_cmd <command> explicitly."
echo " If the env var SLURM_NTASKS is set AND if it greater than the number of nodes,"
echo " the default is its value."
echo " Otherwise, the default is based on the number of NUMA nodes on the current host."
echo " The current default is $nranks."
echo " The default command is based on the number of nodes and ranks (see below)."
echo " -force_mpi"
echo " Generate a default 'mpirun' prefix even if there is only 1 rank to run."
echo " -nodes <N>"
echo " Set the number of nodes."
echo " This is used to compute the default number of OpenMP threads to use per rank."
echo " If the env var SLURM_JOB_NUM_NODES is set, the default is its value."
echo " If the env var SLURM_NNODES is set, the default is its value."
echo " Otherwise, the default is one (1)."
echo " The current default is $nnodes."
echo " -ranks <R>"
echo " Run the YASK executable on <R> MPI ranks."
echo " This value, along with the number of nodes, <N>, is used to set these defaults:"
echo " - Number of MPI ranks per node to <R>/<N>."
echo " - Number of OpenMP threads per rank based on core count (for CPU kernels only)."
echo " - Default MPI command to 'mpirun -np <R> -ppn <R>/<N>'."
echo " If a different MPI command is needed, use -mpi_cmd <command> explicitly."
echo " If the env var SLURM_NTASKS is set AND if it greater than the number of nodes,"
echo " the default is its value."
echo " Otherwise, the default is based on the number of NUMA nodes on the current host"
echo " for CPU kernels or the number of GPUs on the current host for offload kernels."
echo " The current default is $nranks for CPU kernels and $nranks_offload for offload kernels."
echo " -pre_cmd <command(s)>"
echo " One or more commands to run before YASK executable."
echo " -post_cmd <command(s)>"
Expand Down Expand Up @@ -200,6 +227,7 @@ while true; do
elif [[ "$1" == "-help" ]]; then
helping=1
nranks=1
nranks_is_def_cpu=0
logfile='/dev/null'

# Pass option to executable.
Expand All @@ -217,7 +245,15 @@ while true; do

elif [[ "$1" == "-arch" && -n ${2+set} ]]; then
arch=$2
arch_is_def_cpu=0
if [[ $arch =~ "offload" ]]; then
is_offload=1
fi
shift
shift

elif [[ "$1" == "-offload" ]]; then
is_offload=1
shift

elif [[ "$1" == "-host" && -n ${2+set} ]]; then
Expand Down Expand Up @@ -271,9 +307,14 @@ while true; do

elif [[ "$1" == "-ranks" && -n ${2+set} ]]; then
nranks=$2
nranks_is_def_cpu=0
shift
shift

elif [[ "$1" == "-force_mpi" ]]; then
force_mpi=1
shift

elif [[ "$1" == "-nodes" && -n ${2+set} ]]; then
nnodes=$2
shift
Expand Down Expand Up @@ -332,9 +373,24 @@ if [[ -z ${stencil:+x} ]]; then
show_stencils
fi

# Offload settings.
if [[ $is_offload == 1 ]]; then

# Heuristics for MPI ranks for offload.
if [[ $nranks_is_def_cpu == 1 ]]; then
nranks=$nranks_offload
fi

# Heuristics for offload arch.
if [[ $arch_is_def_cpu == 1 ]]; then
arch=$arch_offload
fi
fi

# Set MPI command default.
if [[ $nranks > 1 ]]; then
: ${mpi_cmd="mpirun -np $nranks"}
ppn=$(( $nranks / $nnodes ))
if [[ $nranks > 1 || $force_mpi == 1 ]]; then
: ${mpi_cmd="mpirun -np $nranks -ppn $ppn"}

# Add default Intel MPI settings.
envs+=" I_MPI_PRINT_VERSION=1 I_MPI_DEBUG=5"
Expand Down Expand Up @@ -375,12 +431,6 @@ tag=$stencil.$arch
make_report="$bindir/../build/yask_kernel.$tag.make-report.txt"
yc_report="$bindir/../build/yask_kernel.$tag.yask_compiler-report.txt"

# Heuristic to determine if this is an offload kernel.
is_offload=0
if [[ $arch =~ "offload" ]]; then
is_offload=1
fi

# Double-check that exe exists.
if [[ ! -x $exe ]]; then
echo "error: '$exe' not found or not executable." | tee -a $logfile
Expand Down Expand Up @@ -420,20 +470,14 @@ fi
# Output some vars.
echo "Num nodes:" $nnodes | tee -a $logfile
echo "Num MPI ranks:" $nranks | tee -a $logfile
echo "Num MPI ranks per node:" $(( $nranks / $nnodes )) | tee -a $logfile
echo "Num MPI ranks per node:" $ppn | tee -a $logfile
echo "sh_prefix='$sh_prefix'" | tee -a $logfile
echo "mpi_cmd='$mpi_cmd'" | tee -a $logfile
echo "exe_prefix='$exe_prefix'" | tee -a $logfile
echo "exe='$exe'" | tee -a $logfile
echo "pre_cmd='$pre_cmd'" | tee -a $logfile
echo "post_cmd='$post_cmd'" | tee -a $logfile

# Output the SLURM env.
if [[ `env | grep -c SLURM` > 0 ]]; then
echo "Slurm vars:" | tee -a $logfile
env | grep -E 'SBATCH|SLURM'
fi

# Dump most recent reports.
if [[ -e $make_report ]]; then
$dump $make_report >> $logfile
Expand All @@ -448,34 +492,62 @@ if [[ $doval == 1 ]]; then
fi

# Commands to capture some important system status and config info for benchmark documentation.
config_cmds="sleep 1; uptime; lscpu; cpuinfo -A; sed '/^$/q' /proc/cpuinfo; cpupower frequency-info; uname -a; $dump /etc/system-release; $dump /proc/cmdline; $dump /proc/meminfo; free -gt; numactl -H; ulimit -a; ipcs -l; module list; env | awk '/YASK/ { print \"env:\", \$1 }'"
config_cmds="sleep 1"
if command -v module >/dev/null; then
config_cmds+="; module list"
fi
config_cmds+="; echo 'Selected env vars:'; env | sort | awk '/YASK|SLURM|SBATCH|MPI|OMP/ { print \"env:\", \$1 }'"

config_cmds+="; set -x" # Start echoing commands before running them.
config_cmds+="; uptime; uname -a; ulimit -a; $dump /etc/system-release; $dump /proc/cmdline; $dump /proc/meminfo; free -gt"
if [[ -r /etc/system-release ]]; then
config_cmds+="; $dump /etc/system-release"
fi
if command -v lscpu >/dev/null; then
config_cmds+="; lscpu"
fi
if command -v cpuinfo >/dev/null; then
config_cmds+="; cpuinfo -A"
fi
if command -v cpupower >/dev/null; then
config_cmds+="; cpupower frequency-info"
fi
config_cmds+="; sed '/^$/q' /proc/cpuinfo"
if command -v numactl >/dev/null; then
config_cmds+="; numactl -H"
fi
if command -v ipcx >/dev/null; then
config_cmds+="; ipcs -l"
fi

# Add settings for offload kernel.
if [[ $is_offload == 1 ]]; then
config_cmds+="; clinfo -l";
if [[ $nranks > 1 ]]; then
envs+=" I_MPI_OFFLOAD_TOPOLIB=level_zero I_MPI_OFFLOAD=2"
else
envs+=" EnableImplicitScaling=1"
if command -v clinfo >/dev/null; then
config_cmds+="; clinfo -l";
fi
if command -v xpu-smi >/dev/null; then
config_cmds+="; xpu-smi discovery; xpu-smi topology -m";
fi
if command -v nvidia-smi >/dev/null; then
config_cmds+="; nvidia-smi";
fi
if [[ ! -z "$mpi_cmd" ]]; then
envs+=" I_MPI_OFFLOAD=2"
fi
fi

# Command sequence to be run in a shell.
exe_str="$mpi_cmd $exe_prefix $exe $opts"
cmds="cd $dir; ulimit -s unlimited; $config_cmds; ldd $exe; date; $pre_cmd; env $envs $envs2 $exe_str"
if [[ -n "$post_cmd" ]]; then
cmds+="; $post_cmd"
fi
cmds+="; date"
cmds="cd $dir; ulimit -s unlimited; $config_cmds; ldd $exe; date; $pre_cmd; env $envs $envs2 $exe_str; $post_cmd; date"

# Finally, invoke the binary in a shell.
if [[ $dodry == 0 ]]; then
echo "===================" | tee -a $logfile
if [[ -z "$sh_prefix" ]]; then
sh -c -x "$cmds" 2>&1 | tee -a $logfile
sh -c "$cmds" 2>&1 | tee -a $logfile
else
echo "Running shell under '$sh_prefix'..."
$sh_prefix "sh -c -x '$cmds'" 2>&1 | tee -a $logfile
$sh_prefix "sh -c '$cmds'" 2>&1 | tee -a $logfile
fi
echo "===================" | tee -a $logfile
fi
Expand Down
Loading

0 comments on commit 34b28fc

Please sign in to comment.