Skip to content
Cameron Smith edited this page Jun 23, 2015 · 10 revisions

Table of Contents

The following instructions are for execution on the Babbage Phi/MIC only; the host processors are not used.

See the Babbage page for system details https://www.nersc.gov/users/computational-systems/testbeds/babbage/

Build

Setup the environment

module swap impi/5.0.update1 impi/2016.beta_5.1.0
module swap intel/15.0.update1 intel/2016.beta 
module load cmake
export I_MPI_CC=icc
export I_MPI_CXX=icpc
export I_MPI_FC=ifort

Create a toolchain file

Create 'BabbagePhi.cmake' with the following contents

set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_C_COMPILER icc)
set(CMAKE_CXX_COMPILER icpc)
set(CMAKE_Fortran_COMPILER ifort)
set(CMAKE_AR /usr/bin/ar CACHE STRING "" FORCE)

set(d "/opt/intel/parallel_studio_xe_2015_update2/compilers_and_libraries_2016.0.042/linux")
set(compilermic "${d}/compiler/lib/mic")
set(mklmic "${d}/mkl/lib/mic")
set(cxxflags "-mmic -Wl,-rpath-link=${compilermic} -Wl,--as-needed ")
set(opt "-opt-assume-safe-padding -opt-streaming-stores always -opt-streaming-cache-evict=0")

set(CMAKE_C_FLAGS -mmic CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS "${cxxflags}" CACHE STRING "" FORCE)
set(CMAKE_Fortran_FLAGS -mmic CACHE STRING "" FORCE)

set(CMAKE_C_FLAGS_RELEASE "-mmic ${opt} " CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS_RELEASE "-mmic ${opt} " CACHE STRING "" FORCE)
set(CMAKE_Fortran_FLAGS_RELEASE "-mmic ${opt} -align array64byte " CACHE STRING "" FORCE)

set(CMAKE_FIND_ROOT_PATH 
    /usr/linux-k1om-4.7/linux-k1om/usr/lib64/
    /usr/linux-k1om-4.7/linux-k1om/usr/
    ${d}/mpi/mic/lib/release_mt
    ${d}/mpi/mic/lib/
    ${d}/mpi/mic/)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

Create a build directory

mkdir buildPhi
cd buildPhi

Run CMake

cmake \
  -DCMAKE_TOOLCHAIN_FILE=../BabbagePhi.cmake \
  -DCMAKE_BUILD_TYPE=Release \
  -DPHASTA_INCOMPRESSIBLE=OFF \
  -DPHASTA_COMPRESSIBLE=ON \
  ..

Build

make VERBOSE=1

Run

Setup the environment

see above

Create a single run job script

Create 'runPhi.sh' with the following contents

#!/bin/bash 
#Force use of the rendezvous protocol to reduce memory usage.
export I_MPI_FABRICS=shm:dapl
export I_MPI_DEBUG=5
#Force use of the rendezvous protocol to reduce memory usage.
export I_MPI_EAGER_THRESHOLD=0
export I_MPI_INTRANODE_EAGER_THRESHOLD=0
#connectionless protocol - see the google doc for performance implications
export I_MPI_DAPL_UD=enable 
#pin processes to cores
export I_MPI_PIN_DOMAIN=core

cd $PBS_O_WORKDIR

get_micfile 
mpirun.mic -n <totalProcesses> -ppn <processesPerMIC> -hostfile micfile.$PBS_JOBID <executable>  

Create a multiple run job script

or... if you want to do multiple runs within a job/allocation place the following into 'runPhi.sh'

#!/bin/bash  -x
p=totalNumberOfProcesses
ppn=processesPerPhi
numstart=startingTimeStep

cd $PBS_O_WORKDIR

module swap impi/5.0.update1 impi/2016.beta_5.1.0
module swap intel/15.0.update1 intel/2016.beta 

export I_MPI_FABRICS=shm:dapl

export I_MPI_DEBUG=5
#Force use of the rendezvous protocol to reduce memory usage.
export I_MPI_EAGER_THRESHOLD=0
export I_MPI_INTRANODE_EAGER_THRESHOLD=0

#pin processes to cores
export I_MPI_PIN_DOMAIN=core

dbg=/path/to/debug/phastaC.exe
opt=/path/to/optimized/phastaC.exe

get_micfile 
for exe in dbg opt; do 
  for ud in 'disable' 'enable'; do
    #connectionless protocol
    export I_MPI_DAPL_UD=${ud}

    echo $numstart > ${p}-procs_case/numstart.dat
    mpirun.mic -n $p -hostfile micfile.$PBS_JOBID -ppn $ppn ${!exe} \
    &> r${p}.ppn${ppn}.ud${ud}.${exe}.${PBS_JOBID}.log
  done
done

Submit the job

Note, '-n' specifies the total number of host processes. Since we are not running on the host processes we will set '-n' to equal the number of nodes; a value of zero is not valid.

cd path/to/case/directory # should contain a N-procs_case sub-directory
qsub -l nodes=numNodes -l walltime=HH:MM:SS ./runPhi.sh