Skip to content

Commit

Permalink
Merge branch 'master' of github.com:studouglas/GEANT4-GPU
Browse files Browse the repository at this point in the history
  • Loading branch information
studouglas committed Apr 14, 2016
2 parents a57384c + f1851e8 commit 00f5d4d
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 37 deletions.
Binary file modified Documentation/FinalPresentation/Presentation.pdf
Binary file not shown.
200 changes: 177 additions & 23 deletions Documentation/FinalPresentation/Presentation.tex
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,11 @@ \subsection{Brief Project Overview}
\begin{frame}
\frametitle{Brief Project Overview}
Take an existing particle simulation toolkit - Geant4 - and have some functions run on a GPU device to improve performance.
\begin{block}{Geant}
Geant4
\end{block}
\end{frame}

\subsection{Explanation of Terms}
%Victor
\begin{frame}
\frametitle{What is Geant4}
\frametitle{What is Geant4?}
\begin{itemize}
\item Geant4 is a toolkit that is meant to simulate the passage of particles through matter.
\item It has been developed over the years through collaborative effort of many different institutions and individuals.
Expand All @@ -66,14 +62,15 @@ \subsection{Explanation of Terms}
\frametitle{Demonstration}
\begin{center}
\emph{Demonstration -- Running Geant4 on the CPU}
%

\emph{Hadr04 With Visualization}
\end{center}
\end{frame}

\begin{frame}
\begin{itemize}
\frametitle{What is GP-GPU}
\item General-purpose graphic processing unit computing is a re-purposing of graphics hardware
\frametitle{What is GP-GPU Computing?}
\item General-purpose graphic-processing-unit computing is a re-purposing of graphics hardware
\item Allows GPUs to perform computations that would typically be computed on the CPU
\item If a particular problem is well suited to parallelization, GP-GPU computing can greatly increase performance
\end{itemize}
Expand Down Expand Up @@ -103,7 +100,6 @@ \subsection{Purpose}
\frametitle{Purpose}
\end{frame}


% =================== Section ===================
\section{Features}
\begin{frame}
Expand All @@ -126,14 +122,21 @@ \subsection{Easily Enable/Disable GPU Acceleration}
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Demonstration}
\begin{center}
\emph{Demonstration -- Enabling CUDA Acceleration}
\end{center}
\end{frame}

\begin{frame}[fragile]
\frametitle{Easily Enable/Disable GPU Acceleration}
Method calls to \texttt{G4ParticleHPVector} forwarded to GPU-based implementation
\begin{itemize}
\item This decision is made at compile time based on \texttt{cmake} flag
\end{itemize}

\begin{block}{Example of Forwarding}
\begin{block}{Example of Forwarding Method Calls}
\begin{lstlisting}
inline G4double GetY(G4double x)
{
Expand All @@ -149,17 +152,11 @@ \subsection{Easily Enable/Disable GPU Acceleration}

\begin{frame}
\frametitle{Accelerating Module on GPU}
<<<<<<< HEAD
Existing module \texttt{G4ParticleHPVector} ported to GPU using CUDA\\~\\

\begin{block}{Definition: CUDA}
CUDA is a GP-GPU programming model developed by NVIDIA, for use with NVIDIA graphics cards
\end{block}
=======
\begin{itemize}
\item
\end{itemize}
>>>>>>> d58a86955ef97fecb9c25abda474d33038b8f10f
\end{frame}

\begin{frame}
Expand Down Expand Up @@ -214,6 +211,13 @@ \subsection{Impl. 1: Existing Module in GPU Memory}
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Demonstration}
\begin{center}
\emph{Hadr04 with Visualization}
\end{center}
\end{frame}

\subsubsection{Implementation of Select Methods on GPU}
\begin{frame}
\frametitle{Impl. 1 -- \texttt{Times}}
Expand Down Expand Up @@ -250,18 +254,87 @@ \subsubsection{Impl. 1: Performance}

\begin{frame}
\frametitle{Impl. 1: Performance Results -- \texttt{GetXSec}}
\begin{figure}
\centering
\includegraphics[width=0.8\textwidth]{images/getxsec_e_line.png}
\caption{Runtime vs. Number of Data Points -- \texttt{GetXSec}}
\end{figure}
\end{frame}

\begin{frame}
\frametitle{Impl. 1: Performance Results -- \texttt{SampleLin}}
\begin{figure}
\centering
\includegraphics[width=0.8\textwidth]{images/samplelin_line.png}
\caption{Runtime vs. Number of Data Points -- \texttt{SampleLin}}
\end{figure}
\end{frame}

\begin{frame}
\frametitle{Impl. 1: Performance Results -- System Tests}
\textbf{System Test \#1:}\\
\begin{table}
\caption{Performance - Water, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\textbf{System Test \#2:}\\
\begin{table}
\caption{Performance - Uranium, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\end{frame}

\begin{frame}
\frametitle{Impl. 1: Performance Results -- System Tests (Cont.)}
\textbf{System Test \#3:}\\
\begin{table}
\caption{Performance - Water, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\textbf{System Test \#4:}\\
\begin{table}
\caption{Performance - Uranium, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\end{frame}

\begin{frame}
\frametitle{Impl. 2: Performance Results -- System Tests (Cont.)}
\textbf{System Test \#5:}\\
\begin{table}
\caption{Performance - Water, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\textbf{System Test \#6:}\\
\begin{table}
\caption{Performance - Uranium, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\end{frame}

\begin{frame}
\frametitle{Impl. 1: Performance Discussion}
\begin{itemize}
\item Simple ``getters'' and ``setters'' now
\end{itemize}
\end{frame}


Expand All @@ -271,30 +344,47 @@ \subsection{Impl. 2: Add New GPU-Accelerated Methods to Interface}
Add new methods to \texttt{G4ParticleHPVector} interface that are well-suited to parallelism\\~\\

\textbf{Pros:}
%\begin{block}{Pros:}
\begin{itemize}
\pro Only methods that run faster on the GPU are implemented
\pro Not forced to run methods that run slowly on GPU
\end{itemize}
%\end{block}

\textbf{Cons:}
%\begin{block}{Cons:}
\begin{itemize}
\con Will have to maintain two copies of the vector
\con More copying the vector to and from the GPU
\end{itemize}
%\end{block}
\end{frame}

\begin{frame}
\frametitle{Impl. 2: \texttt{GetXSecList}}
\begin{itemize}
\item Fill an array of energies we want to get xSec values for
\item Fill an array of energies for which we want to cross section values
\item Send the array to the GPU to work on
\item Each thread work on its own query(s)
\end{itemize}
\end{frame}

\begin{frame}[fragile]
\frametitle{Implementation -- \texttt{GetXSecList}}
\begin{lstlisting}[language=C++,basicstyle=\ttfamily,keywordstyle=\color{red}]
int stepSize = sqrt(nEntries);
int i = 0;
double e = queryList[threadID];

for (i = 0; i < nEntries; i += stepSize)
if (d_theData[i].energy >= e)
break;

i = i - (stepSize - 1);

for (; i < nEntries; i++)
if (d_theData[i].energy >= e)
break;

d_minIndices[idx] = i;
\end{lstlisting}
\end{frame}

\subsubsection{Impl. 2: Performance}
\begin{frame}
Expand All @@ -305,8 +395,65 @@ \subsubsection{Impl. 2: Performance}
\frametitle{Impl. 2: Performance Results -- \texttt{GetXSecList}}
\end{frame}


\begin{frame}
\frametitle{Impl. 2: Performance Results -- System Tests}
\textbf{System Test \#1:}\\
\begin{table}
\caption{Performance - Water, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\textbf{System Test \#2:}\\
\begin{table}
\caption{Performance - Uranium, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\end{frame}

\begin{frame}
\frametitle{Impl. 2: Performance Results -- System Tests (Cont.)}
\textbf{System Test \#3:}\\
\begin{table}
\caption{Performance - Water, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\textbf{System Test \#4:}\\
\begin{table}
\caption{Performance - Uranium, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\end{frame}

\begin{frame}
\frametitle{Impl. 2: Performance Results -- System Tests (Cont.)}
\textbf{System Test \#5:}\\
\begin{table}
\caption{Performance - Water, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\textbf{System Test \#6:}\\
\begin{table}
\caption{Performance - Uranium, 2000 events}
\begin{tabular}{lll}
\bf CPU Time&\bf GPU Time&\bf Speedup of GPU\\
54.55s&72.08s&-1.32$\times$\\
\end{tabular}
\end{table}
\end{frame}

\begin{frame}
Expand Down Expand Up @@ -351,7 +498,7 @@ \subsection{Accuracy / Testing}
\item Testing framework based on two phases, one program for each phase
\begin{enumerate}
\item \texttt{GenerateTestResults:} Run unit tests and save results to file
\item \texttt{AnalyzeTestResults:} Compare results files from CPU and GPU runs
\item \texttt{AnalyzeTestResults:} Compare results from CPU and GPU
\end{enumerate}
\item Run \texttt{GenerateTestResults} once for GPU acceleration enabled, once with it disabled
\end{itemize}
Expand All @@ -377,6 +524,13 @@ \subsection{Accuracy / Testing}
\end{block}
\end{frame}

\begin{frame}
\frametitle{Demonstration}
\begin{center}
\emph{Demonstration -- Generating Test Results}
\end{center}
\end{frame}

\begin{frame}
\frametitle{\texttt{AnalyzeTestResults} Details}
Two main functions:
Expand All @@ -397,7 +551,7 @@ \subsection{Accuracy / Testing}
\begin{frame}
\frametitle{Demonstration}
\begin{center}
\emph{Demonstration of Generating and Analyzing Test Results}
\emph{Demonstration -- Analyzing Test Results}
\end{center}
\end{frame}

Expand Down
6 changes: 6 additions & 0 deletions geant4.10.02/source/externals/cuda/CUDAGPU.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#if ${ENABLE_CUDA}
#include "${PROJECT_SOURCE_DIR}/source/externals/cuda/include/G4ParticleHPVector_CUDA.hh"
#define GEANT4_ENABLE_CUDA 1
#else
#define GEANT4_ENABLE_CUDA 0
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ if(GEANT4_BUILD_GRANULAR_LIBS)
GEANT4_GRANULAR_LIBRARY_TARGET(COMPONENT sources.cmake)
endif()

set(ENABLE_CUDA 0)
if(GEANT4_ENABLE_CUDA)
set(ENABLE_CUDA 1)
message(STATUS "CUDA Acceleration ENABLED")
set(ENABLE_CUDA 1)
message(STATUS "CUDA Acceleration ENABLED")
add_definitions(-DGEANT4_ENABLE_CUDA=ON)
else()
set(ENABLE_CUDA 0)
message(STATUS "CUDA Acceleration DISABLED")
endif()
configure_file("${PROJECT_SOURCE_DIR}/source/externals/cuda/include/G4ParticleHPVector.hh"
"${PROJECT_SOURCE_DIR}/source/processes/hadronic/models/particle_hp/include/G4ParticleHPVector.hh")
message("GEANT4_ENABLE_CUDA: " ${GEANT4_ENABLE_CUDA})
configure_file("${PROJECT_SOURCE_DIR}/source/externals/cuda/CUDAGPU.hh"
"${PROJECT_SOURCE_DIR}/source/processes/hadronic/models/particle_hp/include/CUDAGPU.hh")
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,7 @@
#include "G4ParticleHPHash.hh"
#include <cmath>
#include <vector>

#if 1
#include "/Users/stuart/Documents/4th_Year/CS_4ZP6/GEANT4-GPU/geant4.10.02/source/externals/cuda/include/G4ParticleHPVector_CUDA.hh"
#define GEANT4_ENABLE_CUDA 1
#endif
#include "CUDAGPU.hh"

#if defined WIN32-VC
#include <float.h>
Expand Down
Loading

0 comments on commit 00f5d4d

Please sign in to comment.