Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Conflicts:
	Documentation/FinalPresentation/Presentation.pdf
  • Loading branch information
fythal committed Apr 14, 2016
2 parents 237d830 + 94f9915 commit 31f81da
Show file tree
Hide file tree
Showing 14 changed files with 315 additions and 397 deletions.
33 changes: 32 additions & 1 deletion Documentation/FinalPresentation/Presentation.tex
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ \subsection{Explanation of Terms}
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Demonstration}
\begin{center}
\emph{Demonstration -- Running Geant4 on the CPU}
%
\end{center}
\end{frame}

\begin{frame}
\begin{itemize}
\frametitle{What is GP-GPU}
Expand Down Expand Up @@ -295,6 +303,29 @@ \subsubsection{Performance}
\subsection{Accuracy / Testing}
\begin{frame}
\frametitle{Accuracy}
\begin{itemize}
\item All modified functions except SampleLin and Sample
yield results that precisely match
\begin{itemize}
\item Some functions fell extremely close in accuracy to
the original, and were considered to 'pass'
\item The average of 1000 SampleLin tests deviated from the
average of 1000 tests of the original with an error of 0.01
\end{itemize}
\item The system tests differ if the number of nentries is
greater than 500; if not however the results of the system
test conform.
\end{frame}

\begin{frame}
\frametitle{Accuracy Discussion}
\begin{itemize}
\item The deviations in SampleLin and Sample can be
attributed to the functions use of random numbers
\item The negligable deviations in other ported functions
are likely attributed to differences in CPU and GPU
arithmetic, leading to different round-off errors
\end{itemize}
\end{frame}

\begin{frame}
Expand All @@ -312,4 +343,4 @@ \subsection{Recommendations}
\frametitle{Recommendations}
\end{frame}

\end{document}
\end{document}
66 changes: 36 additions & 30 deletions geant4.10.02/source/externals/cuda/src/G4ParticleHPVector_CUDA.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,38 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include "G4ParticleHPVector_CUDA.hh"
#include <thrust/device_vector.h>
#include <stdio.h>
// #include "G4NeutronHPDataPoint.cu"
#include <iostream>
#include <math.h>

/***********************************************
* Device Methods
***********************************************/
__global__ void GetMinIndices_CUDA(G4ParticleHPDataPoint *d_theData, int nEntries,
double *d_energiesIn_xSecsOut, int numQueries, int *d_minIndices) {
const int idx = blockDim.x * blockIdx.x + threadIdx.x;
const int stepSize = (int)sqrt((float)nEntries);
__global__ void SetArrayTo(int *resArray, int querySize, int setValue)
{
int idx = blockDim.x*blockIdx.x + threadIdx.x; // determine threads ID
if(idx < querySize) {
resArray[idx] = setValue;
}
}

if (idx < numQueries) {
int i = 0;
double e = d_energiesIn_xSecsOut[idx];

for (i = 0; i < nEntries; i += stepSize) {
if (d_theData[i].energy >= e) {
break;
__global__ void findMinArray2(G4ParticleHPDataPoint *theData_d, G4double *queryArray_d, int *resArray_d, int numThreads, int querySize, int nEntries)
{
int idx = blockDim.x*blockIdx.x + threadIdx.x; // determine threads ID
for (int i = 0; i < querySize; i++){
G4double queryEnergy = queryArray_d[i];
for(int j = idx; j <= nEntries; j+= numThreads){// check threads designated chunk of data
if(theData_d[j].energy > queryEnergy){
atomicMin(&resArray_d[i], j);
}
}

i = (i - (stepSize - 1) >= 0) ? i - (stepSize - 1) : 0;
for (; i < nEntries; i++) {
if (d_theData[i].energy >= e) {
break;
}
}

d_minIndices[idx] = i;
}
}



/***********************************************
* Device Methods
***********************************************/
void G4ParticleHPVector_CUDA::SetInterpolationManager(G4InterpolationManager & aManager) {
theManager = aManager;
}
Expand Down Expand Up @@ -69,13 +71,17 @@ void G4ParticleHPVector_CUDA::GetXsecList(G4double* energiesIn_xSecsOut, G4int n
cudaMemcpy(d_theData, theData, sizeof(G4ParticleHPDataPoint) * nEntries, cudaMemcpyHostToDevice);
cudaMemcpy(d_energiesIn_xSecsOut, energiesIn_xSecsOut, sizeof(G4double) * numQueries, cudaMemcpyHostToDevice);

// need to add 1 block if doesn't divide evenly (e.g 32 T_P_B, 36 numQueries we need 1+1=2 blocks to get those last 4 queries)
int numBlocksSingleElement = numQueries/THREADS_PER_BLOCK + ((numQueries % THREADS_PER_BLOCK == 0) ? 0 : 1);

GetMinIndices_CUDA <<<numBlocksSingleElement, THREADS_PER_BLOCK>>>
(d_theData, nEntries, d_energiesIn_xSecsOut, numQueries, d_minIndices);

cudaMemcpy(minIndices, d_minIndices, sizeof(G4int) * numQueries, cudaMemcpyDeviceToHost);
printf("About to launch kernel...\n");

int queryBlocks = numQueries/THREADS_PER_BLOCK + (numQueries % THREADS_PER_BLOCK == 0 ? 0:1);
G4double resultVal = 0;
int dataChunk = 32;
int threadNum = nEntries/dataChunk;
int arrayBlocks = threadNum/THREADS_PER_BLOCK + (threadNum % THREADS_PER_BLOCK == 0 ? 0:1);

SetArrayTo <<< queryBlocks, THREADS_PER_BLOCK >>>(d_minIndices, numQueries, nEntries-1);
findMinArray2 <<< arrayBlocks, THREADS_PER_BLOCK >>> (d_theData, d_energiesIn_xSecsOut, d_minIndices, threadNum, numQueries, nEntries);
cudaMemcpy(minIndices, d_minIndices, numQueries * sizeof(G4int), cudaMemcpyDeviceToHost);

for (int i = 0; i < numQueries; i++) {
int minIndex = minIndices[i];
Expand Down
10 changes: 0 additions & 10 deletions geant4.10.02/source/externals/cuda/tests/GenerateTestResults.cc
Original file line number Diff line number Diff line change
Expand Up @@ -348,9 +348,7 @@ void testGetXSec(int caseNum) {
writeOutDoubleInput("min", minVals[j]);
try {
double t1 = getWallTime();
std::cout << "pre write double\n";
writeOutDouble(vectors[caseNum]->GetXsec(testVals[i], minVals[j]));
std::cout << "post write double\n";
double t2 = getWallTime();
writeOutTime(t2-t1);
} catch (G4HadronicException e) {
Expand Down Expand Up @@ -467,7 +465,6 @@ void testAssignment(int caseNum) {
}
void testBuffer(int caseNum){
writeOutTestName("void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList, G4int length)",caseNum);
std::cout << "Starting testBuffer case " << caseNum << "...\n";
double t1 = getWallTime();
int queryListSizes[NUM_QUERY_LISTS] = {10,50,100,10000,100000};
for (int i = 0; i < NUM_QUERY_LISTS; i++) {
Expand All @@ -480,16 +477,9 @@ void testBuffer(int caseNum){
vectors[caseNum]->GetXsecList(list, queryListSizes[i]);
writeOutTime(getWallTime() - t1);

// resultsFile << "getXSecList results ";
writeOutArray(list, queryListSizes[i]);
resultsFile << "\n";
// resultsFile << "getXSecList results (array): [";
// for (int j = 0; j < queryListSizes[i]; j++) {
// resultsFile << list[j] << ",";
// }
// resultsFile << "]\n";
}
std::cout << "\nTOTAL testBuffer TIME FOR CASE " << caseNum << ": " << getWallTime() - t1 << "\n\n";
}

/***********************************************
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ hash: 17710924904126496405
@numQueries=10000
hash: 7506977939755255107
@numQueries=100000
hash: 1638135657286475825
hash: 10300846530834065403
#void Init(std::istream & aDataFile, G4int total, G4double ux=1., G4double uy=1.)_5
theData xVals hash: 7171123492023199933
theData yVals hash: 9268461341921925588
Expand Down
82 changes: 41 additions & 41 deletions geant4.10.02/source/externals/cuda/tests/UnitTest_Times.csv
Original file line number Diff line number Diff line change
@@ -1,42 +1,42 @@
Method Signature,Case Number,nEntries,Input,CPU Time,GPU Time
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),0,0,numQueries=10,1.90735e-06,1.09673e-05
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),0,0,numQueries=50,0,0
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),0,0,numQueries=100,9.53674e-07,0
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),0,0,numQueries=10000,5.10216e-05,3.09944e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),0,0,numQueries=100000,0.000406027,5.00679e-05
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),1,,,0.000183821,0.000191927
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),1,80,numQueries=10,5.96046e-06,0.166332
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),1,80,numQueries=50,2.86102e-06,0.000472069
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),1,80,numQueries=100,1.81198e-05,0.000467062
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),1,80,numQueries=10000,0.000124931,0.00070405
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),1,80,numQueries=100000,0.00128007,0.00235915
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),2,,,0.00429916,0.00298691
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),2,1509,numQueries=10,9.53674e-07,0.000522137
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),2,1509,numQueries=50,2.86102e-06,0.000503063
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),2,1509,numQueries=100,5.00679e-06,0.00048995
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),2,1509,numQueries=10000,0.000433922,0.000810862
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),2,1509,numQueries=100000,0.00232601,0.00353718
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),3,,,0.025912,0.0231729
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),3,8045,numQueries=10,1.90735e-06,0.000764132
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),3,8045,numQueries=50,3.09944e-06,0.000505924
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),3,8045,numQueries=100,5.00679e-06,0.000524998
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),3,8045,numQueries=10000,0.000761986,0.00175095
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),3,8045,numQueries=100000,0.00212288,0.00564194
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),4,,,0.101337,0.101069
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),4,41854,numQueries=10,9.53674e-07,0.0106211
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),4,41854,numQueries=50,1.90735e-06,0.002105
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),4,41854,numQueries=100,5.00679e-06,0.00250816
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),4,41854,numQueries=10000,0.000799894,0.00284505
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),4,41854,numQueries=100000,0.00537419,0.0215211
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),5,,,0.160514,0.16487
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),5,98995,numQueries=10,9.53674e-07,0.0013051
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),5,98995,numQueries=50,1.90735e-06,0.00119305
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),5,98995,numQueries=100,5.00679e-06,0.00112081
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),5,98995,numQueries=10000,0.000838995,0.00385904
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),5,98995,numQueries=100000,0.0118561,0.0405099
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),6,,,0.516324,0.528599
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),6,242594,numQueries=10,1.90735e-06,0.00328398
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),6,242594,numQueries=50,2.14577e-06,0.00317383
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),6,242594,numQueries=100,5.96046e-06,0.00319195
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),6,242594,numQueries=10000,0.000874996,0.0109839
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),6,242594,numQueries=100000,0.0120931,0.113973
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),0,0,numQueries=10,2.86102e-06,1.90735e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),0,0,numQueries=50,0,9.53674e-07
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),0,0,numQueries=100,0,9.53674e-07
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),0,0,numQueries=10000,5.00679e-05,5.00679e-05
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),0,0,numQueries=100000,0.000399113,0.000417948
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),1,,,0.000194073,0.000181913
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),1,80,numQueries=10,8.82149e-06,5.96046e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),1,80,numQueries=50,3.09944e-06,1.90735e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),1,80,numQueries=100,5.96046e-06,4.05312e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),1,80,numQueries=10000,0.000112057,0.000111103
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),1,80,numQueries=100000,0.00107813,0.00125694
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),2,,,0.00434494,0.00327396
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),2,1509,numQueries=10,2.14577e-06,1.90735e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),2,1509,numQueries=50,3.09944e-06,3.09944e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),2,1509,numQueries=100,5.00679e-06,5.00679e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),2,1509,numQueries=10000,0.000267029,0.000335932
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),2,1509,numQueries=100000,0.00190997,0.00187206
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),3,,,0.0300848,0.0294681
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),3,8045,numQueries=10,3.09944e-06,3.09944e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),3,8045,numQueries=50,1.90735e-06,2.14577e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),3,8045,numQueries=100,5.00679e-06,5.96046e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),3,8045,numQueries=10000,0.00069499,0.000791073
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),3,8045,numQueries=100000,0.0020442,0.00250506
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),4,,,0.111284,0.109397
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),4,41854,numQueries=10,1.90735e-06,1.90735e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),4,41854,numQueries=50,3.09944e-06,2.86102e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),4,41854,numQueries=100,5.00679e-06,5.00679e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),4,41854,numQueries=10000,0.000789881,0.000813961
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),4,41854,numQueries=100000,0.010371,0.00589609
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),5,,,0.177023,0.174309
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),5,98995,numQueries=10,6.91414e-06,1.28746e-05
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),5,98995,numQueries=50,2.86102e-06,2.14577e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),5,98995,numQueries=100,5.96046e-06,5.96046e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),5,98995,numQueries=10000,0.000967979,0.000797987
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),5,98995,numQueries=100000,0.0116479,0.015547
void Init(std::istream & aDataFile. G4int total. G4double ux=1.. G4double uy=1.),6,,,0.524723,0.510009
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),6,242594,numQueries=10,1.90735e-06,2.14577e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),6,242594,numQueries=50,1.90735e-06,1.90735e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),6,242594,numQueries=100,5.00679e-06,5.00679e-06
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),6,242594,numQueries=10000,0.000844955,0.000828981
void G4ParticleHPVector_CUDA::GetXsecBuffer(G4double * queryList. G4int length),6,242594,numQueries=100000,0.01231,0.012162
Loading

0 comments on commit 31f81da

Please sign in to comment.