diff --git a/README.md b/README.md
index 359ea27f..1b6308f3 100644
--- a/README.md
+++ b/README.md
@@ -61,8 +61,7 @@ downloaded marshalgen from another source, you need to fix this)
2. `mkdir /path/to/GEANT4-GPU/geant4.10.01.p02-build /path/to/GEANT4-GPU/
geant4.10.01.p02-install`
3. `cd /path/to/GEANT4-GPU/geant4.10.01.p02-build`
-4. `cmake -DGEANT4_INSTALL_DATA=ON -DCMAKE_INSTALL_PREFIX=/path/to/GEANT4-GPU/
-geant4.10.01.p02-install /path/to/GEANT4-GPU/geant4.10.01.p02`
+4. `cmake -DGEANT4_INSTALL_DATA=ON -DCMAKE_INSTALL_PREFIX=/path/to/GEANT4-GPU/geant4.10.01.p02-install /path/to/GEANT4-GPU/geant4.10.01.p02`
5. `make -jN` where `N` is the number of processors on your computer
6. `make install`
@@ -79,6 +78,17 @@ geant4.10.01.p02-install /path/to/GEANT4-GPU/geant4.10.01.p02`
8. Open `/path/to/GEANT4-GPU/Build/addFilesG4STORK` and modify the top few
variables with the correct paths for your install.
+**Installing Geant4 on McMaster's Server (no root privileges)**
+1. SSH into one of McMaster's servers (i.e. `ssh yourMacId@gpu1.mcmaster.ca`), account is on a shared drive across all department servers so once you install once you can access it from any one.
+2. Set up your .gitconfig file and clone the repo in your home folder (path is `/u50/yourMacId/`)
+3. You'll need to install expat and cmake, to do this download the latest versions of both onto your regular desktop.
+4. Copy both tarred files to McMaster's server via SSH: `scp cmake-3.4.0.tar yourMacId@gpu1.mcmaster.ca:/u50/yourMacId/` and `scp expat-2.0.1.tar yourMacId@gpu1.mcmaster.ca:/u50/yourMacId/`
+5. Return to your SSH terminal and untar both files (this may take a while): `tar -xvf cmake-3.4.0.tar; tar -xvf expat-2.0.1.tar`
+6. Build and install cmake: `cd cmake-3.4.0;./bootstrap;make;make install`
+7. Build and install expat: `mkdir bin;cd expat-2.0.1;./configure --prefix=/u50/yourMacId/bin;make;make install`
+8. Add cmake's bin folder to your path. Open `/u50/yourMacId/.bash_profile` and add the following line right before `export PATH`: `PATH=$PATH:$HOME/cmake-3.4.0/bin`
+9. Follow the instructions above to "Install GEANT-4" (starting from 2)
+
**Setting Environment Variables**
It is recommended to add a line to your bash_profile that loads the Geant4
environment variables, like so:
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/CMakeLists.txt b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/CMakeLists.txt
index 3a43ba4f..51dc3549 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/CMakeLists.txt
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/CMakeLists.txt
@@ -11,7 +11,7 @@
#
#------------------------------------------------------------------------------
-#add_subdirectory(cuda)
+add_subdirectory(cuda)
if(GEANT4_BUILD_GRANULAR_LIBS)
include(Geant4MacroLibraryTargets)
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CMakeLists.txt b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CMakeLists.txt
index cc29c12c..f87ead62 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CMakeLists.txt
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CMakeLists.txt
@@ -1,11 +1,14 @@
cmake_minimum_required (VERSION 2.8)
-find_package (CUDA REQUIRED)
-include_directories(/usr/local/cuda/include)
+option (GEANT4_ENABLE_CUDA "Use CUDA to run simulations in parallel on the GPU" ON)
+#add_subdirectory(Hadr04)
+
+if (GEANT4_ENABLE_CUDA)
+ find_package (CUDA REQUIRED)
-cuda_add_library (CUDA_G4NeutronHPVector SHARED CUDA_G4NeutronHPVector.h CUDA_G4NeutronHPVector.cu)
+ include_directories(/usr/local/cuda/include)
+ cuda_add_library (CUDA_G4NeutronHPVector SHARED CUDA_G4NeutronHPVector.h CUDA_G4NeutronHPVector.cu)
-LIST(APPEND CUDA_NVCC_FLAGS --compiler-options -fno-strict-aliasing -lineinfo -use_fast_math -Xptxas -dlcm=cg)
-LIST(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30)
-
-#target_link_libraries(Geant4 CUDA_G4NeutronHPVector /usr/local/cuda/lib/libcudart_static.a)
+ LIST(APPEND CUDA_NVCC_FLAGS --compiler-options -fno-strict-aliasing -lineinfo -use_fast_math -Xptxas -dlcm=cg)
+ LIST(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30)
+endif()
\ No newline at end of file
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.cu b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.cu
index cfbf7839..4f516ffe 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.cu
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.cu
@@ -1,41 +1,32 @@
#include
#include
+#include
#include "CUDA_G4NeutronHPVector.h"
-// Kernel that executes on the CUDA device
+// CUDA kernel
__global__
-void square_array(float *a, int N)
+void sumArrays(int* arr1, int* arr2, int* res, int n)
{
- int idx = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx>> (a_d, N);
-
- // Retrieve result from device and store it in host array
- cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
+void CUDA_sumArrays(int* arr1, int* arr2, int* res, int n) {
+ int *gpu_arr1, *gpu_arr2, *gpu_res;
- // Cleanup
- free(a_h);
- cudaFree(a_d);
+ cudaMalloc((void**)&gpu_arr1, n*sizeof(int));
+ cudaMalloc((void**)&gpu_arr2, n*sizeof(int));
+ cudaMalloc((void**)&gpu_res, n*sizeof(int));
+
+ cudaMemcpy(gpu_arr1, arr1, n*sizeof(int), cudaMemcpyHostToDevice);
+ cudaMemcpy(gpu_arr2, arr2, n*sizeof(int), cudaMemcpyHostToDevice);
- return a_h[N-1];
+ sumArrays<<>>(gpu_arr1, gpu_arr2, gpu_res, n);
+
+ cudaMemcpy(res, gpu_res, n*sizeof(int), cudaMemcpyDeviceToHost);
+
+ cudaFree(gpu_arr1);
+ cudaFree(gpu_arr2);
+ cudaFree(gpu_res);
}
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.h b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.h
index a6abce61..3e63212d 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.h
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.h
@@ -1 +1 @@
-float squareArray(int N);
+void CUDA_sumArrays(int* arr1, int* arr2, int* res, int n);
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/sources.cmake b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/sources.cmake
index 59cc2ab4..96c9f409 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/sources.cmake
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/sources.cmake
@@ -15,6 +15,9 @@
#
#-----------------------------------------------------------------------
+# Include CUDA
+include_directories(cuda)
+
# List external includes needed.
include_directories(${CLHEP_INCLUDE_DIRS})
include_directories(${ZLIB_INCLUDE_DIRS})
@@ -50,6 +53,7 @@ include_directories(${CMAKE_SOURCE_DIR}/source/track/include)
include(Geant4MacroDefineModule)
GEANT4_DEFINE_MODULE(NAME G4had_neu_hp
HEADERS
+ ../cuda/CUDA_G4NeutronHPVector.h
G4NeutronHPList.hh
G4InterpolationIterator.hh
G4NeutronHPIsoData.hh
@@ -187,6 +191,7 @@ GEANT4_DEFINE_MODULE(NAME G4had_neu_hp
G4WattFissionSpectrumValues.hh
### FissionFragment Generator - end
SOURCES
+ ../cuda/CUDA_G4NeutronHPVector.cu
G4NeutronHPIsoData.cc
G4InterpolationManager.cc
G4NeutronHPLevel.cc
@@ -306,6 +311,7 @@ GEANT4_DEFINE_MODULE(NAME G4had_neu_hp
G4WendtFissionFragmentGenerator.cc
### Fission Fragment Generator - end
GRANULAR_DEPENDENCIES
+ CUDA_G4NeutronHPVector
G4baryons
G4bosons
G4geometrymng
@@ -335,6 +341,7 @@ GEANT4_DEFINE_MODULE(NAME G4had_neu_hp
G4particles
G4track
LINK_LIBRARIES
+ CUDA_G4NeutronHPVector
${ZLIB_LIBRARIES}
)
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/src/G4NeutronHPVector.cc b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/src/G4NeutronHPVector.cc
index c0eade0d..e611c8ac 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/src/G4NeutronHPVector.cc
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/src/G4NeutronHPVector.cc
@@ -33,10 +33,10 @@
#include "G4NeutronHPVector.hh"
#include "G4SystemOfUnits.hh"
-//#define CUDA_ENABLED 1
-//#if CUDA_ENABLED
-//#include "CUDA_G4NeutronHPVector.h"
-//#endif
+
+#if GEANT4_CUDA_ENABLED
+ #include "CUDA_G4NeutronHPVector.h"
+#endif
// if the ranges do not match, constant extrapolation is used.
G4NeutronHPVector & operator + (G4NeutronHPVector & left, G4NeutronHPVector & right)
@@ -84,6 +84,7 @@
G4NeutronHPVector::G4NeutronHPVector()
{
+ G4cout << "G4NeutronHPVector Constructed (no params)" << G4endl;
theData = new G4NeutronHPDataPoint[20];
nPoints=20;
nEntries=0;
@@ -100,6 +101,7 @@
G4NeutronHPVector::G4NeutronHPVector(G4int n)
{
+ G4cout << "G4NeutronHPVector Constructed (n: " << n << ")" << G4endl;
nPoints=std::max(n, 20);
theData = new G4NeutronHPDataPoint[nPoints];
nEntries=0;
@@ -150,6 +152,7 @@
G4double G4NeutronHPVector::GetXsec(G4double e)
{
+ G4cout << "G4NeutronHPVector::GetXSec Called, GPU on: " << GEANT4_CUDA_ENABLED << G4endl;
if(nEntries == 0) return 0;
if(!theHash.Prepared()) Hash();
G4int min = theHash.GetMinIndex(e);