diff --git a/README.md b/README.md
index 359ea27f..1b6308f3 100644
--- a/README.md
+++ b/README.md
@@ -61,8 +61,7 @@ downloaded marshalgen from another source, you need to fix this)
 2. `mkdir /path/to/GEANT4-GPU/geant4.10.01.p02-build /path/to/GEANT4-GPU/
 geant4.10.01.p02-install`<br>
 3. `cd /path/to/GEANT4-GPU/geant4.10.01.p02-build`<br>
-4. `cmake -DGEANT4_INSTALL_DATA=ON -DCMAKE_INSTALL_PREFIX=/path/to/GEANT4-GPU/
-geant4.10.01.p02-install /path/to/GEANT4-GPU/geant4.10.01.p02`<br>
+4. `cmake -DGEANT4_INSTALL_DATA=ON -DCMAKE_INSTALL_PREFIX=/path/to/GEANT4-GPU/geant4.10.01.p02-install /path/to/GEANT4-GPU/geant4.10.01.p02`<br>
 5. `make -jN` where `N` is the number of processors on your computer<br>
 6. `make install`
 
@@ -79,6 +78,17 @@ geant4.10.01.p02-install /path/to/GEANT4-GPU/geant4.10.01.p02`<br>
 8. Open `/path/to/GEANT4-GPU/Build/addFilesG4STORK` and modify the top few 
 variables with the correct paths for your install.
 
+**Installing Geant4 on McMaster's Server (no root privileges)**<br>
+1. SSH into one of McMaster's servers (i.e. `ssh yourMacId@gpu1.mcmaster.ca`), account is on a shared drive across all department servers so once you install once you can access it from any one.
+2. Set up your .gitconfig file and clone the repo in your home folder (path is `/u50/yourMacId/`)
+3. You'll need to install expat and cmake, to do this download the latest versions of both onto your regular desktop.
+4. Copy both tarred files to McMaster's server via SSH: `scp cmake-3.4.0.tar yourMacId@gpu1.mcmaster.ca:/u50/yourMacId/` and `scp expat-2.0.1.tar yourMacId@gpu1.mcmaster.ca:/u50/yourMacId/`
+5. Return to your SSH terminal and untar both files (this may take a while): `tar -xvf cmake-3.4.0.tar; tar -xvf expat-2.0.1.tar`
+6. Build and install cmake: `cd cmake-3.4.0;./bootstrap;make;make install`
+7. Build and install expat: `mkdir bin;cd expat-2.0.1;./configure --prefix=/u50/yourMacId/bin;make;make install`
+8. Add cmake's bin folder to your path. Open `/u50/yourMacId/.bash_profile` and add the following line right before `export PATH`: `PATH=$PATH:$HOME/cmake-3.4.0/bin`
+9. Follow the instructions above to "Install GEANT-4" (starting from 2)
+
 **Setting Environment Variables**<br>
 It is recommended to add a line to your bash_profile that loads the Geant4
 environment variables, like so:
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/CMakeLists.txt b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/CMakeLists.txt
index 3a43ba4f..51dc3549 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/CMakeLists.txt
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/CMakeLists.txt
@@ -11,7 +11,7 @@
 #
 #------------------------------------------------------------------------------
 
-#add_subdirectory(cuda)
+add_subdirectory(cuda)
 
 if(GEANT4_BUILD_GRANULAR_LIBS)
     include(Geant4MacroLibraryTargets)
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CMakeLists.txt b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CMakeLists.txt
index cc29c12c..f87ead62 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CMakeLists.txt
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CMakeLists.txt
@@ -1,11 +1,14 @@
 cmake_minimum_required (VERSION 2.8)
-find_package (CUDA REQUIRED)
 
-include_directories(/usr/local/cuda/include)
+option (GEANT4_ENABLE_CUDA "Use CUDA to run simulations in parallel on the GPU" ON)
+#add_subdirectory(Hadr04)
+	
+if (GEANT4_ENABLE_CUDA)
+	find_package (CUDA REQUIRED)
 
-cuda_add_library (CUDA_G4NeutronHPVector SHARED CUDA_G4NeutronHPVector.h CUDA_G4NeutronHPVector.cu)
+	include_directories(/usr/local/cuda/include)
+	cuda_add_library (CUDA_G4NeutronHPVector SHARED CUDA_G4NeutronHPVector.h CUDA_G4NeutronHPVector.cu)
 
-LIST(APPEND CUDA_NVCC_FLAGS --compiler-options -fno-strict-aliasing -lineinfo -use_fast_math -Xptxas -dlcm=cg)
-LIST(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30)
-
-#target_link_libraries(Geant4 CUDA_G4NeutronHPVector /usr/local/cuda/lib/libcudart_static.a)
+	LIST(APPEND CUDA_NVCC_FLAGS --compiler-options -fno-strict-aliasing -lineinfo -use_fast_math -Xptxas -dlcm=cg)
+	LIST(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30)
+endif()
\ No newline at end of file
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.cu b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.cu
index cfbf7839..4f516ffe 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.cu
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.cu
@@ -1,41 +1,32 @@
 #include <cuda_runtime.h>
 #include <cuda.h>
+#include <stdio.h>
 #include "CUDA_G4NeutronHPVector.h"
 
-// Kernel that executes on the CUDA device
+// CUDA kernel
 __global__
-void square_array(float *a, int N)
+void sumArrays(int* arr1, int* arr2, int* res, int n)
 {
-  int idx = blockIdx.x * blockDim.x + threadIdx.x;
-  if (idx<N) 
-      a[idx] = a[idx] * a[idx];
+  int tid = blockIdx.x;
+  if (tid < n) 
+      res[tid] = arr1[tid] + arr2[tid];
 }
 
-float squareArray(int N) {
-    float *a_h, *a_d;  // Pointer to host & device arrays
-  	size_t size = N * sizeof(float);
-  	
-  	a_h = (float *)malloc(size);        // Allocate array on host
-  	cudaMalloc((void **) &a_d, size);   // Allocate array on device
-  
-  	// Initialize host array and copy it to CUDA device
-  	for (int i=0; i<N; i++) {
-  		a_h[i] = (float)i;
-  	}
-  	cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
-  
-  	// Do calculation on device:
-  	int block_size = 4;
-  	int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
-  
-  	square_array <<< n_blocks, block_size >>> (a_d, N);
-  
-  	// Retrieve result from device and store it in host array
-  	cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
+void CUDA_sumArrays(int* arr1, int* arr2, int* res, int n) {
+    int *gpu_arr1, *gpu_arr2, *gpu_res;
 
-  	// Cleanup
-  	free(a_h);
-  	cudaFree(a_d);
+    cudaMalloc((void**)&gpu_arr1, n*sizeof(int));
+    cudaMalloc((void**)&gpu_arr2, n*sizeof(int));
+    cudaMalloc((void**)&gpu_res, n*sizeof(int));
+    
+    cudaMemcpy(gpu_arr1, arr1, n*sizeof(int), cudaMemcpyHostToDevice);
+    cudaMemcpy(gpu_arr2, arr2, n*sizeof(int), cudaMemcpyHostToDevice);
 
-    return a_h[N-1];
+    sumArrays<<<n,1>>>(gpu_arr1, gpu_arr2, gpu_res, n);
+
+    cudaMemcpy(res, gpu_res, n*sizeof(int), cudaMemcpyDeviceToHost);
+
+    cudaFree(gpu_arr1);
+    cudaFree(gpu_arr2);
+    cudaFree(gpu_res);
 }
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.h b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.h
index a6abce61..3e63212d 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.h
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/cuda/CUDA_G4NeutronHPVector.h
@@ -1 +1 @@
-float squareArray(int N);
+void CUDA_sumArrays(int* arr1, int* arr2, int* res, int n);
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/sources.cmake b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/sources.cmake
index 59cc2ab4..96c9f409 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/sources.cmake
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/sources.cmake
@@ -15,6 +15,9 @@
 #
 #-----------------------------------------------------------------------
 
+# Include CUDA
+include_directories(cuda)
+
 # List external includes needed.
 include_directories(${CLHEP_INCLUDE_DIRS})
 include_directories(${ZLIB_INCLUDE_DIRS})
@@ -50,6 +53,7 @@ include_directories(${CMAKE_SOURCE_DIR}/source/track/include)
 include(Geant4MacroDefineModule)
 GEANT4_DEFINE_MODULE(NAME G4had_neu_hp
   HEADERS
+    ../cuda/CUDA_G4NeutronHPVector.h
     G4NeutronHPList.hh
     G4InterpolationIterator.hh
     G4NeutronHPIsoData.hh
@@ -187,6 +191,7 @@ GEANT4_DEFINE_MODULE(NAME G4had_neu_hp
 	G4WattFissionSpectrumValues.hh
 ### FissionFragment Generator - end
   SOURCES
+    ../cuda/CUDA_G4NeutronHPVector.cu
     G4NeutronHPIsoData.cc
     G4InterpolationManager.cc
     G4NeutronHPLevel.cc
@@ -306,6 +311,7 @@ GEANT4_DEFINE_MODULE(NAME G4had_neu_hp
 	G4WendtFissionFragmentGenerator.cc
 ### Fission Fragment Generator - end
   GRANULAR_DEPENDENCIES
+    CUDA_G4NeutronHPVector
     G4baryons
     G4bosons
     G4geometrymng
@@ -335,6 +341,7 @@ GEANT4_DEFINE_MODULE(NAME G4had_neu_hp
     G4particles
     G4track
   LINK_LIBRARIES
+    CUDA_G4NeutronHPVector
     ${ZLIB_LIBRARIES}
   )
 
diff --git a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/src/G4NeutronHPVector.cc b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/src/G4NeutronHPVector.cc
index c0eade0d..e611c8ac 100644
--- a/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/src/G4NeutronHPVector.cc
+++ b/geant4.10.00.p02/source/processes/hadronic/models/neutron_hp/src/G4NeutronHPVector.cc
@@ -33,10 +33,10 @@
 #include "G4NeutronHPVector.hh"
 #include "G4SystemOfUnits.hh"
 
-//#define CUDA_ENABLED 1
-//#if CUDA_ENABLED
-//#include "CUDA_G4NeutronHPVector.h"
-//#endif
+
+#if GEANT4_CUDA_ENABLED
+	#include "CUDA_G4NeutronHPVector.h"
+#endif
 
   // if the ranges do not match, constant extrapolation is used.
   G4NeutronHPVector & operator + (G4NeutronHPVector & left, G4NeutronHPVector & right)
@@ -84,6 +84,7 @@
 
   G4NeutronHPVector::G4NeutronHPVector()
   {
+  	G4cout << "G4NeutronHPVector Constructed (no params)" << G4endl;
     theData = new G4NeutronHPDataPoint[20]; 
     nPoints=20;
     nEntries=0;
@@ -100,6 +101,7 @@
   
   G4NeutronHPVector::G4NeutronHPVector(G4int n)
   {
+  	G4cout << "G4NeutronHPVector Constructed (n: " << n << ")" << G4endl;
     nPoints=std::max(n, 20);
     theData = new G4NeutronHPDataPoint[nPoints]; 
     nEntries=0;
@@ -150,6 +152,7 @@
   
   G4double G4NeutronHPVector::GetXsec(G4double e) 
   {
+    G4cout << "G4NeutronHPVector::GetXSec Called, GPU on: " << GEANT4_CUDA_ENABLED << G4endl;
     if(nEntries == 0) return 0;
     if(!theHash.Prepared()) Hash();
     G4int min = theHash.GetMinIndex(e);