From 5d62a385f836cb0d951f9cf1c9bc6be5e49f588f Mon Sep 17 00:00:00 2001
From: bfontana <bruno.alves@cern.ch>
Date: Wed, 1 Apr 2020 11:50:37 +0200
Subject: [PATCH] improve memory sizes function

---
 ...HeterogeneousHGCalProducerMemoryWrapper.cc | 24 ++++++++++++-------
 .../HeterogeneousHGCalProducerMemoryWrapper.h |  2 +-
 2 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.cc b/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.cc
index 6b8d28bd41af7..3d83115d1d029 100644
--- a/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.cc
+++ b/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.cc
@@ -7,18 +7,17 @@ namespace memory {
       //returns total number of bytes, number of 'double' elements and number of 'float' elements
       std::tuple<int, int, int, int> get_memory_sizes_(const std::vector<int>& fixed_sizes, const int& ndoubles, const int& nfloats, const int& nints)
       {
-	const int size1 = sizeof(double);
-	const int size2 = sizeof(float);
-	const int size3 = sizeof(int);
-	int nelements1_tot = std::accumulate( fixed_sizes.begin(), fixed_sizes.begin() + ndoubles, 0);
-	int nelements2_tot = std::accumulate( fixed_sizes.begin() + ndoubles, fixed_sizes.begin() + ndoubles + nfloats, 0);
-	int nelements3_tot = std::accumulate( fixed_sizes.begin() + ndoubles + nfloats, fixed_sizes.end(), 0);
 	assert( fixed_sizes.begin() + ndoubles + nfloats + nints == fixed_sizes.end() );
-	int size_tot = nelements1_tot*size1+nelements2_tot*size2+nelements3_tot*size3;
-	return std::make_tuple(size_tot, nelements1_tot, nelements2_tot, nelements3_tot);
+	const std::vector<int> sizes = {sizeof(double), sizeof(float), sizeof(int)};
+	const std::vector<int> nelements = { std::accumulate( fixed_sizes.begin(), fixed_sizes.begin() + ndoubles, 0),
+					     std::accumulate( fixed_sizes.begin() + ndoubles, fixed_sizes.begin() + ndoubles + nfloats, 0),
+					     std::accumulate( fixed_sizes.begin() + ndoubles + nfloats, fixed_sizes.end(), 0) };
+	int size_tot = std::inner_product(sizes.begin(), sizes.end(), nelements.begin(), 0);
+	return std::make_tuple(size_tot, nelements[0], nelements[1], nelements[2]);
       }
     }
 
+    //EE: allocates memory for constants on the device
     void device(KernelConstantData<HGCeeUncalibratedRecHitConstantData> *kcdata, cms::cuda::device::unique_ptr<double[]>& mem) {
       const std::vector<int> nelements = {kcdata->data.s_hgcEE_fCPerMIP_, kcdata->data.s_hgcEE_cce_, kcdata->data.s_hgcEE_noise_fC_, kcdata->data.s_rcorr_, kcdata->data.s_weights_, kcdata->data.s_waferTypeL_};
       auto memsizes = get_memory_sizes_(nelements, 5, 0, 1);
@@ -38,6 +37,7 @@ namespace memory {
       kcdata->data.nbelem = 1;
     }
 
+    //HEF: allocates memory for constants on the device
     void device(KernelConstantData<HGChefUncalibratedRecHitConstantData> *kcdata, cms::cuda::device::unique_ptr<double[]>& mem) {
       const std::vector<int> nelements = {kcdata->data.s_hgcHEF_fCPerMIP_, kcdata->data.s_hgcHEF_cce_, kcdata->data.s_hgcHEF_noise_fC_, kcdata->data.s_rcorr_, kcdata->data.s_weights_, kcdata->data.s_waferTypeL_};
       auto memsizes = get_memory_sizes_(nelements, 5, 0, 1);
@@ -57,10 +57,10 @@ namespace memory {
       kcdata->data.nbelem = 1;
     }
 
+    //HEB: allocates memory for constants on the device
     void device(KernelConstantData<HGChebUncalibratedRecHitConstantData> *kcdata, cms::cuda::device::unique_ptr<double[]>& mem) {
       const std::vector<int> nelements = {kcdata->data.s_weights_};
       auto memsizes = get_memory_sizes_(nelements, 1, 0, 0);
-
       mem = cms::cuda::make_device_unique<double[]>(std::get<0>(memsizes), 0);
 
       kcdata->data.weights_  = mem.get();
@@ -72,6 +72,7 @@ namespace memory {
       kcdata->data.nbelem = 1;
     }
 
+    //allocates memory for UncalibratedRecHits SoAs and RecHits SoAs on the device
     void device(const int& nhits, HGCUncalibratedRecHitSoA* soa1, HGCUncalibratedRecHitSoA* soa2, HGCRecHitSoA* soa3, cms::cuda::device::unique_ptr<float[]>& mem)
     {
       std::vector<int> sizes = {6*sizeof(float), 3*sizeof(uint32_t),                     //soa1
@@ -112,6 +113,7 @@ namespace memory {
       soa3->nbytes = std::accumulate(sizes.begin()+4, sizes.end(), 0);
     }
 
+    //EE: allocates memory for constants on the host
     void host(KernelConstantData<HGCeeUncalibratedRecHitConstantData>* kcdata, cms::cuda::host::noncached::unique_ptr<double[]>& mem)
     {
       const std::vector<int> nelements = {kcdata->data.s_hgcEE_fCPerMIP_, kcdata->data.s_hgcEE_cce_, kcdata->data.s_hgcEE_noise_fC_, kcdata->data.s_rcorr_, kcdata->data.s_weights_, kcdata->data.s_waferTypeL_};
@@ -132,6 +134,7 @@ namespace memory {
       kcdata->data.nbelem = 1;
     }
 
+    //HEF: allocates memory for constants on the host
     void host(KernelConstantData<HGChefUncalibratedRecHitConstantData>* kcdata, cms::cuda::host::noncached::unique_ptr<double[]>& mem)
     {
       const std::vector<int> nelements = {kcdata->data.s_hgcHEF_fCPerMIP_, kcdata->data.s_hgcHEF_cce_, kcdata->data.s_hgcHEF_noise_fC_, kcdata->data.s_rcorr_, kcdata->data.s_weights_, kcdata->data.s_waferTypeL_};
@@ -152,6 +155,7 @@ namespace memory {
       kcdata->data.nbelem = 1;
     }
 
+    //HEB: allocates memory for constants on the host
     void host(KernelConstantData<HGChebUncalibratedRecHitConstantData>* kcdata, cms::cuda::host::noncached::unique_ptr<double[]>& mem)
     {
       const std::vector<int> nelements = {kcdata->data.s_weights_};
@@ -167,6 +171,7 @@ namespace memory {
       kcdata->data.nbelem = 1;
     }
 
+    //allocates pinned (non cached) memory for UncalibratedRecHits SoAs on the host
     void host(const int& nhits, HGCUncalibratedRecHitSoA* soa, cms::cuda::host::noncached::unique_ptr<float[]>& mem)
     {
       std::vector<int> sizes = { 6*sizeof(float), 3*sizeof(uint32_t) };
@@ -185,6 +190,7 @@ namespace memory {
       soa->nbytes = size_tot;
     }
 
+    //allocates memory for RecHits SoAs on the host
     void host(const int& nhits, HGCRecHitSoA* soa, cms::cuda::host::unique_ptr<float[]>& mem)
     {
       std::vector<int> sizes = { 3*sizeof(float), 2*sizeof(uint32_t), sizeof(uint8_t) };
diff --git a/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.h b/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.h
index 16bf5c220e1f0..2a4d1879e1822 100644
--- a/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.h
+++ b/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.h
@@ -28,7 +28,7 @@
 #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
 
 #include "KernelManager.h"
-//#include "Utils.h"
+#include "Utils.h"
 
 namespace memory {
   namespace allocation {