From 6f5507d769563df3e683f8e10ac61175caa93a20 Mon Sep 17 00:00:00 2001 From: bfontana Date: Fri, 29 May 2020 12:20:25 +0200 Subject: [PATCH] replace kernel shared memory by passing by value (huge simplification) --- ...HGCUncalibratedRecHitsToRecHitsConstants.h | 90 +++++----- UserCode/CodeGPU/plugins/BuildFile.xml | 2 +- .../CodeGPU/plugins/HGCalRecHitKernelImpl.cu | 170 +++++------------- .../HeterogeneousHGCalEERecHitProducer.cc | 57 +++--- .../HeterogeneousHGCalHEBRecHitProducer.cc | 31 ++-- .../HeterogeneousHGCalHEFRecHitProducer.cc | 62 ++++--- .../HeterogeneousHGCalHEFRecHitProducer.h | 3 +- ...HeterogeneousHGCalProducerMemoryWrapper.cc | 140 --------------- .../HeterogeneousHGCalProducerMemoryWrapper.h | 6 - .../plugins/KernelManagerHGCalRecHit.cu | 51 +----- .../plugins/KernelManagerHGCalRecHit.h | 10 +- 11 files changed, 189 insertions(+), 433 deletions(-) diff --git a/CUDADataFormats/HGCal/interface/HGCUncalibratedRecHitsToRecHitsConstants.h b/CUDADataFormats/HGCal/interface/HGCUncalibratedRecHitsToRecHitsConstants.h index 64bc3bb903b3d..97680395e055a 100644 --- a/CUDADataFormats/HGCal/interface/HGCUncalibratedRecHitsToRecHitsConstants.h +++ b/CUDADataFormats/HGCal/interface/HGCUncalibratedRecHitsToRecHitsConstants.h @@ -3,6 +3,24 @@ #include +//maximum sizes for SoA's arrays holding configuration data ("constants") +namespace maxsizes_constants { + //EE + constexpr size_t ee_fCPerMIP = 6; //number of elements pointed by hgcEE_fCPerMIP_ + constexpr size_t ee_cce = 6; //number of elements posize_ted by hgcEE_cce_ + constexpr size_t ee_noise_fC = 6; //number of elements posize_ted by hgcEE_noise_fC_ + constexpr size_t ee_rcorr = 6; //number of elements posize_ted by rcorr_ + constexpr size_t ee_weights = 53; //number of elements posize_ted by weights_ + //HEF + constexpr size_t hef_fCPerMIP = 6; //number of elements pointed by hgcEE_fCPerMIP_ + constexpr size_t hef_cce = 6; //number of elements posize_ted by hgcEE_cce_ + constexpr size_t hef_noise_fC = 6; //number of elements posize_ted by hgcEE_noise_fC_ + constexpr size_t hef_rcorr = 6; //number of elements posize_ted by rcorr_ + constexpr size_t hef_weights = 53; //number of elements posize_ted by weights_ + //HEB + constexpr size_t heb_weights = 53; //number of elements posize_ted by weights_ +} + class HGCConstantVectorData { public: std::vector fCPerMIP_; @@ -14,65 +32,43 @@ class HGCConstantVectorData { class HGCeeUncalibratedRecHitConstantData { public: - double hgcEE_keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC) + double fCPerMIP_[maxsizes_constants::ee_fCPerMIP]; //femto coloumb to MIP conversion; one value per sensor thickness + double cce_[maxsizes_constants::ee_cce]; //charge collection efficiency, one value per sensor thickness + double noise_fC_[maxsizes_constants::ee_noise_fC]; //noise, one value per sensor thickness + double rcorr_[maxsizes_constants::ee_rcorr]; //thickness correction + double weights_[maxsizes_constants::ee_weights]; //energy weights to recover rechit energy deposited in the absorber + + double hgcEE_keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC) double hgceeUncalib2GeV_; //sets the ADC; obtained by dividing 1e-6 by hgcEE_keV2DIGI_ - double *hgcEE_fCPerMIP_; //femto coloumb to MIP conversion; one value per sensor thickness - double *hgcEE_cce_; //charge collection efficiency, one value per sensor thickness - double *hgcEE_noise_fC_; //noise, one value per sensor thickness - double *rcorr_; //thickness correction - double *weights_; //energy weights to recover rechit energy deposited in the absorber - float xmin_; //used for computing the time resolution error + float xmin_; //used for computing the time resolution error float xmax_; //used for computing the time resolution error float aterm_; //used for computing the time resolution error float cterm_; //used for computing the time resolution error - int nbytes_; //number of bytes allocated by this class - int ndelem_; //number of doubles pointed by this class - int nfelem_; //number of floats pointed by this class - int nielem_; //number of ints pointed by this class - int s_hgcEE_fCPerMIP_; //number of elements pointed by hgcEE_fCPerMIP_ - int s_hgcEE_cce_; //number of elements pointed by hgcEE_cce_ - int s_hgcEE_noise_fC_; //number of elements pointed by hgcEE_noise_fC_ - int s_rcorr_; //number of elements pointed by rcorr_ - int s_weights_; //number of elements pointed by weights_ }; class HGChefUncalibratedRecHitConstantData { public: - double hgcHEF_keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC) - double hgchefUncalib2GeV_; //sets the ADC; obtained by dividing 1e-6 by hgcHEF_keV2DIGI_ - double *hgcHEF_fCPerMIP_; //femto coloumb to MIP conversion; one value per sensor thickness - double *hgcHEF_cce_; //charge collection efficiency, one value per sensor thickness - double *hgcHEF_noise_fC_; //noise, one value per sensor thickness - double *rcorr_; //thickness correction - double *weights_; //energy weights to recover rechit energy deposited in the absorber - float xmin_; //used for computing the time resolution error - float xmax_; //used for computing the time resolution error - float aterm_; //used for computing the time resolution error - float cterm_; //used for computing the time resolution error - int nbytes_; //number of bytes allocated by this class - int ndelem_; //number of doubles allocated by this class - int nfelem_; //number of floats allocated by this class - int nuelem_; //number of unsigned ints allocated by this class - int nielem_; //number of ints allocated by this class - int s_hgcHEF_fCPerMIP_; //number of elements pointed by hgcEE_fCPerMIP_ - int s_hgcHEF_cce_; //number of elements pointed by hgcEE_cce_ - int s_hgcHEF_noise_fC_; //number of elements pointed by hgcEE_noise_fC_ - int s_rcorr_; //number of elements pointed by rcorr_ - int s_weights_; //number of elements pointed by weights_ + double fCPerMIP_[maxsizes_constants::hef_fCPerMIP]; //femto coloumb to MIP conversion; one value per sensor thickness + double cce_[maxsizes_constants::hef_cce]; //charge collection efficiency, one value per sensor thickness + double noise_fC_[maxsizes_constants::hef_noise_fC]; //noise, one value per sensor thickness + double rcorr_[maxsizes_constants::hef_rcorr]; //thickness correction + double weights_[maxsizes_constants::hef_weights]; //energy weights to recover rechit energy deposited in the absorber + + double keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC) + double uncalib2GeV_; //sets the ADC; obtained by dividing 1e-6 by hgcHEF_keV2DIGI_ + float xmin_; //used for computing the time resolution error + float xmax_; //used for computing the time resolution error + float aterm_; //used for computing the time resolution error + float cterm_; //used for computing the time resolution error }; class HGChebUncalibratedRecHitConstantData { public: - double hgcHEB_keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC) - double hgchebUncalib2GeV_; //sets the ADC; obtained by dividing 1e-6 by hgcHEB_keV2DIGI_ - double hgcHEB_noise_MIP_; //noise - double *weights_; //energy weights to recover rechit energy deposited in the absorber - int nbytes_; //number of bytes allocated by this class - int ndelem_; //number of doubles allocated by this class - int nfelem_; //number of floats allocated by this class - int nuelem_; //number of unsigned ints allocated by this class - int nielem_; //number of ints allocated by this class - int s_weights_; //number of elements pointed by weights_ + double weights_[maxsizes_constants::heb_weights]; //energy weights to recover rechit energy deposited in the absorber + + double keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC) + double uncalib2GeV_; //sets the ADC; obtained by dividing 1e-6 by hgcHEB_keV2DIGI_ + double noise_MIP_; //noise }; #endif diff --git a/UserCode/CodeGPU/plugins/BuildFile.xml b/UserCode/CodeGPU/plugins/BuildFile.xml index ea17769698a10..4f17ea8ae64d3 100644 --- a/UserCode/CodeGPU/plugins/BuildFile.xml +++ b/UserCode/CodeGPU/plugins/BuildFile.xml @@ -1,4 +1,4 @@ - + diff --git a/UserCode/CodeGPU/plugins/HGCalRecHitKernelImpl.cu b/UserCode/CodeGPU/plugins/HGCalRecHitKernelImpl.cu index be3241bb0cce5..5af1bc96aaccd 100644 --- a/UserCode/CodeGPU/plugins/HGCalRecHitKernelImpl.cu +++ b/UserCode/CodeGPU/plugins/HGCalRecHitKernelImpl.cu @@ -6,14 +6,15 @@ #include "HGCalRecHitKernelImpl.cuh" __device__ -double get_weight_from_layer(const int& padding, const int& layer, double*& sd) +double get_weight_from_layer(const int& layer, const double (&weights)[maxsizes_constants::hef_weights]) { - return sd[padding + layer]; + return weights[layer]; } __device__ void make_rechit(unsigned int tid, HGCRecHitSoA& dst_soa, HGCUncalibratedRecHitSoA& src_soa, const bool &heb_flag, - const double &weight, const double &rcorr, const double &cce_correction, const double &sigmaNoiseGeV, float *& sf) + const double& weight, const double& rcorr, const double& cce_correction, const double &sigmaNoiseGeV, + const float& xmin, const float& xmax, const float& aterm, const float& cterm) { dst_soa.id_[tid] = src_soa.id_[tid]; dst_soa.energy_[tid] = src_soa.amplitude_[tid] * weight * 0.001f; @@ -29,101 +30,43 @@ void make_rechit(unsigned int tid, HGCRecHitSoA& dst_soa, HGCUncalibratedRecHitS if(heb_flag==0) { //get time resolution - float max = fmaxf(son, sf[0]); //this max trick avoids if...elseif...else condition - float aterm = sf[2]; - float cterm = sf[3]; - dst_soa.timeError_[tid] = sqrt( __fdividef(aterm,max)*__fdividef(aterm,max) + cterm*cterm ); + //https://github.com/cms-sw/cmssw/blob/master/RecoLocalCalo/HGCalRecProducers/src/ComputeClusterTime.cc#L50 + /*Maxmin trick to avoid conditions within the kernel (having xmin < xmax) + 3 possibilities: 1) xval -> xmin -> xmax + 2) xmin -> xval -> xmax + 3) xmin -> xmax -> xval + The time error is calculated with the number in the middle. + */ + float max = fminf( fmaxf(son, xmin), xmax); + float div_ = __fdividef(aterm, max); + dst_soa.timeError_[tid] = sqrt( div_*div_ + cterm*cterm ); } else dst_soa.timeError_[tid] = -1; } __device__ -double get_thickness_correction(const int& padding, const int& type, double *& sd) +double get_thickness_correction(const int& type, const double (&rcorr)[maxsizes_constants::hef_rcorr]) { - return sd[padding + type]; + return rcorr[type]; } __device__ -double get_noise(const int& padding, const int& type, double *& sd) +double get_noise(const int& type, const double (&noise_fC)[maxsizes_constants::hef_noise_fC]) { - return sd[padding + type - 1]; + return noise_fC[type - 1]; } __device__ -double get_cce_correction(const int& padding, const int& type, double *& sd) +double get_cce_correction(const int& type, const double (&cce)[maxsizes_constants::hef_cce]) { - return sd[padding + type - 1]; + return cce[type - 1]; } __device__ -double get_fCPerMIP(const int& padding, const int& type, double *& sd) +double get_fCPerMIP(const int& type, const double (&fCPerMIP)[maxsizes_constants::hef_fCPerMIP]) { - return sd[padding + type - 1]; -} - -__device__ -void set_shared_memory(const int& tid, double*& sd, float*& sf, int*& si, const HGCeeUncalibratedRecHitConstantData& cdata, const int& size1, const int& size2, const int& size3, const int& size4, const int& size5) -{ - const int initial_pad = 2; - if(tid == 0) - { - sd[0] = cdata.hgcEE_keV2DIGI_; - sd[1] = cdata.hgceeUncalib2GeV_; - for(unsigned int i=initial_pad; i 0) ? cdata.xmin_ : 0.1; - sf[1] = cdata.xmax_; - sf[2] = cdata.aterm_; - sf[3] = cdata.cterm_; - } -} - -__device__ -void set_shared_memory(const int& tid, double*& sd, float*& sf, int*& si, const HGChefUncalibratedRecHitConstantData& cdata, const int& size1, const int& size2, const int& size3, const int& size4, const int& size5) -{ - const int initial_pad = 2; - if(tid == 0) - { - sd[0] = cdata.hgcHEF_keV2DIGI_; - sd[1] = cdata.hgchefUncalib2GeV_; - for(unsigned int i=initial_pad; i 0) ? cdata.xmin_ : 0.1; - sf[1] = cdata.xmax_; - sf[2] = cdata.aterm_; - sf[3] = cdata.cterm_; - } -} - -__device__ -void set_shared_memory(const int& tid, double*& sd, float*& sf, const HGChebUncalibratedRecHitConstantData& cdata, const int& size1) -{ - const int initial_pad = 3; - if(tid == 0) - { - sd[0] = cdata.hgcHEB_keV2DIGI_; - sd[1] = cdata.hgchebUncalib2GeV_; - sd[2] = cdata.hgcHEB_noise_MIP_; - for(unsigned int i=initial_pad; iparams.waferTypeL_[0], conds->params.cellCoarseY_[12], detid.cellX()); - int size1 = cdata.s_hgcHEF_fCPerMIP_ + 2; - int size2 = cdata.s_hgcHEF_cce_ + size1; - int size3 = cdata.s_hgcHEF_noise_fC_ + size2; - int size4 = cdata.s_rcorr_ + size3; - int size5 = cdata.s_weights_ + size4; - - extern __shared__ double s[]; - double *sd = s; - float *sf = (float*) (sd + cdata.ndelem_); - int *si = (int*) (sf + cdata.nuelem_); - set_shared_memory(threadIdx.x, sd, sf, si, cdata, size1, size2, size3, size4, size5); - __syncthreads(); - for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x) { - double weight = get_weight_from_layer(size4, detid.layer(), sd); - double rcorr = get_thickness_correction(size3, detid.type(), sd); - double noise = get_noise(size2, detid.type(), sd); - double cce_correction = get_cce_correction(size1, detid.type(), sd); - double fCPerMIP = get_fCPerMIP(2, detid.type(), sd); - double sigmaNoiseGeV = 1e-3 * weight * rcorr * __fdividef( noise, fCPerMIP ); - make_rechit(i, dst_soa, src_soa, false, weight, rcorr, cce_correction, sigmaNoiseGeV, sf); + double weight = get_weight_from_layer(detid.layer(), cdata.weights_); + double rcorr = get_thickness_correction(detid.type(), cdata.rcorr_); + double noise = get_noise(detid.type(), cdata.noise_fC_); + double cce_correction = get_cce_correction(detid.type(), cdata.cce_); + double fCPerMIP = get_fCPerMIP(detid.type(), cdata.fCPerMIP_); + double sigmaNoiseGeV = 1e-3 * weight * rcorr * __fdividef( noise, fCPerMIP ); + make_rechit(i, dst_soa, src_soa, false, weight, rcorr, cce_correction, sigmaNoiseGeV, + cdata.xmin_, cdata.xmax_, cdata.aterm_, cdata.cterm_); } } @@ -208,19 +128,13 @@ void heb_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const { unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x; HeterogeneousHGCScintillatorDetId detid(src_soa.id_[tid]); - int size1 = cdata.s_weights_ + 3; - - extern __shared__ double s[]; - double *sd = s; - float *sf = (float*) (sd + cdata.ndelem_); - set_shared_memory(threadIdx.x, sd, sf, cdata, size1); - __syncthreads(); for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x) { - double weight = get_weight_from_layer(3, detid.layer(), sd); - double noise = sd[2]; + double weight = get_weight_from_layer(detid.layer(), cdata.weights_); + double noise = cdata.noise_MIP_; double sigmaNoiseGeV = 1e-3 * noise * weight; - make_rechit(i, dst_soa, src_soa, true, weight, 0., 0., sigmaNoiseGeV, sf); + make_rechit(i, dst_soa, src_soa, true, weight, 0., 0., sigmaNoiseGeV, + 0, 0, 0, 0); } } diff --git a/UserCode/CodeGPU/plugins/HeterogeneousHGCalEERecHitProducer.cc b/UserCode/CodeGPU/plugins/HeterogeneousHGCalEERecHitProducer.cc index 7decbd73d551a..6d7001f7dffc6 100644 --- a/UserCode/CodeGPU/plugins/HeterogeneousHGCalEERecHitProducer.cc +++ b/UserCode/CodeGPU/plugins/HeterogeneousHGCalEERecHitProducer.cc @@ -3,7 +3,7 @@ HeterogeneousHGCalEERecHitProducer::HeterogeneousHGCalEERecHitProducer(const edm::ParameterSet& ps): token_(consumes(ps.getParameter("HGCEEUncalibRecHitsTok"))) { - cdata_.hgcEE_keV2DIGI_ = ps.getParameter("HGCEE_keV2DIGI"); + cdata_.keV2DIGI_ = ps.getParameter("HGCEE_keV2DIGI"); cdata_.xmin_ = ps.getParameter("minValSiPar"); //float cdata_.xmax_ = ps.getParameter("maxValSiPar"); //float cdata_.aterm_ = ps.getParameter("constSiPar"); //float @@ -13,12 +13,9 @@ HeterogeneousHGCalEERecHitProducer::HeterogeneousHGCalEERecHitProducer(const edm vdata_.noise_fC_ = ps.getParameter("HGCEE_noise_fC").getParameter >("values"); vdata_.rcorr_ = ps.getParameter< std::vector >("rcorr"); vdata_.weights_ = ps.getParameter< std::vector >("weights"); - cdata_.s_hgcEE_fCPerMIP_ = vdata_.fCPerMIP_.size(); - cdata_.s_hgcEE_cce_ = vdata_.cce_.size(); - cdata_.s_hgcEE_noise_fC_ = vdata_.noise_fC_.size(); - cdata_.s_rcorr_ = vdata_.rcorr_.size(); - cdata_.s_weights_ = vdata_.weights_.size(); - cdata_.hgceeUncalib2GeV_ = 1e-6 / cdata_.hgcEE_keV2DIGI_; + cdata_.uncalib2GeV_ = 1e-6 / cdata_.keV2DIGI_; + + assert_sizes_constants_(vdata_); tools_.reset(new hgcal::RecHitTools()); @@ -29,7 +26,6 @@ HeterogeneousHGCalEERecHitProducer::~HeterogeneousHGCalEERecHitProducer() { delete kmdata_; delete kcdata_; - delete d_kcdata_; delete uncalibSoA_; delete d_uncalibSoA_; delete d_intermediateSoA_; @@ -37,6 +33,28 @@ HeterogeneousHGCalEERecHitProducer::~HeterogeneousHGCalEERecHitProducer() delete calibSoA_; } +std::string HeterogeneousHGCalEERecHitProducer::assert_error_message_(std::string var, const size_t& s) +{ + std::string str1 = "The '"; + std::string str2 = "' array must be at least of size "; + std::string str3 = " to hold the configuration data."; + return str1 + var + str2 + std::to_string(s) + str3; +} + +void HeterogeneousHGCalEERecHitProducer::assert_sizes_constants_(const HGCConstantVectorData& vd) +{ + if( vdata_.fCPerMIP_.size() > maxsizes_constants::hef_fCPerMIP ) + cms::cuda::LogError("MaxSizeExceeded") << this->assert_error_message_("fCPerMIP", vdata_.fCPerMIP_.size()); + else if( vdata_.cce_.size() > maxsizes_constants::hef_cce ) + cms::cuda::LogError("MaxSizeExceeded") << this->assert_error_message_("cce", vdata_.cce_.size()); + else if( vdata_.noise_fC_.size() > maxsizes_constants::hef_noise_fC ) + cms::cuda::LogError("MaxSizeExceeded") << this->assert_error_message_("noise_fC", vdata_.noise_fC_.size()); + else if( vdata_.rcorr_.size() > maxsizes_constants::hef_rcorr ) + cms::cuda::LogError("MaxSizeExceeded") << this->assert_error_message_("rcorr", vdata_.rcorr_.size()); + else if( vdata_.weights_.size() > maxsizes_constants::hef_weights ) + cms::cuda::LogError("MaxSizeExceeded") << this->assert_error_message_("weights", vdata_.weights_.size()); +} + void HeterogeneousHGCalEERecHitProducer::acquire(edm::Event const& event, edm::EventSetup const& setup, edm::WaitingTaskWithArenaHolder w) { const cms::cuda::ScopedContextAcquire ctx{event.streamID(), std::move(w), ctxState_}; set_geometry_(setup); @@ -54,7 +72,7 @@ void HeterogeneousHGCalEERecHitProducer::acquire(edm::Event const& event, edm::E kmdata_ = new KernelModifiableData(nhits, stride_, uncalibSoA_, d_uncalibSoA_, d_intermediateSoA_, d_calibSoA_, calibSoA_); KernelManagerHGCalRecHit kernel_manager(kmdata_); - kernel_manager.run_kernels(kcdata_, d_kcdata_); + kernel_manager.run_kernels(kcdata_); rechits_ = std::make_unique(); convert_soa_data_to_collection_(*rechits_, calibSoA_, nhits); } @@ -73,12 +91,7 @@ void HeterogeneousHGCalEERecHitProducer::allocate_memory_() d_calibSoA_ = new HGCRecHitSoA(); calibSoA_ = new HGCRecHitSoA(); kcdata_ = new KernelConstantData(cdata_, vdata_); - d_kcdata_ = new KernelConstantData(cdata_, vdata_); - //_allocate pinned memory for constants on the host - memory::allocation::host(kcdata_, mem_const_); - //_allocate pinned memory for constants on the device - memory::allocation::device(d_kcdata_, d_mem_const_); //_allocate memory for hits on the host memory::allocation::host(stride_, uncalibSoA_, mem_in_); //_allocate memory for hits on the device @@ -99,15 +112,15 @@ void HeterogeneousHGCalEERecHitProducer::set_geometry_(const edm::EventSetup& se void HeterogeneousHGCalEERecHitProducer::convert_constant_data_(KernelConstantData *kcdata) { - for(int i=0; idata_.s_hgcEE_fCPerMIP_; ++i) - kcdata->data_.hgcEE_fCPerMIP_[i] = kcdata->vdata_.fCPerMIP_[i]; - for(int i=0; idata_.s_hgcEE_cce_; ++i) - kcdata->data_.hgcEE_cce_[i] = kcdata->vdata_.cce_[i]; - for(int i=0; idata_.s_hgcEE_noise_fC_; ++i) - kcdata->data_.hgcEE_noise_fC_[i] = kcdata->vdata_.noise_fC_[i]; - for(int i=0; idata_.s_rcorr_; ++i) + for(size_t i=0; ivdata_.fCPerMIP_.size(); ++i) + kcdata->data_.fCPerMIP_[i] = kcdata->vdata_.fCPerMIP_[i]; + for(size_t i=0; ivdata_.cce_.size(); ++i) + kcdata->data_.cce_[i] = kcdata->vdata_.cce_[i]; + for(size_t i=0; ivdata_.noise_fC_.size(); ++i) + kcdata->data_.noise_fC_[i] = kcdata->vdata_.noise_fC_[i]; + for(size_t i=0; ivdata_.rcorr_.size(); ++i) kcdata->data_.rcorr_[i] = kcdata->vdata_.rcorr_[i]; - for(int i=0; idata_.s_weights_; ++i) + for(size_t i=0; ivdata_.weights_.size(); ++i) kcdata->data_.weights_[i] = kcdata->vdata_.weights_[i]; } diff --git a/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEBRecHitProducer.cc b/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEBRecHitProducer.cc index b952de5588c93..bc81b1eef5485 100644 --- a/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEBRecHitProducer.cc +++ b/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEBRecHitProducer.cc @@ -3,11 +3,12 @@ HeterogeneousHGCalHEBRecHitProducer::HeterogeneousHGCalHEBRecHitProducer(const edm::ParameterSet& ps): token_(consumes(ps.getParameter("HGCHEBUncalibRecHitsTok"))) { - cdata_.hgcHEB_keV2DIGI_ = ps.getParameter("HGCHEB_keV2DIGI"); - cdata_.hgcHEB_noise_MIP_ = ps.getParameter("HGCHEB_noise_MIP").getParameter("noise_MIP"); + cdata_.keV2DIGI_ = ps.getParameter("HGCHEB_keV2DIGI"); + cdata_.noise_MIP_ = ps.getParameter("HGCHEB_noise_MIP").getParameter("noise_MIP"); vdata_.weights_ = ps.getParameter< std::vector >("weights"); - cdata_.s_weights_ = vdata_.weights_.size(); - cdata_.hgchebUncalib2GeV_ = 1e-6 / cdata_.hgcHEB_keV2DIGI_; + cdata_.uncalib2GeV_ = 1e-6 / cdata_.keV2DIGI_; + + assert_sizes_constants_(vdata_); tools_.reset(new hgcal::RecHitTools()); @@ -18,7 +19,6 @@ HeterogeneousHGCalHEBRecHitProducer::~HeterogeneousHGCalHEBRecHitProducer() { delete kmdata_; delete kcdata_; - delete d_kcdata_; delete uncalibSoA_; delete d_uncalibSoA_; delete d_intermediateSoA_; @@ -26,6 +26,20 @@ HeterogeneousHGCalHEBRecHitProducer::~HeterogeneousHGCalHEBRecHitProducer() delete calibSoA_; } +std::string HeterogeneousHGCalHEFRecHitProducer::assert_error_message_(std::string var, const size_t& s) +{ + std::string str1 = "The '"; + std::string str2 = "' array must be at least of size "; + std::string str3 = " to hold the configuration data."; + return str1 + var + str2 + std::to_string(s) + str3; +} + +void HeterogeneousHGCalHEFRecHitProducer::assert_sizes_constants_(const HGCConstantVectorData& vd) +{ + if( vdata_.weights_.size() > maxsizes_constants::heb_weights ) + cms::cuda::LogError("MaxSizeExceeded") << this->assert_error_message_("weights", vdata_.fCPerMIP_.size()); +} + void HeterogeneousHGCalHEBRecHitProducer::acquire(edm::Event const& event, edm::EventSetup const& setup, edm::WaitingTaskWithArenaHolder w) { const cms::cuda::ScopedContextAcquire ctx{event.streamID(), std::move(w), ctxState_}; set_geometry_(setup); @@ -43,7 +57,7 @@ void HeterogeneousHGCalHEBRecHitProducer::acquire(edm::Event const& event, edm:: kmdata_ = new KernelModifiableData(nhits, stride_, uncalibSoA_, d_uncalibSoA_, d_intermediateSoA_, d_calibSoA_, calibSoA_); KernelManagerHGCalRecHit kernel_manager(kmdata_); - kernel_manager.run_kernels(kcdata_, d_kcdata_); + kernel_manager.run_kernels(kcdata_); rechits_ = std::make_unique(); convert_soa_data_to_collection_(*rechits_, calibSoA_, nhits); @@ -63,12 +77,7 @@ void HeterogeneousHGCalHEBRecHitProducer::allocate_memory_() d_calibSoA_ = new HGCRecHitSoA(); calibSoA_ = new HGCRecHitSoA(); kcdata_ = new KernelConstantData(cdata_, vdata_); - d_kcdata_ = new KernelConstantData(cdata_, vdata_); - //_allocate pinned memory for constants on the host - memory::allocation::host(kcdata_, mem_const_); - //_allocate pinned memory for constants on the device - memory::allocation::device(d_kcdata_, d_mem_const_); //_allocate memory for hits on the host memory::allocation::host(stride_, uncalibSoA_, mem_in_); //_allocate memory for hits on the device diff --git a/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEFRecHitProducer.cc b/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEFRecHitProducer.cc index 6407b726b321c..9219c25184288 100644 --- a/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEFRecHitProducer.cc +++ b/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEFRecHitProducer.cc @@ -3,7 +3,7 @@ HeterogeneousHGCalHEFRecHitProducer::HeterogeneousHGCalHEFRecHitProducer(const edm::ParameterSet& ps): token_(consumes(ps.getParameter("HGCHEFUncalibRecHitsTok"))) { - cdata_.hgcHEF_keV2DIGI_ = ps.getParameter("HGCHEF_keV2DIGI"); + cdata_.keV2DIGI_ = ps.getParameter("HGCHEF_keV2DIGI"); cdata_.xmin_ = ps.getParameter("minValSiPar"); //float cdata_.xmax_ = ps.getParameter("maxValSiPar"); //float cdata_.aterm_ = ps.getParameter("constSiPar"); //float @@ -13,12 +13,9 @@ HeterogeneousHGCalHEFRecHitProducer::HeterogeneousHGCalHEFRecHitProducer(const e vdata_.noise_fC_ = ps.getParameter("HGCHEF_noise_fC").getParameter >("values"); vdata_.rcorr_ = ps.getParameter< std::vector >("rcorr"); vdata_.weights_ = ps.getParameter< std::vector >("weights"); - cdata_.s_hgcHEF_fCPerMIP_ = vdata_.fCPerMIP_.size(); - cdata_.s_hgcHEF_cce_ = vdata_.cce_.size(); - cdata_.s_hgcHEF_noise_fC_ = vdata_.noise_fC_.size(); - cdata_.s_rcorr_ = vdata_.rcorr_.size(); - cdata_.s_weights_ = vdata_.weights_.size(); - cdata_.hgchefUncalib2GeV_ = 1e-6 / cdata_.hgcHEF_keV2DIGI_; + cdata_.uncalib2GeV_ = 1e-6 / cdata_.keV2DIGI_; + + assert_sizes_constants_(vdata_); tools_.reset(new hgcal::RecHitTools()); @@ -29,7 +26,6 @@ HeterogeneousHGCalHEFRecHitProducer::~HeterogeneousHGCalHEFRecHitProducer() { delete kmdata_; delete kcdata_; - delete d_kcdata_; delete uncalibSoA_; delete d_uncalibSoA_; delete d_intermediateSoA_; @@ -37,6 +33,28 @@ HeterogeneousHGCalHEFRecHitProducer::~HeterogeneousHGCalHEFRecHitProducer() delete calibSoA_; } +std::string HeterogeneousHGCalHEFRecHitProducer::assert_error_message_(std::string var, const size_t& s) +{ + std::string str1 = "The '"; + std::string str2 = "' array must be at least of size "; + std::string str3 = " to hold the configuration data."; + return str1 + var + str2 + std::to_string(s) + str3; +} + +void HeterogeneousHGCalHEFRecHitProducer::assert_sizes_constants_(const HGCConstantVectorData& vd) +{ + if( vdata_.fCPerMIP_.size() > maxsizes_constants::hef_fCPerMIP ) + cms::cuda::LogError("MaxSizeExceeded") << this->assert_error_message_("fCPerMIP", vdata_.fCPerMIP_.size()); + else if( vdata_.cce_.size() > maxsizes_constants::hef_cce ) + cms::cuda::LogError("MaxSizeExceeded") << this->assert_error_message_("cce", vdata_.cce_.size()); + else if( vdata_.noise_fC_.size() > maxsizes_constants::hef_noise_fC ) + cms::cuda::LogError("MaxSizeExceeded") << this->assert_error_message_("noise_fC", vdata_.noise_fC_.size()); + else if( vdata_.rcorr_.size() > maxsizes_constants::hef_rcorr ) + cms::cuda::LogError("MaxSizeExceeded") << this->assert_error_message_("rcorr", vdata_.rcorr_.size()); + else if( vdata_.weights_.size() > maxsizes_constants::hef_weights ) + cms::cuda::LogError("MaxSizeExceeded") << this->assert_error_message_("weights", vdata_.weights_.size()); +} + void HeterogeneousHGCalHEFRecHitProducer::acquire(edm::Event const& event, edm::EventSetup const& setup, edm::WaitingTaskWithArenaHolder w) { const cms::cuda::ScopedContextAcquire ctx{event.streamID(), std::move(w), ctxState_}; @@ -51,14 +69,12 @@ void HeterogeneousHGCalHEFRecHitProducer::acquire(edm::Event const& event, edm:: HeterogeneousConditionsESProductWrapper esproduct(params_); d_conds = esproduct.getHeterogeneousConditionsESProductAsync(ctx.stream()); - std::cout << "check3" << std::endl; + kcdata_ = new KernelConstantData(cdata_, vdata_); convert_constant_data_(kcdata_); - std::cout << "check4" << std::endl; convert_collection_data_to_soa_(hits_hef, uncalibSoA_, nhits); - std::cout << "check5" << std::endl; kmdata_ = new KernelModifiableData(nhits, stride_, uncalibSoA_, d_uncalibSoA_, d_intermediateSoA_, d_calibSoA_, calibSoA_); KernelManagerHGCalRecHit kernel_manager(kmdata_); - kernel_manager.run_kernels(kcdata_, kcdata_, d_conds); + kernel_manager.run_kernels(kcdata_, d_conds); rechits_ = std::make_unique(); convert_soa_data_to_collection_(*rechits_, calibSoA_, nhits); @@ -87,13 +103,7 @@ void HeterogeneousHGCalHEFRecHitProducer::allocate_memory_() d_intermediateSoA_ = new HGCUncalibratedRecHitSoA(); d_calibSoA_ = new HGCRecHitSoA(); calibSoA_ = new HGCRecHitSoA(); - kcdata_ = new KernelConstantData(cdata_, vdata_); - d_kcdata_ = new KernelConstantData(cdata_, vdata_); - //_allocate pinned memory for constants on the host - memory::allocation::host(kcdata_, mem_const_); - //_allocate pinned memory for constants on the device - memory::allocation::device(d_kcdata_, d_mem_const_); //_allocate memory for hits on the host memory::allocation::host(stride_, uncalibSoA_, mem_in_); //_allocate memory for hits on the device @@ -104,15 +114,15 @@ void HeterogeneousHGCalHEFRecHitProducer::allocate_memory_() void HeterogeneousHGCalHEFRecHitProducer::convert_constant_data_(KernelConstantData *kcdata) { - for(int i=0; idata_.s_hgcHEF_fCPerMIP_; ++i) - kcdata->data_.hgcHEF_fCPerMIP_[i] = kcdata->vdata_.fCPerMIP_[i]; - for(int i=0; idata_.s_hgcHEF_cce_; ++i) - kcdata->data_.hgcHEF_cce_[i] = kcdata->vdata_.cce_[i]; - for(int i=0; idata_.s_hgcHEF_noise_fC_; ++i) - kcdata->data_.hgcHEF_noise_fC_[i] = kcdata->vdata_.noise_fC_[i]; - for(int i=0; idata_.s_rcorr_; ++i) + for(size_t i=0; ivdata_.fCPerMIP_.size(); ++i) + kcdata->data_.fCPerMIP_[i] = kcdata->vdata_.fCPerMIP_[i]; + for(size_t i=0; ivdata_.cce_.size(); ++i) + kcdata->data_.cce_[i] = kcdata->vdata_.cce_[i]; + for(size_t i=0; ivdata_.noise_fC_.size(); ++i) + kcdata->data_.noise_fC_[i] = kcdata->vdata_.noise_fC_[i]; + for(size_t i=0; ivdata_.rcorr_.size(); ++i) kcdata->data_.rcorr_[i] = kcdata->vdata_.rcorr_[i]; - for(int i=0; idata_.s_weights_; ++i) + for(size_t i=0; ivdata_.weights_.size(); ++i) kcdata->data_.weights_[i] = kcdata->vdata_.weights_[i]; } diff --git a/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEFRecHitProducer.h b/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEFRecHitProducer.h index e2f585101c751..dee906211a62f 100644 --- a/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEFRecHitProducer.h +++ b/UserCode/CodeGPU/plugins/HeterogeneousHGCalHEFRecHitProducer.h @@ -55,6 +55,8 @@ class HeterogeneousHGCalHEFRecHitProducer: public edm::stream::EDProducer mem_const_; cms::cuda::device::unique_ptr d_mem_const_; @@ -78,7 +80,6 @@ class HeterogeneousHGCalHEFRecHitProducer: public edm::stream::EDProducer *kmdata_; KernelConstantData *kcdata_; - KernelConstantData *d_kcdata_; }; #endif //HeterogeneousHGCalHEFRecHitProducer_h diff --git a/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.cc b/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.cc index 25e21ad930abb..ee47f1e178cbb 100644 --- a/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.cc +++ b/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.cc @@ -1,22 +1,6 @@ #include "HeterogeneousHGCalProducerMemoryWrapper.h" namespace memory { - //variables in the soas for the EE, HEF and HEB configuration data ("constants") - //these numbers excluse SoA members used for size book-keeping purposes - namespace nvars { - constexpr unsigned int double_hgceeconstants_soa = 2; //number of doubles in the EE constants SoA - constexpr unsigned int float_hgceeconstants_soa = 4; //number of floats in the EE constants SoA - constexpr unsigned int int_hgceeconstants_soa = 0; //number of ints in the EE constants SoA - constexpr unsigned int double_hgchefconstants_soa = 2; //number of doubles in the HEF constants SoA - constexpr unsigned int float_hgchefconstants_soa = 4; //number of floats in the HEF constants SoA - constexpr unsigned int int_hgchefconstants_soa = 0; //number of ints in the HEF constants SoA - constexpr unsigned int uint32_hgchefconstants_soa = 0; //number of 32-bit ints in the HEF constants SoA - constexpr unsigned int double_hgchebconstants_soa = 3; //number of doubles in the HEB constants SoA - constexpr unsigned int float_hgchebconstants_soa = 0; //number of floats in the HEB constants SoA - constexpr unsigned int int_hgchebconstants_soa = 0; //number of ints in the HEB constants SoA - constexpr unsigned int uint32_hgchebconstants_soa = 0; //number of 32-bit ints in the HEB constants SoA - } - namespace npointers { //pointers in the soas for the uncalibrated and calibrated hits constexpr unsigned int float_hgcuncalibrechits_soa = 6; //number of float pointers in the uncalibrated rechits SoA @@ -26,81 +10,9 @@ namespace memory { constexpr unsigned int uint32_hgcrechits_soa = 2; //number of uint32_t pointers in the rechits SoA constexpr unsigned int uint8_hgcrechits_soa = 1; //number of uint8_t pointers in the rechits SoA constexpr unsigned int ntypes_hgcrechits_soa = 3; //number of different pointer types in the rechits SoA - //pointers in the soas for the EE, HEF and HEB configuration data ("constants") - constexpr unsigned int double_hgceeconstants_soa = 5; - constexpr unsigned int float_hgceeconstants_soa = 0; - constexpr unsigned int int_hgceeconstants_soa = 0; - constexpr unsigned int double_hgchefconstants_soa = 5; - constexpr unsigned int float_hgchefconstants_soa = 0; - constexpr unsigned int int_hgchefconstants_soa = 0; - constexpr unsigned int double_hgchebconstants_soa = 1; - constexpr unsigned int float_hgchebconstants_soa = 0; - constexpr unsigned int int_hgchebconstants_soa = 0; } namespace allocation { - namespace { - //returns total number of bytes, number of 'double' elements and number of 'float' elements - std::tuple get_memory_sizes_(const std::vector& fixed_sizes, const int& ndoubles, const int& nfloats, const int& nints) - { - assert( fixed_sizes.begin() + ndoubles + nfloats + nints == fixed_sizes.end() ); - const std::vector sizes = {sizeof(double), sizeof(float), sizeof(int)}; - const std::vector nelements = { std::accumulate( fixed_sizes.begin(), fixed_sizes.begin() + ndoubles, 0), - std::accumulate( fixed_sizes.begin() + ndoubles, fixed_sizes.begin() + ndoubles + nfloats, 0), - std::accumulate( fixed_sizes.begin() + ndoubles + nfloats, fixed_sizes.end(), 0) }; - int size_tot = std::inner_product(sizes.begin(), sizes.end(), nelements.begin(), 0); - return std::make_tuple(size_tot, nelements[0], nelements[1], nelements[2]); - } - } - - //EE: allocates memory for constants on the device - void device(KernelConstantData *kcdata, cms::cuda::device::unique_ptr& mem) { - const std::vector nelements = {kcdata->data_.s_hgcEE_fCPerMIP_, kcdata->data_.s_hgcEE_cce_, kcdata->data_.s_hgcEE_noise_fC_, kcdata->data_.s_rcorr_, kcdata->data_.s_weights_}; - auto memsizes = get_memory_sizes_(nelements, npointers::double_hgceeconstants_soa, npointers::float_hgceeconstants_soa, npointers::int_hgceeconstants_soa); - mem = cms::cuda::make_device_unique(std::get<0>(memsizes), 0); - - kcdata->data_.hgcEE_fCPerMIP_ = reinterpret_cast(mem.get()); - kcdata->data_.hgcEE_cce_ = kcdata->data_.hgcEE_fCPerMIP_ + nelements[0]; - kcdata->data_.hgcEE_noise_fC_ = kcdata->data_.hgcEE_cce_ + nelements[1]; - kcdata->data_.rcorr_ = kcdata->data_.hgcEE_noise_fC_ + nelements[2]; - kcdata->data_.weights_ = kcdata->data_.rcorr_ + nelements[3]; - kcdata->data_.nbytes_ = std::get<0>(memsizes); - kcdata->data_.ndelem_ = std::get<1>(memsizes) + 2; - kcdata->data_.nfelem_ = std::get<2>(memsizes) + 4; - kcdata->data_.nielem_ = std::get<3>(memsizes) + 0; - } - - //HEF: allocates memory for constants on the device - void device(KernelConstantData *kcdata, cms::cuda::device::unique_ptr& mem) { - const std::vector nelements = {kcdata->data_.s_hgcHEF_fCPerMIP_, kcdata->data_.s_hgcHEF_cce_, kcdata->data_.s_hgcHEF_noise_fC_, kcdata->data_.s_rcorr_, kcdata->data_.s_weights_}; - auto memsizes = get_memory_sizes_(nelements, npointers::double_hgchefconstants_soa, npointers::float_hgchefconstants_soa, npointers::int_hgchefconstants_soa); - mem = cms::cuda::make_device_unique(std::get<0>(memsizes), 0); - - kcdata->data_.hgcHEF_fCPerMIP_ = reinterpret_cast(mem.get()); - kcdata->data_.hgcHEF_cce_ = kcdata->data_.hgcHEF_fCPerMIP_ + nelements[0]; - kcdata->data_.hgcHEF_noise_fC_ = kcdata->data_.hgcHEF_cce_ + nelements[1]; - kcdata->data_.rcorr_ = kcdata->data_.hgcHEF_noise_fC_ + nelements[2]; - kcdata->data_.weights_ = kcdata->data_.rcorr_ + nelements[3]; - kcdata->data_.nbytes_ = std::get<0>(memsizes); - kcdata->data_.ndelem_ = std::get<1>(memsizes) + 2; - kcdata->data_.nfelem_ = std::get<2>(memsizes) + 4; - kcdata->data_.nielem_ = std::get<3>(memsizes) + 0; - kcdata->data_.nuelem_ = 1; - } - - //HEB: allocates memory for constants on the device - void device(KernelConstantData *kcdata, cms::cuda::device::unique_ptr& mem) { - const std::vector nelements = {kcdata->data_.s_weights_}; - auto memsizes = get_memory_sizes_(nelements, 1, 0, 0); - mem = cms::cuda::make_device_unique(std::get<0>(memsizes), 0); - - kcdata->data_.weights_ = reinterpret_cast(mem.get()); - kcdata->data_.nbytes_ = std::get<0>(memsizes); - kcdata->data_.ndelem_ = std::get<1>(memsizes) + 3; - kcdata->data_.nfelem_ = std::get<2>(memsizes) + 0; - kcdata->data_.nielem_ = std::get<3>(memsizes) + 0; - kcdata->data_.nuelem_ = 1; - } //allocates memory for UncalibratedRecHits SoAs and RecHits SoAs on the device void device(const int& nhits, HGCUncalibratedRecHitSoA* soa1, HGCUncalibratedRecHitSoA* soa2, HGCRecHitSoA* soa3, cms::cuda::device::unique_ptr& mem) @@ -149,58 +61,6 @@ namespace memory { assert(sizes.begin()+2*npointers::ntypes_hgcuncalibrechits_soa+npointers::ntypes_hgcrechits_soa == sizes.end()); } - //EE: allocates page-locked (pinned) and non cached (write-combining) memory for constants on the host - void host(KernelConstantData* kcdata, cms::cuda::host::noncached::unique_ptr& mem) - { - const std::vector nelements = {kcdata->data_.s_hgcEE_fCPerMIP_, kcdata->data_.s_hgcEE_cce_, kcdata->data_.s_hgcEE_noise_fC_, kcdata->data_.s_rcorr_, kcdata->data_.s_weights_}; - auto memsizes = get_memory_sizes_(nelements, npointers::double_hgceeconstants_soa, npointers::float_hgceeconstants_soa, npointers::int_hgceeconstants_soa); - mem = cms::cuda::make_host_noncached_unique(std::get<0>(memsizes), 0); - - kcdata->data_.hgcEE_fCPerMIP_ = reinterpret_cast(mem.get()); - kcdata->data_.hgcEE_cce_ = kcdata->data_.hgcEE_fCPerMIP_ + nelements[0]; - kcdata->data_.hgcEE_noise_fC_ = kcdata->data_.hgcEE_cce_ + nelements[1]; - kcdata->data_.rcorr_ = kcdata->data_.hgcEE_noise_fC_ + nelements[2]; - kcdata->data_.weights_ = kcdata->data_.rcorr_ + nelements[3]; - kcdata->data_.nbytes_ = std::get<0>(memsizes); - kcdata->data_.ndelem_ = std::get<1>(memsizes) + nvars::double_hgceeconstants_soa; - kcdata->data_.nfelem_ = std::get<2>(memsizes) + nvars::float_hgceeconstants_soa; - kcdata->data_.nielem_ = std::get<3>(memsizes) + nvars::int_hgceeconstants_soa; - } - - //HEF: allocates page-locked (pinned) and non cached (write-combining) memory for constants on the host - void host(KernelConstantData* kcdata, cms::cuda::host::noncached::unique_ptr& mem) - { - const std::vector nelements = {kcdata->data_.s_hgcHEF_fCPerMIP_, kcdata->data_.s_hgcHEF_cce_, kcdata->data_.s_hgcHEF_noise_fC_, kcdata->data_.s_rcorr_, kcdata->data_.s_weights_}; - auto memsizes = get_memory_sizes_(nelements, npointers::double_hgchefconstants_soa, npointers::float_hgceeconstants_soa, npointers::int_hgceeconstants_soa); - mem = cms::cuda::make_host_noncached_unique(std::get<0>(memsizes), 0); - - kcdata->data_.hgcHEF_fCPerMIP_ = reinterpret_cast(mem.get()); - kcdata->data_.hgcHEF_cce_ = kcdata->data_.hgcHEF_fCPerMIP_ + nelements[0]; - kcdata->data_.hgcHEF_noise_fC_ = kcdata->data_.hgcHEF_cce_ + nelements[1]; - kcdata->data_.rcorr_ = kcdata->data_.hgcHEF_noise_fC_ + nelements[2]; - kcdata->data_.weights_ = kcdata->data_.rcorr_ + nelements[3]; - kcdata->data_.nbytes_ = std::get<0>(memsizes); - kcdata->data_.ndelem_ = std::get<1>(memsizes) + nvars::double_hgchefconstants_soa; - kcdata->data_.nfelem_ = std::get<2>(memsizes) + nvars::float_hgchefconstants_soa; - kcdata->data_.nielem_ = std::get<3>(memsizes) + nvars::int_hgchefconstants_soa; - kcdata->data_.nuelem_ = nvars::uint32_hgchefconstants_soa; - } - - //HEB: allocates page-locked (pinned) and non cached (write-combining) memory for constants on the host - void host(KernelConstantData* kcdata, cms::cuda::host::noncached::unique_ptr& mem) - { - const std::vector nelements = {kcdata->data_.s_weights_}; - auto memsizes = get_memory_sizes_(nelements, npointers::double_hgchebconstants_soa, npointers::float_hgceeconstants_soa, npointers::int_hgceeconstants_soa); - mem = cms::cuda::make_host_noncached_unique(std::get<0>(memsizes), 0); - - kcdata->data_.weights_ = reinterpret_cast(mem.get()); - kcdata->data_.nbytes_ = std::get<0>(memsizes); - kcdata->data_.ndelem_ = std::get<1>(memsizes) + nvars::double_hgchebconstants_soa; - kcdata->data_.nfelem_ = std::get<2>(memsizes) + nvars::float_hgchebconstants_soa; - kcdata->data_.nielem_ = std::get<3>(memsizes) + nvars::int_hgchebconstants_soa; - kcdata->data_.nuelem_ = nvars::uint32_hgchebconstants_soa; - } - //allocates page-locked (pinned) and non cached (write-combining) memory for UncalibratedRecHits SoAs on the host void host(const int& nhits, HGCUncalibratedRecHitSoA* soa, cms::cuda::host::noncached::unique_ptr& mem) { diff --git a/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.h b/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.h index 27c8204c283dd..1a06326169261 100644 --- a/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.h +++ b/UserCode/CodeGPU/plugins/HeterogeneousHGCalProducerMemoryWrapper.h @@ -31,14 +31,8 @@ namespace memory { namespace allocation { - void host(KernelConstantData*, cms::cuda::host::noncached::unique_ptr&); - void host(KernelConstantData*, cms::cuda::host::noncached::unique_ptr&); - void host(KernelConstantData*, cms::cuda::host::noncached::unique_ptr&); void host(const int&, HGCUncalibratedRecHitSoA*, cms::cuda::host::noncached::unique_ptr&); void host(const int&, HGCRecHitSoA*, cms::cuda::host::unique_ptr&); - void device(KernelConstantData*, cms::cuda::device::unique_ptr&); - void device(KernelConstantData*, cms::cuda::device::unique_ptr&); - void device(KernelConstantData*, cms::cuda::device::unique_ptr&); void device(const int&, HGCUncalibratedRecHitSoA*, HGCUncalibratedRecHitSoA*, HGCRecHitSoA*, cms::cuda::device::unique_ptr&); } } diff --git a/UserCode/CodeGPU/plugins/KernelManagerHGCalRecHit.cu b/UserCode/CodeGPU/plugins/KernelManagerHGCalRecHit.cu index 9ec43147af70a..7ba379b66940d 100644 --- a/UserCode/CodeGPU/plugins/KernelManagerHGCalRecHit.cu +++ b/UserCode/CodeGPU/plugins/KernelManagerHGCalRecHit.cu @@ -22,24 +22,6 @@ void KernelManagerHGCalRecHit::transfer_soas_to_device_() after_(); } -void KernelManagerHGCalRecHit::transfer_constants_to_device_(const KernelConstantData *h_kcdata, KernelConstantData *d_kcdata) -{ - cudaCheck( cudaMemcpyAsync( d_kcdata->data_.hgcEE_fCPerMIP_, h_kcdata->data_.hgcEE_fCPerMIP_, h_kcdata->data_.nbytes_, cudaMemcpyHostToDevice) ); - after_(); -} - -void KernelManagerHGCalRecHit::transfer_constants_to_device_(const KernelConstantData *h_kcdata, KernelConstantData *d_kcdata) -{ - cudaCheck( cudaMemcpyAsync( d_kcdata->data_.hgcHEF_fCPerMIP_, h_kcdata->data_.hgcHEF_fCPerMIP_, h_kcdata->data_.nbytes_, cudaMemcpyHostToDevice) ); - after_(); -} - -void KernelManagerHGCalRecHit::transfer_constants_to_device_(const KernelConstantData *h_kcdata, KernelConstantData *d_kcdata) -{ - cudaCheck( cudaMemcpyAsync( d_kcdata->data_.weights_, h_kcdata->data_.weights_, h_kcdata->data_.nbytes_, cudaMemcpyHostToDevice) ); - after_(); -} - void KernelManagerHGCalRecHit::transfer_soa_to_host_and_synchronize_() { cudaCheck( cudaMemcpyAsync((data_->h_out_)->energy_, (data_->d_out_)->energy_, nbytes_host_, cudaMemcpyDeviceToHost) ); @@ -52,67 +34,48 @@ void KernelManagerHGCalRecHit::reuse_device_pointers_() after_(); } -int KernelManagerHGCalRecHit::get_shared_memory_size_(const int& nd, const int& nf, const int& nu, const int& ni) { - int dmem = nd*sizeof(double); - int fmem = nf*sizeof(float); - int umem = nu*sizeof(uint32_t); - int imem = ni*sizeof(int); - return dmem + fmem + umem + imem; -} - -void KernelManagerHGCalRecHit::run_kernels(const KernelConstantData *h_kcdata, KernelConstantData *d_kcdata) +void KernelManagerHGCalRecHit::run_kernels(const KernelConstantData *kcdata) { - transfer_constants_to_device_(h_kcdata, d_kcdata); transfer_soas_to_device_(); - int nbytes_shared = get_shared_memory_size_(h_kcdata->data_.ndelem_, h_kcdata->data_.nfelem_, 0, h_kcdata->data_.nielem_); - /* - ee_step1<<<::nblocks_, ::nthreads_>>>( *(data_->d_2_), *(data_->d_1_), d_kcdata->data_, data_->nhits_ ); + ee_step1<<<::nblocks_, ::nthreads_>>>( *(data_->d_2_), *(data_->d_1_), kcdata->data_, data_->nhits_ ); after_(); reuse_device_pointers_(); */ - ee_to_rechit<<<::nblocks_, ::nthreads_, nbytes_shared>>>( *(data_->d_out_), *(data_->d_1_), d_kcdata->data_, data_->nhits_ ); + ee_to_rechit<<<::nblocks_, ::nthreads_>>>( *(data_->d_out_), *(data_->d_1_), kcdata->data_, data_->nhits_ ); after_(); - transfer_soa_to_host_and_synchronize_(); } -void KernelManagerHGCalRecHit::run_kernels(const KernelConstantData *h_kcdata, KernelConstantData *d_kcdata, const hgcal_conditions::HeterogeneousHEFConditionsESProduct* d_conds) +void KernelManagerHGCalRecHit::run_kernels(const KernelConstantData *kcdata, const hgcal_conditions::HeterogeneousHEFConditionsESProduct* d_conds) { - transfer_constants_to_device_(h_kcdata, d_kcdata); transfer_soas_to_device_(); - int nbytes_shared = get_shared_memory_size_(h_kcdata->data_.ndelem_, h_kcdata->data_.nfelem_, h_kcdata->data_.nuelem_, h_kcdata->data_.nielem_); - /* hef_step1<<<::nblocks_,::nthreads_>>>( *(data_->d_2), *(data_->d_1_), d_kcdata->data, data_->nhits_); after_(); reuse_device_pointers_(); */ - hef_to_rechit<<<::nblocks_,::nthreads_, nbytes_shared>>>( *(data_->d_out_), *(data_->d_1_), d_kcdata->data_, d_conds, data_->nhits_ ); + hef_to_rechit<<<::nblocks_,::nthreads_>>>( *(data_->d_out_), *(data_->d_1_), kcdata->data_, d_conds, data_->nhits_ ); after_(); transfer_soa_to_host_and_synchronize_(); } -void KernelManagerHGCalRecHit::run_kernels(const KernelConstantData *h_kcdata, KernelConstantData *d_kcdata) +void KernelManagerHGCalRecHit::run_kernels(const KernelConstantData *kcdata) { - transfer_constants_to_device_(h_kcdata, d_kcdata); transfer_soas_to_device_(); - int nbytes_shared = get_shared_memory_size_(h_kcdata->data_.ndelem_, h_kcdata->data_.nfelem_, h_kcdata->data_.nuelem_, h_kcdata->data_.nielem_); - /* heb_step1<<<::nblocks_, ::nthreads_>>>( *(data_->d_2_), *(data_->d_1_), d_kcdata->data_, data_->nhits_); after_(); reuse_device_pointers_(); */ - heb_to_rechit<<<::nblocks_, ::nthreads_, nbytes_shared>>>( *(data_->d_out_), *(data_->d_1_), d_kcdata->data_, data_->nhits_ ); + heb_to_rechit<<<::nblocks_, ::nthreads_>>>( *(data_->d_out_), *(data_->d_1_), kcdata->data_, data_->nhits_ ); after_(); - transfer_soa_to_host_and_synchronize_(); } diff --git a/UserCode/CodeGPU/plugins/KernelManagerHGCalRecHit.h b/UserCode/CodeGPU/plugins/KernelManagerHGCalRecHit.h index 1c9fd90f180b1..64e8ecf5ceabe 100644 --- a/UserCode/CodeGPU/plugins/KernelManagerHGCalRecHit.h +++ b/UserCode/CodeGPU/plugins/KernelManagerHGCalRecHit.h @@ -64,18 +64,14 @@ class KernelManagerHGCalRecHit { public: KernelManagerHGCalRecHit(KernelModifiableData*); ~KernelManagerHGCalRecHit(); - void run_kernels(const KernelConstantData*, KernelConstantData*); - void run_kernels(const KernelConstantData*, KernelConstantData*, const hgcal_conditions::HeterogeneousHEFConditionsESProduct*); - void run_kernels(const KernelConstantData*, KernelConstantData*); + void run_kernels(const KernelConstantData*); + void run_kernels(const KernelConstantData*, const hgcal_conditions::HeterogeneousHEFConditionsESProduct*); + void run_kernels(const KernelConstantData*); HGCRecHitSoA* get_output(); private: void after_(); - int get_shared_memory_size_(const int&, const int&, const int&, const int&); void transfer_soas_to_device_(); - void transfer_constants_to_device_(const KernelConstantData*, KernelConstantData*); - void transfer_constants_to_device_(const KernelConstantData*, KernelConstantData*); - void transfer_constants_to_device_(const KernelConstantData*, KernelConstantData*); void transfer_soa_to_host_and_synchronize_(); void reuse_device_pointers_();