diff --git a/CUDADataFormats/HGCal/interface/HGCConditions.h b/CUDADataFormats/HGCal/interface/HGCConditions.h index 2618de3f21dfa..7a003adb9a071 100644 --- a/CUDADataFormats/HGCal/interface/HGCConditions.h +++ b/CUDADataFormats/HGCal/interface/HGCConditions.h @@ -1,45 +1,6 @@ #ifndef CUDADataFormats_HGCal_HGCConditions_h #define CUDADataFormats_HGCal_HGCConditions_h -#include -#include - -namespace { - struct xyz { - float x; - float y; - float z; - constexpr bool operator ==(const xyz& rhs) const { return this->x == rhs.x && this->y == rhs.y; } - }; - using Item = std::pair; - constexpr Item map_items[] = { - { 0, {3.,3} }, - { 1, {4.,4.} }, - { 2, {5.,5.2} }, - }; - constexpr int map_size = sizeof(map_items)/sizeof(map_items[0]); - - static constexpr xyz findValue(int key, int range = map_size) { - return - (range == 0) ? throw std::runtime_error("Value not present"): - (map_items[range - 1].first == key) ? map_items[range - 1].second: - findValue(key, range - 1); - }; - - static constexpr int findKey(xyz value, int range = map_size) { - return - (range == 0) ? throw std::runtime_error("Key not present"): - (map_items[range - 1].second == value) ? map_items[range - 1].first: - findKey(value, range - 1); - }; -} - - - - - - - class HeterogeneousHGCSiliconDetId { public: constexpr HeterogeneousHGCSiliconDetId(uint32_t id): id_(id) {} @@ -52,7 +13,6 @@ class HeterogeneousHGCSiliconDetId { constexpr uint32_t waferV() { return (((id_ >> kHGCalWaferVSignOffset) & kHGCalWaferVSignMask) ? -waferVAbs() : waferVAbs()); } constexpr uint32_t cellU() { return (id_ >> kHGCalCellUOffset) & kHGCalCellUMask; } constexpr uint32_t cellV() { return (id_ >> kHGCalCellVOffset) & kHGCalCellVMask; } - constexpr float cellX() { return ::findValue(0).x; } //CHANGE!! private: uint32_t id_; @@ -151,11 +111,31 @@ namespace hgcal_conditions { } //namespace parameters + namespace positions { + + //stores the positions taken from the detid's in the CPU + struct HGCalPositions { + std::vector x; + std::vector y; + std::vector z; + }; + + //stores the positions taken from the detid's in the GPU + //it is the same for all three subdetectors + struct HeterogeneousHGCalPositions { + float* x; + float* y; + float* z; + }; + + } //namespace positions + struct HeterogeneousEEConditionsESProduct { parameters::HeterogeneousHGCalEEParameters params; }; struct HeterogeneousHEFConditionsESProduct { parameters::HeterogeneousHGCalHEFParameters params; + positions::HeterogeneousHGCalPositions pos; }; struct HeterogeneousHEBConditionsESProduct { parameters::HeterogeneousHGCalHEBParameters params; diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cu b/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cu index 49f091f7cddf4..c4e88a771fb68 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cu +++ b/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cu @@ -108,7 +108,7 @@ void hef_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const { unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x; HeterogeneousHGCSiliconDetId detid(src_soa.id_[tid]); - printf("cellCoarseY: %lf - cellX: %lf - cellLayer: %d\n", conds->params.cellCoarseY_[12], detid.cellX(), detid.layer()); + printf("cellCoarseY: %lf - cellLayer: %d - cellPosX: %lf - cellPosY: %lf - cellPosZ: %lf\n", conds->params.cellCoarseY_[12], detid.layer(), conds->pos.x[9], conds->pos.y[9], conds->pos.z[9]); for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x) { diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.cc b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.cc index 3848ca4a28324..a67dbcda71dea 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.cc +++ b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.cc @@ -1,51 +1,75 @@ #include "RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.h" -HeterogeneousHGCalHEFConditionsWrapper::HeterogeneousHGCalHEFConditionsWrapper(const HGCalParameters* cpuHGCalParameters) +HeterogeneousHGCalHEFConditionsWrapper::HeterogeneousHGCalHEFConditionsWrapper(const HGCalParameters* cpuHGCalParameters, + const cpos::HGCalPositions* cpuXYZ) { - calculate_memory_bytes(cpuHGCalParameters); + //HGCalParameters as defined in CMSSW + this->sizes_params_ = calculate_memory_bytes_params_(cpuHGCalParameters); + this->chunk_params_ = allocate_memory_params_(this->sizes_params_); + transfer_data_to_heterogeneous_pointers_params_(this->sizes_params_, cpuHGCalParameters); - chunk_ = std::accumulate(this->sizes_.begin(), this->sizes_.end(), 0); //total memory required in bytes + //HGCalPositions as defined in hgcal_conditions::positions + this->sizes_pos_ = calculate_memory_bytes_pos_(cpuXYZ); + this->chunk_pos_ = allocate_memory_pos_(this->sizes_pos_); + transfer_data_to_heterogeneous_pointers_pos_(this->sizes_pos_, cpuXYZ); +} + +size_t HeterogeneousHGCalHEFConditionsWrapper::allocate_memory_params_(const std::vector& sz) +{ + size_t chunk_ = std::accumulate(sz.begin(), sz.end(), 0); //total memory required in bytes gpuErrchk(cudaMallocHost(&this->params_.cellFineX_, chunk_)); + return chunk_; +} +size_t HeterogeneousHGCalHEFConditionsWrapper::allocate_memory_pos_(const std::vector& sz) +{ + size_t chunk_ = std::accumulate(sz.begin(), sz.end(), 0); //total memory required in bytes + gpuErrchk(cudaMallocHost(&this->pos_.x, chunk_)); + return chunk_; +} + +void HeterogeneousHGCalHEFConditionsWrapper::transfer_data_to_heterogeneous_pointers_params_(const std::vector& sz, const HGCalParameters* cpuParams) +{ //store cumulative sum in bytes and convert it to sizes in units of C++ typesHEF, i.e., number if items to be transferred to GPU - std::vector cumsum_sizes( this->sizes_.size()+1, 0 ); //starting with zero - std::partial_sum(this->sizes_.begin(), this->sizes_.end(), cumsum_sizes.begin()+1); + std::vector cumsum_sizes( sz.size()+1, 0 ); //starting with zero + std::partial_sum(sz.begin(), sz.end(), cumsum_sizes.begin()+1); for(unsigned int i=1; isizes_.size(); ++j) { + for(unsigned int j=0; jparams_, j) = select_pointer_d(&this->params_, jm1) + this->sizes_[jm1]; - else if( cp::typesHEF[jm1] == cp::HeterogeneousHGCalHEFParametersType::Double and - cp::typesHEF[j] == cp::HeterogeneousHGCalHEFParametersType::Int32_t ) - select_pointer_i(&this->params_, j) = reinterpret_cast( select_pointer_d(&this->params_, jm1) + this->sizes_[jm1] ); + const size_t shift = cumsum_sizes[j] - cumsum_sizes[jm1]; + if( cpar::typesHEF[jm1] == cpar::HeterogeneousHGCalHEFParametersType::Double and + cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Double ) + select_pointer_d(&this->params_, j) = select_pointer_d(&this->params_, jm1) + shift; + else if( cpar::typesHEF[jm1] == cpar::HeterogeneousHGCalHEFParametersType::Double and + cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Int32_t ) + select_pointer_i(&this->params_, j) = reinterpret_cast( select_pointer_d(&this->params_, jm1) + shift ); } //copying the pointers' content for(unsigned int i=cumsum_sizes[j]; iparams_, j)[index] = select_pointer_d(cpuHGCalParameters, j)[index]; + if( cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Double ) { + select_pointer_d(&this->params_, j)[index] = select_pointer_d(cpuParams, j)[index]; } - else if( cp::typesHEF[j] == cp::HeterogeneousHGCalHEFParametersType::Int32_t ) + else if( cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Int32_t ) { - select_pointer_i(&this->params_, j)[index] = select_pointer_i(cpuHGCalParameters, j)[index]; + select_pointer_i(&this->params_, j)[index] = select_pointer_i(cpuParams, j)[index]; } else edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "Wrong HeterogeneousHGCalParameters type"; @@ -53,29 +77,79 @@ HeterogeneousHGCalHEFConditionsWrapper::HeterogeneousHGCalHEFConditionsWrapper(c } } -void HeterogeneousHGCalHEFConditionsWrapper::calculate_memory_bytes(const HGCalParameters* cpuHGCalParameters) { +void HeterogeneousHGCalHEFConditionsWrapper::transfer_data_to_heterogeneous_pointers_pos_(const std::vector& sz, const cpos::HGCalPositions* cpuParams) +{ + //store cumulative sum in bytes and convert it to sizes in units of C++ floats, i.e., number if items to be transferred to GPU + std::vector cumsum_sizes( sz.size()+1, 0 ); + std::partial_sum(sz.begin(), sz.end(), cumsum_sizes.begin()+1); //starting with zero + for(unsigned int i=1; ipos_), j) = select_pointer_f(&(this->pos_), jm1) + shift; + } + + //copying the pointers' content + for(unsigned int i=cumsum_sizes[j]; ipos_), j)[index] = select_pointer_f(cpuParams, j)[index]; + } + } +} + +std::vector HeterogeneousHGCalHEFConditionsWrapper::calculate_memory_bytes_params_(const HGCalParameters* cpuParams) { size_t npointers = hgcal_conditions::parameters::typesHEF.size(); std::vector sizes(npointers); for(unsigned int i=0; i sizes_units(npointers); for(unsigned int i=0; isizes_.resize(npointers); - std::transform( sizes.begin(), sizes.end(), sizes_units.begin(), this->sizes_.begin(), std::multiplies() ); + this->sizes_params_.resize(npointers); + std::transform( sizes.begin(), sizes.end(), sizes_units.begin(), this->sizes_params_.begin(), std::multiplies() ); + return this->sizes_params_; +} + +std::vector HeterogeneousHGCalHEFConditionsWrapper::calculate_memory_bytes_pos_(const cpos::HGCalPositions* cpuPos) { + size_t npointers = 3; //x, y and z, all float (this is fixed by the geometry and won't change) + std::vector sizes(npointers); + for(unsigned int i=0; i sizes_units(npointers); + for(unsigned int i=0; isizes_pos_.resize(npointers); + std::transform( sizes.begin(), sizes.end(), sizes_units.begin(), this->sizes_pos_.begin(), std::multiplies() ); + return this->sizes_pos_; } HeterogeneousHGCalHEFConditionsWrapper::~HeterogeneousHGCalHEFConditionsWrapper() { @@ -84,8 +158,8 @@ HeterogeneousHGCalHEFConditionsWrapper::~HeterogeneousHGCalHEFConditionsWrapper( //I could use template specializations //try to use std::variant in the future to avoid similar functions with different return values -double*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_d(cp::HeterogeneousHGCalHEFParameters* cpuObject, - const unsigned int& item) const { +double*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_d(cpar::HeterogeneousHGCalHEFParameters* cpuObject, + const unsigned int& item) const { switch(item) { case 0: @@ -120,7 +194,39 @@ std::vector HeterogeneousHGCalHEFConditionsWrapper::select_pointer_d(con } } -int32_t*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_i(cp::HeterogeneousHGCalHEFParameters* cpuObject, +float*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_f(cpos::HeterogeneousHGCalPositions* cpuObject, + const unsigned int& item) const { + switch(item) + { + case 0: + return cpuObject->x; + case 1: + return cpuObject->y; + case 2: + return cpuObject->z; + default: + edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_f(heterogeneous): no item."; + return cpuObject->x; + } +} + +std::vector HeterogeneousHGCalHEFConditionsWrapper::select_pointer_f(const cpos::HGCalPositions* cpuObject, + const unsigned int& item) const { + switch(item) + { + case 0: + return cpuObject->x; + case 1: + return cpuObject->y; + case 2: + return cpuObject->z; + default: + edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_f(non-heterogeneous): no item."; + return cpuObject->x; + } +} + +int32_t*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_i(cpar::HeterogeneousHGCalHEFParameters* cpuObject, const unsigned int& item) const { switch(item) { @@ -157,27 +263,37 @@ hgcal_conditions::HeterogeneousHEFConditionsESProduct const *HeterogeneousHGCalH // Allocate the payload object on pinned host memory. gpuErrchk(cudaMallocHost(&data.host, sizeof(hgcal_conditions::HeterogeneousHEFConditionsESProduct))); // Allocate the payload array(s) on device memory. - gpuErrchk(cudaMalloc(&(data.host->params.cellFineX_), chunk_)); + gpuErrchk(cudaMalloc(&(data.host->params.cellFineX_), chunk_params_)); + gpuErrchk(cudaMalloc(&(data.host->pos.x), chunk_pos_)); // Allocate the payload object on the device memory. gpuErrchk(cudaMalloc(&data.device, sizeof(hgcal_conditions::HeterogeneousHEFConditionsESProduct))); // Transfer the payload, first the array(s) ... - gpuErrchk(cudaMemcpyAsync(data.host->params.cellFineX_, this->params_.cellFineX_, chunk_, cudaMemcpyHostToDevice, stream)); + gpuErrchk(cudaMemcpyAsync(data.host->params.cellFineX_, this->params_.cellFineX_, chunk_params_, cudaMemcpyHostToDevice, stream)); + gpuErrchk(cudaMemcpyAsync(data.host->pos.x, this->pos_.x, chunk_pos_, cudaMemcpyHostToDevice, stream)); - for(unsigned int j=0; jsizes_.size()-1; ++j) + //(set the pointers of the parameters) + for(unsigned int j=0; jsizes_params_.size()-1; ++j) { - if( cp::typesHEF[j] == cp::HeterogeneousHGCalHEFParametersType::Double and - cp::typesHEF[j+1] == cp::HeterogeneousHGCalHEFParametersType::Double ) - select_pointer_d(&(data.host->params), j+1) = select_pointer_d(&(data.host->params), j) + this->sizes_[j]; - else if( cp::typesHEF[j] == cp::HeterogeneousHGCalHEFParametersType::Double and - cp::typesHEF[j+1] == cp::HeterogeneousHGCalHEFParametersType::Int32_t ) - select_pointer_i(&(data.host->params), j+1) = reinterpret_cast( select_pointer_d(&(data.host->params), j) + this->sizes_[j] ); + if( cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Double and + cpar::typesHEF[j+1] == cpar::HeterogeneousHGCalHEFParametersType::Double ) + select_pointer_d(&(data.host->params), j+1) = select_pointer_d(&(data.host->params), j) + (this->sizes_params_[j]/sizeof(double)); + else if( cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Double and + cpar::typesHEF[j+1] == cpar::HeterogeneousHGCalHEFParametersType::Int32_t ) + select_pointer_i(&(data.host->params), j+1) = reinterpret_cast( select_pointer_d(&(data.host->params), j) + (this->sizes_params_[j]/sizeof(double)) ); else edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "compare this functions' logic with hgcal_conditions::parameters::typesHEF"; } + //(set the pointers of the positions) + for(unsigned int j=0; jsizes_pos_.size()-1; ++j) + { + select_pointer_f(&(data.host->pos), j+1) = select_pointer_f(&(data.host->pos), j) + (this->sizes_pos_[j]/sizeof(float)); + } + // ... and then the payload object gpuErrchk(cudaMemcpyAsync(data.device, data.host, sizeof(hgcal_conditions::HeterogeneousHEFConditionsESProduct), cudaMemcpyHostToDevice, stream)); + }); //gpuData_.dataForCurrentDeviceAsync // Returns the payload object on the memory of the current device diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.h b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.h index 1a336545b2dc0..8f7c7628c8062 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.h +++ b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.h @@ -11,7 +11,8 @@ #include "Geometry/HGCalCommonData/interface/HGCalParameters.h" #include "RecoLocalCalo/HGCalRecProducers/plugins/KernelManagerHGCalRecHit.h" -namespace cp = hgcal_conditions::parameters; +namespace cpar = hgcal_conditions::parameters; +namespace cpos = hgcal_conditions::positions; // Declare the wrapper ESProduct. The corresponding ESProducer should // produce objects of this type. @@ -19,7 +20,7 @@ class HeterogeneousHGCalHEFConditionsWrapper { public: // Constructor takes the standard CPU ESProduct, and transforms the // necessary data to array(s) in pinned host memory - HeterogeneousHGCalHEFConditionsWrapper(const HGCalParameters*); + HeterogeneousHGCalHEFConditionsWrapper(const HGCalParameters*, const cpos::HGCalPositions*); // Deallocates all pinned host memory ~HeterogeneousHGCalHEFConditionsWrapper(); @@ -30,16 +31,27 @@ class HeterogeneousHGCalHEFConditionsWrapper { private: // Holds the data in pinned CPU memory // Contrary to its non-heterogeneous counterpart (constructor argument) it is *not* a pointer (so to avoid an extra allocation) - cp::HeterogeneousHGCalHEFParameters params_; + cpar::HeterogeneousHGCalHEFParameters params_; + cpos::HeterogeneousHGCalPositions pos_; - std::vector sizes_; - size_t chunk_; + std::vector sizes_params_; + std::vector sizes_pos_; + size_t chunk_params_; + size_t chunk_pos_; - void calculate_memory_bytes(const HGCalParameters*); - double*& select_pointer_d(cp::HeterogeneousHGCalHEFParameters*, const unsigned int&) const; + std::vector calculate_memory_bytes_params_(const HGCalParameters*); + std::vector calculate_memory_bytes_pos_(const cpos::HGCalPositions*); + size_t allocate_memory_params_(const std::vector&); + size_t allocate_memory_pos_(const std::vector&); + void transfer_data_to_heterogeneous_pointers_params_(const std::vector&, const HGCalParameters*); + void transfer_data_to_heterogeneous_pointers_pos_(const std::vector&, const cpos::HGCalPositions*); + + double*& select_pointer_d(cpar::HeterogeneousHGCalHEFParameters*, const unsigned int&) const; std::vector select_pointer_d(const HGCalParameters*, const unsigned int&) const; - int32_t*& select_pointer_i(cp::HeterogeneousHGCalHEFParameters*, const unsigned int&) const; + int32_t*& select_pointer_i(cpar::HeterogeneousHGCalHEFParameters*, const unsigned int&) const; std::vector select_pointer_i(const HGCalParameters*, const unsigned int&) const; + float*& select_pointer_f(cpos::HeterogeneousHGCalPositions*, const unsigned int&) const; + std::vector select_pointer_f(const cpos::HGCalPositions*, const unsigned int&) const; // Helper struct to hold all information that has to be allocated and // deallocated per device diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.cc b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.cc index 2c1d58c8bea7f..4c8d7780de51c 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.cc +++ b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.cc @@ -16,9 +16,8 @@ HeterogeneousHGCalHEFRecHitProducer::HeterogeneousHGCalHEFRecHitProducer(const e cdata_.uncalib2GeV_ = 1e-6 / cdata_.keV2DIGI_; assert_sizes_constants_(vdata_); - + xyz_ = new hgcal_conditions::positions::HGCalPositions(); tools_.reset(new hgcal::RecHitTools()); - produces(collection_name_); } @@ -31,6 +30,7 @@ HeterogeneousHGCalHEFRecHitProducer::~HeterogeneousHGCalHEFRecHitProducer() delete d_intermediateSoA_; delete d_calibSoA_; delete calibSoA_; + delete xyz_; } std::string HeterogeneousHGCalHEFRecHitProducer::assert_error_message_(std::string var, const size_t& s) @@ -59,9 +59,9 @@ void HeterogeneousHGCalHEFRecHitProducer::acquire(edm::Event const& event, edm:: const cms::cuda::ScopedContextAcquire ctx{event.streamID(), std::move(w), ctxState_}; set_conditions_(setup); - HeterogeneousHGCalHEFConditionsWrapper esproduct(params_); + HeterogeneousHGCalHEFConditionsWrapper esproduct(params_, xyz_); d_conds = esproduct.getHeterogeneousConditionsESProductAsync(ctx.stream()); - + event.getByToken(token_, handle_hef_); const auto &hits_hef = *handle_hef_; @@ -88,6 +88,15 @@ void HeterogeneousHGCalHEFRecHitProducer::set_conditions_(const edm::EventSetup& setup.get().get(handle_str, handle); ddd_ = &( handle->topology().dddConstants() ); params_ = ddd_->getParameter(); + + //fill the CPU position structure from the geometry + size_t test_size = 10; + for(unsigned int i=0; ix.push_back(1.1); + xyz_->y.push_back(1.2); + xyz_->z.push_back(1.3); + } } void HeterogeneousHGCalHEFRecHitProducer::produce(edm::Event& event, const edm::EventSetup& setup) diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.h b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.h index 3aef9132d639f..04ab2ae23020a 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.h +++ b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.h @@ -68,6 +68,7 @@ class HeterogeneousHGCalHEFRecHitProducer: public edm::stream::EDProducer