From cfff76ef684f7921fc2d50a8e377c205f9c4826e Mon Sep 17 00:00:00 2001 From: Bruno Alves Date: Mon, 22 Jun 2020 13:19:52 +0200 Subject: [PATCH] bug fix: threadid index bug fix: uint32_t conversion to float --- .../HGCal/interface/HGCConditions.h | 4 +- .../plugins/HGCalRecHitKernelImpl.cu | 62 ++++++++++--------- .../plugins/HGCalRecHitKernelImpl.cuh | 4 +- .../HeterogeneousHGCalHEFConditions.cc | 3 +- .../HeterogeneousHGCalHEFRecHitProducer.cc | 57 ++++++++--------- .../plugins/KernelManagerHGCalRecHit.cu | 2 + .../plugins/KernelManagerHGCalRecHit.h | 2 +- 7 files changed, 70 insertions(+), 64 deletions(-) diff --git a/CUDADataFormats/HGCal/interface/HGCConditions.h b/CUDADataFormats/HGCal/interface/HGCConditions.h index 9165d2721857b..2052919f4dee4 100644 --- a/CUDADataFormats/HGCal/interface/HGCConditions.h +++ b/CUDADataFormats/HGCal/interface/HGCConditions.h @@ -13,8 +13,8 @@ class HeterogeneousHGCSiliconDetId { constexpr int32_t waferV() { return (((id_ >> kHGCalWaferVSignOffset) & kHGCalWaferVSignMask) ? -waferVAbs() : waferVAbs()); } constexpr int32_t waferX() { return (-2 * waferU() + waferV()); } constexpr int32_t waferY() { return (2 * waferV()); } - constexpr int32_t cellU() { return (id_ >> kHGCalCellUOffset) & kHGCalCellUMask; } - constexpr int32_t cellV() { return (id_ >> kHGCalCellVOffset) & kHGCalCellVMask; } + constexpr uint32_t cellU() { return (id_ >> kHGCalCellUOffset) & kHGCalCellUMask; } + constexpr uint32_t cellV() { return (id_ >> kHGCalCellVOffset) & kHGCalCellVMask; } constexpr uint32_t nCells() { return (type() == HGCalFine) ? HGCalFineN : HGCalCoarseN; } constexpr int32_t cellX() { const uint32_t N = nCells(); return (3 * (cellV() - N) + 2); } constexpr int32_t cellY() { const uint32_t N = nCells(); return (2 * cellU() - (N + cellV())); } diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cu b/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cu index 1b404c653bb43..ecc6a22de5883 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cu +++ b/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cu @@ -88,10 +88,10 @@ __global__ void ee_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const HGCeeUncalibratedRecHitConstantData cdata, int length) { unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x; - HeterogeneousHGCSiliconDetId detid(src_soa.id_[tid]); for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x) { + HeterogeneousHGCSiliconDetId detid(src_soa.id_[i]); double weight = get_weight_from_layer(detid.layer(), cdata.weights_); double rcorr = get_thickness_correction(detid.type(), cdata.rcorr_); double noise = get_noise(detid.type(), cdata.noise_fC_); @@ -110,7 +110,7 @@ void hef_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x) { - HeterogeneousHGCSiliconDetId detid(src_soa.id_[tid]); + HeterogeneousHGCSiliconDetId detid(src_soa.id_[i]); printf("cellCoarseY: %lf - cellX: %d - numberCellsHexagon: %d - DetId: %d - Var: %d\n", conds->params.cellCoarseY_[12], detid.cellX(), conds->posmap.numberCellsHexagon[0], conds->posmap.detid[9], conds->posmap.waferMax); double weight = get_weight_from_layer(detid.layer(), cdata.weights_); @@ -128,10 +128,10 @@ __global__ void heb_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const HGChebUncalibratedRecHitConstantData cdata, int length) { unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x; - HeterogeneousHGCScintillatorDetId detid(src_soa.id_[tid]); for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x) { + HeterogeneousHGCScintillatorDetId detid(src_soa.id_[i]); double weight = get_weight_from_layer(detid.layer(), cdata.weights_); double noise = cdata.noise_MIP_; double sigmaNoiseGeV = 1e-3 * noise * weight; @@ -147,31 +147,38 @@ void fill_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditio for (unsigned int i = tid; i < conds->nelems_posmap; i += blockDim.x * gridDim.x) { - HeterogeneousHGCSiliconDetId did(conds->posmap.detid[tid]); - uint32_t cU = did.cellU(); - uint32_t cV = did.cellV(); - uint32_t wU = did.waferU(); - uint32_t wV = did.waferV(); - uint32_t ncells = did.nCells(); + HeterogeneousHGCSiliconDetId did(conds->posmap.detid[i]); + const float cU = static_cast( did.cellU() ); + const float cV = static_cast( did.cellV() ); + const float wU = static_cast( did.waferU() ); + const float wV = static_cast( did.waferV() ); + const float ncells = static_cast( did.nCells() ); + const float side = static_cast( did.zside() ); + const int32_t layer = did.layer(); - float r = 0.5f * (conds->posmap.waferSize + conds->posmap.sensorSeparation); - float sqrt3 = __fsqrt_rn(3.); - float rsqrt3 = __frsqrt_rn(3.); - float R = 2.f * r * rsqrt3; //rsqrt: 1 / sqrt - uint32_t n2 = ncells / 2; - float yoff = rsqrt3 * 2.f * r; //CHANGE according to Sunanda's reply + //based on `std::pair HGCalDDDConstants::locateCell(const HGCSiliconDetId&, bool) + const float r_x2 = conds->posmap.waferSize + conds->posmap.sensorSeparation; + const float r = 0.5f * r_x2; + const float sqrt3 = __fsqrt_rn(3.f); + const float rsqrt3 = __frsqrt_rn(3.f);//rsqrt: 1 / sqrt + const float R = r_x2 * rsqrt3; + const float n2 = ncells / 2.f; + const float yoff_abs = rsqrt3 * r_x2; + const float yoff = (layer%2==1) ? yoff_abs : -1.f * yoff_abs; //CHANGE according to Sunanda's reply float xpos = (-2.f * wU + wV) * r; - float ypos = yoff + (1.5 * wV * R); - float R1 = __fdividef( conds->posmap.waferSize, 3.f * ncells ); - float r1 = 0.5 * R1 * sqrt3; - xpos += (1.5 * (cV - ncells) + 1.0) * R1; - ypos += (cU - 0.5 * cV - n2) * 2 * r1; - - conds->posmap.x[tid] = xpos * did.zside(); - conds->posmap.y[tid] = ypos; - conds->posmap.z[tid] = 1.3; + float ypos = yoff + (1.5f * wV * R); + const float R1 = __fdividef( conds->posmap.waferSize, 3.f * ncells ); + const float r1_x2 = R1 * sqrt3; + xpos += (1.5f * (cV - ncells) + 1.f) * R1; + ypos += (cU - 0.5f * cV - n2) * r1_x2; + + conds->posmap.x[i] = xpos * side; + conds->posmap.y[i] = ypos; + conds->posmap.z[i] = 1.3; + + //printf( "%d - %lf - %lf\n", cV - ncells, 1.5f*(static_cast(cV) - static_cast(ncells)), 1.5f*(static_cast(cV - ncells)) ); + //printf("waferU: %d\t waferV: %d\t cellU: %d\t cellV: %d\t nCells: %d\t R1: %lf\t Layer: %d\t PosX: %lf\t PosY: %lf\t PosZ: %lf\n", wU, wV, cU, cV, ncells, R1, layer, conds->posmap.x[i], conds->posmap.y[i], conds->posmap.z[i]); } - } __global__ @@ -181,7 +188,6 @@ void print_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditi for (unsigned int i = tid; i < conds->nelems_posmap; i += blockDim.x * gridDim.x) { - printf("PosX: %lf - PosY: %lf - Posz: %lf - waferSize: %lf - sensorSeparation: %lf\n", conds->posmap.x[tid], conds->posmap.y[tid], conds->posmap.z[tid], conds->posmap.waferSize, conds->posmap.sensorSeparation); - } - + printf("PosX: %lf\t PosY: %lf\t Posz: %lf\n", conds->posmap.x[i], conds->posmap.y[i], conds->posmap.z[i]); + } } diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cuh b/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cuh index f1f0a6e1d79af..2c1c326d266a1 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cuh +++ b/RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cuh @@ -30,9 +30,9 @@ __global__ void heb_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, HGChebUncalibratedRecHitConstantData cdata, int length); __global__ -void fill_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditionsESProduct* conds); +void fill_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditionsESProduct*); __global__ -void print_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditionsESProduct* conds); +void print_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditionsESProduct*); #endif //RecoLocalCalo_HGCalRecProducers_HGCalRecHitKernelImpl_cuh diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.cc b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.cc index 64b8e9a1c6b01..fe2eddca6498e 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.cc +++ b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.cc @@ -118,7 +118,7 @@ void HeterogeneousHGCalHEFConditionsWrapper::transfer_data_to_heterogeneous_poin } //copying the pointers' content - if( j>this->number_position_arrays ) //required due to the assymetry between cpos::HeterogeneousHGCalPositionsMapping and cpos::HGCalPositionsMapping + if( j>=this->number_position_arrays ) //required due to the assymetry between cpos::HeterogeneousHGCalPositionsMapping and cpos::HGCalPositionsMapping { for(unsigned int i=cumsum_sizes[j]; inumber_position_arrays*this->nelems_posmap_*sfloat; //size in bytes occupied by the non-position information + std::cout << position_memory_size_to_transfer << ", " << chunk_pos_ << ", " << this->number_position_arrays*this->nelems_posmap_*sfloat << ", " << this->number_position_arrays*this->nelems_posmap_ << ", " << this->nelems_posmap_ << std::endl; gpuErrchk(cudaMemcpyAsync(data.host->posmap.numberCellsHexagon, this->posmap_.numberCellsHexagon, position_memory_size_to_transfer, cudaMemcpyHostToDevice, stream)); // ... and then the payload object diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.cc b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.cc index e695807e47a8a..97770dd15736b 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.cc +++ b/RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFRecHitProducer.cc @@ -81,7 +81,6 @@ void HeterogeneousHGCalHEFRecHitProducer::acquire(edm::Event const& event, edm:: convert_collection_data_to_soa_(hits_hef, uncalibSoA_, nhits); kmdata_ = new KernelModifiableData(nhits, stride_, uncalibSoA_, d_uncalibSoA_, d_intermediateSoA_, d_calibSoA_, calibSoA_); KernelManagerHGCalRecHit kernel_manager(kmdata_); - std::cout << "CHECK before run kernels " << std::endl; kernel_manager.run_kernels(kcdata_, d_conds); rechits_ = std::make_unique(); @@ -147,36 +146,34 @@ void HeterogeneousHGCalHEFRecHitProducer::set_conditions_(const edm::EventSetup& posmap_->waferMax = ddd_->waferMax(); //store detids following a geometry ordering - for(int iside=-1; iside<=1; iside = iside+2) { - for(int ilayer=1; ilayer<=posmap_->lastLayer; ++ilayer) { - //float z_ = iside<0 ? -1.f * static_cast( ddd_->waferZ(ilayer, true) ) : static_cast( ddd_->waferZ(ilayer, true) ); //originally a double + for(int ilayer=1; ilayer<=posmap_->lastLayer; ++ilayer) { + //float z_ = iside<0 ? -1.f * static_cast( ddd_->waferZ(ilayer, true) ) : static_cast( ddd_->waferZ(ilayer, true) ); //originally a double - for(int iwaferU=posmap_->waferMin; iwaferUwaferMax; ++iwaferU) { - for(int iwaferV=posmap_->waferMin; iwaferVwaferMax; ++iwaferV) { - int type_ = ddd_->waferType(ilayer, iwaferU, iwaferV); //0: fine; 1: coarseThin; 2: coarseThick (as defined in DataFormats/ForwardDetId/interface/HGCSiliconDetId.h) - - int nCellsHex = ddd_->numberCellsHexagon(ilayer, iwaferU, iwaferV, false); - posmap_->numberCellsHexagon.push_back( nCellsHex ); - - //left side of wafer - for(int cellUmax=nCellsHex, icellV=0; cellUmax<2*nCellsHex && icellVdetid.push_back( detid_ ); - } - } - //right side of wafer - for(int cellUmin=1, icellV=nCellsHex; cellUmin<=nCellsHex && icellV<2*nCellsHex; ++cellUmin, ++icellV) - { - for(int icellU=cellUmin; icellU<2*nCellsHex; ++icellU) - { - uint32_t detid_ = HGCSiliconDetId(DetId::HGCalHSi, iside, type_, ilayer, iwaferU, iwaferV, icellU, icellV); - posmap_->detid.push_back( detid_ ); - } - } - } + for(int iwaferU=posmap_->waferMin; iwaferUwaferMax; ++iwaferU) { + for(int iwaferV=posmap_->waferMin; iwaferVwaferMax; ++iwaferV) { + int type_ = ddd_->waferType(ilayer, iwaferU, iwaferV); //0: fine; 1: coarseThin; 2: coarseThick (as defined in DataFormats/ForwardDetId/interface/HGCSiliconDetId.h) + + int nCellsHex = ddd_->numberCellsHexagon(ilayer, iwaferU, iwaferV, false); + posmap_->numberCellsHexagon.push_back( nCellsHex ); + + //left side of wafer + for(int cellUmax=nCellsHex, icellV=0; cellUmax<2*nCellsHex && icellVdetid.push_back( detid_.rawId() ); + } + } + //right side of wafer + for(int cellUmin=1, icellV=nCellsHex; cellUmin<=nCellsHex && icellV<2*nCellsHex; ++cellUmin, ++icellV) + { + for(int icellU=cellUmin; icellU<2*nCellsHex; ++icellU) + { + HGCSiliconDetId detid_(DetId::HGCalHSi, 1, type_, ilayer, iwaferU, iwaferV, icellU, icellV); + posmap_->detid.push_back( detid_.rawId() ); + } + } } } } diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/KernelManagerHGCalRecHit.cu b/RecoLocalCalo/HGCalRecProducers/plugins/KernelManagerHGCalRecHit.cu index 1286555fbe109..ee5bcab161328 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/KernelManagerHGCalRecHit.cu +++ b/RecoLocalCalo/HGCalRecProducers/plugins/KernelManagerHGCalRecHit.cu @@ -10,6 +10,8 @@ KernelManagerHGCalRecHit::KernelManagerHGCalRecHit(KernelModifiableDatanhits_ + ::nthreads_.x - 1) / ::nthreads_.x; nbytes_host_ = (data_->h_out_)->nbytes_ * data_->stride_; nbytes_device_ = (data_->d_1_)->nbytes_ * data_->stride_; + + printf("NUMBERS: %d - %d - %d\n", nblocks_.x, nthreads_.x, nblocks_.x*nthreads_.x); } KernelManagerHGCalRecHit::~KernelManagerHGCalRecHit() diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/KernelManagerHGCalRecHit.h b/RecoLocalCalo/HGCalRecProducers/plugins/KernelManagerHGCalRecHit.h index 64e8ecf5ceabe..6f789fea5e7d6 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/KernelManagerHGCalRecHit.h +++ b/RecoLocalCalo/HGCalRecProducers/plugins/KernelManagerHGCalRecHit.h @@ -32,7 +32,7 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t namespace { dim3 nblocks_; - constexpr dim3 nthreads_(256); //some kernels will potentially not allocate shared memory properly with a lower number + constexpr dim3 nthreads_(1); //some kernels will potentially not allocate shared memory properly with a lower number } template