Skip to content

Commit

Permalink
bug fix: threadid index
Browse files Browse the repository at this point in the history
bug fix: uint32_t conversion to float
  • Loading branch information
Bruno Alves authored and bfonta committed Sep 15, 2020
1 parent 45f4412 commit cfff76e
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 64 deletions.
4 changes: 2 additions & 2 deletions CUDADataFormats/HGCal/interface/HGCConditions.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ class HeterogeneousHGCSiliconDetId {
constexpr int32_t waferV() { return (((id_ >> kHGCalWaferVSignOffset) & kHGCalWaferVSignMask) ? -waferVAbs() : waferVAbs()); }
constexpr int32_t waferX() { return (-2 * waferU() + waferV()); }
constexpr int32_t waferY() { return (2 * waferV()); }
constexpr int32_t cellU() { return (id_ >> kHGCalCellUOffset) & kHGCalCellUMask; }
constexpr int32_t cellV() { return (id_ >> kHGCalCellVOffset) & kHGCalCellVMask; }
constexpr uint32_t cellU() { return (id_ >> kHGCalCellUOffset) & kHGCalCellUMask; }
constexpr uint32_t cellV() { return (id_ >> kHGCalCellVOffset) & kHGCalCellVMask; }
constexpr uint32_t nCells() { return (type() == HGCalFine) ? HGCalFineN : HGCalCoarseN; }
constexpr int32_t cellX() { const uint32_t N = nCells(); return (3 * (cellV() - N) + 2); }
constexpr int32_t cellY() { const uint32_t N = nCells(); return (2 * cellU() - (N + cellV())); }
Expand Down
62 changes: 34 additions & 28 deletions RecoLocalCalo/HGCalRecProducers/plugins/HGCalRecHitKernelImpl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ __global__
void ee_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const HGCeeUncalibratedRecHitConstantData cdata, int length)
{
unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
HeterogeneousHGCSiliconDetId detid(src_soa.id_[tid]);

for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x)
{
HeterogeneousHGCSiliconDetId detid(src_soa.id_[i]);
double weight = get_weight_from_layer(detid.layer(), cdata.weights_);
double rcorr = get_thickness_correction(detid.type(), cdata.rcorr_);
double noise = get_noise(detid.type(), cdata.noise_fC_);
Expand All @@ -110,7 +110,7 @@ void hef_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const

for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x)
{
HeterogeneousHGCSiliconDetId detid(src_soa.id_[tid]);
HeterogeneousHGCSiliconDetId detid(src_soa.id_[i]);
printf("cellCoarseY: %lf - cellX: %d - numberCellsHexagon: %d - DetId: %d - Var: %d\n", conds->params.cellCoarseY_[12], detid.cellX(), conds->posmap.numberCellsHexagon[0], conds->posmap.detid[9], conds->posmap.waferMax);

double weight = get_weight_from_layer(detid.layer(), cdata.weights_);
Expand All @@ -128,10 +128,10 @@ __global__
void heb_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const HGChebUncalibratedRecHitConstantData cdata, int length)
{
unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
HeterogeneousHGCScintillatorDetId detid(src_soa.id_[tid]);

for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x)
{
HeterogeneousHGCScintillatorDetId detid(src_soa.id_[i]);
double weight = get_weight_from_layer(detid.layer(), cdata.weights_);
double noise = cdata.noise_MIP_;
double sigmaNoiseGeV = 1e-3 * noise * weight;
Expand All @@ -147,31 +147,38 @@ void fill_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditio

for (unsigned int i = tid; i < conds->nelems_posmap; i += blockDim.x * gridDim.x)
{
HeterogeneousHGCSiliconDetId did(conds->posmap.detid[tid]);
uint32_t cU = did.cellU();
uint32_t cV = did.cellV();
uint32_t wU = did.waferU();
uint32_t wV = did.waferV();
uint32_t ncells = did.nCells();
HeterogeneousHGCSiliconDetId did(conds->posmap.detid[i]);
const float cU = static_cast<float>( did.cellU() );
const float cV = static_cast<float>( did.cellV() );
const float wU = static_cast<float>( did.waferU() );
const float wV = static_cast<float>( did.waferV() );
const float ncells = static_cast<float>( did.nCells() );
const float side = static_cast<float>( did.zside() );
const int32_t layer = did.layer();

float r = 0.5f * (conds->posmap.waferSize + conds->posmap.sensorSeparation);
float sqrt3 = __fsqrt_rn(3.);
float rsqrt3 = __frsqrt_rn(3.);
float R = 2.f * r * rsqrt3; //rsqrt: 1 / sqrt
uint32_t n2 = ncells / 2;
float yoff = rsqrt3 * 2.f * r; //CHANGE according to Sunanda's reply
//based on `std::pair<float, float> HGCalDDDConstants::locateCell(const HGCSiliconDetId&, bool)
const float r_x2 = conds->posmap.waferSize + conds->posmap.sensorSeparation;
const float r = 0.5f * r_x2;
const float sqrt3 = __fsqrt_rn(3.f);
const float rsqrt3 = __frsqrt_rn(3.f);//rsqrt: 1 / sqrt
const float R = r_x2 * rsqrt3;
const float n2 = ncells / 2.f;
const float yoff_abs = rsqrt3 * r_x2;
const float yoff = (layer%2==1) ? yoff_abs : -1.f * yoff_abs; //CHANGE according to Sunanda's reply
float xpos = (-2.f * wU + wV) * r;
float ypos = yoff + (1.5 * wV * R);
float R1 = __fdividef( conds->posmap.waferSize, 3.f * ncells );
float r1 = 0.5 * R1 * sqrt3;
xpos += (1.5 * (cV - ncells) + 1.0) * R1;
ypos += (cU - 0.5 * cV - n2) * 2 * r1;

conds->posmap.x[tid] = xpos * did.zside();
conds->posmap.y[tid] = ypos;
conds->posmap.z[tid] = 1.3;
float ypos = yoff + (1.5f * wV * R);
const float R1 = __fdividef( conds->posmap.waferSize, 3.f * ncells );
const float r1_x2 = R1 * sqrt3;
xpos += (1.5f * (cV - ncells) + 1.f) * R1;
ypos += (cU - 0.5f * cV - n2) * r1_x2;

conds->posmap.x[i] = xpos * side;
conds->posmap.y[i] = ypos;
conds->posmap.z[i] = 1.3;

//printf( "%d - %lf - %lf\n", cV - ncells, 1.5f*(static_cast<float>(cV) - static_cast<float>(ncells)), 1.5f*(static_cast<float>(cV - ncells)) );
//printf("waferU: %d\t waferV: %d\t cellU: %d\t cellV: %d\t nCells: %d\t R1: %lf\t Layer: %d\t PosX: %lf\t PosY: %lf\t PosZ: %lf\n", wU, wV, cU, cV, ncells, R1, layer, conds->posmap.x[i], conds->posmap.y[i], conds->posmap.z[i]);
}

}

__global__
Expand All @@ -181,7 +188,6 @@ void print_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditi

for (unsigned int i = tid; i < conds->nelems_posmap; i += blockDim.x * gridDim.x)
{
printf("PosX: %lf - PosY: %lf - Posz: %lf - waferSize: %lf - sensorSeparation: %lf\n", conds->posmap.x[tid], conds->posmap.y[tid], conds->posmap.z[tid], conds->posmap.waferSize, conds->posmap.sensorSeparation);
}

printf("PosX: %lf\t PosY: %lf\t Posz: %lf\n", conds->posmap.x[i], conds->posmap.y[i], conds->posmap.z[i]);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ __global__
void heb_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, HGChebUncalibratedRecHitConstantData cdata, int length);

__global__
void fill_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditionsESProduct* conds);
void fill_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditionsESProduct*);

__global__
void print_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditionsESProduct* conds);
void print_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditionsESProduct*);

#endif //RecoLocalCalo_HGCalRecProducers_HGCalRecHitKernelImpl_cuh
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ void HeterogeneousHGCalHEFConditionsWrapper::transfer_data_to_heterogeneous_poin
}

//copying the pointers' content
if( j>this->number_position_arrays ) //required due to the assymetry between cpos::HeterogeneousHGCalPositionsMapping and cpos::HGCalPositionsMapping
if( j>=this->number_position_arrays ) //required due to the assymetry between cpos::HeterogeneousHGCalPositionsMapping and cpos::HGCalPositionsMapping
{
for(unsigned int i=cumsum_sizes[j]; i<cumsum_sizes[j+1]; ++i)
{
Expand Down Expand Up @@ -394,6 +394,7 @@ hgcal_conditions::HeterogeneousHEFConditionsESProduct const *HeterogeneousHGCalH

//Important: The transfer does *not* start at posmap.x because the positions are not known in the CPU side!
size_t position_memory_size_to_transfer = chunk_pos_ - this->number_position_arrays*this->nelems_posmap_*sfloat; //size in bytes occupied by the non-position information
std::cout << position_memory_size_to_transfer << ", " << chunk_pos_ << ", " << this->number_position_arrays*this->nelems_posmap_*sfloat << ", " << this->number_position_arrays*this->nelems_posmap_ << ", " << this->nelems_posmap_ << std::endl;
gpuErrchk(cudaMemcpyAsync(data.host->posmap.numberCellsHexagon, this->posmap_.numberCellsHexagon, position_memory_size_to_transfer, cudaMemcpyHostToDevice, stream));

// ... and then the payload object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ void HeterogeneousHGCalHEFRecHitProducer::acquire(edm::Event const& event, edm::
convert_collection_data_to_soa_(hits_hef, uncalibSoA_, nhits);
kmdata_ = new KernelModifiableData<HGCUncalibratedRecHitSoA, HGCRecHitSoA>(nhits, stride_, uncalibSoA_, d_uncalibSoA_, d_intermediateSoA_, d_calibSoA_, calibSoA_);
KernelManagerHGCalRecHit kernel_manager(kmdata_);
std::cout << "CHECK before run kernels " << std::endl;
kernel_manager.run_kernels(kcdata_, d_conds);

rechits_ = std::make_unique<HGCRecHitCollection>();
Expand Down Expand Up @@ -147,36 +146,34 @@ void HeterogeneousHGCalHEFRecHitProducer::set_conditions_(const edm::EventSetup&
posmap_->waferMax = ddd_->waferMax();

//store detids following a geometry ordering
for(int iside=-1; iside<=1; iside = iside+2) {
for(int ilayer=1; ilayer<=posmap_->lastLayer; ++ilayer) {
//float z_ = iside<0 ? -1.f * static_cast<float>( ddd_->waferZ(ilayer, true) ) : static_cast<float>( ddd_->waferZ(ilayer, true) ); //originally a double
for(int ilayer=1; ilayer<=posmap_->lastLayer; ++ilayer) {
//float z_ = iside<0 ? -1.f * static_cast<float>( ddd_->waferZ(ilayer, true) ) : static_cast<float>( ddd_->waferZ(ilayer, true) ); //originally a double

for(int iwaferU=posmap_->waferMin; iwaferU<posmap_->waferMax; ++iwaferU) {
for(int iwaferV=posmap_->waferMin; iwaferV<posmap_->waferMax; ++iwaferV) {
int type_ = ddd_->waferType(ilayer, iwaferU, iwaferV); //0: fine; 1: coarseThin; 2: coarseThick (as defined in DataFormats/ForwardDetId/interface/HGCSiliconDetId.h)

int nCellsHex = ddd_->numberCellsHexagon(ilayer, iwaferU, iwaferV, false);
posmap_->numberCellsHexagon.push_back( nCellsHex );

//left side of wafer
for(int cellUmax=nCellsHex, icellV=0; cellUmax<2*nCellsHex && icellV<nCellsHex; ++cellUmax, ++icellV)
{
for(int icellU=0; icellU<=cellUmax; ++icellU)
{
uint32_t detid_ = HGCSiliconDetId(DetId::HGCalHSi, iside, type_, ilayer, iwaferU, iwaferV, icellU, icellV);
posmap_->detid.push_back( detid_ );
}
}
//right side of wafer
for(int cellUmin=1, icellV=nCellsHex; cellUmin<=nCellsHex && icellV<2*nCellsHex; ++cellUmin, ++icellV)
{
for(int icellU=cellUmin; icellU<2*nCellsHex; ++icellU)
{
uint32_t detid_ = HGCSiliconDetId(DetId::HGCalHSi, iside, type_, ilayer, iwaferU, iwaferV, icellU, icellV);
posmap_->detid.push_back( detid_ );
}
}
}
for(int iwaferU=posmap_->waferMin; iwaferU<posmap_->waferMax; ++iwaferU) {
for(int iwaferV=posmap_->waferMin; iwaferV<posmap_->waferMax; ++iwaferV) {
int type_ = ddd_->waferType(ilayer, iwaferU, iwaferV); //0: fine; 1: coarseThin; 2: coarseThick (as defined in DataFormats/ForwardDetId/interface/HGCSiliconDetId.h)

int nCellsHex = ddd_->numberCellsHexagon(ilayer, iwaferU, iwaferV, false);
posmap_->numberCellsHexagon.push_back( nCellsHex );

//left side of wafer
for(int cellUmax=nCellsHex, icellV=0; cellUmax<2*nCellsHex && icellV<nCellsHex; ++cellUmax, ++icellV)
{
for(int icellU=0; icellU<=cellUmax; ++icellU)
{
HGCSiliconDetId detid_(DetId::HGCalHSi, 1, type_, ilayer, iwaferU, iwaferV, icellU, icellV);
posmap_->detid.push_back( detid_.rawId() );
}
}
//right side of wafer
for(int cellUmin=1, icellV=nCellsHex; cellUmin<=nCellsHex && icellV<2*nCellsHex; ++cellUmin, ++icellV)
{
for(int icellU=cellUmin; icellU<2*nCellsHex; ++icellU)
{
HGCSiliconDetId detid_(DetId::HGCalHSi, 1, type_, ilayer, iwaferU, iwaferV, icellU, icellV);
posmap_->detid.push_back( detid_.rawId() );
}
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ KernelManagerHGCalRecHit::KernelManagerHGCalRecHit(KernelModifiableData<HGCUncal
::nblocks_ = (data_->nhits_ + ::nthreads_.x - 1) / ::nthreads_.x;
nbytes_host_ = (data_->h_out_)->nbytes_ * data_->stride_;
nbytes_device_ = (data_->d_1_)->nbytes_ * data_->stride_;

printf("NUMBERS: %d - %d - %d\n", nblocks_.x, nthreads_.x, nblocks_.x*nthreads_.x);
}

KernelManagerHGCalRecHit::~KernelManagerHGCalRecHit()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t

namespace {
dim3 nblocks_;
constexpr dim3 nthreads_(256); //some kernels will potentially not allocate shared memory properly with a lower number
constexpr dim3 nthreads_(1); //some kernels will potentially not allocate shared memory properly with a lower number
}

template <typename T>
Expand Down

0 comments on commit cfff76e

Please sign in to comment.