Skip to content

Commit

Permalink
replace kernel shared memory by passing by value (huge simplification)
Browse files Browse the repository at this point in the history
  • Loading branch information
bfontana authored and bfonta committed Sep 15, 2020
1 parent 4838266 commit 1fc601e
Show file tree
Hide file tree
Showing 11 changed files with 189 additions and 433 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,24 @@

#include <vector>

//maximum sizes for SoA's arrays holding configuration data ("constants")
namespace maxsizes_constants {
//EE
constexpr size_t ee_fCPerMIP = 6; //number of elements pointed by hgcEE_fCPerMIP_
constexpr size_t ee_cce = 6; //number of elements posize_ted by hgcEE_cce_
constexpr size_t ee_noise_fC = 6; //number of elements posize_ted by hgcEE_noise_fC_
constexpr size_t ee_rcorr = 6; //number of elements posize_ted by rcorr_
constexpr size_t ee_weights = 53; //number of elements posize_ted by weights_
//HEF
constexpr size_t hef_fCPerMIP = 6; //number of elements pointed by hgcEE_fCPerMIP_
constexpr size_t hef_cce = 6; //number of elements posize_ted by hgcEE_cce_
constexpr size_t hef_noise_fC = 6; //number of elements posize_ted by hgcEE_noise_fC_
constexpr size_t hef_rcorr = 6; //number of elements posize_ted by rcorr_
constexpr size_t hef_weights = 53; //number of elements posize_ted by weights_
//HEB
constexpr size_t heb_weights = 53; //number of elements posize_ted by weights_
}

class HGCConstantVectorData {
public:
std::vector<double> fCPerMIP_;
Expand All @@ -14,65 +32,43 @@ class HGCConstantVectorData {

class HGCeeUncalibratedRecHitConstantData {
public:
double hgcEE_keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC)
double fCPerMIP_[maxsizes_constants::ee_fCPerMIP]; //femto coloumb to MIP conversion; one value per sensor thickness
double cce_[maxsizes_constants::ee_cce]; //charge collection efficiency, one value per sensor thickness
double noise_fC_[maxsizes_constants::ee_noise_fC]; //noise, one value per sensor thickness
double rcorr_[maxsizes_constants::ee_rcorr]; //thickness correction
double weights_[maxsizes_constants::ee_weights]; //energy weights to recover rechit energy deposited in the absorber

double hgcEE_keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC)
double hgceeUncalib2GeV_; //sets the ADC; obtained by dividing 1e-6 by hgcEE_keV2DIGI_
double *hgcEE_fCPerMIP_; //femto coloumb to MIP conversion; one value per sensor thickness
double *hgcEE_cce_; //charge collection efficiency, one value per sensor thickness
double *hgcEE_noise_fC_; //noise, one value per sensor thickness
double *rcorr_; //thickness correction
double *weights_; //energy weights to recover rechit energy deposited in the absorber
float xmin_; //used for computing the time resolution error
float xmin_; //used for computing the time resolution error
float xmax_; //used for computing the time resolution error
float aterm_; //used for computing the time resolution error
float cterm_; //used for computing the time resolution error
int nbytes_; //number of bytes allocated by this class
int ndelem_; //number of doubles pointed by this class
int nfelem_; //number of floats pointed by this class
int nielem_; //number of ints pointed by this class
int s_hgcEE_fCPerMIP_; //number of elements pointed by hgcEE_fCPerMIP_
int s_hgcEE_cce_; //number of elements pointed by hgcEE_cce_
int s_hgcEE_noise_fC_; //number of elements pointed by hgcEE_noise_fC_
int s_rcorr_; //number of elements pointed by rcorr_
int s_weights_; //number of elements pointed by weights_
};

class HGChefUncalibratedRecHitConstantData {
public:
double hgcHEF_keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC)
double hgchefUncalib2GeV_; //sets the ADC; obtained by dividing 1e-6 by hgcHEF_keV2DIGI_
double *hgcHEF_fCPerMIP_; //femto coloumb to MIP conversion; one value per sensor thickness
double *hgcHEF_cce_; //charge collection efficiency, one value per sensor thickness
double *hgcHEF_noise_fC_; //noise, one value per sensor thickness
double *rcorr_; //thickness correction
double *weights_; //energy weights to recover rechit energy deposited in the absorber
float xmin_; //used for computing the time resolution error
float xmax_; //used for computing the time resolution error
float aterm_; //used for computing the time resolution error
float cterm_; //used for computing the time resolution error
int nbytes_; //number of bytes allocated by this class
int ndelem_; //number of doubles allocated by this class
int nfelem_; //number of floats allocated by this class
int nuelem_; //number of unsigned ints allocated by this class
int nielem_; //number of ints allocated by this class
int s_hgcHEF_fCPerMIP_; //number of elements pointed by hgcEE_fCPerMIP_
int s_hgcHEF_cce_; //number of elements pointed by hgcEE_cce_
int s_hgcHEF_noise_fC_; //number of elements pointed by hgcEE_noise_fC_
int s_rcorr_; //number of elements pointed by rcorr_
int s_weights_; //number of elements pointed by weights_
double fCPerMIP_[maxsizes_constants::hef_fCPerMIP]; //femto coloumb to MIP conversion; one value per sensor thickness
double cce_[maxsizes_constants::hef_cce]; //charge collection efficiency, one value per sensor thickness
double noise_fC_[maxsizes_constants::hef_noise_fC]; //noise, one value per sensor thickness
double rcorr_[maxsizes_constants::hef_rcorr]; //thickness correction
double weights_[maxsizes_constants::hef_weights]; //energy weights to recover rechit energy deposited in the absorber

double keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC)
double uncalib2GeV_; //sets the ADC; obtained by dividing 1e-6 by hgcHEF_keV2DIGI_
float xmin_; //used for computing the time resolution error
float xmax_; //used for computing the time resolution error
float aterm_; //used for computing the time resolution error
float cterm_; //used for computing the time resolution error
};

class HGChebUncalibratedRecHitConstantData {
public:
double hgcHEB_keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC)
double hgchebUncalib2GeV_; //sets the ADC; obtained by dividing 1e-6 by hgcHEB_keV2DIGI_
double hgcHEB_noise_MIP_; //noise
double *weights_; //energy weights to recover rechit energy deposited in the absorber
int nbytes_; //number of bytes allocated by this class
int ndelem_; //number of doubles allocated by this class
int nfelem_; //number of floats allocated by this class
int nuelem_; //number of unsigned ints allocated by this class
int nielem_; //number of ints allocated by this class
int s_weights_; //number of elements pointed by weights_
double weights_[maxsizes_constants::heb_weights]; //energy weights to recover rechit energy deposited in the absorber

double keV2DIGI_; //energy to femto coloumb conversion: 1000 eV/3.62 (eV per e) / 6.24150934e3 (e per fC)
double uncalib2GeV_; //sets the ADC; obtained by dividing 1e-6 by hgcHEB_keV2DIGI_
double noise_MIP_; //noise
};

#endif
2 changes: 1 addition & 1 deletion UserCode/CodeGPU/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<library name="HeterogeneousUncalibRecHits" file="HeterogeneousHGCalEERecHitProducer.cc HeterogeneousHGCalHEFRecHitProducer.cc HeterogeneousHGCalHEBRecHitProducer.cc HeterogeneousHGCalESProduct.cc HeterogeneousHGCalProducerMemoryWrapper.cc KernelManagerHGCalRecHit.cu HGCalRecHitKernelImpl.cu">
<library name="HeterogeneousUncalibRecHits" file="HeterogeneousHGCalHEFRecHitProducer.cc HeterogeneousHGCalESProduct.cc HeterogeneousHGCalProducerMemoryWrapper.cc KernelManagerHGCalRecHit.cu HGCalRecHitKernelImpl.cu">
<use name="cuda"/>
<use name="HeterogeneousCore/CUDACore"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
Expand Down
170 changes: 42 additions & 128 deletions UserCode/CodeGPU/plugins/HGCalRecHitKernelImpl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
#include "HGCalRecHitKernelImpl.cuh"

__device__
double get_weight_from_layer(const int& padding, const int& layer, double*& sd)
double get_weight_from_layer(const int& layer, const double (&weights)[maxsizes_constants::hef_weights])
{
return sd[padding + layer];
return weights[layer];
}

__device__
void make_rechit(unsigned int tid, HGCRecHitSoA& dst_soa, HGCUncalibratedRecHitSoA& src_soa, const bool &heb_flag,
const double &weight, const double &rcorr, const double &cce_correction, const double &sigmaNoiseGeV, float *& sf)
const double& weight, const double& rcorr, const double& cce_correction, const double &sigmaNoiseGeV,
const float& xmin, const float& xmax, const float& aterm, const float& cterm)
{
dst_soa.id_[tid] = src_soa.id_[tid];
dst_soa.energy_[tid] = src_soa.amplitude_[tid] * weight * 0.001f;
Expand All @@ -29,101 +30,43 @@ void make_rechit(unsigned int tid, HGCRecHitSoA& dst_soa, HGCUncalibratedRecHitS
if(heb_flag==0)
{
//get time resolution
float max = fmaxf(son, sf[0]); //this max trick avoids if...elseif...else condition
float aterm = sf[2];
float cterm = sf[3];
dst_soa.timeError_[tid] = sqrt( __fdividef(aterm,max)*__fdividef(aterm,max) + cterm*cterm );
//https://github.com/cms-sw/cmssw/blob/master/RecoLocalCalo/HGCalRecProducers/src/ComputeClusterTime.cc#L50
/*Maxmin trick to avoid conditions within the kernel (having xmin < xmax)
3 possibilities: 1) xval -> xmin -> xmax
2) xmin -> xval -> xmax
3) xmin -> xmax -> xval
The time error is calculated with the number in the middle.
*/
float max = fminf( fmaxf(son, xmin), xmax);
float div_ = __fdividef(aterm, max);
dst_soa.timeError_[tid] = sqrt( div_*div_ + cterm*cterm );
}
else
dst_soa.timeError_[tid] = -1;
}

__device__
double get_thickness_correction(const int& padding, const int& type, double *& sd)
double get_thickness_correction(const int& type, const double (&rcorr)[maxsizes_constants::hef_rcorr])
{
return sd[padding + type];
return rcorr[type];
}

__device__
double get_noise(const int& padding, const int& type, double *& sd)
double get_noise(const int& type, const double (&noise_fC)[maxsizes_constants::hef_noise_fC])
{
return sd[padding + type - 1];
return noise_fC[type - 1];
}

__device__
double get_cce_correction(const int& padding, const int& type, double *& sd)
double get_cce_correction(const int& type, const double (&cce)[maxsizes_constants::hef_cce])
{
return sd[padding + type - 1];
return cce[type - 1];
}

__device__
double get_fCPerMIP(const int& padding, const int& type, double *& sd)
double get_fCPerMIP(const int& type, const double (&fCPerMIP)[maxsizes_constants::hef_fCPerMIP])
{
return sd[padding + type - 1];
}

__device__
void set_shared_memory(const int& tid, double*& sd, float*& sf, int*& si, const HGCeeUncalibratedRecHitConstantData& cdata, const int& size1, const int& size2, const int& size3, const int& size4, const int& size5)
{
const int initial_pad = 2;
if(tid == 0)
{
sd[0] = cdata.hgcEE_keV2DIGI_;
sd[1] = cdata.hgceeUncalib2GeV_;
for(unsigned int i=initial_pad; i<size1; ++i)
sd[i] = cdata.hgcEE_fCPerMIP_[i-initial_pad];
for(unsigned int i=size1; i<size2; ++i)
sd[i] = cdata.hgcEE_cce_[i-size1];
for(unsigned int i=size2; i<size3; ++i)
sd[i] = cdata.hgcEE_noise_fC_[i-size2];
for(unsigned int i=size3; i<size4; ++i)
sd[i] = cdata.rcorr_[i-size3];
for(unsigned int i=size4; i<size5; ++i)
sd[i] = cdata.weights_[i-size4];
sf[0] = (cdata.xmin_ > 0) ? cdata.xmin_ : 0.1;
sf[1] = cdata.xmax_;
sf[2] = cdata.aterm_;
sf[3] = cdata.cterm_;
}
}

__device__
void set_shared_memory(const int& tid, double*& sd, float*& sf, int*& si, const HGChefUncalibratedRecHitConstantData& cdata, const int& size1, const int& size2, const int& size3, const int& size4, const int& size5)
{
const int initial_pad = 2;
if(tid == 0)
{
sd[0] = cdata.hgcHEF_keV2DIGI_;
sd[1] = cdata.hgchefUncalib2GeV_;
for(unsigned int i=initial_pad; i<size1; ++i)
sd[i] = cdata.hgcHEF_fCPerMIP_[i-initial_pad];
for(unsigned int i=size1; i<size2; ++i)
sd[i] = cdata.hgcHEF_cce_[i-size1];
for(unsigned int i=size2; i<size3; ++i)
sd[i] = cdata.hgcHEF_noise_fC_[i-size2];
for(unsigned int i=size3; i<size4; ++i)
sd[i] = cdata.rcorr_[i-size3];
for(unsigned int i=size4; i<size5; ++i)
sd[i] = cdata.weights_[i-size4];
sf[0] = (cdata.xmin_ > 0) ? cdata.xmin_ : 0.1;
sf[1] = cdata.xmax_;
sf[2] = cdata.aterm_;
sf[3] = cdata.cterm_;
}
}

__device__
void set_shared_memory(const int& tid, double*& sd, float*& sf, const HGChebUncalibratedRecHitConstantData& cdata, const int& size1)
{
const int initial_pad = 3;
if(tid == 0)
{
sd[0] = cdata.hgcHEB_keV2DIGI_;
sd[1] = cdata.hgchebUncalib2GeV_;
sd[2] = cdata.hgcHEB_noise_MIP_;
for(unsigned int i=initial_pad; i<size1; ++i)
sd[i] = cdata.weights_[i-initial_pad];
}
return fCPerMIP[type - 1];
}

__global__
Expand All @@ -146,28 +89,17 @@ void ee_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const
{
unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
HeterogeneousHGCSiliconDetId detid(src_soa.id_[tid]);
int size1 = cdata.s_hgcEE_fCPerMIP_ + 2;
int size2 = cdata.s_hgcEE_cce_ + size1;
int size3 = cdata.s_hgcEE_noise_fC_ + size2;
int size4 = cdata.s_rcorr_ + size3;
int size5 = cdata.s_weights_ + size4;

extern __shared__ double s[];
double *sd = s;
float *sf = (float*) (sd + cdata.ndelem_);
int *si = (int*) (sf + cdata.nfelem_);
set_shared_memory(threadIdx.x, sd, sf, si, cdata, size1, size2, size3, size4, size5);
__syncthreads();

for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x)
{
double weight = get_weight_from_layer(size4, detid.layer(), sd);
double rcorr = get_thickness_correction(size3, detid.layer(), sd);
double noise = get_noise(size2, detid.type(), sd);
double cce_correction = get_cce_correction(size1, detid.type(), sd);
double fCPerMIP = get_fCPerMIP(2, detid.type(), sd);
double sigmaNoiseGeV = 1e-3 * weight * rcorr * __fdividef( noise, fCPerMIP );
make_rechit(i, dst_soa, src_soa, false, weight, rcorr, cce_correction, sigmaNoiseGeV, sf);
double weight = get_weight_from_layer(detid.layer(), cdata.weights_);
double rcorr = get_thickness_correction(detid.type(), cdata.rcorr_);
double noise = get_noise(detid.type(), cdata.noise_fC_);
double cce_correction = get_cce_correction(detid.type(), cdata.cce_);
double fCPerMIP = get_fCPerMIP(detid.type(), cdata.fCPerMIP_);
double sigmaNoiseGeV = 1e-3 * weight * rcorr * __fdividef( noise, fCPerMIP );
make_rechit(i, dst_soa, src_soa, false, weight, rcorr, cce_correction, sigmaNoiseGeV,
cdata.xmin_, cdata.xmax_, cdata.aterm_, cdata.cterm_);
}
}

Expand All @@ -178,28 +110,16 @@ void hef_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const
HeterogeneousHGCSiliconDetId detid(src_soa.id_[tid]);
printf("waferTypeL: %d - cellCoarseY: %lf - cellX: %d\n", conds->params.waferTypeL_[0], conds->params.cellCoarseY_[12], detid.cellX());

int size1 = cdata.s_hgcHEF_fCPerMIP_ + 2;
int size2 = cdata.s_hgcHEF_cce_ + size1;
int size3 = cdata.s_hgcHEF_noise_fC_ + size2;
int size4 = cdata.s_rcorr_ + size3;
int size5 = cdata.s_weights_ + size4;

extern __shared__ double s[];
double *sd = s;
float *sf = (float*) (sd + cdata.ndelem_);
int *si = (int*) (sf + cdata.nuelem_);
set_shared_memory(threadIdx.x, sd, sf, si, cdata, size1, size2, size3, size4, size5);
__syncthreads();

for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x)
{
double weight = get_weight_from_layer(size4, detid.layer(), sd);
double rcorr = get_thickness_correction(size3, detid.type(), sd);
double noise = get_noise(size2, detid.type(), sd);
double cce_correction = get_cce_correction(size1, detid.type(), sd);
double fCPerMIP = get_fCPerMIP(2, detid.type(), sd);
double sigmaNoiseGeV = 1e-3 * weight * rcorr * __fdividef( noise, fCPerMIP );
make_rechit(i, dst_soa, src_soa, false, weight, rcorr, cce_correction, sigmaNoiseGeV, sf);
double weight = get_weight_from_layer(detid.layer(), cdata.weights_);
double rcorr = get_thickness_correction(detid.type(), cdata.rcorr_);
double noise = get_noise(detid.type(), cdata.noise_fC_);
double cce_correction = get_cce_correction(detid.type(), cdata.cce_);
double fCPerMIP = get_fCPerMIP(detid.type(), cdata.fCPerMIP_);
double sigmaNoiseGeV = 1e-3 * weight * rcorr * __fdividef( noise, fCPerMIP );
make_rechit(i, dst_soa, src_soa, false, weight, rcorr, cce_correction, sigmaNoiseGeV,
cdata.xmin_, cdata.xmax_, cdata.aterm_, cdata.cterm_);
}
}

Expand All @@ -208,19 +128,13 @@ void heb_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const
{
unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
HeterogeneousHGCScintillatorDetId detid(src_soa.id_[tid]);
int size1 = cdata.s_weights_ + 3;

extern __shared__ double s[];
double *sd = s;
float *sf = (float*) (sd + cdata.ndelem_);
set_shared_memory(threadIdx.x, sd, sf, cdata, size1);
__syncthreads();

for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x)
{
double weight = get_weight_from_layer(3, detid.layer(), sd);
double noise = sd[2];
double weight = get_weight_from_layer(detid.layer(), cdata.weights_);
double noise = cdata.noise_MIP_;
double sigmaNoiseGeV = 1e-3 * noise * weight;
make_rechit(i, dst_soa, src_soa, true, weight, 0., 0., sigmaNoiseGeV, sf);
make_rechit(i, dst_soa, src_soa, true, weight, 0., 0., sigmaNoiseGeV,
0, 0, 0, 0);
}
}
Loading

0 comments on commit 1fc601e

Please sign in to comment.