Skip to content

Commit

Permalink
bug fix: point arrays in conditions SoAs with multiples of sizeof() i…
Browse files Browse the repository at this point in the history
…nstead of bytes
  • Loading branch information
Bruno Alves authored and bfonta committed Sep 15, 2020
1 parent 94fadd1 commit 59f8870
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 91 deletions.
60 changes: 20 additions & 40 deletions CUDADataFormats/HGCal/interface/HGCConditions.h
Original file line number Diff line number Diff line change
@@ -1,45 +1,6 @@
#ifndef CUDADataFormats_HGCal_HGCConditions_h
#define CUDADataFormats_HGCal_HGCConditions_h

#include <utility>
#include <stdexcept>

namespace {
struct xyz {
float x;
float y;
float z;
constexpr bool operator ==(const xyz& rhs) const { return this->x == rhs.x && this->y == rhs.y; }
};
using Item = std::pair<int, xyz>;
constexpr Item map_items[] = {
{ 0, {3.,3} },
{ 1, {4.,4.} },
{ 2, {5.,5.2} },
};
constexpr int map_size = sizeof(map_items)/sizeof(map_items[0]);

static constexpr xyz findValue(int key, int range = map_size) {
return
(range == 0) ? throw std::runtime_error("Value not present"):
(map_items[range - 1].first == key) ? map_items[range - 1].second:
findValue(key, range - 1);
};

static constexpr int findKey(xyz value, int range = map_size) {
return
(range == 0) ? throw std::runtime_error("Key not present"):
(map_items[range - 1].second == value) ? map_items[range - 1].first:
findKey(value, range - 1);
};
}







class HeterogeneousHGCSiliconDetId {
public:
constexpr HeterogeneousHGCSiliconDetId(uint32_t id): id_(id) {}
Expand All @@ -52,7 +13,6 @@ class HeterogeneousHGCSiliconDetId {
constexpr uint32_t waferV() { return (((id_ >> kHGCalWaferVSignOffset) & kHGCalWaferVSignMask) ? -waferVAbs() : waferVAbs()); }
constexpr uint32_t cellU() { return (id_ >> kHGCalCellUOffset) & kHGCalCellUMask; }
constexpr uint32_t cellV() { return (id_ >> kHGCalCellVOffset) & kHGCalCellVMask; }
constexpr float cellX() { return ::findValue(0).x; } //CHANGE!!

private:
uint32_t id_;
Expand Down Expand Up @@ -151,11 +111,31 @@ namespace hgcal_conditions {

} //namespace parameters

namespace positions {

//stores the positions taken from the detid's in the CPU
struct HGCalPositions {
std::vector<float> x;
std::vector<float> y;
std::vector<float> z;
};

//stores the positions taken from the detid's in the GPU
//it is the same for all three subdetectors
struct HeterogeneousHGCalPositions {
float* x;
float* y;
float* z;
};

} //namespace positions

struct HeterogeneousEEConditionsESProduct {
parameters::HeterogeneousHGCalEEParameters params;
};
struct HeterogeneousHEFConditionsESProduct {
parameters::HeterogeneousHGCalHEFParameters params;
positions::HeterogeneousHGCalPositions pos;
};
struct HeterogeneousHEBConditionsESProduct {
parameters::HeterogeneousHGCalHEBParameters params;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ void hef_to_rechit(HGCRecHitSoA dst_soa, HGCUncalibratedRecHitSoA src_soa, const
{
unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
HeterogeneousHGCSiliconDetId detid(src_soa.id_[tid]);
printf("cellCoarseY: %lf - cellX: %lf - cellLayer: %d\n", conds->params.cellCoarseY_[12], detid.cellX(), detid.layer());
printf("cellCoarseY: %lf - cellLayer: %d - cellPosX: %lf - cellPosY: %lf - cellPosZ: %lf\n", conds->params.cellCoarseY_[12], detid.layer(), conds->pos.x[9], conds->pos.y[9], conds->pos.z[9]);

for (unsigned int i = tid; i < length; i += blockDim.x * gridDim.x)
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,81 +1,155 @@
#include "RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.h"

HeterogeneousHGCalHEFConditionsWrapper::HeterogeneousHGCalHEFConditionsWrapper(const HGCalParameters* cpuHGCalParameters)
HeterogeneousHGCalHEFConditionsWrapper::HeterogeneousHGCalHEFConditionsWrapper(const HGCalParameters* cpuHGCalParameters,
const cpos::HGCalPositions* cpuXYZ)
{
calculate_memory_bytes(cpuHGCalParameters);
//HGCalParameters as defined in CMSSW
this->sizes_params_ = calculate_memory_bytes_params_(cpuHGCalParameters);
this->chunk_params_ = allocate_memory_params_(this->sizes_params_);
transfer_data_to_heterogeneous_pointers_params_(this->sizes_params_, cpuHGCalParameters);

chunk_ = std::accumulate(this->sizes_.begin(), this->sizes_.end(), 0); //total memory required in bytes
//HGCalPositions as defined in hgcal_conditions::positions
this->sizes_pos_ = calculate_memory_bytes_pos_(cpuXYZ);
this->chunk_pos_ = allocate_memory_pos_(this->sizes_pos_);
transfer_data_to_heterogeneous_pointers_pos_(this->sizes_pos_, cpuXYZ);
}

size_t HeterogeneousHGCalHEFConditionsWrapper::allocate_memory_params_(const std::vector<size_t>& sz)
{
size_t chunk_ = std::accumulate(sz.begin(), sz.end(), 0); //total memory required in bytes
gpuErrchk(cudaMallocHost(&this->params_.cellFineX_, chunk_));
return chunk_;
}

size_t HeterogeneousHGCalHEFConditionsWrapper::allocate_memory_pos_(const std::vector<size_t>& sz)
{
size_t chunk_ = std::accumulate(sz.begin(), sz.end(), 0); //total memory required in bytes
gpuErrchk(cudaMallocHost(&this->pos_.x, chunk_));
return chunk_;
}

void HeterogeneousHGCalHEFConditionsWrapper::transfer_data_to_heterogeneous_pointers_params_(const std::vector<size_t>& sz, const HGCalParameters* cpuParams)
{
//store cumulative sum in bytes and convert it to sizes in units of C++ typesHEF, i.e., number if items to be transferred to GPU
std::vector<size_t> cumsum_sizes( this->sizes_.size()+1, 0 ); //starting with zero
std::partial_sum(this->sizes_.begin(), this->sizes_.end(), cumsum_sizes.begin()+1);
std::vector<size_t> cumsum_sizes( sz.size()+1, 0 ); //starting with zero
std::partial_sum(sz.begin(), sz.end(), cumsum_sizes.begin()+1);
for(unsigned int i=1; i<cumsum_sizes.size(); ++i) //start at second element (the first is zero)
{
unsigned int typesHEFsize = 0;
if( cp::typesHEF[i-1] == cp::HeterogeneousHGCalHEFParametersType::Double )
if( cpar::typesHEF[i-1] == cpar::HeterogeneousHGCalHEFParametersType::Double )
typesHEFsize = sizeof(double);
else if( cp::typesHEF[i-1] == cp::HeterogeneousHGCalHEFParametersType::Int32_t )
else if( cpar::typesHEF[i-1] == cpar::HeterogeneousHGCalHEFParametersType::Int32_t )
typesHEFsize = sizeof(int32_t);
else
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "Wrong HeterogeneousHGCalParameters type";
cumsum_sizes[i] /= typesHEFsize;
}

for(unsigned int j=0; j<this->sizes_.size(); ++j) {
for(unsigned int j=0; j<sz.size(); ++j) {

//setting the pointers
if(j != 0)
{
const unsigned int jm1 = j-1;
if( cp::typesHEF[jm1] == cp::HeterogeneousHGCalHEFParametersType::Double and
cp::typesHEF[j] == cp::HeterogeneousHGCalHEFParametersType::Double )
select_pointer_d(&this->params_, j) = select_pointer_d(&this->params_, jm1) + this->sizes_[jm1];
else if( cp::typesHEF[jm1] == cp::HeterogeneousHGCalHEFParametersType::Double and
cp::typesHEF[j] == cp::HeterogeneousHGCalHEFParametersType::Int32_t )
select_pointer_i(&this->params_, j) = reinterpret_cast<int32_t*>( select_pointer_d(&this->params_, jm1) + this->sizes_[jm1] );
const size_t shift = cumsum_sizes[j] - cumsum_sizes[jm1];
if( cpar::typesHEF[jm1] == cpar::HeterogeneousHGCalHEFParametersType::Double and
cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Double )
select_pointer_d(&this->params_, j) = select_pointer_d(&this->params_, jm1) + shift;
else if( cpar::typesHEF[jm1] == cpar::HeterogeneousHGCalHEFParametersType::Double and
cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Int32_t )
select_pointer_i(&this->params_, j) = reinterpret_cast<int32_t*>( select_pointer_d(&this->params_, jm1) + shift );
}

//copying the pointers' content
for(unsigned int i=cumsum_sizes[j]; i<cumsum_sizes[j+1]; ++i)
{
unsigned int index = i - cumsum_sizes[j];
if( cp::typesHEF[j] == cp::HeterogeneousHGCalHEFParametersType::Double ) {
select_pointer_d(&this->params_, j)[index] = select_pointer_d(cpuHGCalParameters, j)[index];
if( cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Double ) {
select_pointer_d(&this->params_, j)[index] = select_pointer_d(cpuParams, j)[index];
}
else if( cp::typesHEF[j] == cp::HeterogeneousHGCalHEFParametersType::Int32_t )
else if( cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Int32_t )
{
select_pointer_i(&this->params_, j)[index] = select_pointer_i(cpuHGCalParameters, j)[index];
select_pointer_i(&this->params_, j)[index] = select_pointer_i(cpuParams, j)[index];
}
else
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "Wrong HeterogeneousHGCalParameters type";
}
}
}

void HeterogeneousHGCalHEFConditionsWrapper::calculate_memory_bytes(const HGCalParameters* cpuHGCalParameters) {
void HeterogeneousHGCalHEFConditionsWrapper::transfer_data_to_heterogeneous_pointers_pos_(const std::vector<size_t>& sz, const cpos::HGCalPositions* cpuParams)
{
//store cumulative sum in bytes and convert it to sizes in units of C++ floats, i.e., number if items to be transferred to GPU
std::vector<size_t> cumsum_sizes( sz.size()+1, 0 );
std::partial_sum(sz.begin(), sz.end(), cumsum_sizes.begin()+1); //starting with zero
for(unsigned int i=1; i<cumsum_sizes.size(); ++i) //start at second element (the first is zero)
{
cumsum_sizes[i] /= sizeof(float);
}

for(unsigned int j=0; j<sz.size(); ++j) {

//setting the pointers
if(j != 0)
{
const unsigned int jm1 = j-1;
const size_t shift = cumsum_sizes[j] - cumsum_sizes[jm1];
select_pointer_f(&(this->pos_), j) = select_pointer_f(&(this->pos_), jm1) + shift;
}

//copying the pointers' content
for(unsigned int i=cumsum_sizes[j]; i<cumsum_sizes[j+1]; ++i)
{
unsigned int index = i - cumsum_sizes[j];
select_pointer_f(&(this->pos_), j)[index] = select_pointer_f(cpuParams, j)[index];
}
}
}

std::vector<size_t> HeterogeneousHGCalHEFConditionsWrapper::calculate_memory_bytes_params_(const HGCalParameters* cpuParams) {
size_t npointers = hgcal_conditions::parameters::typesHEF.size();
std::vector<size_t> sizes(npointers);
for(unsigned int i=0; i<npointers; ++i)
{
if(cp::typesHEF[i] == cp::HeterogeneousHGCalHEFParametersType::Double)
sizes[i] = select_pointer_d(cpuHGCalParameters, i).size();
if(cpar::typesHEF[i] == cpar::HeterogeneousHGCalHEFParametersType::Double)
sizes[i] = select_pointer_d(cpuParams, i).size();
else
sizes[i] = select_pointer_i(cpuHGCalParameters, i).size();
sizes[i] = select_pointer_i(cpuParams, i).size();
}

std::vector<size_t> sizes_units(npointers);
for(unsigned int i=0; i<npointers; ++i)
{
if(cp::typesHEF[i] == cp::HeterogeneousHGCalHEFParametersType::Double)
if(cpar::typesHEF[i] == cpar::HeterogeneousHGCalHEFParametersType::Double)
sizes_units[i] = sizeof(double);
else if(cp::typesHEF[i] == cp::HeterogeneousHGCalHEFParametersType::Int32_t)
else if(cpar::typesHEF[i] == cpar::HeterogeneousHGCalHEFParametersType::Int32_t)
sizes_units[i] = sizeof(int32_t);
}

//element by element multiplication
this->sizes_.resize(npointers);
std::transform( sizes.begin(), sizes.end(), sizes_units.begin(), this->sizes_.begin(), std::multiplies<size_t>() );
this->sizes_params_.resize(npointers);
std::transform( sizes.begin(), sizes.end(), sizes_units.begin(), this->sizes_params_.begin(), std::multiplies<size_t>() );
return this->sizes_params_;
}

std::vector<size_t> HeterogeneousHGCalHEFConditionsWrapper::calculate_memory_bytes_pos_(const cpos::HGCalPositions* cpuPos) {
size_t npointers = 3; //x, y and z, all float (this is fixed by the geometry and won't change)
std::vector<size_t> sizes(npointers);
for(unsigned int i=0; i<npointers; ++i)
{
sizes[i] = select_pointer_f(cpuPos, i).size();
}

std::vector<size_t> sizes_units(npointers);
for(unsigned int i=0; i<npointers; ++i)
{
sizes_units[i] = sizeof(float);
}

//element by element multiplication
this->sizes_pos_.resize(npointers);
std::transform( sizes.begin(), sizes.end(), sizes_units.begin(), this->sizes_pos_.begin(), std::multiplies<size_t>() );
return this->sizes_pos_;
}

HeterogeneousHGCalHEFConditionsWrapper::~HeterogeneousHGCalHEFConditionsWrapper() {
Expand All @@ -84,8 +158,8 @@ HeterogeneousHGCalHEFConditionsWrapper::~HeterogeneousHGCalHEFConditionsWrapper(

//I could use template specializations
//try to use std::variant in the future to avoid similar functions with different return values
double*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_d(cp::HeterogeneousHGCalHEFParameters* cpuObject,
const unsigned int& item) const {
double*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_d(cpar::HeterogeneousHGCalHEFParameters* cpuObject,
const unsigned int& item) const {
switch(item)
{
case 0:
Expand Down Expand Up @@ -120,7 +194,39 @@ std::vector<double> HeterogeneousHGCalHEFConditionsWrapper::select_pointer_d(con
}
}

int32_t*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_i(cp::HeterogeneousHGCalHEFParameters* cpuObject,
float*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_f(cpos::HeterogeneousHGCalPositions* cpuObject,
const unsigned int& item) const {
switch(item)
{
case 0:
return cpuObject->x;
case 1:
return cpuObject->y;
case 2:
return cpuObject->z;
default:
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_f(heterogeneous): no item.";
return cpuObject->x;
}
}

std::vector<float> HeterogeneousHGCalHEFConditionsWrapper::select_pointer_f(const cpos::HGCalPositions* cpuObject,
const unsigned int& item) const {
switch(item)
{
case 0:
return cpuObject->x;
case 1:
return cpuObject->y;
case 2:
return cpuObject->z;
default:
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_f(non-heterogeneous): no item.";
return cpuObject->x;
}
}

int32_t*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_i(cpar::HeterogeneousHGCalHEFParameters* cpuObject,
const unsigned int& item) const {
switch(item)
{
Expand Down Expand Up @@ -157,27 +263,37 @@ hgcal_conditions::HeterogeneousHEFConditionsESProduct const *HeterogeneousHGCalH
// Allocate the payload object on pinned host memory.
gpuErrchk(cudaMallocHost(&data.host, sizeof(hgcal_conditions::HeterogeneousHEFConditionsESProduct)));
// Allocate the payload array(s) on device memory.
gpuErrchk(cudaMalloc(&(data.host->params.cellFineX_), chunk_));
gpuErrchk(cudaMalloc(&(data.host->params.cellFineX_), chunk_params_));
gpuErrchk(cudaMalloc(&(data.host->pos.x), chunk_pos_));

// Allocate the payload object on the device memory.
gpuErrchk(cudaMalloc(&data.device, sizeof(hgcal_conditions::HeterogeneousHEFConditionsESProduct)));
// Transfer the payload, first the array(s) ...
gpuErrchk(cudaMemcpyAsync(data.host->params.cellFineX_, this->params_.cellFineX_, chunk_, cudaMemcpyHostToDevice, stream));
gpuErrchk(cudaMemcpyAsync(data.host->params.cellFineX_, this->params_.cellFineX_, chunk_params_, cudaMemcpyHostToDevice, stream));
gpuErrchk(cudaMemcpyAsync(data.host->pos.x, this->pos_.x, chunk_pos_, cudaMemcpyHostToDevice, stream));

for(unsigned int j=0; j<this->sizes_.size()-1; ++j)
//(set the pointers of the parameters)
for(unsigned int j=0; j<this->sizes_params_.size()-1; ++j)
{
if( cp::typesHEF[j] == cp::HeterogeneousHGCalHEFParametersType::Double and
cp::typesHEF[j+1] == cp::HeterogeneousHGCalHEFParametersType::Double )
select_pointer_d(&(data.host->params), j+1) = select_pointer_d(&(data.host->params), j) + this->sizes_[j];
else if( cp::typesHEF[j] == cp::HeterogeneousHGCalHEFParametersType::Double and
cp::typesHEF[j+1] == cp::HeterogeneousHGCalHEFParametersType::Int32_t )
select_pointer_i(&(data.host->params), j+1) = reinterpret_cast<int32_t*>( select_pointer_d(&(data.host->params), j) + this->sizes_[j] );
if( cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Double and
cpar::typesHEF[j+1] == cpar::HeterogeneousHGCalHEFParametersType::Double )
select_pointer_d(&(data.host->params), j+1) = select_pointer_d(&(data.host->params), j) + (this->sizes_params_[j]/sizeof(double));
else if( cpar::typesHEF[j] == cpar::HeterogeneousHGCalHEFParametersType::Double and
cpar::typesHEF[j+1] == cpar::HeterogeneousHGCalHEFParametersType::Int32_t )
select_pointer_i(&(data.host->params), j+1) = reinterpret_cast<int32_t*>( select_pointer_d(&(data.host->params), j) + (this->sizes_params_[j]/sizeof(double)) );
else
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "compare this functions' logic with hgcal_conditions::parameters::typesHEF";
}

//(set the pointers of the positions)
for(unsigned int j=0; j<this->sizes_pos_.size()-1; ++j)
{
select_pointer_f(&(data.host->pos), j+1) = select_pointer_f(&(data.host->pos), j) + (this->sizes_pos_[j]/sizeof(float));
}

// ... and then the payload object
gpuErrchk(cudaMemcpyAsync(data.device, data.host, sizeof(hgcal_conditions::HeterogeneousHEFConditionsESProduct), cudaMemcpyHostToDevice, stream));

}); //gpuData_.dataForCurrentDeviceAsync

// Returns the payload object on the memory of the current device
Expand Down
Loading

0 comments on commit 59f8870

Please sign in to comment.