Skip to content

Commit

Permalink
Consistently use gpuClustering::maxNumModules
Browse files Browse the repository at this point in the history
  • Loading branch information
fwyzard committed Dec 15, 2020
1 parent c362200 commit f6924bb
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ cms::cuda::host::unique_ptr<float[]> TrackingRecHit2DCUDA::localCoordToHostAsync

template <>
cms::cuda::host::unique_ptr<uint32_t[]> TrackingRecHit2DCUDA::hitsModuleStartToHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<uint32_t[]>(2001, stream);
cudaCheck(cudaMemcpyAsync(ret.get(), m_hitsModuleStart, 4 * 2001, cudaMemcpyDefault, stream));
auto ret = cms::cuda::make_host_unique<uint32_t[]>(gpuClustering::maxNumModules + 1, stream);
cudaCheck(cudaMemcpyAsync(ret.get(), m_hitsModuleStart, sizeof(uint32_t) * (gpuClustering::maxNumModules + 1), cudaMemcpyDefault, stream));
return ret;
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#endif // __device__
#endif // __CUDACC__

#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"

struct SiPixelGainForHLTonGPU_DecodingStructure {
Expand Down Expand Up @@ -59,7 +60,7 @@ class SiPixelGainForHLTonGPU {
constexpr float decodePed(unsigned int ped) const { return ped * pedPrecision_ + minPed_; }

DecodingStructure* v_pedestals_;
std::pair<Range, int> rangeAndCols_[2000];
std::pair<Range, int> rangeAndCols_[gpuClustering::maxNumModules];

float minPed_, maxPed_, minGain_, maxGain_;
float pedPrecision_, gainPrecision_;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
#include "DataFormats/Common/interface/DetSetVector.h"
#include "DataFormats/Common/interface/Handle.h"
#include "DataFormats/DetId/interface/DetId.h"
Expand Down Expand Up @@ -84,7 +85,7 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con

auto collection = std::make_unique<edm::DetSetVector<PixelDigi>>();
auto outputClusters = std::make_unique<SiPixelClusterCollectionNew>();
outputClusters->reserve(2000, nDigis / 4);
outputClusters->reserve(gpuClustering::maxNumModules, nDigis / 4);

edm::DetSet<PixelDigi>* detDigis = nullptr;
for (uint32_t i = 0; i < nDigis; i++) {
Expand Down
3 changes: 2 additions & 1 deletion RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ int main(void) {

using namespace gpuClustering;

int numElements = 256 * 2000;
constexpr int numElements = 256 * maxNumModules;

// these in reality are already on GPU
auto h_id = std::make_unique<uint16_t[]>(numElements);
auto h_x = std::make_unique<uint16_t[]>(numElements);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv
HitModuleStart moduleStart_; // index of the first pixel of each module
HitModuleStart clusInModule_;
memset(&clusInModule_, 0, sizeof(HitModuleStart)); // needed??
assert(2001 == clusInModule_.size());
assert(0 == clusInModule_[2000]);
assert(gpuClustering::maxNumModules + 1 == clusInModule_.size());
assert(0 == clusInModule_[gpuClustering::maxNumModules]);
uint32_t moduleId_;
moduleStart_[1] = 0; // we run sequentially....

Expand All @@ -128,15 +128,15 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv
DetId detIdObject(detid);
const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject);
auto gind = genericDet->index();
assert(gind < 2000);
assert(gind < gpuClustering::maxNumModules);
auto const nclus = DSViter->size();
clusInModule_[gind] = nclus;
numberOfClusters += nclus;
}
hitsModuleStart[0] = 0;
for (int i = 1, n = clusInModule_.size(); i < n; ++i)
hitsModuleStart[i] = hitsModuleStart[i - 1] + clusInModule_[i - 1];
assert(numberOfClusters == int(hitsModuleStart[2000]));
assert(numberOfClusters == int(hitsModuleStart[gpuClustering::maxNumModules]));

// output SoA
auto output = std::make_unique<TrackingRecHit2DCPU>(numberOfClusters, &cpeView, hitsModuleStart, nullptr);
Expand All @@ -149,7 +149,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv
}

if (convert2Legacy_)
legacyOutput->reserve(2000, numberOfClusters);
legacyOutput->reserve(gpuClustering::maxNumModules, numberOfClusters);

int numberOfDetUnits = 0;
int numberOfHits = 0;
Expand All @@ -159,7 +159,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv
DetId detIdObject(detid);
const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject);
auto const gind = genericDet->index();
assert(gind < 2000);
assert(gind < gpuClustering::maxNumModules);
const PixelGeomDetUnit* pixDet = dynamic_cast<const PixelGeomDetUnit*>(genericDet);
assert(pixDet);
auto const nclus = DSViter->size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ namespace gpuPixelDoublets {

// found hit corresponding to our cuda thread, now do the job
auto mi = hh.detectorIndex(i);
if (mi > 2000)
if (mi > gpuClustering::maxNumModules)
continue; // invalid

/* maybe clever, not effective when zoCut is on
Expand Down Expand Up @@ -201,7 +201,7 @@ namespace gpuPixelDoublets {
assert(oi >= offsets[outer]);
assert(oi < offsets[outer + 1]);
auto mo = hh.detectorIndex(oi);
if (mo > 2000)
if (mo > gpuClustering::maxNumModules)
continue; // invalid

if (doZ0Cut && z0cutoff(oi))
Expand Down
5 changes: 3 additions & 2 deletions SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ __global__ void simLink(const SiPixelDigisCUDA::DeviceConstView* dd,
uint32_t n) {
constexpr uint32_t invTK = 0; // std::numeric_limits<int32_t>::max();
using gpuClustering::invalidModuleId;
using gpuClustering::maxNumModules;

auto const& hh = *hhp;
auto i = blockIdx.x * blockDim.x + threadIdx.x;
Expand All @@ -32,12 +33,12 @@ __global__ void simLink(const SiPixelDigisCUDA::DeviceConstView* dd,
auto id = dd->moduleInd(i);
if (invalidModuleId == id)
return;
assert(id < 2000);
assert(id < maxNumModules);

auto ch = pixelgpudetails::pixelToChannel(dd->xx(i), dd->yy(i));
auto first = hh.hitsModuleStart(id);
auto cl = first + dd->clus(i);
assert(cl < 2000 * blockDim.x);
assert(cl < maxNumModules * blockDim.x);

const Clus2TP me{{id, ch, 0, 0, 0, 0, 0}};

Expand Down

0 comments on commit f6924bb

Please sign in to comment.