From f6924bb184f1a9b34cddd602d1cd84891a9ea98b Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 15 Dec 2020 15:09:04 +0100 Subject: [PATCH] Consistently use gpuClustering::maxNumModules --- .../src/TrackingRecHit2DHeterogeneous.cc | 4 ++-- .../interface/SiPixelGainForHLTonGPU.h | 3 ++- .../plugins/SiPixelDigisClustersFromSoA.cc | 3 ++- .../SiPixelClusterizer/test/gpuClustering_t.h | 3 ++- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 12 ++++++------ .../PixelTriplets/plugins/gpuPixelDoubletsAlgos.h | 4 ++-- .../TrackerHitAssociation/plugins/ClusterSLOnGPU.cu | 5 +++-- 7 files changed, 19 insertions(+), 15 deletions(-) diff --git a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc index d4bf1b500e216..7df49b1c9f780 100644 --- a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc +++ b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc @@ -13,7 +13,7 @@ cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::localCoordToHostAsync template <> cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::hitsModuleStartToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(2001, stream); - cudaCheck(cudaMemcpyAsync(ret.get(), m_hitsModuleStart, 4 * 2001, cudaMemcpyDefault, stream)); + auto ret = cms::cuda::make_host_unique(gpuClustering::maxNumModules + 1, stream); + cudaCheck(cudaMemcpyAsync(ret.get(), m_hitsModuleStart, sizeof(uint32_t) * (gpuClustering::maxNumModules + 1), cudaMemcpyDefault, stream)); return ret; } diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h index fc228d0207ecf..aa5a127927b90 100644 --- a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h @@ -16,6 +16,7 @@ #endif // __device__ #endif // __CUDACC__ +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" struct SiPixelGainForHLTonGPU_DecodingStructure { @@ -59,7 +60,7 @@ class SiPixelGainForHLTonGPU { constexpr float decodePed(unsigned int ped) const { return ped * pedPrecision_ + minPed_; } DecodingStructure* v_pedestals_; - std::pair rangeAndCols_[2000]; + std::pair rangeAndCols_[gpuClustering::maxNumModules]; float minPed_, maxPed_, minGain_, maxGain_; float pedPrecision_, gainPrecision_; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index dbbc5c4b03284..0685a1d1abed7 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -1,3 +1,4 @@ +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/Handle.h" #include "DataFormats/DetId/interface/DetId.h" @@ -84,7 +85,7 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con auto collection = std::make_unique>(); auto outputClusters = std::make_unique(); - outputClusters->reserve(2000, nDigis / 4); + outputClusters->reserve(gpuClustering::maxNumModules, nDigis / 4); edm::DetSet* detDigis = nullptr; for (uint32_t i = 0; i < nDigis; i++) { diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index e3e5f17604df0..02611ab1cac1d 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -26,7 +26,8 @@ int main(void) { using namespace gpuClustering; - int numElements = 256 * 2000; + constexpr int numElements = 256 * maxNumModules; + // these in reality are already on GPU auto h_id = std::make_unique(numElements); auto h_x = std::make_unique(numElements); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index b3c77f2e17788..4f4e1e4113564 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -113,8 +113,8 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv HitModuleStart moduleStart_; // index of the first pixel of each module HitModuleStart clusInModule_; memset(&clusInModule_, 0, sizeof(HitModuleStart)); // needed?? - assert(2001 == clusInModule_.size()); - assert(0 == clusInModule_[2000]); + assert(gpuClustering::maxNumModules + 1 == clusInModule_.size()); + assert(0 == clusInModule_[gpuClustering::maxNumModules]); uint32_t moduleId_; moduleStart_[1] = 0; // we run sequentially.... @@ -128,7 +128,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv DetId detIdObject(detid); const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); auto gind = genericDet->index(); - assert(gind < 2000); + assert(gind < gpuClustering::maxNumModules); auto const nclus = DSViter->size(); clusInModule_[gind] = nclus; numberOfClusters += nclus; @@ -136,7 +136,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv hitsModuleStart[0] = 0; for (int i = 1, n = clusInModule_.size(); i < n; ++i) hitsModuleStart[i] = hitsModuleStart[i - 1] + clusInModule_[i - 1]; - assert(numberOfClusters == int(hitsModuleStart[2000])); + assert(numberOfClusters == int(hitsModuleStart[gpuClustering::maxNumModules])); // output SoA auto output = std::make_unique(numberOfClusters, &cpeView, hitsModuleStart, nullptr); @@ -149,7 +149,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv } if (convert2Legacy_) - legacyOutput->reserve(2000, numberOfClusters); + legacyOutput->reserve(gpuClustering::maxNumModules, numberOfClusters); int numberOfDetUnits = 0; int numberOfHits = 0; @@ -159,7 +159,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv DetId detIdObject(detid); const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); auto const gind = genericDet->index(); - assert(gind < 2000); + assert(gind < gpuClustering::maxNumModules); const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); assert(pixDet); auto const nclus = DSViter->size(); diff --git a/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoubletsAlgos.h b/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoubletsAlgos.h index 4e93f984a88d4..d055c8b7cb867 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoubletsAlgos.h +++ b/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoubletsAlgos.h @@ -105,7 +105,7 @@ namespace gpuPixelDoublets { // found hit corresponding to our cuda thread, now do the job auto mi = hh.detectorIndex(i); - if (mi > 2000) + if (mi > gpuClustering::maxNumModules) continue; // invalid /* maybe clever, not effective when zoCut is on @@ -201,7 +201,7 @@ namespace gpuPixelDoublets { assert(oi >= offsets[outer]); assert(oi < offsets[outer + 1]); auto mo = hh.detectorIndex(oi); - if (mo > 2000) + if (mo > gpuClustering::maxNumModules) continue; // invalid if (doZ0Cut && z0cutoff(oi)) diff --git a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu index c06d6d254bad3..0aab26d9cc091 100644 --- a/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu +++ b/SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu @@ -22,6 +22,7 @@ __global__ void simLink(const SiPixelDigisCUDA::DeviceConstView* dd, uint32_t n) { constexpr uint32_t invTK = 0; // std::numeric_limits::max(); using gpuClustering::invalidModuleId; + using gpuClustering::maxNumModules; auto const& hh = *hhp; auto i = blockIdx.x * blockDim.x + threadIdx.x; @@ -32,12 +33,12 @@ __global__ void simLink(const SiPixelDigisCUDA::DeviceConstView* dd, auto id = dd->moduleInd(i); if (invalidModuleId == id) return; - assert(id < 2000); + assert(id < maxNumModules); auto ch = pixelgpudetails::pixelToChannel(dd->xx(i), dd->yy(i)); auto first = hh.hitsModuleStart(id); auto cl = first + dd->clus(i); - assert(cl < 2000 * blockDim.x); + assert(cl < maxNumModules * blockDim.x); const Clus2TP me{{id, ch, 0, 0, 0, 0, 0}};