From 4bd3572c6114a8a7e8da32457555775544853753 Mon Sep 17 00:00:00 2001 From: adriano Date: Fri, 26 Apr 2024 11:03:19 +0200 Subject: [PATCH] Fixes for Phase2 pixel Alpaka reco - nDigis = 0 - limit to actual buffer size for SiPixelDigisSoA (also for Phase1) - removing nDigis_h and simplifying nDigis() --- .../interface/SiPixelDigisDevice.h | 8 +--- .../interface/SiPixelDigisHost.h | 8 +--- .../alpaka/SiPixelDigisSoACollection.h | 4 +- .../SiPixelDigiSoA/src/classes_def.xml | 3 +- .../interface/SimplePixelTopology.h | 9 ++++- .../plugins/alpaka/ClusterChargeCut.h | 38 ++++++++++++++++++- .../alpaka/SiPixelPhase2DigiToCluster.cc | 31 +++++---------- .../alpaka/SiPixelRawToClusterKernel.dev.cc | 11 +++++- .../alpaka/SiPixelRawToClusterKernel.h | 2 +- 9 files changed, 72 insertions(+), 42 deletions(-) diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h index 1748069685923..da0914511c99b 100644 --- a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h @@ -21,17 +21,13 @@ class SiPixelDigisDevice : public PortableDeviceCollection(maxFedWords + 1, device) {} - void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { - nModules_h = nModules; - nDigis_h = nDigis; - } + void setNModules(uint32_t nModules) { nModules_h = nModules; } uint32_t nModules() const { return nModules_h; } - uint32_t nDigis() const { return nDigis_h; } + uint32_t nDigis() const { return this->view().metadata().size() - 1; } private: uint32_t nModules_h = 0; - uint32_t nDigis_h = 0; }; #endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisDevice_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h index 4e4650efac1cb..69633db9db28b 100644 --- a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h @@ -14,17 +14,13 @@ class SiPixelDigisHost : public PortableHostCollection { explicit SiPixelDigisHost(size_t maxFedWords, TQueue queue) : PortableHostCollection(maxFedWords + 1, queue) {} - void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { - nModules_h = nModules; - nDigis_h = nDigis; - } + void setNModules(uint32_t nModules) { nModules_h = nModules; } uint32_t nModules() const { return nModules_h; } - uint32_t nDigis() const { return nDigis_h; } + uint32_t nDigis() const { return view().metadata().size() - 1; } private: uint32_t nModules_h = 0; - uint32_t nDigis_h = 0; }; #endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisHost_h diff --git a/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h index 2fe60454d553f..6bc853a7970df 100644 --- a/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h +++ b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h @@ -23,9 +23,9 @@ namespace cms::alpakatools { struct CopyToHost> { template static auto copyAsync(TQueue &queue, SiPixelDigisDevice const &srcData) { - SiPixelDigisHost dstData(srcData.view().metadata().size(), queue); + SiPixelDigisHost dstData(srcData.view().metadata().size() - 1, queue); alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); - dstData.setNModulesDigis(srcData.nModules(), srcData.nDigis()); + dstData.setNModules(srcData.nModules()); return dstData; } }; diff --git a/DataFormats/SiPixelDigiSoA/src/classes_def.xml b/DataFormats/SiPixelDigiSoA/src/classes_def.xml index c68be4a01bf5a..0d17fff166eae 100644 --- a/DataFormats/SiPixelDigiSoA/src/classes_def.xml +++ b/DataFormats/SiPixelDigiSoA/src/classes_def.xml @@ -2,7 +2,8 @@ - + + diff --git a/Geometry/CommonTopologies/interface/SimplePixelTopology.h b/Geometry/CommonTopologies/interface/SimplePixelTopology.h index 8ff70a630e109..59f3f4e0fa66f 100644 --- a/Geometry/CommonTopologies/interface/SimplePixelTopology.h +++ b/Geometry/CommonTopologies/interface/SimplePixelTopology.h @@ -380,8 +380,13 @@ namespace pixelTopology { static constexpr uint16_t numberOfModules = 3892; - // 1024 bins, 10 bits - static constexpr uint16_t clusterBinning = 1024; + // 1000 bins < 1024 bins (10 bits) must be: + // - < 32*32 (warpSize*warpSize for block prefix scan for CUDA) + // - > number of columns (y) in any module. This is due to the fact + // that in pixel clustering we give for granted that in each + // bin we only have the pixel belonging to the same column. + // See RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h#L325-L347 + static constexpr uint16_t clusterBinning = 1000; static constexpr uint16_t clusterBits = 10; static constexpr uint16_t numberOfModulesInBarrel = 756; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h index 80a7f4301be42..c8496c6757abb 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h @@ -27,6 +27,19 @@ namespace pixelClustering { const uint32_t numElements) const { constexpr int32_t maxNumClustersPerModules = TrackerTraits::maxNumClustersPerModules; +#ifdef GPU_DEBUG + if (cms::alpakatools::once_per_grid(acc)) { + printf("All digis before cut: \n"); + for (uint32_t i = 0; i < numElements; i++) + printf("%d %d %d %d %d \n", + i, + digi_view[i].rawIdArr(), + digi_view[i].clus(), + digi_view[i].pdigi(), + digi_view[i].adc()); + } +#endif + auto& charge = alpaka::declareSharedVar(acc); auto& ok = alpaka::declareSharedVar(acc); auto& newclusId = alpaka::declareSharedVar(acc); @@ -37,6 +50,7 @@ namespace pixelClustering { ALPAKA_ASSERT_ACC(startBPIX2 < TrackerTraits::numberOfModules); auto endModule = clus_view[0].moduleStart(); + for (auto module : cms::alpakatools::independent_groups(acc, endModule)) { auto firstPixel = clus_view[1 + module].moduleStart(); auto thisModuleId = digi_view[firstPixel].moduleId(); @@ -112,8 +126,17 @@ namespace pixelClustering { newclusId[i] = ok[i] = (charge[i] >= chargeCut) ? 1 : 0; if (0 == ok[i]) good = false; +#ifdef GPU_DEBUG + printf("Cutting pix %d in module %d newId %d ok? %d charge %d cut %d -> good %d \n", + i, + thisModuleId, + newclusId[i], + ok[i], + charge[i], + chargeCut, + good); +#endif } - // if all clusters are above threshold, do nothing if (alpaka::syncBlockThreadsPredicate(acc, good)) continue; @@ -136,6 +159,7 @@ namespace pixelClustering { alpaka::syncBlockThreads(acc); } } + ALPAKA_ASSERT_ACC(nclus >= newclusId[nclus - 1]); clus_view[thisModuleId].clusInModule() = newclusId[nclus - 1]; @@ -154,6 +178,18 @@ namespace pixelClustering { // done alpaka::syncBlockThreads(acc); +#ifdef GPU_DEBUG + if (cms::alpakatools::once_per_grid(acc)) { + printf("All digis AFTER cut: \n"); + for (uint32_t i = 0; i < numElements; i++) + printf("%d %d %d %d %d \n", + i, + digi_view[i].rawIdArr(), + digi_view[i].clus(), + digi_view[i].pdigi(), + digi_view[i].adc()); + } +#endif } } }; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc index 575c5ab925145..4b97366b060be 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc @@ -45,12 +45,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const edm::EDGetTokenT> pixelDigiToken_; device::EDPutToken digiPutToken_; - device::EDPutToken digiErrorPutToken_; device::EDPutToken clusterPutToken_; Algo Algo_; - const bool includeErrors_; const SiPixelClusterThresholds clusterThresholds_; uint32_t nDigis_ = 0; @@ -62,17 +60,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { pixelDigiToken_(consumes>(iConfig.getParameter("InputDigis"))), digiPutToken_(produces()), clusterPutToken_(produces()), - includeErrors_(iConfig.getParameter("IncludeErrors")), clusterThresholds_{iConfig.getParameter("clusterThreshold_layer1"), iConfig.getParameter("clusterThreshold_otherLayers"), static_cast(iConfig.getParameter("ElectronPerADCGain")), static_cast(iConfig.getParameter("Phase2ReadoutMode")), static_cast(iConfig.getParameter("Phase2DigiBaseline")), - static_cast(iConfig.getParameter("Phase2KinkADC"))} { - if (includeErrors_) { - digiErrorPutToken_ = produces(); - } - } + static_cast(iConfig.getParameter("Phase2KinkADC"))} {} void SiPixelPhase2DigiToCluster::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -100,11 +93,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { nDigis += det.size(); } - if (nDigis_ == 0) + if (nDigis == 0) return; - SiPixelDigisHost digis_h(nDigis, iEvent.queue()); nDigis_ = nDigis; + SiPixelDigisHost digis_h(nDigis_, iEvent.queue()); nDigis = 0; for (const auto& det : input) { @@ -119,6 +112,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { digis_h.view()[nDigis].yy() = uint16_t(px.column()); digis_h.view()[nDigis].adc() = uint16_t(px.adc()); + digis_h.view()[nDigis].clus() = 0; + digis_h.view()[nDigis].pdigi() = uint32_t(px.packedData()); digis_h.view()[nDigis].rawIdArr() = uint32_t(detid); @@ -130,27 +125,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { digis_d = SiPixelDigisSoACollection(nDigis, iEvent.queue()); alpaka::memcpy(iEvent.queue(), digis_d.buffer(), digis_h.buffer()); - Algo_.makePhase2ClustersAsync(iEvent.queue(), clusterThresholds_, digis_d.view(), nDigis); + Algo_.makePhase2ClustersAsync(iEvent.queue(), clusterThresholds_, digis_d.view(), nDigis_); } void SiPixelPhase2DigiToCluster::produce(device::Event& iEvent, device::EventSetup const& iSetup) { if (nDigis_ == 0) { - SiPixelClustersSoACollection clusters_d{pixelTopology::Phase1::numberOfModules, iEvent.queue()}; - iEvent.emplace(digiPutToken_, std::move(digis_d)); + SiPixelClustersSoACollection clusters_d{pixelTopology::Phase2::numberOfModules, iEvent.queue()}; + SiPixelDigisSoACollection digis_d_zero{nDigis_, iEvent.queue()}; + iEvent.emplace(digiPutToken_, std::move(digis_d_zero)); iEvent.emplace(clusterPutToken_, std::move(clusters_d)); - if (includeErrors_) { - iEvent.emplace(digiErrorPutToken_, SiPixelDigiErrorsSoACollection()); - } return; } - digis_d.setNModulesDigis(Algo_.nModules(), nDigis_); - + digis_d.setNModules(Algo_.nModules()); iEvent.emplace(digiPutToken_, std::move(digis_d)); iEvent.emplace(clusterPutToken_, Algo_.getClusters()); - if (includeErrors_) { - iEvent.emplace(digiErrorPutToken_, Algo_.getErrors()); - } } } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc index 911db86bd7d06..0eb8ade78687e 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -432,7 +432,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { struct FillHitsModuleStart { template ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const { - ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < 2048); // easy to extend at least till 32*1024 + constexpr bool isPhase2 = std::is_base_of::value; + + // For Phase1 there are 1856 pixel modules + // For Phase2 there are 3872 pixel modules + // For whichever setup with more modules it would be + // easy to extend at least till 32*1024 + + constexpr uint16_t prefixScanUpperLimit = isPhase2 ? 4096 : 2048; + ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < prefixScanUpperLimit); constexpr int numberOfModules = TrackerTraits::numberOfModules; constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule; @@ -449,7 +457,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule()); } - constexpr bool isPhase2 = std::is_base_of::value; constexpr auto leftModules = isPhase2 ? 1024 : numberOfModules - 1024; auto &&ws = alpaka::declareSharedVar(acc); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h index 2b12e4750835f..aed00c9a48ed2 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h @@ -168,7 +168,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const uint32_t numDigis); SiPixelDigisSoACollection getDigis() { - digis_d->setNModulesDigis(nModules_Clusters_h[0], nDigis); + digis_d->setNModules(nModules_Clusters_h[0]); return std::move(*digis_d); }