From 72591c3d14d6cf2e19fde31b1ca3f5ed669a0876 Mon Sep 17 00:00:00 2001 From: adriano Date: Thu, 18 Apr 2024 16:15:05 +0200 Subject: [PATCH] Allowing runtime number of hits - update for HitToTuple map in CA --- .../interface/SimplePixelTopology.h | 2 -- .../alpaka/SiPixelRawToClusterKernel.dev.cc | 13 ++++--------- .../plugins/alpaka/PixelRecHitKernels.dev.cc | 9 --------- .../CAHitNtupletGeneratorKernels.dev.cc | 19 ++++++++++++++++--- .../alpaka/CAHitNtupletGeneratorKernels.h | 2 ++ .../plugins/alpaka/CAStructures.h | 2 +- 6 files changed, 23 insertions(+), 24 deletions(-) diff --git a/Geometry/CommonTopologies/interface/SimplePixelTopology.h b/Geometry/CommonTopologies/interface/SimplePixelTopology.h index faf511ee87f39..8ff70a630e109 100644 --- a/Geometry/CommonTopologies/interface/SimplePixelTopology.h +++ b/Geometry/CommonTopologies/interface/SimplePixelTopology.h @@ -323,7 +323,6 @@ namespace pixelTopology { using tindex_type = uint32_t; // for tuples using cindex_type = uint32_t; // for cells - static constexpr uint32_t maxNumberOfHits = 256 * 1024; static constexpr uint32_t maxCellNeighbors = 64; static constexpr uint32_t maxCellTracks = 302; static constexpr uint32_t maxHitsOnTrack = 15; @@ -417,7 +416,6 @@ namespace pixelTopology { using tindex_type = uint16_t; // for tuples using cindex_type = uint32_t; // for cells - static constexpr uint32_t maxNumberOfHits = 48 * 1024; static constexpr uint32_t maxCellNeighbors = 36; static constexpr uint32_t maxCellTracks = 48; static constexpr uint32_t maxHitsOnTrack = 10; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc index f54c9e9af29ec..911db86bd7d06 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -488,9 +488,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::syncBlockThreads(acc); } #ifdef GPU_DEBUG - ALPAKA_ASSERT_ACC(0 == clus_view[0].moduleStart()); - auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart()); - ALPAKA_ASSERT_ACC(c0 == clus_view[1].moduleStart()); + ALPAKA_ASSERT_ACC(0 == clus_view[1].moduleStart()); + auto c0 = std::min(maxHitsInModule, clus_view[2].clusModuleStart()); + ALPAKA_ASSERT_ACC(c0 == clus_view[2].moduleStart()); ALPAKA_ASSERT_ACC(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart()); ALPAKA_ASSERT_ACC(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart()); ALPAKA_ASSERT_ACC(clus_view[numberOfModules].moduleStart() >= clus_view[1025].moduleStart()); @@ -504,13 +504,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (i == bpix2 || i == fpix1) printf("moduleStart %d %d\n", i, clus_view[i].moduleStart()); } + #endif - // avoid overflow - constexpr auto MAX_HITS = TrackerTraits::maxNumberOfHits; - for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules + 1)) { - if (clus_view[i].clusModuleStart() > MAX_HITS) - clus_view[i].clusModuleStart() = MAX_HITS; - } } // end of FillHitsModuleStart kernel operator() }; // end of FillHitsModuleStart struct diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc index 5b6d1133a77bb..fe3a703a8e713 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc @@ -113,15 +113,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { hrv_d.contentSize = nHits; hrv_d.contentStorage = hits_d.view().phiBinnerStorage(); - // fillManyFromVector(h_d.data(), nParts, v_d.data(), offsets_d.data(), offsets[10], 256, queue); - /* cms::alpakatools::fillManyFromVector(&(hits_d.view().phiBinner()), - nLayers, - hits_d.view().iphi(), - hits_d.view().hitsLayerStart().data(), - nHits, - (uint32_t)256, - queue); -*/ cms::alpakatools::fillManyFromVector(&(hits_d.view().phiBinner()), hrv_d, nLayers, diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc index 0cc24f81254aa..c5c3e2332c1f6 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc @@ -37,6 +37,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // workspace device_hitToTuple_{cms::alpakatools::make_device_buffer(queue)}, + device_hitToTupleStorage_{ + cms::alpakatools::make_device_buffer(queue, nhits + 1)}, device_tupleMultiplicity_{cms::alpakatools::make_device_buffer(queue)}, // NB: In legacy, device_theCells_ and device_isOuterHitOfCell_ were allocated inside buildDoublets @@ -66,6 +68,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { device_hitToTuple_apc_{reinterpret_cast(device_storage_.data() + 1)}, device_nCells_{cms::alpakatools::make_device_view(alpaka::getDev(queue), *reinterpret_cast(device_storage_.data() + 2))} { +#ifdef GPU_DEBUG + std::cout << "Allocation for tuple building. N hits " << nhits << std::endl; +#endif + alpaka::memset(queue, counters_, 0); alpaka::memset(queue, device_nCells_, 0); alpaka::memset(queue, cellStorage_, 0); @@ -74,14 +80,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::memcpy(queue, device_cellCuts_, cellCuts_h); [[maybe_unused]] TupleMultiplicity *tupleMultiplicityDeviceData = device_tupleMultiplicity_.data(); - [[maybe_unused]] HitToTuple *hitToTupleDeviceData = device_hitToTuple_.data(); using TM = cms::alpakatools::OneToManyAssocRandomAccess; TM *tm = device_tupleMultiplicity_.data(); TM::template launchZero(tm, queue); TupleMultiplicity::template launchZero(tupleMultiplicityDeviceData, queue); - HitToTuple::template launchZero(hitToTupleDeviceData, queue); + + device_hitToTupleView_.assoc = device_hitToTuple_.data(); + device_hitToTupleView_.offStorage = device_hitToTupleStorage_.data(); + device_hitToTupleView_.offSize = nhits + 1; + + HitToTuple::template launchZero(device_hitToTupleView_, queue); +#ifdef GPU_DEBUG + std::cout << "Allocations for CAHitNtupletGeneratorKernels: done!" << std::endl; +#endif } template @@ -401,7 +414,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { tracks_view, this->device_hitToTuple_.data()); //CHECK - HitToTuple::template launchFinalize(this->device_hitToTuple_.data(), queue); + HitToTuple::template launchFinalize(this->device_hitToTupleView_, queue); alpaka::exec( queue, workDiv1D, Kernel_fillHitInTracks{}, tracks_view, this->device_hitToTuple_.data()); #ifdef GPU_DEBUG diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h index ecf8e00c454ab..796bb3f36c586 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h @@ -253,6 +253,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // workspace cms::alpakatools::device_buffer device_hitToTuple_; + cms::alpakatools::device_buffer device_hitToTupleStorage_; + typename HitToTuple::View device_hitToTupleView_; cms::alpakatools::device_buffer device_tupleMultiplicity_; cms::alpakatools::device_buffer device_theCells_; cms::alpakatools::device_buffer device_isOuterHitOfCell_; diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h index fcc4fab8ead54..1790582f213e2 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h @@ -31,7 +31,7 @@ namespace caStructures { template using HitToTupleT = cms::alpakatools::OneToManyAssocRandomAccess; // 3.5 should be enough template