diff --git a/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc index 684380dcbdfbc..56cacacc92c37 100644 --- a/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc +++ b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc @@ -15,7 +15,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { public: template >> ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelClustersSoAView clust_view) const { - for (int32_t j : elements_with_stride(acc, clust_view.metadata().size())) { + for (int32_t j : uniform_elements(acc, clust_view.metadata().size())) { clust_view[j].moduleStart() = j; clust_view[j].clusInModule() = j * 2; clust_view[j].moduleId() = j * 3; @@ -28,7 +28,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { public: template >> ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelClustersSoAConstView clust_view) const { - for (uint32_t j : elements_with_stride(acc, clust_view.metadata().size())) { + for (uint32_t j : uniform_elements(acc, clust_view.metadata().size())) { assert(clust_view[j].moduleStart() == j); assert(clust_view[j].clusInModule() == j * 2); assert(clust_view[j].moduleId() == j * 3); diff --git a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc index accf175bccfe6..566d4fd7ac92c 100644 --- a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc +++ b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc @@ -27,7 +27,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { tracks_view.nTracks() = nTracks; } - for (int32_t j : elements_with_stride(acc, nTracks)) { + for (int32_t j : uniform_elements(acc, nTracks)) { tracks_view[j].pt() = (float)j; tracks_view[j].eta() = (float)j; tracks_view[j].chi2() = (float)j; @@ -50,7 +50,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (cms::alpakatools::once_per_grid(acc)) { ALPAKA_ASSERT(tracks_view.nTracks() == nTracks); } - for (int32_t j : elements_with_stride(acc, tracks_view.nTracks())) { + for (int32_t j : uniform_elements(acc, tracks_view.nTracks())) { ALPAKA_ASSERT(abs(tracks_view[j].pt() - (float)j) < .0001); ALPAKA_ASSERT(abs(tracks_view[j].eta() - (float)j) < .0001); ALPAKA_ASSERT(abs(tracks_view[j].chi2() - (float)j) < .0001); diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc index b987b0ee82a63..d490ba540211b 100644 --- a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc @@ -45,7 +45,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // can be increased to soa.nHits() for debugging - for (uint32_t i : cms::alpakatools::elements_with_stride(acc, 10)) { + for (uint32_t i : cms::alpakatools::uniform_elements(acc, 10)) { printf("iPhi %d -> %d\n", i, soa[i].iphi()); } } diff --git a/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h b/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h index 304d01ff9fd08..9535abad90c01 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h +++ b/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h @@ -25,7 +25,7 @@ namespace cms::alpakatools { T const *__restrict__ v, uint32_t const *__restrict__ offsets) const { const uint32_t nt = offsets[nh]; - for (uint32_t i : elements_with_stride(acc, nt)) { + for (uint32_t i : uniform_elements(acc, nt)) { auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i); ALPAKA_ASSERT_OFFLOAD((*off) > 0); int32_t ih = off - offsets - 1; @@ -44,7 +44,7 @@ namespace cms::alpakatools { T const *__restrict__ v, uint32_t const *__restrict__ offsets) const { const uint32_t nt = offsets[nh]; - for (uint32_t i : elements_with_stride(acc, nt)) { + for (uint32_t i : uniform_elements(acc, nt)) { auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i); ALPAKA_ASSERT_OFFLOAD((*off) > 0); int32_t ih = off - offsets - 1; diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testAtomicPairCounter.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testAtomicPairCounter.dev.cc index 1687feb8c1bab..e059a668e1480 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testAtomicPairCounter.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testAtomicPairCounter.dev.cc @@ -18,7 +18,7 @@ struct update { template ALPAKA_FN_ACC void operator()( const TAcc &acc, AtomicPairCounter *dc, uint32_t *ind, uint32_t *cont, uint32_t n) const { - for (auto i : elements_with_stride(acc, n)) { + for (auto i : uniform_elements(acc, n)) { auto m = i % 11; m = m % 6 + 1; // max 6, no 0 auto c = dc->inc_add(acc, m); diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc index 4ce11cc7facdd..b032939f9870b 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc @@ -29,19 +29,19 @@ struct mykernel { auto& ws = alpaka::declareSharedVar(acc); // set off zero - for (auto j : elements_with_stride(acc, Hist::totbins())) { + for (auto j : uniform_elements(acc, Hist::totbins())) { hist.off[j] = 0; } alpaka::syncBlockThreads(acc); // set bins zero - for (auto j : elements_with_stride(acc, Hist::totbins())) { + for (auto j : uniform_elements(acc, Hist::totbins())) { hist.content[j] = 0; } alpaka::syncBlockThreads(acc); // count - for (auto j : elements_with_stride(acc, N)) { + for (auto j : uniform_elements(acc, N)) { hist.count(acc, v[j]); } alpaka::syncBlockThreads(acc); @@ -56,18 +56,18 @@ struct mykernel { ALPAKA_ASSERT_OFFLOAD(N == hist.size()); // verify - for ([[maybe_unused]] auto j : elements_with_stride(acc, Hist::nbins())) { + for ([[maybe_unused]] auto j : uniform_elements(acc, Hist::nbins())) { ALPAKA_ASSERT_OFFLOAD(hist.off[j] <= hist.off[j + 1]); } alpaka::syncBlockThreads(acc); - for (auto j : elements_with_stride(acc, 32)) { + for (auto j : uniform_elements(acc, 32)) { ws[j] = 0; // used by prefix scan... } alpaka::syncBlockThreads(acc); // fill - for (auto j : elements_with_stride(acc, N)) { + for (auto j : uniform_elements(acc, N)) { hist.fill(acc, v[j], j); } alpaka::syncBlockThreads(acc); @@ -77,7 +77,7 @@ struct mykernel { // bin #ifndef NDEBUG - for (auto j : elements_with_stride(acc, hist.size() - 1)) { + for (auto j : uniform_elements(acc, hist.size() - 1)) { auto p = hist.begin() + j; ALPAKA_ASSERT_OFFLOAD((*p) < N); [[maybe_unused]] auto k1 = Hist::bin(v[*p]); @@ -87,7 +87,7 @@ struct mykernel { #endif // forEachInWindow - for (auto i : elements_with_stride(acc, hist.size())) { + for (auto i : uniform_elements(acc, hist.size())) { auto p = hist.begin() + i; auto j = *p; #ifndef NDEBUG diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneRadixSort.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneRadixSort.dev.cc index b1cb735b55194..a8d9240d47183 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneRadixSort.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneRadixSort.dev.cc @@ -85,7 +85,7 @@ namespace { // __shared__ uint16_t sws[2048]; // __shared__ float z[2048]; // __shared__ int iz[2048]; - for (auto itrack : elements_with_stride(acc, elements)) { + for (auto itrack : uniform_elements(acc, elements)) { z[itrack] = gpu_input[itrack]; iz[itrack] = 10000 * gpu_input[itrack]; // order[itrack] = itrack; @@ -95,7 +95,7 @@ namespace { alpaka::syncBlockThreads(acc); //verify - for (auto itrack : elements_with_stride(acc, elements - 1)) { + for (auto itrack : uniform_elements(acc, elements - 1)) { auto ntrack = order[itrack]; auto mtrack = order[itrack + 1]; assert(truncate<2>(z[ntrack]) <= truncate<2>(z[mtrack])); @@ -123,7 +123,7 @@ namespace { radixSort(acc, iz, order, sws, elements); alpaka::syncBlockThreads(acc); - for (auto itrack : elements_with_stride(acc, elements - 1)) { + for (auto itrack : uniform_elements(acc, elements - 1)) { auto ntrack = order[itrack]; auto mtrack = order[itrack + 1]; assert(iz[ntrack] <= iz[mtrack]); diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc index d1de1f1c17cca..492911e6b1a57 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc @@ -36,7 +36,7 @@ struct countMultiLocal { TK const* __restrict__ tk, Multiplicity* __restrict__ assoc, uint32_t n) const { - for (auto i : elements_with_stride(acc, n)) { + for (auto i : uniform_elements(acc, n)) { auto& local = alpaka::declareSharedVar(acc); const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); const bool oncePerSharedMemoryAccess = (threadIdxLocal == 0); @@ -59,7 +59,7 @@ struct countMulti { TK const* __restrict__ tk, Multiplicity* __restrict__ assoc, uint32_t n) const { - for (auto i : elements_with_stride(acc, n)) { + for (auto i : uniform_elements(acc, n)) { assoc->count(acc, 2 + i % 4); } } @@ -68,7 +68,7 @@ struct countMulti { struct verifyMulti { template ALPAKA_FN_ACC void operator()(const TAcc& acc, Multiplicity* __restrict__ m1, Multiplicity* __restrict__ m2) const { - for ([[maybe_unused]] auto i : elements_with_stride(acc, Multiplicity{}.totOnes())) { + for ([[maybe_unused]] auto i : uniform_elements(acc, Multiplicity{}.totOnes())) { ALPAKA_ASSERT_OFFLOAD(m1->off[i] == m2->off[i]); } } @@ -80,7 +80,7 @@ struct count { TK const* __restrict__ tk, AssocRandomAccess* __restrict__ assoc, uint32_t n) const { - for (auto i : elements_with_stride(acc, 4 * n)) { + for (auto i : uniform_elements(acc, 4 * n)) { auto k = i / 4; auto j = i - 4 * k; ALPAKA_ASSERT_OFFLOAD(j < 4); @@ -100,7 +100,7 @@ struct fill { TK const* __restrict__ tk, AssocRandomAccess* __restrict__ assoc, uint32_t n) const { - for (auto i : elements_with_stride(acc, 4 * n)) { + for (auto i : uniform_elements(acc, 4 * n)) { auto k = i / 4; auto j = i - 4 * k; ALPAKA_ASSERT_OFFLOAD(j < 4); @@ -125,7 +125,7 @@ struct fillBulk { template ALPAKA_FN_ACC void operator()( const TAcc& acc, AtomicPairCounter* apc, TK const* __restrict__ tk, Assoc* __restrict__ assoc, uint32_t n) const { - for (auto k : elements_with_stride(acc, n)) { + for (auto k : uniform_elements(acc, n)) { auto m = tk[k][3] < MaxElem ? 4 : 3; assoc->bulkFill(acc, *apc, &tk[k][0], m); } diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc index bffee8f1f533d..5e8f4ee3b8e9a 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc @@ -34,7 +34,7 @@ struct testPrefixScan { auto& c = alpaka::declareSharedVar(acc); auto& co = alpaka::declareSharedVar(acc); - for (auto i : elements_with_stride(acc, size)) { + for (auto i : uniform_elements(acc, size)) { c[i] = 1; }; @@ -49,7 +49,7 @@ struct testPrefixScan { // TODO: not needed? Not in multi kernel version, not in CUDA version alpaka::syncBlockThreads(acc); - for (auto i : elements_with_stride(acc, size)) { + for (auto i : uniform_elements(acc, size)) { if (0 == i) continue; if constexpr (!std::is_floating_point_v) { @@ -109,7 +109,7 @@ struct testWarpPrefixScan { struct init { template ALPAKA_FN_ACC void operator()(const TAcc& acc, uint32_t* v, uint32_t val, uint32_t n) const { - for (auto index : elements_with_stride(acc, n)) { + for (auto index : uniform_elements(acc, n)) { v[index] = val; if (index == 0) @@ -121,7 +121,7 @@ struct init { struct verify { template ALPAKA_FN_ACC void operator()(const TAcc& acc, uint32_t const* v, uint32_t n) const { - for (auto index : elements_with_stride(acc, n)) { + for (auto index : uniform_elements(acc, n)) { ALPAKA_ASSERT_OFFLOAD(v[index] == index + 1); if (index == 0) diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testSimpleVector.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testSimpleVector.dev.cc index c29b571c6d356..6f60679c79d64 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testSimpleVector.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testSimpleVector.dev.cc @@ -15,7 +15,7 @@ using namespace ALPAKA_ACCELERATOR_NAMESPACE; struct vector_pushback { template ALPAKA_FN_ACC void operator()(const TAcc& acc, SimpleVector* foo) const { - for (auto index : elements_with_stride(acc)) + for (auto index : uniform_elements(acc)) foo->push_back(acc, index); } }; @@ -30,7 +30,7 @@ struct vector_reset { struct vector_emplace_back { template ALPAKA_FN_ACC void operator()(const TAcc& acc, SimpleVector* foo) const { - for (auto index : elements_with_stride(acc)) + for (auto index : uniform_elements(acc)) foo->emplace_back(acc, index); } }; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h index e34df782db4dc..1ab3bdde439ab 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h @@ -41,11 +41,11 @@ namespace calibPixel { clus_view[0].clusModuleStart() = 0; clus_view[0].moduleStart() = 0; } - for (auto i : cms::alpakatools::elements_with_stride(acc, phase1PixelTopology::numberOfModules)) { + for (auto i : cms::alpakatools::uniform_elements(acc, phase1PixelTopology::numberOfModules)) { clus_view[i].clusInModule() = 0; } - for (auto i : cms::alpakatools::elements_with_stride(acc, numElements)) { + for (auto i : cms::alpakatools::uniform_elements(acc, numElements)) { auto dvgi = view[i]; if (dvgi.moduleId() == ::pixelClustering::invalidModuleId) continue; @@ -106,11 +106,11 @@ namespace calibPixel { clus_view[0].clusModuleStart() = clus_view[0].moduleStart() = 0; } - for (uint32_t i : cms::alpakatools::elements_with_stride(acc, phase2PixelTopology::numberOfModules)) { + for (uint32_t i : cms::alpakatools::uniform_elements(acc, phase2PixelTopology::numberOfModules)) { clus_view[i].clusInModule() = 0; } - for (uint32_t i : cms::alpakatools::elements_with_stride(acc, numElements)) { + for (uint32_t i : cms::alpakatools::uniform_elements(acc, numElements)) { auto dvgi = view[i]; if (pixelClustering::invalidModuleId != dvgi.moduleId()) { const int mode = (Phase2ReadoutMode < -1 ? -1 : Phase2ReadoutMode); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h index 37afda9847a99..b2fcca94e1d24 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h @@ -104,7 +104,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { printf("Starting to count modules to set module starts:"); } #endif - for (int32_t i : cms::alpakatools::elements_with_stride(acc, numElements)) { + for (int32_t i : cms::alpakatools::uniform_elements(acc, numElements)) { digi_view[i].clus() = i; if (::pixelClustering::invalidModuleId == digi_view[i].moduleId()) continue; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc index 1cb55b0a27955..6a28f0cd0504a 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -304,7 +304,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (cms::alpakatools::once_per_grid(acc)) err.size() = 0; - for (auto gIndex : cms::alpakatools::elements_with_stride(acc, wordCounter)) { + for (auto gIndex : cms::alpakatools::uniform_elements(acc, wordCounter)) { auto dvgi = digisView[gIndex]; dvgi.xx() = 0; dvgi.yy() = 0; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc index 63e269cc79453..5b6d1133a77bb 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc @@ -36,7 +36,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t* __restrict__ hitsLayerStart) const { assert(0 == hitsModuleStart[0]); - for (int32_t i : cms::alpakatools::elements_with_stride(acc, TrackerTraits::numberOfLayers + 1)) { + for (int32_t i : cms::alpakatools::uniform_elements(acc, TrackerTraits::numberOfLayers + 1)) { hitsLayerStart[i] = hitsModuleStart[cpeParams->layerGeometry().layerStart[i]]; #ifdef GPU_DEBUG int old = i == 0 ? 0 : hitsModuleStart[cpeParams->layerGeometry().layerStart[i - 1]]; diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc index 9882c5c47b43e..ae6739cfb72df 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc @@ -62,7 +62,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } #endif const auto nt = riemannFit::maxNumberOfConcurrentFits; - for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) { auto tuple_idx = local_idx + offset; if ((int)tuple_idx >= totTK) { ptkids[local_idx] = invalidTkId; @@ -190,7 +190,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // same as above... // look in bin for this hit multiplicity const auto nt = riemannFit::maxNumberOfConcurrentFits; - for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) { if (invalidTkId == ptkids[local_idx]) break; auto tkid = ptkids[local_idx]; diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h index 68b6e597e93c8..7b296324ba3eb 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h @@ -113,7 +113,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { } } - for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nOnes())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack) // current real limit printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx)); ALPAKA_ASSERT_OFFLOAD(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack); @@ -141,7 +141,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { #endif } - for (auto idx : cms::alpakatools::elements_with_stride(acc, *nCells)) { + for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) { auto const &thisCell = cells[idx]; if (thisCell.hasFishbone() && !thisCell.isKilled()) alpaka::atomicAdd(acc, &c.nFishCells, 1ull, alpaka::hierarchy::Blocks{}); @@ -158,7 +158,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { } // FIXME this loop was up to nHits - isOuterHitOfCell.offset in the CUDA version - for (auto idx : cms::alpakatools::elements_with_stride(acc, nHits)) + for (auto idx : cms::alpakatools::uniform_elements(acc, nHits)) if ((*isOuterHitOfCell).container[idx].full()) // ++tooManyOuterHitOfCell; printf("OuterHitOfCell overflow %d\n", idx); } @@ -174,7 +174,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { TkSoAView tracks_view) const { constexpr auto reject = Quality::dup; - for (auto idx : cms::alpakatools::elements_with_stride(acc, *nCells)) { + for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) { auto const &thisCell = cells[idx]; if (!thisCell.isKilled()) continue; @@ -199,7 +199,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { // quality to mark rejected constexpr auto reject = Quality::edup; /// cannot be loose ALPAKA_ASSERT_OFFLOAD(nCells); - for (auto idx : cms::alpakatools::elements_with_stride(acc, *nCells)) { + for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) { auto const &thisCell = cells[idx]; if (thisCell.tracks().size() < 2) @@ -242,7 +242,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { ALPAKA_ASSERT_OFFLOAD(nCells); const auto ntNCells = (*nCells); - for (auto idx : cms::alpakatools::elements_with_stride(acc, ntNCells)) { + for (auto idx : cms::alpakatools::uniform_elements(acc, ntNCells)) { auto const &thisCell = cells[idx]; if (thisCell.tracks().size() < 2) continue; @@ -401,7 +401,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { printf("starting producing ntuplets from %d cells \n", *nCells); #endif - for (auto idx : cms::alpakatools::elements_with_stride(acc, (*nCells))) { + for (auto idx : cms::alpakatools::uniform_elements(acc, (*nCells))) { auto const &thisCell = cells[idx]; // cut by earlyFishbone @@ -445,7 +445,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { CACellT *__restrict__ cells, uint32_t const *nCells) const { using Cell = CACellT; - for (auto idx : cms::alpakatools::elements_with_stride(acc, (*nCells))) { + for (auto idx : cms::alpakatools::uniform_elements(acc, (*nCells))) { auto &thisCell = cells[idx]; if (!thisCell.tracks().empty()) thisCell.setStatusBits(Cell::StatusBit::kInTrack); @@ -460,7 +460,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, TupleMultiplicity *tupleMultiplicity) const { - for (auto it : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nOnes())) { + for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { auto nhits = tracks_view.hitIndices().size(it); if (nhits < 3) continue; @@ -482,7 +482,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, TupleMultiplicity *tupleMultiplicity) const { - for (auto it : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nOnes())) { + for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { auto nhits = tracks_view.hitIndices().size(it); if (nhits < 3) continue; @@ -504,7 +504,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, QualityCuts cuts) const { - for (auto it : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nOnes())) { + for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { auto nhits = tracks_view.hitIndices().size(it); if (nhits == 0) break; // guard @@ -549,7 +549,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { public: template >> ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, Counters *counters) const { - for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nOnes())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { if (tracks_view.hitIndices().size(idx) == 0) break; //guard if (tracks_view[idx].quality() < Quality::loose) @@ -569,7 +569,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, HitToTuple *hitToTuple) const { - for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nOnes())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { if (tracks_view.hitIndices().size(idx) == 0) break; // guard for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h) @@ -585,7 +585,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, HitToTuple *hitToTuple) const { - for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nOnes())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { if (tracks_view.hitIndices().size(idx) == 0) break; // guard for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h) @@ -602,11 +602,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { TkSoAView tracks_view, HitsConstView hh) const { // copy offsets - for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nOnes())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx]; } // fill hit indices - for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().size())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().size())) { ALPAKA_ASSERT_OFFLOAD(tracks_view.hitIndices().content[idx] < (uint32_t)hh.metadata().size()); tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex(); } @@ -625,7 +625,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { if (cms::alpakatools::once_per_grid(acc)) tracks_view.nTracks() = ntracks; - for (auto idx : cms::alpakatools::elements_with_stride(acc, ntracks)) { + for (auto idx : cms::alpakatools::uniform_elements(acc, ntracks)) { ALPAKA_ASSERT_OFFLOAD(TracksUtilities::nHits(tracks_view, idx) >= 3); tracks_view[idx].nLayers() = TracksUtilities::computeNumberOfLayers(tracks_view, idx); } @@ -640,7 +640,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { HitToTuple const *__restrict__ hitToTuple, Counters *counters) const { auto &c = *counters; - for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple->nOnes())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple->nOnes())) { if (hitToTuple->size(idx) == 0) continue; // SHALL NOT BE break alpaka::atomicAdd(acc, &c.nUsedHits, 1ull, alpaka::hierarchy::Blocks{}); @@ -663,7 +663,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { auto &hitToTuple = *phitToTuple; auto const &foundNtuplets = *ptuples; - for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple->nbins())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple->nbins())) { if (hitToTuple.size(idx) < 2) continue; @@ -705,7 +705,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { // quality to mark rejected auto const reject = dupPassThrough ? loose : dup; - for (auto idx : cms::alpakatools::elements_with_stride(acc, tuples->nbins())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, tuples->nbins())) { if (tuples->size(idx) == 0) break; //guard if (quality[idx] <= reject) @@ -731,7 +731,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { auto &hitToTuple = *phitToTuple; - for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple.nOnes())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) { if (hitToTuple.size(idx) < 2) continue; @@ -793,7 +793,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { uint32_t l1end = hh.hitsLayerStart()[1]; - for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple.nOnes())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) { if (hitToTuple.size(idx) < 2) continue; @@ -844,7 +844,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { auto &hitToTuple = *phitToTuple; - for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple.nOnes())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) { if (hitToTuple.size(idx) < 2) continue; @@ -904,7 +904,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { auto &hitToTuple = *phitToTuple; - for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple.nOnes())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) { if (hitToTuple.size(idx) < 2) continue; @@ -947,8 +947,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { int iev) const { constexpr auto loose = Quality::loose; - for (auto i : cms::alpakatools::elements_with_stride( - acc, firstPrint, std::min(lastPrint, tracks_view.hitIndices().nbins()))) { + for (auto i : + cms::alpakatools::uniform_elements(acc, firstPrint, std::min(lastPrint, tracks_view.hitIndices().nbins()))) { auto nh = tracks_view.hitIndices().size(i); if (nh < 3) continue; diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h index 518a55c318402..580198772034d 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h @@ -29,7 +29,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { CellTracks* cellTracksContainer) const { ALPAKA_ASSERT_OFFLOAD((*isOuterHitOfCell).container); - for (auto i : cms::alpakatools::elements_with_stride(acc, nHits - isOuterHitOfCell->offset)) + for (auto i : cms::alpakatools::uniform_elements(acc, nHits - isOuterHitOfCell->offset)) (*isOuterHitOfCell).container[i].reset(); if (cms::alpakatools::once_per_grid(acc)) { diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc index 3a1d5dacd8435..9ab7d1fdf1e78 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc @@ -55,7 +55,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #endif const auto nt = riemannFit::maxNumberOfConcurrentFits; - for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) { auto tuple_idx = local_idx + offset; if (tuple_idx >= tupleMultiplicity->size(nHits)) break; @@ -111,7 +111,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // look in bin for this hit multiplicity const auto nt = riemannFit::maxNumberOfConcurrentFits; - for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) { auto tuple_idx = local_idx + offset; if (tuple_idx >= tupleMultiplicity->size(nHits)) break; @@ -158,7 +158,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // look in bin for this hit multiplicity const auto nt = riemannFit::maxNumberOfConcurrentFits; - for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto local_idx : cms::alpakatools::uniform_elements(acc, nt)) { auto tuple_idx = local_idx + offset; if (tuple_idx >= tupleMultiplicity->size(nHits)) break; diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h index 447a3d6c89c07..cb772a7e653b4 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h @@ -67,7 +67,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { auto& hist = alpaka::declareSharedVar(acc); auto& hws = alpaka::declareSharedVar(acc); - for (auto j : cms::alpakatools::elements_with_stride(acc, Hist::totbins())) { + for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) { hist.off[j] = 0; } alpaka::syncBlockThreads(acc); @@ -79,7 +79,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { ALPAKA_ASSERT_OFFLOAD(static_cast(nt) <= hist.capacity()); // fill hist (bin shall be wider than "eps") - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); int iz = int(zt[i] * 10.); // valid if eps<=0.1 // iz = std::clamp(iz, INT8_MIN, INT8_MAX); // sorry c++17 only @@ -98,12 +98,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { hist.finalize(acc, hws); alpaka::syncBlockThreads(acc); ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { hist.fill(acc, izt[i], uint16_t(i)); } alpaka::syncBlockThreads(acc); // count neighbours - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (ezt2[i] > er2mx) continue; auto loop = [&](uint32_t j) { @@ -122,7 +122,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); // find closest above me .... (we ignore the possibility of two j at same distance from i) - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { float mdist = eps; auto loop = [&](uint32_t j) { if (nn[j] < nn[i]) @@ -143,7 +143,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { #ifdef GPU_DEBUG // mini verification - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); } @@ -151,7 +151,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { #endif // consolidate graph (percolate index of seed) - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { auto m = iv[i]; while (m != iv[m]) m = iv[m]; @@ -161,7 +161,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { #ifdef GPU_DEBUG alpaka::syncBlockThreads(acc); // mini verification - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); } @@ -169,7 +169,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { #ifdef GPU_DEBUG // and verify that we did not spit any cluster... - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { auto minJ = i; auto mdist = eps; auto loop = [&](uint32_t j) { @@ -199,7 +199,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { // find the number of different clusters, identified by a tracks with clus[i] == i and density larger than threshold; // mark these tracks with a negative id. - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] == int(i)) { if (nn[i] >= minT) { auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{}); @@ -214,7 +214,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); // propagate the negative id to all the tracks in the cluster. - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] >= 0) { // mark each track in a cluster with the same id as the first one iv[i] = iv[iv[i]]; @@ -223,7 +223,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); // adjust the cluster id to be a positive value starting from 0 - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { iv[i] = -iv[i] - 1; } diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h index 769896aa97252..38cfb0bec2289 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h @@ -62,7 +62,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { auto& hist = alpaka::declareSharedVar(acc); auto& hws = alpaka::declareSharedVar(acc); - for (auto j : cms::alpakatools::elements_with_stride(acc, Hist::totbins())) { + for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) { hist.off[j] = 0; } alpaka::syncBlockThreads(acc); @@ -75,7 +75,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { ALPAKA_ASSERT_OFFLOAD(static_cast(nt) <= hist.capacity()); // fill hist (bin shall be wider than "eps") - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); int iz = int(zt[i] * 10.); // valid if eps<=0.1 iz = std::clamp(iz, INT8_MIN, INT8_MAX); @@ -93,13 +93,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { hist.finalize(acc, hws); alpaka::syncBlockThreads(acc); ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { hist.fill(acc, izt[i], uint32_t(i)); } alpaka::syncBlockThreads(acc); // count neighbours - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (ezt2[i] > er2mx) continue; auto loop = [&](uint32_t j) { @@ -118,7 +118,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); // find NN with smaller z... - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (nn[i] < minT) continue; // DBSCAN core rule float mz = zt[i]; @@ -141,7 +141,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { #ifdef GPU_DEBUG // mini verification - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); } @@ -149,7 +149,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { #endif // consolidate graph (percolate index of seed) - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { auto m = iv[i]; while (m != iv[m]) m = iv[m]; @@ -160,7 +160,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { #ifdef GPU_DEBUG // mini verification - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); } @@ -169,7 +169,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { #ifdef GPU_DEBUG // and verify that we did not spit any cluster... - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (nn[i] < minT) continue; // DBSCAN core rule ALPAKA_ASSERT_OFFLOAD(zt[iv[i]] <= zt[i]); @@ -194,7 +194,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { #endif // collect edges (assign to closest cluster of closest point??? here to closest point) - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule if (nn[i] >= minT) continue; // DBSCAN edge rule @@ -219,7 +219,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { // find the number of different clusters, identified by a tracks with clus[i] == i; // mark these tracks with a negative id. - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] == int(i)) { if (nn[i] >= minT) { auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{}); @@ -234,7 +234,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); // propagate the negative id to all the tracks in the cluster. - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] >= 0) { // mark each track in a cluster with the same id as the first one iv[i] = iv[iv[i]]; @@ -243,7 +243,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); // adjust the cluster id to be a positive value starting from 0 - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { iv[i] = -iv[i] - 1; } diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h index 6468fb9e185c4..100b4b6d42d84 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h @@ -61,7 +61,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto& hist = alpaka::declareSharedVar(acc); auto& hws = alpaka::declareSharedVar(acc); - for (auto j : cms::alpakatools::elements_with_stride(acc, Hist::totbins())) { + for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) { hist.off[j] = 0; } alpaka::syncBlockThreads(acc); @@ -74,7 +74,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { ALPAKA_ASSERT_OFFLOAD(static_cast(nt) <= hist.capacity()); // fill hist (bin shall be wider than "eps") - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); int iz = int(zt[i] * 10.); // valid if eps<=0.1 iz = std::clamp(iz, INT8_MIN, INT8_MAX); @@ -95,13 +95,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::syncBlockThreads(acc); ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { hist.fill(acc, izt[i], uint16_t(i)); } alpaka::syncBlockThreads(acc); // count neighbours - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (ezt2[i] > er2mx) continue; auto loop = [&](uint32_t j) { @@ -127,7 +127,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { bool more = true; while (alpaka::syncBlockThreadsPredicate(acc, more)) { if (1 == nloops % 2) { - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { auto m = iv[i]; while (m != iv[m]) m = iv[m]; @@ -135,7 +135,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } } else { more = false; - for (auto k : cms::alpakatools::elements_with_stride(acc, hist.size())) { + for (auto k : cms::alpakatools::uniform_elements(acc, hist.size())) { auto p = hist.begin() + k; auto i = (*p); auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1)); @@ -167,7 +167,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // while // collect edges (assign to closest cluster of closest point??? here to closest point) - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule if (nn[i] >= minT) continue; // DBSCAN edge rule @@ -192,7 +192,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // find the number of different clusters, identified by a tracks with clus[i] == i; // mark these tracks with a negative id. - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] == int(i)) { if (nn[i] >= minT) { auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{}); @@ -207,7 +207,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); // propagate the negative id to all the tracks in the cluster. - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] >= 0) { // mark each track in a cluster with the same id as the first one iv[i] = iv[iv[i]]; @@ -216,7 +216,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::syncBlockThreads(acc); // adjust the cluster id to be a positive value starting from 0 - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { iv[i] = -iv[i] - 1; } diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h index 5ee24f610c1aa..caba60c826823 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h @@ -42,7 +42,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { auto foundClusters = nvFinal; // zero - for (auto i : cms::alpakatools::elements_with_stride(acc, foundClusters)) { + for (auto i : cms::alpakatools::uniform_elements(acc, foundClusters)) { zv[i] = 0; wv[i] = 0; chi2[i] = 0; @@ -58,7 +58,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); // compute cluster location - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] > 9990) { if constexpr (verbose) alpaka::atomicAdd(acc, &noise, 1, alpaka::hierarchy::Threads{}); @@ -73,7 +73,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); // reuse nn - for (auto i : cms::alpakatools::elements_with_stride(acc, foundClusters)) { + for (auto i : cms::alpakatools::uniform_elements(acc, foundClusters)) { ALPAKA_ASSERT_OFFLOAD(wv[i] > 0.f); zv[i] /= wv[i]; nn[i] = -1; // ndof @@ -81,7 +81,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); // compute chi2 - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] > 9990) continue; @@ -96,7 +96,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { } alpaka::syncBlockThreads(acc); - for (auto i : cms::alpakatools::elements_with_stride(acc, foundClusters)) { + for (auto i : cms::alpakatools::uniform_elements(acc, foundClusters)) { if (nn[i] > 0) { wv[i] *= float(nn[i]) / chi2[i]; } diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h index 5d5765ed3d4b8..ff8fab8ab635f 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h @@ -36,17 +36,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { return; // fill indexing - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { data.idv()[ws.itrk()[i]] = iv[i]; }; // can be done asynchronously at the end of previous event - for (auto i : cms::alpakatools::elements_with_stride(acc, nvFinal)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nvFinal)) { ptv2[i] = 0; }; alpaka::syncBlockThreads(acc); - for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] <= 9990) { alpaka::atomicAdd(acc, &ptv2[iv[i]], ptt2[i], alpaka::hierarchy::Blocks{}); } diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h index 5a16d9c57a20d..7ba0f905e260b 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h @@ -100,7 +100,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { } alpaka::syncBlockThreads(acc); - for (auto k : cms::alpakatools::elements_with_stride(acc, nq)) { + for (auto k : cms::alpakatools::uniform_elements(acc, nq)) { auto i = newV[k]; alpaka::atomicAdd(acc, &znew[i], zz[k] * ww[k], alpaka::hierarchy::Threads{}); alpaka::atomicAdd(acc, &wnew[i], ww[k], alpaka::hierarchy::Threads{}); @@ -113,7 +113,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { } alpaka::syncBlockThreads(acc); - for (auto k : cms::alpakatools::elements_with_stride(acc, nq)) { + for (auto k : cms::alpakatools::uniform_elements(acc, nq)) { auto d0 = fabs(zz[k] - znew[0]); auto d1 = fabs(zz[k] - znew[1]); auto newer = d0 < d1 ? 0 : 1; @@ -145,7 +145,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { if (0 == threadIdxLocal) igv = alpaka::atomicAdd(acc, &ws.nvIntermediate(), 1u, alpaka::hierarchy::Blocks{}); alpaka::syncBlockThreads(acc); - for (auto k : cms::alpakatools::elements_with_stride(acc, nq)) { + for (auto k : cms::alpakatools::uniform_elements(acc, nq)) { if (1 == newV[k]) iv[it[k]] = igv; } diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc index 89a8ee676e35b..2d33fee32752c 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc @@ -39,7 +39,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto const* quality = tracks_view.quality(); using helper = TracksUtilities; - for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.nTracks())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.nTracks())) { [[maybe_unused]] auto nHits = helper::nHits(tracks_view, idx); ALPAKA_ASSERT_OFFLOAD(nHits >= 3);