From 856ec61cf98d6d124c0f6e95aec76fe76c343bba Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sun, 11 Feb 2024 23:27:51 +0100 Subject: [PATCH] Update the Pixel reconstruction for Alpaka 1.1.0 --- .../test/alpaka/ZVertexSoA_test.dev.cc | 2 +- .../plugins/alpaka/ClusterChargeCut.h | 10 ++-- .../plugins/alpaka/PixelClustering.h | 38 +++++++-------- .../alpaka/SiPixelRawToClusterKernel.dev.cc | 18 ++++---- .../plugins/alpaka/PixelRecHits.h | 12 ++--- .../plugins/alpaka/BrokenLineFit.dev.cc | 46 +++++++++---------- .../PixelSeeding/plugins/alpaka/CACell.h | 4 +- .../CAHitNtupletGeneratorKernels.dev.cc | 2 +- .../alpaka/CAHitNtupletGeneratorKernelsImpl.h | 28 +++++------ .../plugins/alpaka/CAPixelDoublets.h | 6 +-- .../plugins/alpaka/CAPixelDoubletsAlgos.h | 18 ++++---- .../PixelSeeding/plugins/alpaka/HelixFit.cc | 4 +- .../plugins/alpaka/RiemannFit.dev.cc | 28 +++++------ .../plugins/alpaka/clusterTracksByDensity.h | 30 ++++++------ .../plugins/alpaka/clusterTracksDBSCAN.h | 28 +++++------ .../plugins/alpaka/clusterTracksIterative.h | 22 ++++----- .../plugins/alpaka/fitVertices.h | 8 ++-- .../plugins/alpaka/splitVertices.h | 12 ++--- .../plugins/alpaka/vertexFinder.dev.cc | 2 +- 19 files changed, 159 insertions(+), 159 deletions(-) diff --git a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc index 1b22159a53b88..749073d1f916f 100644 --- a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc +++ b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc @@ -34,7 +34,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template >> ALPAKA_FN_ACC void operator()(TAcc const& acc, reco::ZVertexSoAView zvertex_view) const { if (cms::alpakatools::once_per_grid(acc)) { - ALPAKA_ASSERT_OFFLOAD(zvertex_view.nvFinal() == 420); + ALPAKA_ASSERT_ACC(zvertex_view.nvFinal() == 420); } for (int32_t j : elements_with_stride(acc, zvertex_view.nvFinal())) { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h index d50995cf8d6e5..80a7f4301be42 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h @@ -33,8 +33,8 @@ namespace pixelClustering { constexpr int startBPIX2 = TrackerTraits::layerStart[1]; - ALPAKA_ASSERT_OFFLOAD(TrackerTraits::numberOfModules < maxNumModules); - ALPAKA_ASSERT_OFFLOAD(startBPIX2 < TrackerTraits::numberOfModules); + ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < maxNumModules); + ALPAKA_ASSERT_ACC(startBPIX2 < TrackerTraits::numberOfModules); auto endModule = clus_view[0].moduleStart(); for (auto module : cms::alpakatools::independent_groups(acc, endModule)) { @@ -53,7 +53,7 @@ namespace pixelClustering { // reached the end of the module while skipping the invalid pixels, skip this module continue; } - ALPAKA_ASSERT_OFFLOAD(thisModuleId < TrackerTraits::numberOfModules); + ALPAKA_ASSERT_ACC(thisModuleId < TrackerTraits::numberOfModules); uint32_t nclus = clus_view[thisModuleId].clusInModule(); if (nclus == 0) @@ -87,7 +87,7 @@ namespace pixelClustering { printf("start cluster charge cut for module %d in block %d\n", thisModuleId, module); #endif - ALPAKA_ASSERT_OFFLOAD(nclus <= maxNumClustersPerModules); + ALPAKA_ASSERT_ACC(nclus <= maxNumClustersPerModules); for (auto i : cms::alpakatools::independent_group_elements(acc, nclus)) { charge[i] = 0; } @@ -136,7 +136,7 @@ namespace pixelClustering { alpaka::syncBlockThreads(acc); } } - ALPAKA_ASSERT_OFFLOAD(nclus >= newclusId[nclus - 1]); + ALPAKA_ASSERT_ACC(nclus >= newclusId[nclus - 1]); clus_view[thisModuleId].clusInModule() = newclusId[nclus - 1]; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h index b2fcca94e1d24..4a2d9e72e9366 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h @@ -118,7 +118,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { &clus_view[0].moduleStart(), static_cast(::pixelClustering::maxNumModules), alpaka::hierarchy::Blocks{}); - ALPAKA_ASSERT_OFFLOAD(loc < TrackerTraits::numberOfModules); + ALPAKA_ASSERT_ACC(loc < TrackerTraits::numberOfModules); #ifdef GPU_DEBUG printf("> New module (no. %d) found at digi %d \n", loc, i); #endif @@ -143,7 +143,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { for (uint32_t module : cms::alpakatools::independent_groups(acc, lastModule)) { auto firstPixel = clus_view[1 + module].moduleStart(); uint32_t thisModuleId = digi_view[firstPixel].moduleId(); - ALPAKA_ASSERT_OFFLOAD(thisModuleId < TrackerTraits::numberOfModules); + ALPAKA_ASSERT_ACC(thisModuleId < TrackerTraits::numberOfModules); #ifdef GPU_DEBUG if (thisModuleId % 100 == 1) @@ -182,8 +182,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD((lastPixel == numElements) or - ((lastPixel < numElements) and (digi_view[lastPixel].moduleId() != thisModuleId))); + ALPAKA_ASSERT_ACC((lastPixel == numElements) or + ((lastPixel < numElements) and (digi_view[lastPixel].moduleId() != thisModuleId))); // limit to maxPixInModule (FIXME if recurrent (and not limited to simulation with low threshold) one will need to implement something cleverer) if (cms::alpakatools::once_per_block(acc)) { if (lastPixel - firstPixel > TrackerTraits::maxPixInModule) { @@ -195,7 +195,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(lastPixel - firstPixel <= TrackerTraits::maxPixInModule); + ALPAKA_ASSERT_ACC(lastPixel - firstPixel <= TrackerTraits::maxPixInModule); #ifdef GPU_DEBUG auto& totGood = alpaka::declareSharedVar(acc); @@ -254,7 +254,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { hist.finalize(acc, ws); alpaka::syncBlockThreads(acc); #ifdef GPU_DEBUG - ALPAKA_ASSERT_OFFLOAD(hist.size() == totGood); + ALPAKA_ASSERT_ACC(hist.size() == totGood); if (thisModuleId % 100 == 1) if (cms::alpakatools::once_per_block(acc)) printf("histo size %d\n", hist.size()); @@ -299,11 +299,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { // with blockDimension = threadPerBlock * elementsPerThread. // Hence, maxIter can be tuned accordingly to the workdiv. constexpr unsigned int maxIterGPU = 16; - ALPAKA_ASSERT_OFFLOAD((hist.size() / blockDimension) < maxIterGPU); + ALPAKA_ASSERT_ACC((hist.size() / blockDimension) < maxIterGPU); // NB: can be tuned. constexpr uint32_t maxElements = cms::alpakatools::requires_single_thread_per_block_v ? 256 : 1; - ALPAKA_ASSERT_OFFLOAD((alpaka::getWorkDiv(acc)[0u] <= maxElements)); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0u] <= maxElements)); constexpr unsigned int maxIter = maxIterGPU * maxElements; @@ -321,23 +321,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { // fill the nearest neighbours uint32_t k = 0; for (uint32_t j : cms::alpakatools::independent_group_elements(acc, hist.size())) { - ALPAKA_ASSERT_OFFLOAD(k < maxIter); + ALPAKA_ASSERT_ACC(k < maxIter); auto p = hist.begin() + j; auto i = *p + firstPixel; - ALPAKA_ASSERT_OFFLOAD(digi_view[i].moduleId() != ::pixelClustering::invalidModuleId); - ALPAKA_ASSERT_OFFLOAD(digi_view[i].moduleId() == thisModuleId); // same module + ALPAKA_ASSERT_ACC(digi_view[i].moduleId() != ::pixelClustering::invalidModuleId); + ALPAKA_ASSERT_ACC(digi_view[i].moduleId() == thisModuleId); // same module auto bin = Hist::bin(digi_view[i].yy() + 1); auto end = hist.end(bin); ++p; - ALPAKA_ASSERT_OFFLOAD(0 == nnn[k]); + ALPAKA_ASSERT_ACC(0 == nnn[k]); for (; p < end; ++p) { auto m = *p + firstPixel; - ALPAKA_ASSERT_OFFLOAD(m != i); - ALPAKA_ASSERT_OFFLOAD(int(digi_view[m].yy()) - int(digi_view[i].yy()) >= 0); - ALPAKA_ASSERT_OFFLOAD(int(digi_view[m].yy()) - int(digi_view[i].yy()) <= 1); + ALPAKA_ASSERT_ACC(m != i); + ALPAKA_ASSERT_ACC(int(digi_view[m].yy()) - int(digi_view[i].yy()) >= 0); + ALPAKA_ASSERT_ACC(int(digi_view[m].yy()) - int(digi_view[i].yy()) <= 1); if (std::abs(int(digi_view[m].xx()) - int(digi_view[i].xx())) <= 1) { auto l = nnn[k]++; - ALPAKA_ASSERT_OFFLOAD(l < maxNeighbours); + ALPAKA_ASSERT_ACC(l < maxNeighbours); nn[k][l] = *p; } } @@ -360,13 +360,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { more = false; uint32_t k = 0; for (uint32_t j : cms::alpakatools::independent_group_elements(acc, hist.size())) { - ALPAKA_ASSERT_OFFLOAD(k < maxIter); + ALPAKA_ASSERT_ACC(k < maxIter); auto p = hist.begin() + j; auto i = *p + firstPixel; for (int kk = 0; kk < nnn[k]; ++kk) { auto l = nn[k][kk]; auto m = l + firstPixel; - ALPAKA_ASSERT_OFFLOAD(m != i); + ALPAKA_ASSERT_ACC(m != i); // FIXME ::Threads ? auto old = alpaka::atomicMin(acc, &digi_view[m].clus(), digi_view[i].clus(), alpaka::hierarchy::Blocks{}); if (old != digi_view[i].clus()) { @@ -404,7 +404,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { if (cms::alpakatools::once_per_block(acc)) n0 = nloops; alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(alpaka::syncBlockThreadsPredicate(acc, nloops == n0)); + ALPAKA_ASSERT_ACC(alpaka::syncBlockThreadsPredicate(acc, nloops == n0)); if (thisModuleId % 100 == 1) if (cms::alpakatools::once_per_block(acc)) printf("# loops %d\n", nloops); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc index 6a28f0cd0504a..13b971753bd75 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -432,16 +432,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { struct FillHitsModuleStart { template ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const { - ALPAKA_ASSERT_OFFLOAD(TrackerTraits::numberOfModules < 2048); // easy to extend at least till 32*1024 + ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < 2048); // easy to extend at least till 32*1024 constexpr int numberOfModules = TrackerTraits::numberOfModules; constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule; #ifndef NDEBUG [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx(acc)[0u]); - ALPAKA_ASSERT_OFFLOAD(0 == blockIdxLocal); + ALPAKA_ASSERT_ACC(0 == blockIdxLocal); [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); - ALPAKA_ASSERT_OFFLOAD(1 == gridDimension); + ALPAKA_ASSERT_ACC(1 == gridDimension); #endif // limit to maxHitsInModule; @@ -488,16 +488,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::syncBlockThreads(acc); } #ifdef GPU_DEBUG - ALPAKA_ASSERT_OFFLOAD(0 == clus_view[0].moduleStart()); + ALPAKA_ASSERT_ACC(0 == clus_view[0].moduleStart()); auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart()); - ALPAKA_ASSERT_OFFLOAD(c0 == clus_view[1].moduleStart()); - ALPAKA_ASSERT_OFFLOAD(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart()); - ALPAKA_ASSERT_OFFLOAD(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart()); - ALPAKA_ASSERT_OFFLOAD(clus_view[numberOfModules].moduleStart() >= clus_view[1025].moduleStart()); + ALPAKA_ASSERT_ACC(c0 == clus_view[1].moduleStart()); + ALPAKA_ASSERT_ACC(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart()); + ALPAKA_ASSERT_ACC(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart()); + ALPAKA_ASSERT_ACC(clus_view[numberOfModules].moduleStart() >= clus_view[1025].moduleStart()); for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules + 1)) { if (0 != i) - ALPAKA_ASSERT_OFFLOAD(clus_view[i].moduleStart() >= clus_view[i - 1].moduleStart()); + ALPAKA_ASSERT_ACC(clus_view[i].moduleStart() >= clus_view[i - 1].moduleStart()); // Check BPX2 (1), FP1 (4) constexpr auto bpix2 = TrackerTraits::layerStart[1]; constexpr auto fpix1 = TrackerTraits::layerStart[4]; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h index d90f38c11c984..aacdeb79a2749 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h @@ -38,7 +38,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t nonEmptyModules, SiPixelClustersSoAConstView clusters, TrackingRecHitSoAView hits) const { - ALPAKA_ASSERT_OFFLOAD(cpeParams); + ALPAKA_ASSERT_ACC(cpeParams); // outer loop: one block per module for (uint32_t module : cms::alpakatools::independent_groups(acc, nonEmptyModules)) { @@ -80,7 +80,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto k = clusters[1 + module].moduleStart(); while (digis[k].moduleId() == invalidModuleId) ++k; - ALPAKA_ASSERT_OFFLOAD(digis[k].moduleId() == me); + ALPAKA_ASSERT_ACC(digis[k].moduleId() == me); } if (me % 100 == 1) @@ -129,8 +129,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (cl < startClus || cl >= lastClus) continue; cl -= startClus; - ALPAKA_ASSERT_OFFLOAD(cl >= 0); - ALPAKA_ASSERT_OFFLOAD(cl < maxHitsInIter); + ALPAKA_ASSERT_ACC(cl >= 0); + ALPAKA_ASSERT_ACC(cl < maxHitsInIter); auto x = digis[i].xx(); auto y = digis[i].yy(); alpaka::atomicMin(acc, &clusParams.minRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{}); @@ -152,8 +152,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (cl < startClus || cl >= lastClus) continue; cl -= startClus; - ALPAKA_ASSERT_OFFLOAD(cl >= 0); - ALPAKA_ASSERT_OFFLOAD(cl < maxHitsInIter); + ALPAKA_ASSERT_ACC(cl >= 0); + ALPAKA_ASSERT_ACC(cl < maxHitsInIter); auto x = digis[i].xx(); auto y = digis[i].yy(); auto ch = digis[i].adc(); diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc index ae6739cfb72df..aaf83bbc2e097 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc @@ -42,17 +42,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { constexpr uint32_t hitsInFit = N; constexpr auto invalidTkId = std::numeric_limits::max(); - ALPAKA_ASSERT_OFFLOAD(hitsInFit <= nHitsL); - ALPAKA_ASSERT_OFFLOAD(nHitsL <= nHitsH); - ALPAKA_ASSERT_OFFLOAD(phits); - ALPAKA_ASSERT_OFFLOAD(pfast_fit); - ALPAKA_ASSERT_OFFLOAD(foundNtuplets); - ALPAKA_ASSERT_OFFLOAD(tupleMultiplicity); + ALPAKA_ASSERT_ACC(hitsInFit <= nHitsL); + ALPAKA_ASSERT_ACC(nHitsL <= nHitsH); + ALPAKA_ASSERT_ACC(phits); + ALPAKA_ASSERT_ACC(pfast_fit); + ALPAKA_ASSERT_ACC(foundNtuplets); + ALPAKA_ASSERT_ACC(tupleMultiplicity); // look in bin for this hit multiplicity int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL); - ALPAKA_ASSERT_OFFLOAD(totTK <= int(tupleMultiplicity->size())); - ALPAKA_ASSERT_OFFLOAD(totTK >= 0); + ALPAKA_ASSERT_ACC(totTK <= int(tupleMultiplicity->size())); + ALPAKA_ASSERT_ACC(totTK >= 0); #ifdef BROKENLINE_DEBUG const uint32_t threadIdx(alpaka::getIdx(acc)[0u]); @@ -70,14 +70,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // get it from the ntuple container (one to one to helix) auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx); - ALPAKA_ASSERT_OFFLOAD(static_cast(tkid) < foundNtuplets->nOnes()); + ALPAKA_ASSERT_ACC(static_cast(tkid) < foundNtuplets->nOnes()); ptkids[local_idx] = tkid; auto nHits = foundNtuplets->size(tkid); - ALPAKA_ASSERT_OFFLOAD(nHits >= nHitsL); - ALPAKA_ASSERT_OFFLOAD(nHits <= nHitsH); + ALPAKA_ASSERT_ACC(nHits >= nHitsL); + ALPAKA_ASSERT_ACC(nHits <= nHitsH); riemannFit::Map3xNd hits(phits + local_idx); riemannFit::Map4d fast_fit(pfast_fit + local_idx); @@ -109,7 +109,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { int j = int(n + 0.5f); // round if (hitsInFit - 1 == i) j = nHits - 1; // force last hit to ensure max lever arm. - ALPAKA_ASSERT_OFFLOAD(j < int(nHits)); + ALPAKA_ASSERT_ACC(j < int(nHits)); n += incr; auto hit = hitId[j]; float ge[6]; @@ -118,7 +118,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto const &dp = cpeParams->detParams(hh.detectorIndex(hit)); auto status = hh[hit].chargeAndStatus().status; int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin; - ALPAKA_ASSERT_OFFLOAD(qbin >= 0 && qbin < 5); + ALPAKA_ASSERT_ACC(qbin >= 0 && qbin < 5); bool nok = (status.isBigY | status.isOneY); // compute cotanbeta and use it to recompute error dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz); @@ -161,10 +161,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { brokenline::fastFit(acc, hits, fast_fit); // no NaN here.... - ALPAKA_ASSERT_OFFLOAD(fast_fit(0) == fast_fit(0)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(1) == fast_fit(1)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(2) == fast_fit(2)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(3) == fast_fit(3)); + ALPAKA_ASSERT_ACC(fast_fit(0) == fast_fit(0)); + ALPAKA_ASSERT_ACC(fast_fit(1) == fast_fit(1)); + ALPAKA_ASSERT_ACC(fast_fit(2) == fast_fit(2)); + ALPAKA_ASSERT_ACC(fast_fit(3) == fast_fit(3)); } } }; @@ -181,10 +181,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { double *__restrict__ phits, float *__restrict__ phits_ge, double *__restrict__ pfast_fit) const { - ALPAKA_ASSERT_OFFLOAD(results_view.pt()); - ALPAKA_ASSERT_OFFLOAD(results_view.eta()); - ALPAKA_ASSERT_OFFLOAD(results_view.chi2()); - ALPAKA_ASSERT_OFFLOAD(pfast_fit); + ALPAKA_ASSERT_ACC(results_view.pt()); + ALPAKA_ASSERT_ACC(results_view.eta()); + ALPAKA_ASSERT_ACC(results_view.chi2()); + ALPAKA_ASSERT_ACC(pfast_fit); constexpr auto invalidTkId = std::numeric_limits::max(); // same as above... @@ -195,7 +195,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { break; auto tkid = ptkids[local_idx]; - ALPAKA_ASSERT_OFFLOAD(tkid < TrackerTraits::maxNumberOfTuples); + ALPAKA_ASSERT_ACC(tkid < TrackerTraits::maxNumberOfTuples); riemannFit::Map3xNd hits(phits + local_idx); riemannFit::Map4d fast_fit(pfast_fit + local_idx); @@ -247,7 +247,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t hitsInFit, uint32_t maxNumberOfTuples, Queue &queue) { - ALPAKA_ASSERT_OFFLOAD(tuples_); + ALPAKA_ASSERT_ACC(tuples_); uint32_t blockSize = 64; uint32_t numberOfBlocks = cms::alpakatools::divide_up_by(maxNumberOfConcurrentFits_, blockSize); diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h index 4c83eef84fdfe..d8af548109d29 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h @@ -301,11 +301,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if constexpr (DEPTH <= 0) { printf("ERROR: CACellT::find_ntuplets reached full depth!\n"); - ALPAKA_ASSERT_OFFLOAD(false); + ALPAKA_ASSERT_ACC(false); } else { auto doubletId = this - cells; tmpNtuplet.push_back_unsafe(doubletId); - ALPAKA_ASSERT_OFFLOAD(tmpNtuplet.size() <= int(TrackerTraits::maxHitsOnTrack - 3)); + ALPAKA_ASSERT_ACC(tmpNtuplet.size() <= int(TrackerTraits::maxHitsOnTrack - 3)); bool last = true; for (unsigned int otherCell : outerNeighbors()) { diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc index 56bae962fbe06..0cc24f81254aa 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc @@ -280,7 +280,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #endif // in principle we can use "nhits" to heuristically dimension the workspace... - ALPAKA_ASSERT_OFFLOAD(this->device_isOuterHitOfCell_.data()); + ALPAKA_ASSERT_ACC(this->device_isOuterHitOfCell_.data()); alpaka::exec( queue, diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h index 7b296324ba3eb..e7ff7a2c5a01a 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h @@ -108,17 +108,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { apc->get().second, nHits); if (apc->get().first < TrackerTraits::maxNumberOfQuadruplets) { - ALPAKA_ASSERT_OFFLOAD(tracks_view.hitIndices().size(apc->get().first) == 0); - ALPAKA_ASSERT_OFFLOAD(tracks_view.hitIndices().size() == apc->get().second); + ALPAKA_ASSERT_ACC(tracks_view.hitIndices().size(apc->get().first) == 0); + ALPAKA_ASSERT_ACC(tracks_view.hitIndices().size() == apc->get().second); } } for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack) // current real limit printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx)); - ALPAKA_ASSERT_OFFLOAD(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack); + ALPAKA_ASSERT_ACC(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack); for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih) - ALPAKA_ASSERT_OFFLOAD(int(*ih) < nHits); + ALPAKA_ASSERT_ACC(int(*ih) < nHits); } #endif @@ -198,7 +198,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { bool dupPassThrough) const { // quality to mark rejected constexpr auto reject = Quality::edup; /// cannot be loose - ALPAKA_ASSERT_OFFLOAD(nCells); + ALPAKA_ASSERT_ACC(nCells); for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) { auto const &thisCell = cells[idx]; @@ -239,7 +239,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { auto const reject = dupPassThrough ? Quality::loose : Quality::dup; constexpr auto loose = Quality::loose; - ALPAKA_ASSERT_OFFLOAD(nCells); + ALPAKA_ASSERT_ACC(nCells); const auto ntNCells = (*nCells); for (auto idx : cms::alpakatools::uniform_elements(acc, ntNCells)) { @@ -431,7 +431,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { stack, params.minHitsPerNtuplet_, bpix1Start); - ALPAKA_ASSERT_OFFLOAD(stack.empty()); + ALPAKA_ASSERT_ACC(stack.empty()); } } } @@ -466,10 +466,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { continue; if (tracks_view[it].quality() == Quality::edup) continue; - ALPAKA_ASSERT_OFFLOAD(tracks_view[it].quality() == Quality::bad); + ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad); if (nhits > TrackerTraits::maxHitsOnTrack) // current limit printf("wrong mult %d %d\n", it, nhits); - ALPAKA_ASSERT_OFFLOAD(nhits <= TrackerTraits::maxHitsOnTrack); + ALPAKA_ASSERT_ACC(nhits <= TrackerTraits::maxHitsOnTrack); tupleMultiplicity->count(acc, nhits); } } @@ -488,10 +488,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { continue; if (tracks_view[it].quality() == Quality::edup) continue; - ALPAKA_ASSERT_OFFLOAD(tracks_view[it].quality() == Quality::bad); + ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad); if (nhits > TrackerTraits::maxHitsOnTrack) printf("wrong mult %d %d\n", it, nhits); - ALPAKA_ASSERT_OFFLOAD(nhits <= TrackerTraits::maxHitsOnTrack); + ALPAKA_ASSERT_ACC(nhits <= TrackerTraits::maxHitsOnTrack); tupleMultiplicity->fill(acc, nhits, it); } } @@ -513,7 +513,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { if (tracks_view[it].quality() == Quality::edup) continue; - ALPAKA_ASSERT_OFFLOAD(tracks_view[it].quality() == Quality::bad); + ALPAKA_ASSERT_ACC(tracks_view[it].quality() == Quality::bad); // mark doublets as bad if (nhits < 3) @@ -607,7 +607,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { } // fill hit indices for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().size())) { - ALPAKA_ASSERT_OFFLOAD(tracks_view.hitIndices().content[idx] < (uint32_t)hh.metadata().size()); + ALPAKA_ASSERT_ACC(tracks_view.hitIndices().content[idx] < (uint32_t)hh.metadata().size()); tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex(); } } @@ -626,7 +626,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { if (cms::alpakatools::once_per_grid(acc)) tracks_view.nTracks() = ntracks; for (auto idx : cms::alpakatools::uniform_elements(acc, ntracks)) { - ALPAKA_ASSERT_OFFLOAD(TracksUtilities::nHits(tracks_view, idx) >= 3); + ALPAKA_ASSERT_ACC(TracksUtilities::nHits(tracks_view, idx) >= 3); tracks_view[idx].nLayers() = TracksUtilities::computeNumberOfLayers(tracks_view, idx); } } diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h index 580198772034d..e3116eb5b7f43 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h @@ -27,7 +27,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { CellNeighbors* cellNeighborsContainer, CellTracksVector* cellTracks, CellTracks* cellTracksContainer) const { - ALPAKA_ASSERT_OFFLOAD((*isOuterHitOfCell).container); + ALPAKA_ASSERT_ACC((*isOuterHitOfCell).container); for (auto i : cms::alpakatools::uniform_elements(acc, nHits - isOuterHitOfCell->offset)) (*isOuterHitOfCell).container[i].reset(); @@ -36,10 +36,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { cellNeighbors->construct(TrackerTraits::maxNumOfActiveDoublets, cellNeighborsContainer); cellTracks->construct(TrackerTraits::maxNumOfActiveDoublets, cellTracksContainer); [[maybe_unused]] auto i = cellNeighbors->extend(acc); - ALPAKA_ASSERT_OFFLOAD(0 == i); + ALPAKA_ASSERT_ACC(0 == i); (*cellNeighbors)[0].reset(); i = cellTracks->extend(acc); - ALPAKA_ASSERT_OFFLOAD(0 == i); + ALPAKA_ASSERT_ACC(0 == i); (*cellTracks)[0].reset(); } } diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h index 048aaf2058d27..97d9acdd8739a 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h @@ -155,7 +155,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { auto const& __restrict__ phiBinner = hh.phiBinner(); uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data(); - ALPAKA_ASSERT_OFFLOAD(offsets); + ALPAKA_ASSERT_ACC(offsets); auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; }; @@ -189,20 +189,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { ; --pairLayerId; - ALPAKA_ASSERT_OFFLOAD(pairLayerId < nPairs); - ALPAKA_ASSERT_OFFLOAD(j < innerLayerCumulativeSize[pairLayerId]); - ALPAKA_ASSERT_OFFLOAD(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]); + ALPAKA_ASSERT_ACC(pairLayerId < nPairs); + ALPAKA_ASSERT_ACC(j < innerLayerCumulativeSize[pairLayerId]); + ALPAKA_ASSERT_ACC(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]); uint8_t inner = TrackerTraits::layerPairs[2 * pairLayerId]; uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1]; - ALPAKA_ASSERT_OFFLOAD(outer > inner); + ALPAKA_ASSERT_ACC(outer > inner); auto hoff = PhiBinner::histOff(outer); auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1]; i += offsets[inner]; - ALPAKA_ASSERT_OFFLOAD(i >= offsets[inner]); - ALPAKA_ASSERT_OFFLOAD(i < offsets[inner + 1]); + ALPAKA_ASSERT_ACC(i >= offsets[inner]); + ALPAKA_ASSERT_ACC(i < offsets[inner + 1]); // found hit corresponding to our worker thread, now do the job if (hh[i].detectorIndex() > pixelClustering::maxNumModules) @@ -267,8 +267,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { for (uint32_t pIndex : cms::alpakatools::independent_group_elements_x(acc, maxpIndex)) { // FIXME implement alpaka::ldg and use it here? or is it const* __restrict__ enough? auto oi = p[pIndex]; - ALPAKA_ASSERT_OFFLOAD(oi >= offsets[outer]); - ALPAKA_ASSERT_OFFLOAD(oi < offsets[outer + 1]); + ALPAKA_ASSERT_ACC(oi >= offsets[outer]); + ALPAKA_ASSERT_ACC(oi < offsets[outer + 1]); auto mo = hh[oi].detectorIndex(); // invalid diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc index 078cbe8de45a4..d0fe19233b225 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc @@ -8,8 +8,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { tupleMultiplicity_ = tupleMultiplicity; outputSoa_ = helix_fit_results; - ALPAKA_ASSERT_OFFLOAD(tuples_); - ALPAKA_ASSERT_OFFLOAD(tupleMultiplicity_); + ALPAKA_ASSERT_ACC(tuples_); + ALPAKA_ASSERT_ACC(tupleMultiplicity_); } template diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc index 9ab7d1fdf1e78..a822bbd8a8252 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc @@ -40,11 +40,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t offset) const { constexpr uint32_t hitsInFit = N; - ALPAKA_ASSERT_OFFLOAD(hitsInFit <= nHits); + ALPAKA_ASSERT_ACC(hitsInFit <= nHits); - ALPAKA_ASSERT_OFFLOAD(pfast_fit); - ALPAKA_ASSERT_OFFLOAD(foundNtuplets); - ALPAKA_ASSERT_OFFLOAD(tupleMultiplicity); + ALPAKA_ASSERT_ACC(pfast_fit); + ALPAKA_ASSERT_ACC(foundNtuplets); + ALPAKA_ASSERT_ACC(tupleMultiplicity); // look in bin for this hit multiplicity @@ -62,9 +62,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // get it from the ntuple container (one to one to helix) auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); - ALPAKA_ASSERT_OFFLOAD(static_cast(tkid) < foundNtuplets->nOnes()); + ALPAKA_ASSERT_ACC(static_cast(tkid) < foundNtuplets->nOnes()); - ALPAKA_ASSERT_OFFLOAD(foundNtuplets->size(tkid) == nHits); + ALPAKA_ASSERT_ACC(foundNtuplets->size(tkid) == nHits); riemannFit::Map3xNd hits(phits + local_idx); riemannFit::Map4d fast_fit(pfast_fit + local_idx); @@ -83,10 +83,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { riemannFit::fastFit(acc, hits, fast_fit); // no NaN here.... - ALPAKA_ASSERT_OFFLOAD(fast_fit(0) == fast_fit(0)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(1) == fast_fit(1)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(2) == fast_fit(2)); - ALPAKA_ASSERT_OFFLOAD(fast_fit(3) == fast_fit(3)); + ALPAKA_ASSERT_ACC(fast_fit(0) == fast_fit(0)); + ALPAKA_ASSERT_ACC(fast_fit(1) == fast_fit(1)); + ALPAKA_ASSERT_ACC(fast_fit(2) == fast_fit(2)); + ALPAKA_ASSERT_ACC(fast_fit(3) == fast_fit(3)); } } }; @@ -104,8 +104,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { double *__restrict__ pfast_fit_input, riemannFit::CircleFit *circle_fit, uint32_t offset) const { - ALPAKA_ASSERT_OFFLOAD(circle_fit); - ALPAKA_ASSERT_OFFLOAD(N <= nHits); + ALPAKA_ASSERT_ACC(circle_fit); + ALPAKA_ASSERT_ACC(N <= nHits); // same as above... @@ -151,8 +151,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { double *__restrict__ pfast_fit_input, riemannFit::CircleFit *__restrict__ circle_fit, uint32_t offset) const { - ALPAKA_ASSERT_OFFLOAD(circle_fit); - ALPAKA_ASSERT_OFFLOAD(N <= nHits); + ALPAKA_ASSERT_ACC(circle_fit); + ALPAKA_ASSERT_ACC(N <= nHits); // same as above... diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h index cb772a7e653b4..122457a7d05d2 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h @@ -57,11 +57,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { int32_t* __restrict__ nn = data.ndof(); int32_t* __restrict__ iv = ws.iv(); - ALPAKA_ASSERT_OFFLOAD(zt); - ALPAKA_ASSERT_OFFLOAD(ezt2); - ALPAKA_ASSERT_OFFLOAD(izt); - ALPAKA_ASSERT_OFFLOAD(nn); - ALPAKA_ASSERT_OFFLOAD(iv); + ALPAKA_ASSERT_ACC(zt); + ALPAKA_ASSERT_ACC(ezt2); + ALPAKA_ASSERT_ACC(izt); + ALPAKA_ASSERT_ACC(nn); + ALPAKA_ASSERT_ACC(iv); using Hist = cms::alpakatools::HistoContainer; auto& hist = alpaka::declareSharedVar(acc); @@ -76,17 +76,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { if (cms::alpakatools::once_per_block(acc)) printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt); } - ALPAKA_ASSERT_OFFLOAD(static_cast(nt) <= hist.capacity()); + ALPAKA_ASSERT_ACC(static_cast(nt) <= hist.capacity()); // fill hist (bin shall be wider than "eps") for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { - ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); + ALPAKA_ASSERT_ACC(i < ::zVertex::MAXTRACKS); int iz = int(zt[i] * 10.); // valid if eps<=0.1 // iz = std::clamp(iz, INT8_MIN, INT8_MAX); // sorry c++17 only iz = std::min(std::max(iz, INT8_MIN), INT8_MAX); izt[i] = iz - INT8_MIN; - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN >= 0); - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN < 256); + ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0); + ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256); hist.count(acc, izt[i]); iv[i] = i; nn[i] = 0; @@ -97,7 +97,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); hist.finalize(acc, hws); alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); + ALPAKA_ASSERT_ACC(hist.size() == nt); for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { hist.fill(acc, izt[i], uint16_t(i)); } @@ -145,7 +145,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { // mini verification for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) - ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i)); } alpaka::syncBlockThreads(acc); #endif @@ -163,7 +163,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { // mini verification for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) - ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i)); } #endif @@ -187,8 +187,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { }; cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); // should belong to the same cluster... - ALPAKA_ASSERT_OFFLOAD(iv[i] == iv[minJ]); - ALPAKA_ASSERT_OFFLOAD(nn[i] <= nn[iv[i]]); + ALPAKA_ASSERT_ACC(iv[i] == iv[minJ]); + ALPAKA_ASSERT_ACC(nn[i] <= nn[iv[i]]); } alpaka::syncBlockThreads(acc); #endif @@ -211,7 +211,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); + ALPAKA_ASSERT_ACC(foundClusters < ::zVertex::MAXVTX); // propagate the negative id to all the tracks in the cluster. for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h index 38cfb0bec2289..7090599dcfdb0 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h @@ -53,10 +53,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { int32_t* __restrict__ nn = data.ndof(); int32_t* __restrict__ iv = ws.iv(); - ALPAKA_ASSERT_OFFLOAD(zt); - ALPAKA_ASSERT_OFFLOAD(iv); - ALPAKA_ASSERT_OFFLOAD(nn); - ALPAKA_ASSERT_OFFLOAD(ezt2); + ALPAKA_ASSERT_ACC(zt); + ALPAKA_ASSERT_ACC(iv); + ALPAKA_ASSERT_ACC(nn); + ALPAKA_ASSERT_ACC(ezt2); using Hist = cms::alpakatools::HistoContainer; auto& hist = alpaka::declareSharedVar(acc); @@ -72,16 +72,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); } - ALPAKA_ASSERT_OFFLOAD(static_cast(nt) <= hist.capacity()); + ALPAKA_ASSERT_ACC(static_cast(nt) <= hist.capacity()); // fill hist (bin shall be wider than "eps") for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { - ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); + ALPAKA_ASSERT_ACC(i < ::zVertex::MAXTRACKS); int iz = int(zt[i] * 10.); // valid if eps<=0.1 iz = std::clamp(iz, INT8_MIN, INT8_MAX); izt[i] = iz - INT8_MIN; - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN >= 0); - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN < 256); + ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0); + ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256); hist.count(acc, izt[i]); iv[i] = i; nn[i] = 0; @@ -92,7 +92,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); hist.finalize(acc, hws); alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); + ALPAKA_ASSERT_ACC(hist.size() == nt); for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { hist.fill(acc, izt[i], uint32_t(i)); } @@ -143,7 +143,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { // mini verification for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) - ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i)); } alpaka::syncBlockThreads(acc); #endif @@ -162,7 +162,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { // mini verification for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (iv[i] != int(i)) - ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i)); } alpaka::syncBlockThreads(acc); #endif @@ -172,7 +172,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { if (nn[i] < minT) continue; // DBSCAN core rule - ALPAKA_ASSERT_OFFLOAD(zt[iv[i]] <= zt[i]); + ALPAKA_ASSERT_ACC(zt[iv[i]] <= zt[i]); auto loop = [&](uint32_t j) { if (nn[j] < minT) return; // DBSCAN core rule @@ -186,7 +186,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { printf(" %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]); ; } - ALPAKA_ASSERT_OFFLOAD(iv[i] == iv[j]); + ALPAKA_ASSERT_ACC(iv[i] == iv[j]); }; cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); } @@ -231,7 +231,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); + ALPAKA_ASSERT_ACC(foundClusters < ::zVertex::MAXVTX); // propagate the negative id to all the tracks in the cluster. for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h index 100b4b6d42d84..38e8429c0d28f 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h @@ -52,10 +52,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { int32_t* __restrict__ nn = data.ndof(); int32_t* __restrict__ iv = ws.iv(); - ALPAKA_ASSERT_OFFLOAD(zt); - ALPAKA_ASSERT_OFFLOAD(nn); - ALPAKA_ASSERT_OFFLOAD(iv); - ALPAKA_ASSERT_OFFLOAD(ezt2); + ALPAKA_ASSERT_ACC(zt); + ALPAKA_ASSERT_ACC(nn); + ALPAKA_ASSERT_ACC(iv); + ALPAKA_ASSERT_ACC(ezt2); using Hist = cms::alpakatools::HistoContainer; auto& hist = alpaka::declareSharedVar(acc); @@ -71,16 +71,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); } - ALPAKA_ASSERT_OFFLOAD(static_cast(nt) <= hist.capacity()); + ALPAKA_ASSERT_ACC(static_cast(nt) <= hist.capacity()); // fill hist (bin shall be wider than "eps") for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { - ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); + ALPAKA_ASSERT_ACC(i < ::zVertex::MAXTRACKS); int iz = int(zt[i] * 10.); // valid if eps<=0.1 iz = std::clamp(iz, INT8_MIN, INT8_MAX); izt[i] = iz - INT8_MIN; - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN >= 0); - ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN < 256); + ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0); + ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256); hist.count(acc, izt[i]); iv[i] = i; nn[i] = 0; @@ -94,7 +94,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { hist.finalize(acc, hws); alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); + ALPAKA_ASSERT_ACC(hist.size() == nt); for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { hist.fill(acc, izt[i], uint16_t(i)); } @@ -142,7 +142,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (nn[i] < minT) continue; // DBSCAN core rule auto loop = [&](uint32_t j) { - ALPAKA_ASSERT_OFFLOAD(i != j); + ALPAKA_ASSERT_ACC(i != j); if (nn[j] < minT) return; // DBSCAN core rule auto dist = std::abs(zt[i] - zt[j]); @@ -204,7 +204,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); + ALPAKA_ASSERT_ACC(foundClusters < ::zVertex::MAXVTX); // propagate the negative id to all the tracks in the cluster. for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h index caba60c826823..a8c428e2f5a00 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h @@ -37,7 +37,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { int32_t* __restrict__ nn = data.ndof(); int32_t* __restrict__ iv = ws.iv(); - ALPAKA_ASSERT_OFFLOAD(nvFinal <= nvIntermediate); + ALPAKA_ASSERT_ACC(nvFinal <= nvIntermediate); nvFinal = nvIntermediate; auto foundClusters = nvFinal; @@ -64,8 +64,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::atomicAdd(acc, &noise, 1, alpaka::hierarchy::Threads{}); continue; } - ALPAKA_ASSERT_OFFLOAD(iv[i] >= 0); - ALPAKA_ASSERT_OFFLOAD(iv[i] < int(foundClusters)); + ALPAKA_ASSERT_ACC(iv[i] >= 0); + ALPAKA_ASSERT_ACC(iv[i] < int(foundClusters)); auto w = 1.f / ezt2[i]; alpaka::atomicAdd(acc, &zv[iv[i]], zt[i] * w, alpaka::hierarchy::Threads{}); alpaka::atomicAdd(acc, &wv[iv[i]], w, alpaka::hierarchy::Threads{}); @@ -74,7 +74,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { alpaka::syncBlockThreads(acc); // reuse nn for (auto i : cms::alpakatools::uniform_elements(acc, foundClusters)) { - ALPAKA_ASSERT_OFFLOAD(wv[i] > 0.f); + ALPAKA_ASSERT_ACC(wv[i] > 0.f); zv[i] /= wv[i]; nn[i] = -1; // ndof } diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h index 7ba0f905e260b..e2ba0b46b8be4 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h @@ -38,10 +38,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { int32_t const* __restrict__ nn = data.ndof(); int32_t* __restrict__ iv = ws.iv(); - ALPAKA_ASSERT_OFFLOAD(zt); - ALPAKA_ASSERT_OFFLOAD(wv); - ALPAKA_ASSERT_OFFLOAD(chi2); - ALPAKA_ASSERT_OFFLOAD(nn); + ALPAKA_ASSERT_ACC(zt); + ALPAKA_ASSERT_ACC(wv); + ALPAKA_ASSERT_ACC(chi2); + ALPAKA_ASSERT_ACC(nn); constexpr uint32_t MAXTK = 512; @@ -61,7 +61,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { if (chi2[kv] < maxChi2 * float(nn[kv])) continue; - ALPAKA_ASSERT_OFFLOAD(nn[kv] < int32_t(MAXTK)); + ALPAKA_ASSERT_ACC(nn[kv] < int32_t(MAXTK)); if ((uint32_t)nn[kv] >= MAXTK) continue; // too bad FIXME @@ -85,7 +85,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { auto& wnew = alpaka::declareSharedVar(acc); alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(int(nq) == nn[kv] + 1); + ALPAKA_ASSERT_ACC(int(nq) == nn[kv] + 1); int maxiter = 20; // kt-min.... diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc index 2d33fee32752c..b41e07aff56d5 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc @@ -41,7 +41,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.nTracks())) { [[maybe_unused]] auto nHits = helper::nHits(tracks_view, idx); - ALPAKA_ASSERT_OFFLOAD(nHits >= 3); + ALPAKA_ASSERT_ACC(nHits >= 3); // initialize soa... soa[idx].idv() = -1;