From 9f6d61262e352d4c05b71824c68ba5849ca22dec Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Fri, 16 Aug 2024 12:55:26 -0700 Subject: [PATCH 1/2] Moved LST and Event classes to ALPAKA_ACCELERATOR_NAMESPACE --- .../plugins/alpaka/LSTModulesDevESProducer.cc | 4 +- RecoTracker/LST/plugins/alpaka/LSTProducer.cc | 4 +- RecoTracker/LSTCore/interface/LST.h | 184 +++--- .../LSTCore/interface/alpaka/Constants.h | 4 +- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 581 +++++++++--------- RecoTracker/LSTCore/src/alpaka/Event.h | 372 ++++++----- RecoTracker/LSTCore/src/alpaka/LST.dev.cc | 117 ++-- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 4 +- RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 4 +- RecoTracker/LSTCore/src/alpaka/Segment.h | 4 +- .../LSTCore/src/alpaka/TrackCandidate.h | 4 +- RecoTracker/LSTCore/src/alpaka/Triplet.h | 4 +- RecoTracker/LSTCore/standalone/bin/lst.cc | 18 +- .../standalone/code/core/AccessHelper.cc | 114 ++-- .../standalone/code/core/AccessHelper.h | 2 +- .../LSTCore/standalone/code/core/trkCore.cc | 24 +- .../LSTCore/standalone/code/core/trkCore.h | 2 +- .../standalone/code/core/write_lst_ntuple.cc | 54 +- .../standalone/code/core/write_lst_ntuple.h | 2 +- 19 files changed, 734 insertions(+), 768 deletions(-) diff --git a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc index 908eb796e581b..c1d815210bd53 100644 --- a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc +++ b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc @@ -22,8 +22,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { descriptions.addWithDefaultLabel(desc); } - std::unique_ptr> produce(TrackerRecoGeometryRecord const& iRecord) { - return lst::loadAndFillESHost(); + std::unique_ptr<::lst::LSTESData> produce(TrackerRecoGeometryRecord const& iRecord) { + return ::lst::loadAndFillESHost(); } }; diff --git a/RecoTracker/LST/plugins/alpaka/LSTProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc index 18bd7c25a9aec..6365eb9822483 100644 --- a/RecoTracker/LST/plugins/alpaka/LSTProducer.cc +++ b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc @@ -87,11 +87,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: edm::EDGetTokenT lstPixelSeedInputToken_; edm::EDGetTokenT lstPhase2OTHitsInputToken_; - device::ESGetToken, TrackerRecoGeometryRecord> lstESToken_; + device::ESGetToken<::lst::LSTESData, TrackerRecoGeometryRecord> lstESToken_; const bool verbose_, nopLSDupClean_, tcpLSTriplets_; edm::EDPutTokenT lstOutputToken_; - lst::LST lst_; + lst::LST lst_; }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/LSTCore/interface/LST.h b/RecoTracker/LSTCore/interface/LST.h index ac23bd09a7ecf..a83399cbd8356 100644 --- a/RecoTracker/LSTCore/interface/LST.h +++ b/RecoTracker/LSTCore/interface/LST.h @@ -8,105 +8,101 @@ #include #include -namespace lst { - template - class Event; +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace lst { + class Event; - template - class LST; + class LST { + public: + LST() = default; - template - class LST { - public: - LST() = default; + void run(Queue& queue, + bool verbose, + ::lst::LSTESData const* deviceESData, + std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z, + bool no_pls_dupclean, + bool tc_pls_triplets); + std::vector> const& hits() const { return out_tc_hitIdxs_; } + std::vector const& len() const { return out_tc_len_; } + std::vector const& seedIdx() const { return out_tc_seedIdx_; } + std::vector const& trackCandidateType() const { return out_tc_trackCandidateType_; } - template - void run(TQueue& queue, - bool verbose, - LSTESData> const* deviceESData, - std::vector const& see_px, - std::vector const& see_py, - std::vector const& see_pz, - std::vector const& see_dxy, - std::vector const& see_dz, - std::vector const& see_ptErr, - std::vector const& see_etaErr, - std::vector const& see_stateTrajGlbX, - std::vector const& see_stateTrajGlbY, - std::vector const& see_stateTrajGlbZ, - std::vector const& see_stateTrajGlbPx, - std::vector const& see_stateTrajGlbPy, - std::vector const& see_stateTrajGlbPz, - std::vector const& see_q, - std::vector> const& see_hitIdx, - std::vector const& ph2_detId, - std::vector const& ph2_x, - std::vector const& ph2_y, - std::vector const& ph2_z, - bool no_pls_dupclean, - bool tc_pls_triplets); - std::vector> const& hits() const { return out_tc_hitIdxs_; } - std::vector const& len() const { return out_tc_len_; } - std::vector const& seedIdx() const { return out_tc_seedIdx_; } - std::vector const& trackCandidateType() const { return out_tc_trackCandidateType_; } + private: + void prepareInput(std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z); - private: - void prepareInput(std::vector const& see_px, - std::vector const& see_py, - std::vector const& see_pz, - std::vector const& see_dxy, - std::vector const& see_dz, - std::vector const& see_ptErr, - std::vector const& see_etaErr, - std::vector const& see_stateTrajGlbX, - std::vector const& see_stateTrajGlbY, - std::vector const& see_stateTrajGlbZ, - std::vector const& see_stateTrajGlbPx, - std::vector const& see_stateTrajGlbPy, - std::vector const& see_stateTrajGlbPz, - std::vector const& see_q, - std::vector> const& see_hitIdx, - std::vector const& ph2_detId, - std::vector const& ph2_x, - std::vector const& ph2_y, - std::vector const& ph2_z); + void getOutput(Event& event); + std::vector getHitIdxs(short trackCandidateType, + unsigned int TCIdx, + unsigned int const* TCHitIndices, + unsigned int const* hitIndices); - void getOutput(lst::Event& event); - std::vector getHitIdxs(short trackCandidateType, - unsigned int TCIdx, - unsigned int const* TCHitIndices, - unsigned int const* hitIndices); + // Input and output vectors + std::vector in_trkX_; + std::vector in_trkY_; + std::vector in_trkZ_; + std::vector in_hitId_; + std::vector in_hitIdxs_; + std::vector in_hitIndices_vec0_; + std::vector in_hitIndices_vec1_; + std::vector in_hitIndices_vec2_; + std::vector in_hitIndices_vec3_; + std::vector in_deltaPhi_vec_; + std::vector in_ptIn_vec_; + std::vector in_ptErr_vec_; + std::vector in_px_vec_; + std::vector in_py_vec_; + std::vector in_pz_vec_; + std::vector in_eta_vec_; + std::vector in_etaErr_vec_; + std::vector in_phi_vec_; + std::vector in_charge_vec_; + std::vector in_seedIdx_vec_; + std::vector in_superbin_vec_; + std::vector in_pixelType_vec_; + std::vector in_isQuad_vec_; + std::vector> out_tc_hitIdxs_; + std::vector out_tc_len_; + std::vector out_tc_seedIdx_; + std::vector out_tc_trackCandidateType_; + }; - // Input and output vectors - std::vector in_trkX_; - std::vector in_trkY_; - std::vector in_trkZ_; - std::vector in_hitId_; - std::vector in_hitIdxs_; - std::vector in_hitIndices_vec0_; - std::vector in_hitIndices_vec1_; - std::vector in_hitIndices_vec2_; - std::vector in_hitIndices_vec3_; - std::vector in_deltaPhi_vec_; - std::vector in_ptIn_vec_; - std::vector in_ptErr_vec_; - std::vector in_px_vec_; - std::vector in_py_vec_; - std::vector in_pz_vec_; - std::vector in_eta_vec_; - std::vector in_etaErr_vec_; - std::vector in_phi_vec_; - std::vector in_charge_vec_; - std::vector in_seedIdx_vec_; - std::vector in_superbin_vec_; - std::vector in_pixelType_vec_; - std::vector in_isQuad_vec_; - std::vector> out_tc_hitIdxs_; - std::vector out_tc_len_; - std::vector out_tc_seedIdx_; - std::vector out_tc_trackCandidateType_; - }; - -} // namespace lst + } // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE #endif diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h index 14ab5d8efe7f8..459989670ccdd 100644 --- a/RecoTracker/LSTCore/interface/alpaka/Constants.h +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -11,7 +11,7 @@ namespace lst { - using namespace ALPAKA_ACCELERATOR_NAMESPACE; + using namespace alpaka_common; // Half precision wrapper functions. #if defined(FP16_Base) @@ -46,7 +46,7 @@ namespace lst { Vec adjustedThreads = threadsPerBlock; // special overrides for CPU/host cases - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { adjustedBlocks = Vec::all(static_cast(1)); if constexpr (alpaka::accMatchesTags) { diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index f9757b0659691..97c4300456c8c 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -2,9 +2,12 @@ #include "Event.h" -using namespace ALPAKA_ACCELERATOR_NAMESPACE; +using Device = ALPAKA_ACCELERATOR_NAMESPACE::Device; +using Queue = ALPAKA_ACCELERATOR_NAMESPACE::Queue; +using Acc1D = ALPAKA_ACCELERATOR_NAMESPACE::Acc1D; +using Acc3D = ALPAKA_ACCELERATOR_NAMESPACE::Acc3D; -void lst::Event::initSync(bool verbose) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::initSync(bool verbose) { alpaka::wait(queue); // other calls can be asynchronous addObjects = verbose; hitsInGPU = nullptr; @@ -47,7 +50,7 @@ void lst::Event::initSync(bool verbose) { } } -void lst::Event::resetEventSync() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::resetEventSync() { alpaka::wait(queue); // synchronize to reset consistently //reset the arrays for (int i = 0; i < 6; i++) { @@ -154,24 +157,24 @@ void lst::Event::resetEventSync() { } } -void lst::Event::addHitToEvent(std::vector const& x, - std::vector const& y, - std::vector const& z, - std::vector const& detId, - std::vector const& idxInNtuple) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple) { // Use the actual number of hits instead of a max. unsigned int nHits = x.size(); // Initialize space on device/host for next event. if (hitsInGPU == nullptr) { - hitsInGPU = new lst::Hits(); - hitsBuffers = new lst::HitsBuffer(nModules_, nHits, devAcc, queue); + hitsInGPU = new ::lst::Hits(); + hitsBuffers = new ::lst::HitsBuffer(nModules_, nHits, devAcc, queue); hitsInGPU->setData(*hitsBuffers); } if (rangesInGPU == nullptr) { - rangesInGPU = new lst::ObjectRanges(); - rangesBuffers = new lst::ObjectRangesBuffer(nModules_, nLowerModules_, devAcc, queue); + rangesInGPU = new ::lst::ObjectRanges(); + rangesBuffers = new ::lst::ObjectRangesBuffer(nModules_, nLowerModules_, devAcc, queue); rangesInGPU->setData(*rangesBuffers); } @@ -188,15 +191,14 @@ void lst::Event::addHitToEvent(std::vector const& x, alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock1{1, 1, 256}; - Vec3D const blocksPerGrid1{1, 1, max_blocks}; - WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); + Vec3D const blocksPerGrid1{1, 1, ::lst::max_blocks}; + WorkDiv3D const hit_loop_workdiv = ::lst::createWorkDiv(blocksPerGrid1, threadsPerBlock1, ::lst::elementsPerThread); - hitLoopKernel hit_loop_kernel; alpaka::exec(queue, hit_loop_workdiv, - hit_loop_kernel, - Endcap, - TwoS, + ::lst::hitLoopKernel{}, + ::lst::Endcap, + ::lst::TwoS, nModules_, nEndCapMap_, endcapGeometryBuffers_.geoMapDetId_buf.data(), @@ -206,41 +208,41 @@ void lst::Event::addHitToEvent(std::vector const& x, nHits); Vec3D const threadsPerBlock2{1, 1, 256}; - Vec3D const blocksPerGrid2{1, 1, max_blocks}; - WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); + Vec3D const blocksPerGrid2{1, 1, ::lst::max_blocks}; + WorkDiv3D const module_ranges_workdiv = + ::lst::createWorkDiv(blocksPerGrid2, threadsPerBlock2, ::lst::elementsPerThread); - moduleRangesKernel module_ranges_kernel; alpaka::exec( - queue, module_ranges_workdiv, module_ranges_kernel, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); + queue, module_ranges_workdiv, ::lst::moduleRangesKernel{}, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); } -void lst::Event::addPixelSegmentToEvent(std::vector const& hitIndices0, - std::vector const& hitIndices1, - std::vector const& hitIndices2, - std::vector const& hitIndices3, - std::vector const& dPhiChange, - std::vector const& ptIn, - std::vector const& ptErr, - std::vector const& px, - std::vector const& py, - std::vector const& pz, - std::vector const& eta, - std::vector const& etaErr, - std::vector const& phi, - std::vector const& charge, - std::vector const& seedIdx, - std::vector const& superbin, - std::vector const& pixelType, - std::vector const& isQuad) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad) { unsigned int size = ptIn.size(); - if (size > n_max_pixel_segments_per_module) { + if (size > ::lst::n_max_pixel_segments_per_module) { printf( "*********************************************************\n" "* Warning: Pixel line segments will be truncated. *\n" "* You need to increase n_max_pixel_segments_per_module. *\n" "*********************************************************\n"); - size = n_max_pixel_segments_per_module; + size = ::lst::n_max_pixel_segments_per_module; } unsigned int mdSize = 2 * size; @@ -253,25 +255,24 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& // Create a host buffer for a value to be passed to the device auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; + *pixelMaxMDs_buf_h.data() = ::lst::n_max_pixel_md_per_modules; alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createMDArrayRangesGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; alpaka::exec( - queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); + queue, createMDArrayRangesGPU_workDiv, ::lst::createMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); alpaka::wait(queue); // wait to get the data before manipulation - *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += ::lst::n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); - mdsInGPU = new lst::MiniDoublets(); - miniDoubletsBuffers = new lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); + mdsInGPU = new ::lst::MiniDoublets(); + miniDoubletsBuffers = new ::lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_buf_h); @@ -280,12 +281,11 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them - WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createSegmentArrayRanges_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::createSegmentArrayRanges createSegmentArrayRanges_kernel; alpaka::exec(queue, createSegmentArrayRanges_workDiv, - createSegmentArrayRanges_kernel, + ::lst::createSegmentArrayRanges{}, *modulesBuffers_.data(), *rangesInGPU, *mdsInGPU); @@ -295,21 +295,21 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& alpaka::memcpy(queue, nTotalSegments_view, rangesBuffers->device_nTotalSegs_buf); alpaka::wait(queue); // wait to get the value before manipulation - nTotalSegments_ += n_max_pixel_segments_per_module; + nTotalSegments_ += ::lst::n_max_pixel_segments_per_module; - segmentsInGPU = new lst::Segments(); - segmentsBuffers = new lst::SegmentsBuffer( - nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); + segmentsInGPU = new ::lst::Segments(); + segmentsBuffers = new ::lst::SegmentsBuffer( + nTotalSegments_, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); alpaka::memcpy(queue, segmentsBuffers->nMemoryLocations_buf, nTotalSegments_view); } - auto hitIndices0_dev = allocBufWrapper(devAcc, size, queue); - auto hitIndices1_dev = allocBufWrapper(devAcc, size, queue); - auto hitIndices2_dev = allocBufWrapper(devAcc, size, queue); - auto hitIndices3_dev = allocBufWrapper(devAcc, size, queue); - auto dPhiChange_dev = allocBufWrapper(devAcc, size, queue); + auto hitIndices0_dev = ::lst::allocBufWrapper(devAcc, size, queue); + auto hitIndices1_dev = ::lst::allocBufWrapper(devAcc, size, queue); + auto hitIndices2_dev = ::lst::allocBufWrapper(devAcc, size, queue); + auto hitIndices3_dev = ::lst::allocBufWrapper(devAcc, size, queue); + auto dPhiChange_dev = ::lst::allocBufWrapper(devAcc, size, queue); alpaka::memcpy(queue, hitIndices0_dev, hitIndices0, size); alpaka::memcpy(queue, hitIndices1_dev, hitIndices1, size); @@ -352,13 +352,13 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock{1, 1, 256}; - Vec3D const blocksPerGrid{1, 1, max_blocks}; - WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + Vec3D const blocksPerGrid{1, 1, ::lst::max_blocks}; + WorkDiv3D const addPixelSegmentToEvent_workdiv = + ::lst::createWorkDiv(blocksPerGrid, threadsPerBlock, ::lst::elementsPerThread); - addPixelSegmentToEventKernel addPixelSegmentToEvent_kernel; alpaka::exec(queue, addPixelSegmentToEvent_workdiv, - addPixelSegmentToEvent_kernel, + ::lst::addPixelSegmentToEventKernel{}, *modulesBuffers_.data(), *rangesInGPU, *hitsInGPU, @@ -373,56 +373,53 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& size); } -void lst::Event::createMiniDoublets() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createMiniDoublets() { // Create a view for the element nLowerModules_ inside rangesBuffers->miniDoubletModuleOccupancy auto dst_view_miniDoubletModuleOccupancy = alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); // Create a host buffer for a value to be passed to the device auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; + *pixelMaxMDs_buf_h.data() = ::lst::n_max_pixel_md_per_modules; alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createMDArrayRangesGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; alpaka::exec( - queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); + queue, createMDArrayRangesGPU_workDiv, ::lst::createMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); alpaka::wait(queue); // wait to get the data before manipulation - *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += ::lst::n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); if (mdsInGPU == nullptr) { - mdsInGPU = new lst::MiniDoublets(); - miniDoubletsBuffers = new lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); + mdsInGPU = new ::lst::MiniDoublets(); + miniDoubletsBuffers = new ::lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); } Vec3D const threadsPerBlockCreateMDInGPU{1, 16, 32}; Vec3D const blocksPerGridCreateMDInGPU{1, nLowerModules_ / threadsPerBlockCreateMDInGPU[1], 1}; WorkDiv3D const createMiniDoubletsInGPUv2_workDiv = - createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, ::lst::elementsPerThread); - lst::createMiniDoubletsInGPUv2 createMiniDoubletsInGPUv2_kernel; alpaka::exec(queue, createMiniDoubletsInGPUv2_workDiv, - createMiniDoubletsInGPUv2_kernel, + ::lst::createMiniDoubletsInGPUv2{}, *modulesBuffers_.data(), *hitsInGPU, *mdsInGPU, *rangesInGPU); - WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::addMiniDoubletRangesToEventExplicit addMiniDoubletRangesToEventExplicit_kernel; alpaka::exec(queue, addMiniDoubletRangesToEventExplicit_workDiv, - addMiniDoubletRangesToEventExplicit_kernel, + ::lst::addMiniDoubletRangesToEventExplicit{}, *modulesBuffers_.data(), *mdsInGPU, *rangesInGPU, @@ -433,34 +430,32 @@ void lst::Event::createMiniDoublets() { } } -void lst::Event::createSegmentsWithModuleMap() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createSegmentsWithModuleMap() { if (segmentsInGPU == nullptr) { - segmentsInGPU = new lst::Segments(); - segmentsBuffers = new lst::SegmentsBuffer( - nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); + segmentsInGPU = new ::lst::Segments(); + segmentsBuffers = new ::lst::SegmentsBuffer( + nTotalSegments_, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); } Vec3D const threadsPerBlockCreateSeg{1, 1, 64}; Vec3D const blocksPerGridCreateSeg{1, 1, nLowerModules_}; WorkDiv3D const createSegmentsInGPUv2_workDiv = - createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, ::lst::elementsPerThread); - lst::createSegmentsInGPUv2 createSegmentsInGPUv2_kernel; alpaka::exec(queue, createSegmentsInGPUv2_workDiv, - createSegmentsInGPUv2_kernel, + ::lst::createSegmentsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, *rangesInGPU); - WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::addSegmentRangesToEventExplicit addSegmentRangesToEventExplicit_kernel; alpaka::exec(queue, addSegmentRangesToEventExplicit_workDiv, - addSegmentRangesToEventExplicit_kernel, + ::lst::addSegmentRangesToEventExplicit{}, *modulesBuffers_.data(), *segmentsInGPU, *rangesInGPU); @@ -470,14 +465,13 @@ void lst::Event::createSegmentsWithModuleMap() { } } -void lst::Event::createTriplets() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { if (tripletsInGPU == nullptr) { - WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createTripletArrayRanges_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::createTripletArrayRanges createTripletArrayRanges_kernel; alpaka::exec(queue, createTripletArrayRanges_workDiv, - createTripletArrayRanges_kernel, + ::lst::createTripletArrayRanges{}, *modulesBuffers_.data(), *rangesInGPU, *segmentsInGPU); @@ -488,8 +482,8 @@ void lst::Event::createTriplets() { alpaka::memcpy(queue, maxTriplets_buf_h, rangesBuffers->device_nTotalTrips_buf); alpaka::wait(queue); // wait to get the value before using it - tripletsInGPU = new lst::Triplets(); - tripletsBuffers = new lst::TripletsBuffer(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); + tripletsInGPU = new ::lst::Triplets(); + tripletsBuffers = new ::lst::TripletsBuffer(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); tripletsInGPU->setData(*tripletsBuffers); alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf_h); @@ -527,18 +521,17 @@ void lst::Event::createTriplets() { } // Allocate and copy to device index - auto index_gpu_buf = allocBufWrapper(devAcc, nLowerModules_, queue); + auto index_gpu_buf = ::lst::allocBufWrapper(devAcc, nLowerModules_, queue); alpaka::memcpy(queue, index_gpu_buf, index_buf_h, nonZeroModules); Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; - Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; + Vec3D const blocksPerGridCreateTrip{::lst::max_blocks, 1, 1}; WorkDiv3D const createTripletsInGPUv2_workDiv = - createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, ::lst::elementsPerThread); - lst::createTripletsInGPUv2 createTripletsInGPUv2_kernel; alpaka::exec(queue, createTripletsInGPUv2_workDiv, - createTripletsInGPUv2_kernel, + ::lst::createTripletsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -547,12 +540,11 @@ void lst::Event::createTriplets() { index_gpu_buf.data(), nonZeroModules); - WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addTripletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::addTripletRangesToEventExplicit addTripletRangesToEventExplicit_kernel; alpaka::exec(queue, addTripletRangesToEventExplicit_workDiv, - addTripletRangesToEventExplicit_kernel, + ::lst::addTripletRangesToEventExplicit{}, *modulesBuffers_.data(), *tripletsInGPU, *rangesInGPU); @@ -562,35 +554,33 @@ void lst::Event::createTriplets() { } } -void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { if (trackCandidatesInGPU == nullptr) { - trackCandidatesInGPU = new lst::TrackCandidates(); - trackCandidatesBuffers = new lst::TrackCandidatesBuffer( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc, queue); + trackCandidatesInGPU = new ::lst::TrackCandidates(); + trackCandidatesBuffers = new ::lst::TrackCandidatesBuffer( + ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devAcc, queue); trackCandidatesInGPU->setData(*trackCandidatesBuffers); } Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; Vec3D const blocksPerGrid_crossCleanpT3{1, 4, 20}; WorkDiv3D const crossCleanpT3_workDiv = - createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread); + ::lst::createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, ::lst::elementsPerThread); - lst::crossCleanpT3 crossCleanpT3_kernel; alpaka::exec(queue, crossCleanpT3_workDiv, - crossCleanpT3_kernel, + ::lst::crossCleanpT3{}, *modulesBuffers_.data(), *rangesInGPU, *pixelTripletsInGPU, *segmentsInGPU, *pixelQuintupletsInGPU); - WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); + WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = ::lst::createWorkDiv({1}, {512}, {1}); - lst::addpT3asTrackCandidatesInGPU addpT3asTrackCandidatesInGPU_kernel; alpaka::exec(queue, addpT3asTrackCandidatesInGPU_workDiv, - addpT3asTrackCandidatesInGPU_kernel, + ::lst::addpT3asTrackCandidatesInGPU{}, nLowerModules_, *pixelTripletsInGPU, *trackCandidatesInGPU, @@ -606,24 +596,22 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = - createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, ::lst::elementsPerThread); - lst::removeDupQuintupletsInGPUBeforeTC removeDupQuintupletsInGPUBeforeTC_kernel; alpaka::exec(queue, removeDupQuintupletsInGPUBeforeTC_workDiv, - removeDupQuintupletsInGPUBeforeTC_kernel, + ::lst::removeDupQuintupletsInGPUBeforeTC{}, *quintupletsInGPU, *rangesInGPU); Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; - Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks}; + Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, ::lst::max_blocks}; WorkDiv3D const crossCleanT5_workDiv = - createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread); + ::lst::createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, ::lst::elementsPerThread); - lst::crossCleanT5 crossCleanT5_kernel; alpaka::exec(queue, crossCleanT5_workDiv, - crossCleanT5_kernel, + ::lst::crossCleanT5{}, *modulesBuffers_.data(), *quintupletsInGPU, *pixelQuintupletsInGPU, @@ -632,13 +620,12 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ Vec3D const threadsPerBlock_addT5asTrackCandidateInGPU{1, 8, 128}; Vec3D const blocksPerGrid_addT5asTrackCandidateInGPU{1, 8, 10}; - WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = createWorkDiv( - blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, elementsPerThread); + WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = ::lst::createWorkDiv( + blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, ::lst::elementsPerThread); - lst::addT5asTrackCandidateInGPU addT5asTrackCandidateInGPU_kernel; alpaka::exec(queue, addT5asTrackCandidateInGPU_workDiv, - addT5asTrackCandidateInGPU_kernel, + ::lst::addT5asTrackCandidateInGPU{}, nLowerModules_, *quintupletsInGPU, *trackCandidatesInGPU, @@ -646,24 +633,22 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; - Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; + Vec3D const blocksPerGridCheckHitspLS{1, ::lst::max_blocks * 4, ::lst::max_blocks / 4}; WorkDiv3D const checkHitspLS_workDiv = - createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, ::lst::elementsPerThread); - lst::checkHitspLS checkHitspLS_kernel; alpaka::exec( - queue, checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, true); + queue, checkHitspLS_workDiv, ::lst::checkHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, true); } Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; Vec3D const blocksPerGrid_crossCleanpLS{1, 4, 20}; WorkDiv3D const crossCleanpLS_workDiv = - createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread); + ::lst::createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, ::lst::elementsPerThread); - lst::crossCleanpLS crossCleanpLS_kernel; alpaka::exec(queue, crossCleanpLS_workDiv, - crossCleanpLS_kernel, + ::lst::crossCleanpLS{}, *modulesBuffers_.data(), *rangesInGPU, *pixelTripletsInGPU, @@ -674,24 +659,23 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ *quintupletsInGPU); Vec3D const threadsPerBlock_addpLSasTrackCandidateInGPU{1, 1, 384}; - Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, max_blocks}; - WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = createWorkDiv( - blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, elementsPerThread); + Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, ::lst::max_blocks}; + WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = ::lst::createWorkDiv( + blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, ::lst::elementsPerThread); - lst::addpLSasTrackCandidateInGPU addpLSasTrackCandidateInGPU_kernel; alpaka::exec(queue, addpLSasTrackCandidateInGPU_workDiv, - addpLSasTrackCandidateInGPU_kernel, + ::lst::addpLSasTrackCandidateInGPU{}, nLowerModules_, *trackCandidatesInGPU, *segmentsInGPU, tc_pls_triplets); // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached - auto nTrackCanpT5Host_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCanpT3Host_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCanpLSHost_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCanT5Host_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCanpT5Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nTrackCanpT3Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nTrackCanpLSHost_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nTrackCanT5Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nTrackCanpT5Host_buf, trackCandidatesBuffers->nTrackCandidatespT5_buf); alpaka::memcpy(queue, nTrackCanpT3Host_buf, trackCandidatesBuffers->nTrackCandidatespT3_buf); alpaka::memcpy(queue, nTrackCanpLSHost_buf, trackCandidatesBuffers->nTrackCandidatespLS_buf); @@ -702,26 +686,27 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ auto nTrackCandidatespT3 = *nTrackCanpT3Host_buf.data(); auto nTrackCandidatespLS = *nTrackCanpLSHost_buf.data(); auto nTrackCandidatesT5 = *nTrackCanT5Host_buf.data(); - if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || - (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { + if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == ::lst::n_max_pixel_track_candidates) || + (nTrackCandidatesT5 == ::lst::n_max_nonpixel_track_candidates)) { printf( "****************************************************************************************************\n" "* Warning: Track candidates were possibly truncated. *\n" - "* You may need to increase either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates. *\n" + "* You may need to increase either ::lst::n_max_pixel_track_candidates or " + "::lst::n_max_nonpixel_track_candidates. *\n" "* Run the code with the WARNINGS flag activated for more details. *\n" "****************************************************************************************************\n"); } } -void lst::Event::createPixelTriplets() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { if (pixelTripletsInGPU == nullptr) { - pixelTripletsInGPU = new lst::PixelTriplets(); - pixelTripletsBuffers = new lst::PixelTripletsBuffer(n_max_pixel_triplets, devAcc, queue); + pixelTripletsInGPU = new ::lst::PixelTriplets(); + pixelTripletsBuffers = new ::lst::PixelTripletsBuffer(::lst::n_max_pixel_triplets, devAcc, queue); pixelTripletsInGPU->setData(*pixelTripletsBuffers); } - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto superbins_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); @@ -737,25 +722,25 @@ void lst::Event::createPixelTriplets() { alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using - auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelSize_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelIndex_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelSize_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelIndex_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); unsigned int* connectedPixelSize_host = connectedPixelSize_host_buf.data(); unsigned int* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); - int pixelIndexOffsetPos = - pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; - int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] + - pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; + int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::lst::size_superbins - 1] + + pixelMapping_.connectedPixelsSizes[::lst::size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::lst::size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[::lst::size_superbins - 1] + pixelIndexOffsetPos; // TODO: check if a map/reduction to just eligible pLSs would speed up the kernel // the current selection still leaves a significant fraction of unmatchable pLSs for (unsigned int i = 0; i < nInnerSegments; i++) { // loop over # pLS int8_t pixelType = pixelTypes[i]; // Get pixel type for this pLS int superbin = superbins[i]; // Get superbin for this pixel - if ((superbin < 0) or (superbin >= (int)size_superbins) or (pixelType > 2) or (pixelType < 0)) { + if ((superbin < 0) or (superbin >= (int)::lst::size_superbins) or (pixelType > 2) or (pixelType < 0)) { connectedPixelSize_host[i] = 0; connectedPixelIndex_host[i] = 0; continue; @@ -787,12 +772,11 @@ void lst::Event::createPixelTriplets() { Vec3D const threadsPerBlock{1, 4, 32}; Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; WorkDiv3D const createPixelTripletsInGPUFromMapv2_workDiv = - createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + ::lst::createWorkDiv(blocksPerGrid, threadsPerBlock, ::lst::elementsPerThread); - lst::createPixelTripletsInGPUFromMapv2 createPixelTripletsInGPUFromMapv2_kernel; alpaka::exec(queue, createPixelTripletsInGPUFromMapv2_workDiv, - createPixelTripletsInGPUFromMapv2_kernel, + ::lst::createPixelTripletsInGPUFromMapv2{}, *modulesBuffers_.data(), *rangesInGPU, *mdsInGPU, @@ -804,7 +788,7 @@ void lst::Event::createPixelTriplets() { nInnerSegments); #ifdef WARNINGS - auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); + auto nPixelTriplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); alpaka::wait(queue); // wait to get the value before using it @@ -817,28 +801,26 @@ void lst::Event::createPixelTriplets() { //seems like more blocks lead to conflicting writes Vec3D const blocksPerGridDupPixTrip{1, 40, 1}; WorkDiv3D const removeDupPixelTripletsInGPUFromMap_workDiv = - createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, ::lst::elementsPerThread); - lst::removeDupPixelTripletsInGPUFromMap removeDupPixelTripletsInGPUFromMap_kernel; alpaka::exec(queue, removeDupPixelTripletsInGPUFromMap_workDiv, - removeDupPixelTripletsInGPUFromMap_kernel, + ::lst::removeDupPixelTripletsInGPUFromMap{}, *pixelTripletsInGPU); } -void lst::Event::createQuintuplets() { - WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = createWorkDiv({1}, {1024}, {1}); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { + WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::createEligibleModulesListForQuintupletsGPU createEligibleModulesListForQuintupletsGPU_kernel; alpaka::exec(queue, createEligibleModulesListForQuintupletsGPU_workDiv, - createEligibleModulesListForQuintupletsGPU_kernel, + ::lst::createEligibleModulesListForQuintupletsGPU{}, *modulesBuffers_.data(), *tripletsInGPU, *rangesInGPU); - auto nEligibleT5Modules_buf = allocBufWrapper(devHost, 1, queue); - auto nTotalQuintuplets_buf = allocBufWrapper(devHost, 1, queue); + auto nEligibleT5Modules_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nTotalQuintuplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nEligibleT5Modules_buf, rangesBuffers->nEligibleT5Modules_buf); alpaka::memcpy(queue, nTotalQuintuplets_buf, rangesBuffers->device_nTotalQuints_buf); @@ -848,8 +830,8 @@ void lst::Event::createQuintuplets() { auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); if (quintupletsInGPU == nullptr) { - quintupletsInGPU = new lst::Quintuplets(); - quintupletsBuffers = new lst::QuintupletsBuffer(nTotalQuintuplets, nLowerModules_, devAcc, queue); + quintupletsInGPU = new ::lst::Quintuplets(); + quintupletsBuffers = new ::lst::QuintupletsBuffer(nTotalQuintuplets, nLowerModules_, devAcc, queue); quintupletsInGPU->setData(*quintupletsBuffers); alpaka::memcpy(queue, quintupletsBuffers->nMemoryLocations_buf, nTotalQuintuplets_buf); @@ -858,12 +840,11 @@ void lst::Event::createQuintuplets() { Vec3D const threadsPerBlockQuints{1, 8, 32}; Vec3D const blocksPerGridQuints{std::max((int)nEligibleT5Modules, 1), 1, 1}; WorkDiv3D const createQuintupletsInGPUv2_workDiv = - createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, ::lst::elementsPerThread); - lst::createQuintupletsInGPUv2 createQuintupletsInGPUv2_kernel; alpaka::exec(queue, createQuintupletsInGPUv2_workDiv, - createQuintupletsInGPUv2_kernel, + ::lst::createQuintupletsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -873,24 +854,22 @@ void lst::Event::createQuintuplets() { nEligibleT5Modules); Vec3D const threadsPerBlockDupQuint{1, 16, 16}; - Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1}; + Vec3D const blocksPerGridDupQuint{::lst::max_blocks, 1, 1}; WorkDiv3D const removeDupQuintupletsInGPUAfterBuild_workDiv = - createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, ::lst::elementsPerThread); - lst::removeDupQuintupletsInGPUAfterBuild removeDupQuintupletsInGPUAfterBuild_kernel; alpaka::exec(queue, removeDupQuintupletsInGPUAfterBuild_workDiv, - removeDupQuintupletsInGPUAfterBuild_kernel, + ::lst::removeDupQuintupletsInGPUAfterBuild{}, *modulesBuffers_.data(), *quintupletsInGPU, *rangesInGPU); - WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::addQuintupletRangesToEventExplicit addQuintupletRangesToEventExplicit_kernel; alpaka::exec(queue, addQuintupletRangesToEventExplicit_workDiv, - addQuintupletRangesToEventExplicit_kernel, + ::lst::addQuintupletRangesToEventExplicit{}, *modulesBuffers_.data(), *quintupletsInGPU, *rangesInGPU); @@ -900,34 +879,33 @@ void lst::Event::createQuintuplets() { } } -void lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; - Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; + Vec3D const blocksPerGridCheckHitspLS{1, ::lst::max_blocks * 4, ::lst::max_blocks / 4}; WorkDiv3D const checkHitspLS_workDiv = - createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, ::lst::elementsPerThread); - lst::checkHitspLS checkHitspLS_kernel; alpaka::exec( - queue, checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, false); + queue, checkHitspLS_workDiv, ::lst::checkHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, false); } } -void lst::Event::createPixelQuintuplets() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { if (pixelQuintupletsInGPU == nullptr) { - pixelQuintupletsInGPU = new lst::PixelQuintuplets(); - pixelQuintupletsBuffers = new lst::PixelQuintupletsBuffer(n_max_pixel_quintuplets, devAcc, queue); + pixelQuintupletsInGPU = new ::lst::PixelQuintuplets(); + pixelQuintupletsBuffers = new ::lst::PixelQuintupletsBuffer(::lst::n_max_pixel_quintuplets, devAcc, queue); pixelQuintupletsInGPU->setData(*pixelQuintupletsBuffers); } if (trackCandidatesInGPU == nullptr) { - trackCandidatesInGPU = new lst::TrackCandidates(); - trackCandidatesBuffers = new lst::TrackCandidatesBuffer( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc, queue); + trackCandidatesInGPU = new ::lst::TrackCandidates(); + trackCandidatesBuffers = new ::lst::TrackCandidatesBuffer( + ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devAcc, queue); trackCandidatesInGPU->setData(*trackCandidatesBuffers); } - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto superbins_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); @@ -943,24 +921,24 @@ void lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using - auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelSize_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelIndex_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelSize_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelIndex_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); auto* connectedPixelSize_host = connectedPixelSize_host_buf.data(); auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); - int pixelIndexOffsetPos = - pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; - int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] + - pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; + int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::lst::size_superbins - 1] + + pixelMapping_.connectedPixelsSizes[::lst::size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::lst::size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[::lst::size_superbins - 1] + pixelIndexOffsetPos; // Loop over # pLS for (unsigned int i = 0; i < nInnerSegments; i++) { int8_t pixelType = pixelTypes[i]; // Get pixel type for this pLS int superbin = superbins[i]; // Get superbin for this pixel - if ((superbin < 0) or (superbin >= (int)size_superbins) or (pixelType > 2) or (pixelType < 0)) { + if ((superbin < 0) or (superbin >= (int)::lst::size_superbins) or (pixelType > 2) or (pixelType < 0)) { connectedPixelIndex_host[i] = 0; connectedPixelSize_host[i] = 0; continue; @@ -986,14 +964,13 @@ void lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; - Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; + Vec3D const blocksPerGridCreatePixQuints{16, ::lst::max_blocks, 1}; WorkDiv3D const createPixelQuintupletsInGPUFromMapv2_workDiv = - createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, ::lst::elementsPerThread); - lst::createPixelQuintupletsInGPUFromMapv2 createPixelQuintupletsInGPUFromMapv2_kernel; alpaka::exec(queue, createPixelQuintupletsInGPUFromMapv2_workDiv, - createPixelQuintupletsInGPUFromMapv2_kernel, + ::lst::createPixelQuintupletsInGPUFromMapv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -1006,22 +983,20 @@ void lst::Event::createPixelQuintuplets() { *rangesInGPU); Vec3D const threadsPerBlockDupPix{1, 16, 16}; - Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; + Vec3D const blocksPerGridDupPix{1, ::lst::max_blocks, 1}; WorkDiv3D const removeDupPixelQuintupletsInGPUFromMap_workDiv = - createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, ::lst::elementsPerThread); - lst::removeDupPixelQuintupletsInGPUFromMap removeDupPixelQuintupletsInGPUFromMap_kernel; alpaka::exec(queue, removeDupPixelQuintupletsInGPUFromMap_workDiv, - removeDupPixelQuintupletsInGPUFromMap_kernel, + ::lst::removeDupPixelQuintupletsInGPUFromMap{}, *pixelQuintupletsInGPU); - WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = createWorkDiv({1}, {256}, {1}); + WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = ::lst::createWorkDiv({1}, {256}, {1}); - lst::addpT5asTrackCandidateInGPU addpT5asTrackCandidateInGPU_kernel; alpaka::exec(queue, addpT5asTrackCandidateInGPU_workDiv, - addpT5asTrackCandidateInGPU_kernel, + ::lst::addpT5asTrackCandidateInGPU{}, nLowerModules_, *pixelQuintupletsInGPU, *trackCandidatesInGPU, @@ -1029,7 +1004,7 @@ void lst::Event::createPixelQuintuplets() { *rangesInGPU); #ifdef WARNINGS - auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); + auto nPixelQuintuplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); alpaka::wait(queue); // wait to get the value before using it @@ -1038,18 +1013,18 @@ void lst::Event::createPixelQuintuplets() { #endif } -void lst::Event::addMiniDoubletsToEventExplicit() { - auto nMDsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addMiniDoubletsToEventExplicit() { + auto nMDsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nMDsCPU_buf, miniDoubletsBuffers->nMDs_buf, nLowerModules_); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - auto module_hitRanges_buf = allocBufWrapper(devHost, nLowerModules_ * 2, queue); + auto module_hitRanges_buf = ::lst::allocBufWrapper(devHost, nLowerModules_ * 2, queue); alpaka::memcpy(queue, module_hitRanges_buf, hitsBuffers->hitRanges_buf, nLowerModules_ * 2u); alpaka::wait(queue); // wait for inputs before using them @@ -1061,7 +1036,7 @@ void lst::Event::addMiniDoubletsToEventExplicit() { for (unsigned int i = 0; i < nLowerModules_; i++) { if (!(nMDsCPU[i] == 0 or module_hitRanges[i * 2] == -1)) { - if (module_subdets[i] == Barrel) { + if (module_subdets[i] == ::lst::Barrel) { n_minidoublets_by_layer_barrel_[module_layers[i] - 1] += nMDsCPU[i]; } else { n_minidoublets_by_layer_endcap_[module_layers[i] - 1] += nMDsCPU[i]; @@ -1070,15 +1045,15 @@ void lst::Event::addMiniDoubletsToEventExplicit() { } } -void lst::Event::addSegmentsToEventExplicit() { - auto nSegmentsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addSegmentsToEventExplicit() { + auto nSegmentsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nSegmentsCPU_buf, segmentsBuffers->nSegments_buf, nLowerModules_); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); alpaka::wait(queue); // wait for inputs before using them @@ -1089,7 +1064,7 @@ void lst::Event::addSegmentsToEventExplicit() { for (unsigned int i = 0; i < nLowerModules_; i++) { if (!(nSegmentsCPU[i] == 0)) { - if (module_subdets[i] == Barrel) { + if (module_subdets[i] == ::lst::Barrel) { n_segments_by_layer_barrel_[module_layers[i] - 1] += nSegmentsCPU[i]; } else { n_segments_by_layer_endcap_[module_layers[i] - 1] += nSegmentsCPU[i]; @@ -1098,18 +1073,18 @@ void lst::Event::addSegmentsToEventExplicit() { } } -void lst::Event::addQuintupletsToEventExplicit() { - auto nQuintupletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addQuintupletsToEventExplicit() { + auto nQuintupletsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nQuintupletsCPU_buf, quintupletsBuffers->nQuintuplets_buf); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nModules_, queue); + auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - auto module_quintupletModuleIndices_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_quintupletModuleIndices_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_quintupletModuleIndices_buf, rangesBuffers->quintupletModuleIndices_buf); alpaka::wait(queue); // wait for inputs before using them @@ -1121,7 +1096,7 @@ void lst::Event::addQuintupletsToEventExplicit() { for (uint16_t i = 0; i < nLowerModules_; i++) { if (!(nQuintupletsCPU[i] == 0 or module_quintupletModuleIndices[i] == -1)) { - if (module_subdets[i] == Barrel) { + if (module_subdets[i] == ::lst::Barrel) { n_quintuplets_by_layer_barrel_[module_layers[i] - 1] += nQuintupletsCPU[i]; } else { n_quintuplets_by_layer_endcap_[module_layers[i] - 1] += nQuintupletsCPU[i]; @@ -1130,15 +1105,15 @@ void lst::Event::addQuintupletsToEventExplicit() { } } -void lst::Event::addTripletsToEventExplicit() { - auto nTripletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addTripletsToEventExplicit() { + auto nTripletsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nTripletsCPU_buf, tripletsBuffers->nTriplets_buf); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); alpaka::wait(queue); // wait for inputs before using them @@ -1149,7 +1124,7 @@ void lst::Event::addTripletsToEventExplicit() { for (uint16_t i = 0; i < nLowerModules_; i++) { if (nTripletsCPU[i] != 0) { - if (module_subdets[i] == Barrel) { + if (module_subdets[i] == ::lst::Barrel) { n_triplets_by_layer_barrel_[module_layers[i] - 1] += nTripletsCPU[i]; } else { n_triplets_by_layer_endcap_[module_layers[i] - 1] += nTripletsCPU[i]; @@ -1158,7 +1133,7 @@ void lst::Event::addTripletsToEventExplicit() { } } -unsigned int lst::Event::getNumberOfHits() { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHits() { unsigned int hits = 0; for (auto& it : n_hits_by_layer_barrel_) { hits += it; @@ -1170,22 +1145,22 @@ unsigned int lst::Event::getNumberOfHits() { return hits; } -unsigned int lst::Event::getNumberOfHitsByLayer(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayer(unsigned int layer) { if (layer == 6) return n_hits_by_layer_barrel_[layer]; else return n_hits_by_layer_barrel_[layer] + n_hits_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfHitsByLayerBarrel(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayerBarrel(unsigned int layer) { return n_hits_by_layer_barrel_[layer]; } -unsigned int lst::Event::getNumberOfHitsByLayerEndcap(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayerEndcap(unsigned int layer) { return n_hits_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfMiniDoublets() { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoublets() { unsigned int miniDoublets = 0; for (auto& it : n_minidoublets_by_layer_barrel_) { miniDoublets += it; @@ -1197,22 +1172,22 @@ unsigned int lst::Event::getNumberOfMiniDoublets() { return miniDoublets; } -unsigned int lst::Event::getNumberOfMiniDoubletsByLayer(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayer(unsigned int layer) { if (layer == 6) return n_minidoublets_by_layer_barrel_[layer]; else return n_minidoublets_by_layer_barrel_[layer] + n_minidoublets_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) { return n_minidoublets_by_layer_barrel_[layer]; } -unsigned int lst::Event::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) { return n_minidoublets_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfSegments() { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegments() { unsigned int segments = 0; for (auto& it : n_segments_by_layer_barrel_) { segments += it; @@ -1224,22 +1199,22 @@ unsigned int lst::Event::getNumberOfSegments() { return segments; } -unsigned int lst::Event::getNumberOfSegmentsByLayer(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayer(unsigned int layer) { if (layer == 6) return n_segments_by_layer_barrel_[layer]; else return n_segments_by_layer_barrel_[layer] + n_segments_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfSegmentsByLayerBarrel(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayerBarrel(unsigned int layer) { return n_segments_by_layer_barrel_[layer]; } -unsigned int lst::Event::getNumberOfSegmentsByLayerEndcap(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayerEndcap(unsigned int layer) { return n_segments_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfTriplets() { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTriplets() { unsigned int triplets = 0; for (auto& it : n_triplets_by_layer_barrel_) { triplets += it; @@ -1251,22 +1226,22 @@ unsigned int lst::Event::getNumberOfTriplets() { return triplets; } -unsigned int lst::Event::getNumberOfTripletsByLayer(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayer(unsigned int layer) { if (layer == 6) return n_triplets_by_layer_barrel_[layer]; else return n_triplets_by_layer_barrel_[layer] + n_triplets_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfTripletsByLayerBarrel(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayerBarrel(unsigned int layer) { return n_triplets_by_layer_barrel_[layer]; } -unsigned int lst::Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { return n_triplets_by_layer_endcap_[layer]; } -int lst::Event::getNumberOfPixelTriplets() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTriplets() { auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); @@ -1274,7 +1249,7 @@ int lst::Event::getNumberOfPixelTriplets() { return *nPixelTriplets_buf_h.data(); } -int lst::Event::getNumberOfPixelQuintuplets() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelQuintuplets() { auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); @@ -1282,7 +1257,7 @@ int lst::Event::getNumberOfPixelQuintuplets() { return *nPixelQuintuplets_buf_h.data(); } -unsigned int lst::Event::getNumberOfQuintuplets() { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintuplets() { unsigned int quintuplets = 0; for (auto& it : n_quintuplets_by_layer_barrel_) { quintuplets += it; @@ -1294,22 +1269,22 @@ unsigned int lst::Event::getNumberOfQuintuplets() { return quintuplets; } -unsigned int lst::Event::getNumberOfQuintupletsByLayer(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayer(unsigned int layer) { if (layer == 6) return n_quintuplets_by_layer_barrel_[layer]; else return n_quintuplets_by_layer_barrel_[layer] + n_quintuplets_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) { return n_quintuplets_by_layer_barrel_[layer]; } -unsigned int lst::Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { return n_quintuplets_by_layer_endcap_[layer]; } -int lst::Event::getNumberOfTrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); @@ -1317,7 +1292,7 @@ int lst::Event::getNumberOfTrackCandidates() { return *nTrackCandidates_buf_h.data(); } -int lst::Event::getNumberOfPT5TrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT5TrackCandidates() { auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers->nTrackCandidatespT5_buf); @@ -1326,7 +1301,7 @@ int lst::Event::getNumberOfPT5TrackCandidates() { return *nTrackCandidatesPT5_buf_h.data(); } -int lst::Event::getNumberOfPT3TrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT3TrackCandidates() { auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers->nTrackCandidatespT3_buf); @@ -1334,7 +1309,7 @@ int lst::Event::getNumberOfPT3TrackCandidates() { return *nTrackCandidatesPT3_buf_h.data(); } -int lst::Event::getNumberOfPLSTrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPLSTrackCandidates() { auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers->nTrackCandidatespLS_buf); @@ -1342,7 +1317,7 @@ int lst::Event::getNumberOfPLSTrackCandidates() { return *nTrackCandidatesPLS_buf_h.data(); } -int lst::Event::getNumberOfPixelTrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1352,7 +1327,7 @@ int lst::Event::getNumberOfPixelTrackCandidates() { return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); } -int lst::Event::getNumberOfT5TrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfT5TrackCandidates() { auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); @@ -1360,7 +1335,8 @@ int lst::Event::getNumberOfT5TrackCandidates() { return *nTrackCandidatesT5_buf_h.data(); } -lst::HitsBuffer* lst::Event::getHits(bool sync) //std::shared_ptr should take care of garbage collection +lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getHits( + bool sync) //std::shared_ptr should take care of garbage collection { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1368,7 +1344,7 @@ lst::HitsBuffer* lst::Event::getHits(bool sync) //std::shared_p alpaka::wait(queue); // wait for the value before using auto const nHits = *nHits_buf_h.data(); - hitsInCPU = new lst::HitsBuffer(nModules_, nHits, devHost, queue); + hitsInCPU = new ::lst::HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); *hitsInCPU->nHits_buf.data() = nHits; @@ -1384,14 +1360,14 @@ lst::HitsBuffer* lst::Event::getHits(bool sync) //std::shared_p return hitsInCPU; } -lst::HitsBuffer* lst::Event::getHitsInCMSSW(bool sync) { +lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getHitsInCMSSW(bool sync) { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); alpaka::wait(queue); // wait for the value before using auto const nHits = *nHits_buf_h.data(); - hitsInCPU = new lst::HitsBuffer(nModules_, nHits, devHost, queue); + hitsInCPU = new ::lst::HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); *hitsInCPU->nHits_buf.data() = nHits; @@ -1402,9 +1378,9 @@ lst::HitsBuffer* lst::Event::getHitsInCMSSW(bool sync) { return hitsInCPU; } -lst::ObjectRangesBuffer* lst::Event::getRanges(bool sync) { +lst::ObjectRangesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getRanges(bool sync) { if (rangesInCPU == nullptr) { - rangesInCPU = new lst::ObjectRangesBuffer(nModules_, nLowerModules_, devHost, queue); + rangesInCPU = new ::lst::ObjectRangesBuffer(nModules_, nLowerModules_, devHost, queue); rangesInCPU->setData(*rangesInCPU); alpaka::memcpy(queue, rangesInCPU->hitRanges_buf, rangesBuffers->hitRanges_buf); @@ -1418,7 +1394,7 @@ lst::ObjectRangesBuffer* lst::Event::getRanges(bool sync) { return rangesInCPU; } -lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets(bool sync) { +lst::MiniDoubletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getMiniDoublets(bool sync) { if (mdsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based mdsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1426,7 +1402,7 @@ lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets(bool sync) alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - mdsInCPU = new lst::MiniDoubletsBuffer(nMemHost, nLowerModules_, devHost, queue); + mdsInCPU = new ::lst::MiniDoubletsBuffer(nMemHost, nLowerModules_, devHost, queue); mdsInCPU->setData(*mdsInCPU); *mdsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1441,7 +1417,7 @@ lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets(bool sync) return mdsInCPU; } -lst::SegmentsBuffer* lst::Event::getSegments(bool sync) { +lst::SegmentsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getSegments(bool sync) { if (segmentsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based segmentsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1449,8 +1425,8 @@ lst::SegmentsBuffer* lst::Event::getSegments(bool sync) { alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - segmentsInCPU = - new lst::SegmentsBuffer(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, devHost, queue); + segmentsInCPU = new ::lst::SegmentsBuffer( + nMemHost, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devHost, queue); segmentsInCPU->setData(*segmentsInCPU); *segmentsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1478,7 +1454,7 @@ lst::SegmentsBuffer* lst::Event::getSegments(bool sync) { return segmentsInCPU; } -lst::TripletsBuffer* lst::Event::getTriplets(bool sync) { +lst::TripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTriplets(bool sync) { if (tripletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based tripletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1486,7 +1462,7 @@ lst::TripletsBuffer* lst::Event::getTriplets(bool sync) { alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - tripletsInCPU = new lst::TripletsBuffer(nMemHost, nLowerModules_, devHost, queue); + tripletsInCPU = new ::lst::TripletsBuffer(nMemHost, nLowerModules_, devHost, queue); tripletsInCPU->setData(*tripletsInCPU); *tripletsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1501,9 +1477,12 @@ lst::TripletsBuffer* lst::Event::getTriplets(bool sync) { alpaka::memcpy(queue, tripletsInCPU->rtLo_buf, tripletsBuffers->rtLo_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->rtHi_buf, tripletsBuffers->rtHi_buf, nMemHost); #endif - alpaka::memcpy(queue, tripletsInCPU->hitIndices_buf, tripletsBuffers->hitIndices_buf, Params_T3::kHits * nMemHost); alpaka::memcpy( - queue, tripletsInCPU->logicalLayers_buf, tripletsBuffers->logicalLayers_buf, Params_T3::kLayers * nMemHost); + queue, tripletsInCPU->hitIndices_buf, tripletsBuffers->hitIndices_buf, ::lst::Params_T3::kHits * nMemHost); + alpaka::memcpy(queue, + tripletsInCPU->logicalLayers_buf, + tripletsBuffers->logicalLayers_buf, + ::lst::Params_T3::kLayers * nMemHost); alpaka::memcpy(queue, tripletsInCPU->segmentIndices_buf, tripletsBuffers->segmentIndices_buf, 2 * nMemHost); alpaka::memcpy(queue, tripletsInCPU->betaIn_buf, tripletsBuffers->betaIn_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost); @@ -1515,7 +1494,7 @@ lst::TripletsBuffer* lst::Event::getTriplets(bool sync) { return tripletsInCPU; } -lst::QuintupletsBuffer* lst::Event::getQuintuplets(bool sync) { +lst::QuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getQuintuplets(bool sync) { if (quintupletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based quintupletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1523,7 +1502,7 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets(bool sync) { alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - quintupletsInCPU = new lst::QuintupletsBuffer(nMemHost, nLowerModules_, devHost, queue); + quintupletsInCPU = new ::lst::QuintupletsBuffer(nMemHost, nLowerModules_, devHost, queue); quintupletsInCPU->setData(*quintupletsInCPU); *quintupletsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1534,7 +1513,7 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets(bool sync) { alpaka::memcpy(queue, quintupletsInCPU->lowerModuleIndices_buf, quintupletsBuffers->lowerModuleIndices_buf, - Params_T5::kLayers * nMemHost); + ::lst::Params_T5::kLayers * nMemHost); alpaka::memcpy(queue, quintupletsInCPU->innerRadius_buf, quintupletsBuffers->innerRadius_buf, nMemHost); alpaka::memcpy(queue, quintupletsInCPU->bridgeRadius_buf, quintupletsBuffers->bridgeRadius_buf, nMemHost); alpaka::memcpy(queue, quintupletsInCPU->outerRadius_buf, quintupletsBuffers->outerRadius_buf, nMemHost); @@ -1552,7 +1531,7 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets(bool sync) { return quintupletsInCPU; } -lst::PixelTripletsBuffer* lst::Event::getPixelTriplets(bool sync) { +lst::PixelTripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getPixelTriplets(bool sync) { if (pixelTripletsInCPU == nullptr) { // Get nPixelTriplets parameter to initialize host based quintupletsInCPU auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1560,7 +1539,7 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets(bool sync alpaka::wait(queue); // wait for the value before using auto const nPixelTriplets = *nPixelTriplets_buf_h.data(); - pixelTripletsInCPU = new lst::PixelTripletsBuffer(nPixelTriplets, devHost, queue); + pixelTripletsInCPU = new ::lst::PixelTripletsBuffer(nPixelTriplets, devHost, queue); pixelTripletsInCPU->setData(*pixelTripletsInCPU); *pixelTripletsInCPU->nPixelTriplets_buf.data() = nPixelTriplets; @@ -1592,7 +1571,8 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets(bool sync return pixelTripletsInCPU; } -lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets(bool sync) { +lst::PixelQuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getPixelQuintuplets( + bool sync) { if (pixelQuintupletsInCPU == nullptr) { // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1600,7 +1580,7 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets(boo alpaka::wait(queue); // wait for the value before using auto const nPixelQuintuplets = *nPixelQuintuplets_buf_h.data(); - pixelQuintupletsInCPU = new lst::PixelQuintupletsBuffer(nPixelQuintuplets, devHost, queue); + pixelQuintupletsInCPU = new ::lst::PixelQuintupletsBuffer(nPixelQuintuplets, devHost, queue); pixelQuintupletsInCPU->setData(*pixelQuintupletsInCPU); *pixelQuintupletsInCPU->nPixelQuintuplets_buf.data() = nPixelQuintuplets; @@ -1629,7 +1609,8 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets(boo return pixelQuintupletsInCPU; } -lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates(bool sync) { +lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTrackCandidates( + bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1637,21 +1618,21 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates(bool alpaka::wait(queue); auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - trackCandidatesInCPU = new lst::TrackCandidatesBuffer( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); + trackCandidatesInCPU = new ::lst::TrackCandidatesBuffer( + ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, - Params_pT5::kHits * nTrackCanHost); + ::lst::Params_pT5::kHits * nTrackCanHost); alpaka::memcpy( queue, trackCandidatesInCPU->pixelSeedIndex_buf, trackCandidatesBuffers->pixelSeedIndex_buf, nTrackCanHost); alpaka::memcpy(queue, trackCandidatesInCPU->logicalLayers_buf, trackCandidatesBuffers->logicalLayers_buf, - Params_pT5::kLayers * nTrackCanHost); + ::lst::Params_pT5::kLayers * nTrackCanHost); alpaka::memcpy(queue, trackCandidatesInCPU->directObjectIndices_buf, trackCandidatesBuffers->directObjectIndices_buf, @@ -1668,7 +1649,8 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates(bool return trackCandidatesInCPU; } -lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSSW(bool sync) { +lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTrackCandidatesInCMSSW( + bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1676,15 +1658,15 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSS alpaka::wait(queue); // wait for the value before using auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - trackCandidatesInCPU = new lst::TrackCandidatesBuffer( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); + trackCandidatesInCPU = new ::lst::TrackCandidatesBuffer( + ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, - Params_pT5::kHits * nTrackCanHost); + ::lst::Params_pT5::kHits * nTrackCanHost); alpaka::memcpy( queue, trackCandidatesInCPU->pixelSeedIndex_buf, trackCandidatesBuffers->pixelSeedIndex_buf, nTrackCanHost); alpaka::memcpy(queue, @@ -1697,10 +1679,11 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSS return trackCandidatesInCPU; } -lst::ModulesBuffer* lst::Event::getModules(bool isFull, bool sync) { +lst::ModulesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getModules(bool isFull, + bool sync) { if (modulesInCPU == nullptr) { // The last input here is just a small placeholder for the allocation. - modulesInCPU = new lst::ModulesBuffer(devHost, nModules_, nPixels_); + modulesInCPU = new ::lst::ModulesBuffer(devHost, nModules_, nPixels_); modulesInCPU->copyFromSrc(queue, modulesBuffers_, isFull); if (sync) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 64365bb58bfa8..3c3549f96d41e 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -17,192 +17,188 @@ #include "HeterogeneousCore/AlpakaInterface/interface/host.h" -namespace lst { - - using namespace ALPAKA_ACCELERATOR_NAMESPACE; - - template - class Event; - - template <> - class Event { - private: - Queue queue; - Device devAcc; - DevHost devHost; - bool addObjects; - - std::array n_hits_by_layer_barrel_; - std::array n_hits_by_layer_endcap_; - std::array n_minidoublets_by_layer_barrel_; - std::array n_minidoublets_by_layer_endcap_; - std::array n_segments_by_layer_barrel_; - std::array n_segments_by_layer_endcap_; - std::array n_triplets_by_layer_barrel_; - std::array n_triplets_by_layer_endcap_; - std::array n_trackCandidates_by_layer_barrel_; - std::array n_trackCandidates_by_layer_endcap_; - std::array n_quintuplets_by_layer_barrel_; - std::array n_quintuplets_by_layer_endcap_; - unsigned int nTotalSegments_; - - //Device stuff - ObjectRanges* rangesInGPU; - ObjectRangesBuffer* rangesBuffers; - Hits* hitsInGPU; - HitsBuffer* hitsBuffers; - MiniDoublets* mdsInGPU; - MiniDoubletsBuffer* miniDoubletsBuffers; - Segments* segmentsInGPU; - SegmentsBuffer* segmentsBuffers; - Triplets* tripletsInGPU; - TripletsBuffer* tripletsBuffers; - Quintuplets* quintupletsInGPU; - QuintupletsBuffer* quintupletsBuffers; - TrackCandidates* trackCandidatesInGPU; - TrackCandidatesBuffer* trackCandidatesBuffers; - PixelTriplets* pixelTripletsInGPU; - PixelTripletsBuffer* pixelTripletsBuffers; - PixelQuintuplets* pixelQuintupletsInGPU; - PixelQuintupletsBuffer* pixelQuintupletsBuffers; - - //CPU interface stuff - ObjectRangesBuffer* rangesInCPU; - HitsBuffer* hitsInCPU; - MiniDoubletsBuffer* mdsInCPU; - SegmentsBuffer* segmentsInCPU; - TripletsBuffer* tripletsInCPU; - TrackCandidatesBuffer* trackCandidatesInCPU; - ModulesBuffer* modulesInCPU; - QuintupletsBuffer* quintupletsInCPU; - PixelTripletsBuffer* pixelTripletsInCPU; - PixelQuintupletsBuffer* pixelQuintupletsInCPU; - - void initSync(bool verbose); - - int* superbinCPU; - int8_t* pixelTypeCPU; - - const uint16_t nModules_; - const uint16_t nLowerModules_; - const unsigned int nPixels_; - const unsigned int nEndCapMap_; - ModulesBuffer const& modulesBuffers_; - PixelMap const& pixelMapping_; - EndcapGeometryBuffer const& endcapGeometryBuffers_; - - public: - // Constructor used for CMSSW integration. Uses an external queue. - template - Event(bool verbose, TQueue const& q, const LSTESData* deviceESData) - : queue(q), - devAcc(alpaka::getDev(q)), - devHost(cms::alpakatools::host()), - nModules_(deviceESData->nModules), - nLowerModules_(deviceESData->nLowerModules), - nPixels_(deviceESData->nPixels), - nEndCapMap_(deviceESData->nEndCapMap), - modulesBuffers_(deviceESData->modulesBuffers), - pixelMapping_(*deviceESData->pixelMapping), - endcapGeometryBuffers_(deviceESData->endcapGeometryBuffers) { - initSync(verbose); - } - void resetEventSync(); // synchronizes - void wait() const { alpaka::wait(queue); } - - // Calls the appropriate hit function, then increments the counter - void addHitToEvent(std::vector const& x, - std::vector const& y, - std::vector const& z, - std::vector const& detId, - std::vector const& idxInNtuple); - void addPixelSegmentToEvent(std::vector const& hitIndices0, - std::vector const& hitIndices1, - std::vector const& hitIndices2, - std::vector const& hitIndices3, - std::vector const& dPhiChange, - std::vector const& ptIn, - std::vector const& ptErr, - std::vector const& px, - std::vector const& py, - std::vector const& pz, - std::vector const& eta, - std::vector const& etaErr, - std::vector const& phi, - std::vector const& charge, - std::vector const& seedIdx, - std::vector const& superbin, - std::vector const& pixelType, - std::vector const& isQuad); - - void createMiniDoublets(); - void createSegmentsWithModuleMap(); - void createTriplets(); - void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets); - void createPixelTriplets(); - void createQuintuplets(); - void pixelLineSegmentCleaning(bool no_pls_dupclean); - void createPixelQuintuplets(); - - // functions that map the objects to the appropriate modules - void addMiniDoubletsToEventExplicit(); - void addSegmentsToEventExplicit(); - void addQuintupletsToEventExplicit(); - void addTripletsToEventExplicit(); - void resetObjectsInModule(); - - unsigned int getNumberOfHits(); - unsigned int getNumberOfHitsByLayer(unsigned int layer); - unsigned int getNumberOfHitsByLayerBarrel(unsigned int layer); - unsigned int getNumberOfHitsByLayerEndcap(unsigned int layer); - - unsigned int getNumberOfMiniDoublets(); - unsigned int getNumberOfMiniDoubletsByLayer(unsigned int layer); - unsigned int getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer); - unsigned int getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer); - - unsigned int getNumberOfSegments(); - unsigned int getNumberOfSegmentsByLayer(unsigned int layer); - unsigned int getNumberOfSegmentsByLayerBarrel(unsigned int layer); - unsigned int getNumberOfSegmentsByLayerEndcap(unsigned int layer); - - unsigned int getNumberOfTriplets(); - unsigned int getNumberOfTripletsByLayer(unsigned int layer); - unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer); - unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer); - - int getNumberOfPixelTriplets(); - int getNumberOfPixelQuintuplets(); - - unsigned int getNumberOfQuintuplets(); - unsigned int getNumberOfQuintupletsByLayer(unsigned int layer); - unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer); - unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer); - - int getNumberOfTrackCandidates(); - int getNumberOfPT5TrackCandidates(); - int getNumberOfPT3TrackCandidates(); - int getNumberOfPLSTrackCandidates(); - int getNumberOfPixelTrackCandidates(); - int getNumberOfT5TrackCandidates(); - - // sync adds alpaka::wait at the end of filling a buffer during lazy fill - // (has no effect on repeated calls) - // set to false may allow faster operation with concurrent calls of get* - // HANDLE WITH CARE - HitsBuffer* getHits(bool sync = true); - HitsBuffer* getHitsInCMSSW(bool sync = true); - ObjectRangesBuffer* getRanges(bool sync = true); - MiniDoubletsBuffer* getMiniDoublets(bool sync = true); - SegmentsBuffer* getSegments(bool sync = true); - TripletsBuffer* getTriplets(bool sync = true); - QuintupletsBuffer* getQuintuplets(bool sync = true); - PixelTripletsBuffer* getPixelTriplets(bool sync = true); - PixelQuintupletsBuffer* getPixelQuintuplets(bool sync = true); - TrackCandidatesBuffer* getTrackCandidates(bool sync = true); - TrackCandidatesBuffer* getTrackCandidatesInCMSSW(bool sync = true); - ModulesBuffer* getModules(bool isFull = false, bool sync = true); - }; - -} // namespace lst +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace lst { + + class Event { + private: + Queue queue; + Device devAcc; + DevHost devHost; + bool addObjects; + + std::array n_hits_by_layer_barrel_; + std::array n_hits_by_layer_endcap_; + std::array n_minidoublets_by_layer_barrel_; + std::array n_minidoublets_by_layer_endcap_; + std::array n_segments_by_layer_barrel_; + std::array n_segments_by_layer_endcap_; + std::array n_triplets_by_layer_barrel_; + std::array n_triplets_by_layer_endcap_; + std::array n_trackCandidates_by_layer_barrel_; + std::array n_trackCandidates_by_layer_endcap_; + std::array n_quintuplets_by_layer_barrel_; + std::array n_quintuplets_by_layer_endcap_; + unsigned int nTotalSegments_; + + //Device stuff + ::lst::ObjectRanges* rangesInGPU; + ::lst::ObjectRangesBuffer* rangesBuffers; + ::lst::Hits* hitsInGPU; + ::lst::HitsBuffer* hitsBuffers; + ::lst::MiniDoublets* mdsInGPU; + ::lst::MiniDoubletsBuffer* miniDoubletsBuffers; + ::lst::Segments* segmentsInGPU; + ::lst::SegmentsBuffer* segmentsBuffers; + ::lst::Triplets* tripletsInGPU; + ::lst::TripletsBuffer* tripletsBuffers; + ::lst::Quintuplets* quintupletsInGPU; + ::lst::QuintupletsBuffer* quintupletsBuffers; + ::lst::TrackCandidates* trackCandidatesInGPU; + ::lst::TrackCandidatesBuffer* trackCandidatesBuffers; + ::lst::PixelTriplets* pixelTripletsInGPU; + ::lst::PixelTripletsBuffer* pixelTripletsBuffers; + ::lst::PixelQuintuplets* pixelQuintupletsInGPU; + ::lst::PixelQuintupletsBuffer* pixelQuintupletsBuffers; + + //CPU interface stuff + ::lst::ObjectRangesBuffer* rangesInCPU; + ::lst::HitsBuffer* hitsInCPU; + ::lst::MiniDoubletsBuffer* mdsInCPU; + ::lst::SegmentsBuffer* segmentsInCPU; + ::lst::TripletsBuffer* tripletsInCPU; + ::lst::TrackCandidatesBuffer* trackCandidatesInCPU; + ::lst::ModulesBuffer* modulesInCPU; + ::lst::QuintupletsBuffer* quintupletsInCPU; + ::lst::PixelTripletsBuffer* pixelTripletsInCPU; + ::lst::PixelQuintupletsBuffer* pixelQuintupletsInCPU; + + void initSync(bool verbose); + + int* superbinCPU; + int8_t* pixelTypeCPU; + + const uint16_t nModules_; + const uint16_t nLowerModules_; + const unsigned int nPixels_; + const unsigned int nEndCapMap_; + ::lst::ModulesBuffer const& modulesBuffers_; + ::lst::PixelMap const& pixelMapping_; + ::lst::EndcapGeometryBuffer const& endcapGeometryBuffers_; + + public: + // Constructor used for CMSSW integration. Uses an external queue. + Event(bool verbose, Queue const& q, const ::lst::LSTESData* deviceESData) + : queue(q), + devAcc(alpaka::getDev(q)), + devHost(cms::alpakatools::host()), + nModules_(deviceESData->nModules), + nLowerModules_(deviceESData->nLowerModules), + nPixels_(deviceESData->nPixels), + nEndCapMap_(deviceESData->nEndCapMap), + modulesBuffers_(deviceESData->modulesBuffers), + pixelMapping_(*deviceESData->pixelMapping), + endcapGeometryBuffers_(deviceESData->endcapGeometryBuffers) { + initSync(verbose); + } + void resetEventSync(); // synchronizes + void wait() const { alpaka::wait(queue); } + + // Calls the appropriate hit function, then increments the counter + void addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple); + void addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad); + + void createMiniDoublets(); + void createSegmentsWithModuleMap(); + void createTriplets(); + void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets); + void createPixelTriplets(); + void createQuintuplets(); + void pixelLineSegmentCleaning(bool no_pls_dupclean); + void createPixelQuintuplets(); + + // functions that map the objects to the appropriate modules + void addMiniDoubletsToEventExplicit(); + void addSegmentsToEventExplicit(); + void addQuintupletsToEventExplicit(); + void addTripletsToEventExplicit(); + void resetObjectsInModule(); + + unsigned int getNumberOfHits(); + unsigned int getNumberOfHitsByLayer(unsigned int layer); + unsigned int getNumberOfHitsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfHitsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfMiniDoublets(); + unsigned int getNumberOfMiniDoubletsByLayer(unsigned int layer); + unsigned int getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfSegments(); + unsigned int getNumberOfSegmentsByLayer(unsigned int layer); + unsigned int getNumberOfSegmentsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfSegmentsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfTriplets(); + unsigned int getNumberOfTripletsByLayer(unsigned int layer); + unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer); + + int getNumberOfPixelTriplets(); + int getNumberOfPixelQuintuplets(); + + unsigned int getNumberOfQuintuplets(); + unsigned int getNumberOfQuintupletsByLayer(unsigned int layer); + unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer); + + int getNumberOfTrackCandidates(); + int getNumberOfPT5TrackCandidates(); + int getNumberOfPT3TrackCandidates(); + int getNumberOfPLSTrackCandidates(); + int getNumberOfPixelTrackCandidates(); + int getNumberOfT5TrackCandidates(); + + // sync adds alpaka::wait at the end of filling a buffer during lazy fill + // (has no effect on repeated calls) + // set to false may allow faster operation with concurrent calls of get* + // HANDLE WITH CARE + ::lst::HitsBuffer* getHits(bool sync = true); + ::lst::HitsBuffer* getHitsInCMSSW(bool sync = true); + ::lst::ObjectRangesBuffer* getRanges(bool sync = true); + ::lst::MiniDoubletsBuffer* getMiniDoublets(bool sync = true); + ::lst::SegmentsBuffer* getSegments(bool sync = true); + ::lst::TripletsBuffer* getTriplets(bool sync = true); + ::lst::QuintupletsBuffer* getQuintuplets(bool sync = true); + ::lst::PixelTripletsBuffer* getPixelTriplets(bool sync = true); + ::lst::PixelQuintupletsBuffer* getPixelQuintuplets(bool sync = true); + ::lst::TrackCandidatesBuffer* getTrackCandidates(bool sync = true); + ::lst::TrackCandidatesBuffer* getTrackCandidatesInCMSSW(bool sync = true); + ::lst::ModulesBuffer* getModules(bool isFull = false, bool sync = true); + }; + + } // namespace lst + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE #endif diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index f5ee7d7f52add..e3e9909045a6d 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -19,26 +19,25 @@ namespace { } } // namespace -template <> -void lst::LST::prepareInput(std::vector const& see_px, - std::vector const& see_py, - std::vector const& see_pz, - std::vector const& see_dxy, - std::vector const& see_dz, - std::vector const& see_ptErr, - std::vector const& see_etaErr, - std::vector const& see_stateTrajGlbX, - std::vector const& see_stateTrajGlbY, - std::vector const& see_stateTrajGlbZ, - std::vector const& see_stateTrajGlbPx, - std::vector const& see_stateTrajGlbPy, - std::vector const& see_stateTrajGlbPz, - std::vector const& see_q, - std::vector> const& see_hitIdx, - std::vector const& ph2_detId, - std::vector const& ph2_x, - std::vector const& ph2_y, - std::vector const& ph2_z) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::prepareInput(std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z) { unsigned int count = 0; auto n_see = see_stateTrajGlbPx.size(); std::vector px_vec; @@ -212,25 +211,24 @@ void lst::LST::prepareInput(std::vector const& see_px, in_isQuad_vec_ = isQuad_vec; } -template <> -std::vector lst::LST::getHitIdxs(short trackCandidateType, - unsigned int TCIdx, - unsigned int const* TCHitIndices, - unsigned int const* hitIndices) { +std::vector ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::getHitIdxs(short trackCandidateType, + unsigned int TCIdx, + unsigned int const* TCHitIndices, + unsigned int const* hitIndices) { std::vector hits; unsigned int maxNHits = 0; if (trackCandidateType == 7) - maxNHits = Params_pT5::kHits; // pT5 + maxNHits = ::lst::Params_pT5::kHits; // pT5 else if (trackCandidateType == 5) - maxNHits = Params_pT3::kHits; // pT3 + maxNHits = ::lst::Params_pT3::kHits; // pT3 else if (trackCandidateType == 4) - maxNHits = Params_T5::kHits; // T5 + maxNHits = ::lst::Params_T5::kHits; // T5 else if (trackCandidateType == 8) - maxNHits = Params_pLS::kHits; // pLS + maxNHits = ::lst::Params_pLS::kHits; // pLS for (unsigned int i = 0; i < maxNHits; i++) { - unsigned int hitIdxInGPU = TCHitIndices[Params_pT5::kHits * TCIdx + i]; + unsigned int hitIdxInGPU = TCHitIndices[::lst::Params_pT5::kHits * TCIdx + i]; unsigned int hitIdx = (trackCandidateType == 8) ? hitIdxInGPU @@ -248,15 +246,14 @@ std::vector lst::LST::getHitIdxs(short trackCandidateType, return hits; } -template <> -void lst::LST::getOutput(lst::Event& event) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::getOutput(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event& event) { std::vector> tc_hitIdxs; std::vector tc_len; std::vector tc_seedIdx; std::vector tc_trackCandidateType; - lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line - lst::TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data(); + ::lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line + ::lst::TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data(); unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; @@ -277,33 +274,31 @@ void lst::LST::getOutput(lst::Event& event) { out_tc_trackCandidateType_ = tc_trackCandidateType; } -template <> -template <> -void lst::LST::run(Queue& queue, - bool verbose, - LSTESData const* deviceESData, - std::vector const& see_px, - std::vector const& see_py, - std::vector const& see_pz, - std::vector const& see_dxy, - std::vector const& see_dz, - std::vector const& see_ptErr, - std::vector const& see_etaErr, - std::vector const& see_stateTrajGlbX, - std::vector const& see_stateTrajGlbY, - std::vector const& see_stateTrajGlbZ, - std::vector const& see_stateTrajGlbPx, - std::vector const& see_stateTrajGlbPy, - std::vector const& see_stateTrajGlbPz, - std::vector const& see_q, - std::vector> const& see_hitIdx, - std::vector const& ph2_detId, - std::vector const& ph2_x, - std::vector const& ph2_y, - std::vector const& ph2_z, - bool no_pls_dupclean, - bool tc_pls_triplets) { - auto event = lst::Event(verbose, queue, deviceESData); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::run(Queue& queue, + bool verbose, + ::lst::LSTESData const* deviceESData, + std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z, + bool no_pls_dupclean, + bool tc_pls_triplets) { + auto event = ALPAKA_ACCELERATOR_NAMESPACE::lst::Event(verbose, queue, deviceESData); prepareInput(see_px, see_py, see_pz, diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index 60e1a7428edf5..47b46d1b749dc 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -970,7 +970,7 @@ namespace lst { struct lst::Modules modulesInGPU, struct lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); @@ -1068,7 +1068,7 @@ namespace lst { struct lst::ObjectRanges rangesInGPU, struct lst::Hits hitsInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 4d957a0a1402e..09290d7ba025f 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -2670,7 +2670,7 @@ namespace lst { lst::Triplets tripletsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); @@ -2774,7 +2774,7 @@ namespace lst { lst::Quintuplets quintupletsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 6b44ddfbe24b7..7f47ff5d9a87b 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -802,7 +802,7 @@ namespace lst { lst::ObjectRanges rangesInGPU, lst::MiniDoublets mdsInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); @@ -906,7 +906,7 @@ namespace lst { lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 0439050e100d2..99faae02c286e 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -390,7 +390,7 @@ namespace lst { lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); @@ -539,7 +539,7 @@ namespace lst { lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index c5ac432ebf310..b20cf2ebea660 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -932,7 +932,7 @@ namespace lst { lst::ObjectRanges rangesInGPU, lst::Segments segmentsInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); @@ -1035,7 +1035,7 @@ namespace lst { lst::Triplets tripletsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); diff --git a/RecoTracker/LSTCore/standalone/bin/lst.cc b/RecoTracker/LSTCore/standalone/bin/lst.cc index 89bb43a3bcd4b..060308e4dabab 100644 --- a/RecoTracker/LSTCore/standalone/bin/lst.cc +++ b/RecoTracker/LSTCore/standalone/bin/lst.cc @@ -2,8 +2,6 @@ #include -using namespace ALPAKA_ACCELERATOR_NAMESPACE; - //___________________________________________________________________________________________________________________________________________________________________________________________ int main(int argc, char **argv) { //******************************************************************************** @@ -255,7 +253,7 @@ int main(int argc, char **argv) { // Printing out the option settings overview std::cout << "=========================================================" << std::endl; - std::cout << " Running for Acc = " << alpaka::getAccName() << std::endl; + std::cout << " Running for Acc = " << alpaka::getAccName() << std::endl; std::cout << " Setting of the analysis job based on provided arguments " << std::endl; std::cout << "---------------------------------------------------------" << std::endl; std::cout << " ana.input_file_list_tstring: " << ana.input_file_list_tstring << std::endl; @@ -298,17 +296,18 @@ int main(int argc, char **argv) { //___________________________________________________________________________________________________________________________________________________________________________________________ void run_lst() { - Device devAcc = alpaka::getDevByIdx(ALPAKA_ACCELERATOR_NAMESPACE::Platform{}, 0u); - std::vector queues; + ALPAKA_ACCELERATOR_NAMESPACE::Device devAcc = alpaka::getDevByIdx(ALPAKA_ACCELERATOR_NAMESPACE::Platform{}, 0u); + std::vector queues; for (int s = 0; s < ana.streams; s++) { - queues.push_back(Queue(devAcc)); + queues.push_back(ALPAKA_ACCELERATOR_NAMESPACE::Queue(devAcc)); } // Load various maps used in the lst reconstruction TStopwatch full_timer; full_timer.Start(); auto hostESData = lst::loadAndFillESHost(); - auto deviceESData = cms::alpakatools::CopyToDevice>::copyAsync(queues[0], *hostESData.get()); + auto deviceESData = + cms::alpakatools::CopyToDevice>::copyAsync(queues[0], *hostESData.get()); float timeForMapLoading = full_timer.RealTime() * 1000; if (ana.do_write_ntuple) { @@ -384,9 +383,10 @@ void run_lst() { full_timer.Reset(); full_timer.Start(); - std::vector *> events; + std::vector events; for (int s = 0; s < ana.streams; s++) { - lst::Event *event = new lst::Event(ana.verbose >= 2, queues[s], &deviceESData); + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event = + new ALPAKA_ACCELERATOR_NAMESPACE::lst::Event(ana.verbose >= 2, queues[s], &deviceESData); events.push_back(event); } float timeForEventCreation = full_timer.RealTime() * 1000; diff --git a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc index 76cfa9760b71a..bf513865ffbed 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc +++ b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc @@ -1,14 +1,12 @@ #include "AccessHelper.h" -using namespace ALPAKA_ACCELERATOR_NAMESPACE; - // =============== // ----* Hit *---- // =============== //____________________________________________________________________________________________ std::tuple, std::vector> convertHitsToHitIdxsAndHitTypes( - lst::Event* event, std::vector hits) { + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, std::vector hits) { lst::Hits const* hitsEvt = event->getHits()->data(); std::vector hitidxs; std::vector hittypes; @@ -27,7 +25,7 @@ std::tuple, std::vector> convertHitsToHi // =============== //____________________________________________________________________________________________ -std::vector getPixelHitsFrompLS(lst::Event* event, unsigned int pLS) { +std::vector getPixelHitsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { lst::Segments const* segments = event->getSegments()->data(); lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); @@ -46,7 +44,7 @@ std::vector getPixelHitsFrompLS(lst::Event* event, unsigned } //____________________________________________________________________________________________ -std::vector getPixelHitIdxsFrompLS(lst::Event* event, unsigned int pLS) { +std::vector getPixelHitIdxsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { lst::Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getPixelHitsFrompLS(event, pLS); std::vector hitidxs; @@ -56,15 +54,15 @@ std::vector getPixelHitIdxsFrompLS(lst::Event* event, unsig } //____________________________________________________________________________________________ -std::vector getPixelHitTypesFrompLS(lst::Event* event, unsigned int pLS) { +std::vector getPixelHitTypesFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { std::vector hits = getPixelHitsFrompLS(event, pLS); std::vector hittypes(hits.size(), 0); return hittypes; } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompLS(lst::Event* event, - unsigned pLS) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompLS( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pLS) { return convertHitsToHitIdxsAndHitTypes(event, getPixelHitsFrompLS(event, pLS)); } @@ -73,7 +71,7 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getHitsFromMD(lst::Event* event, unsigned int MD) { +std::vector getHitsFromMD(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int MD) { lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); unsigned int hit_1 = miniDoublets->anchorHitIndices[MD]; unsigned int hit_2 = miniDoublets->outerHitIndices[MD]; @@ -81,8 +79,8 @@ std::vector getHitsFromMD(lst::Event* event, unsigned int M } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromMD(lst::Event* event, - unsigned MD) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromMD( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned MD) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromMD(event, MD)); } @@ -91,7 +89,7 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getMDsFromLS(lst::Event* event, unsigned int LS) { +std::vector getMDsFromLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int LS) { lst::Segments const* segments = event->getSegments()->data(); unsigned int MD_1 = segments->mdIndices[2 * LS]; unsigned int MD_2 = segments->mdIndices[2 * LS + 1]; @@ -99,7 +97,7 @@ std::vector getMDsFromLS(lst::Event* event, unsigned int LS } //____________________________________________________________________________________________ -std::vector getHitsFromLS(lst::Event* event, unsigned int LS) { +std::vector getHitsFromLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int LS) { std::vector MDs = getMDsFromLS(event, LS); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -107,8 +105,8 @@ std::vector getHitsFromLS(lst::Event* event, unsigned int L } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromLS(lst::Event* event, - unsigned LS) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromLS( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned LS) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromLS(event, LS)); } @@ -117,7 +115,7 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromT3(lst::Event* event, unsigned int T3) { +std::vector getLSsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { lst::Triplets const* triplets = event->getTriplets()->data(); unsigned int LS_1 = triplets->segmentIndices[2 * T3]; unsigned int LS_2 = triplets->segmentIndices[2 * T3 + 1]; @@ -125,7 +123,7 @@ std::vector getLSsFromT3(lst::Event* event, unsigned int T3 } //____________________________________________________________________________________________ -std::vector getMDsFromT3(lst::Event* event, unsigned int T3) { +std::vector getMDsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { std::vector LSs = getLSsFromT3(event, T3); std::vector MDs_0 = getMDsFromLS(event, LSs[0]); std::vector MDs_1 = getMDsFromLS(event, LSs[1]); @@ -133,7 +131,7 @@ std::vector getMDsFromT3(lst::Event* event, unsigned int T3 } //____________________________________________________________________________________________ -std::vector getHitsFromT3(lst::Event* event, unsigned int T3) { +std::vector getHitsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { std::vector MDs = getMDsFromT3(event, T3); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -142,8 +140,8 @@ std::vector getHitsFromT3(lst::Event* event, unsigned int T } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromT3(lst::Event* event, - unsigned T3) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromT3( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned T3) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromT3(event, T3)); } @@ -152,7 +150,7 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getT3sFromT5(lst::Event* event, unsigned int T5) { +std::vector getT3sFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); unsigned int T3_1 = quintuplets->tripletIndices[2 * T5]; unsigned int T3_2 = quintuplets->tripletIndices[2 * T5 + 1]; @@ -160,7 +158,7 @@ std::vector getT3sFromT5(lst::Event* event, unsigned int T5 } //____________________________________________________________________________________________ -std::vector getLSsFromT5(lst::Event* event, unsigned int T5) { +std::vector getLSsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { std::vector T3s = getT3sFromT5(event, T5); std::vector LSs_0 = getLSsFromT3(event, T3s[0]); std::vector LSs_1 = getLSsFromT3(event, T3s[1]); @@ -168,7 +166,7 @@ std::vector getLSsFromT5(lst::Event* event, unsigned int T5 } //____________________________________________________________________________________________ -std::vector getMDsFromT5(lst::Event* event, unsigned int T5) { +std::vector getMDsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { std::vector LSs = getLSsFromT5(event, T5); std::vector MDs_0 = getMDsFromLS(event, LSs[0]); std::vector MDs_1 = getMDsFromLS(event, LSs[1]); @@ -178,7 +176,7 @@ std::vector getMDsFromT5(lst::Event* event, unsigned int T5 } //____________________________________________________________________________________________ -std::vector getHitsFromT5(lst::Event* event, unsigned int T5) { +std::vector getHitsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { std::vector MDs = getMDsFromT5(event, T5); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -189,7 +187,7 @@ std::vector getHitsFromT5(lst::Event* event, unsigned int T } //____________________________________________________________________________________________ -std::vector getHitIdxsFromT5(lst::Event* event, unsigned int T5) { +std::vector getHitIdxsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { lst::Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFromT5(event, T5); std::vector hitidxs; @@ -198,7 +196,7 @@ std::vector getHitIdxsFromT5(lst::Event* event, unsigned in return hitidxs; } //____________________________________________________________________________________________ -std::vector getModuleIdxsFromT5(lst::Event* event, unsigned int T5) { +std::vector getModuleIdxsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { std::vector hits = getHitsFromT5(event, T5); std::vector module_idxs; lst::Hits const* hitsEvt = event->getHits()->data(); @@ -208,14 +206,14 @@ std::vector getModuleIdxsFromT5(lst::Event* event, unsigned return module_idxs; } //____________________________________________________________________________________________ -std::vector getHitTypesFromT5(lst::Event* event, unsigned int T5) { +std::vector getHitTypesFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { return {4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; ; } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromT5(lst::Event* event, - unsigned T5) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromT5( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned T5) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromT5(event, T5)); } @@ -224,7 +222,7 @@ std::tuple, std::vector> getHitIdxsAndHi // =============== //____________________________________________________________________________________________ -unsigned int getPixelLSFrompT3(lst::Event* event, unsigned int pT3) { +unsigned int getPixelLSFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); lst::Modules const* modulesEvt = event->getModules()->data(); @@ -233,37 +231,38 @@ unsigned int getPixelLSFrompT3(lst::Event* event, unsigned int pT3) { } //____________________________________________________________________________________________ -unsigned int getT3FrompT3(lst::Event* event, unsigned int pT3) { +unsigned int getT3FrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); return pixelTriplets->tripletIndices[pT3]; } //____________________________________________________________________________________________ -std::vector getLSsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getLSsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getLSsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getMDsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getMDsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getMDsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getOuterTrackerHitsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getOuterTrackerHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, + unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getHitsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getPixelHitsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getPixelHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); return getPixelHitsFrompLS(event, pLS); } //____________________________________________________________________________________________ -std::vector getHitsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); unsigned int T3 = getT3FrompT3(event, pT3); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); @@ -273,7 +272,7 @@ std::vector getHitsFrompT3(lst::Event* event, unsigned int } //____________________________________________________________________________________________ -std::vector getHitIdxsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getHitIdxsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { lst::Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFrompT3(event, pT3); std::vector hitidxs; @@ -282,7 +281,7 @@ std::vector getHitIdxsFrompT3(lst::Event* event, unsigned i return hitidxs; } //____________________________________________________________________________________________ -std::vector getModuleIdxsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getModuleIdxsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { std::vector hits = getOuterTrackerHitsFrompT3(event, pT3); std::vector module_idxs; lst::Hits const* hitsEvt = event->getHits()->data(); @@ -292,7 +291,7 @@ std::vector getModuleIdxsFrompT3(lst::Event* event, unsigne return module_idxs; } //____________________________________________________________________________________________ -std::vector getHitTypesFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getHitTypesFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); // pixel Hits list will be either 3 or 4 and depending on it return accordingly @@ -303,8 +302,8 @@ std::vector getHitTypesFrompT3(lst::Event* event, unsigned } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompT3(lst::Event* event, - unsigned pT3) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompT3( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pT3) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFrompT3(event, pT3)); } @@ -313,7 +312,7 @@ std::tuple, std::vector> getHitIdxsAndHi // =============== //____________________________________________________________________________________________ -unsigned int getPixelLSFrompT5(lst::Event* event, unsigned int pT5) { +unsigned int getPixelLSFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); lst::Modules const* modulesEvt = event->getModules()->data(); @@ -322,43 +321,44 @@ unsigned int getPixelLSFrompT5(lst::Event* event, unsigned int pT5) { } //____________________________________________________________________________________________ -unsigned int getT5FrompT5(lst::Event* event, unsigned int pT5) { +unsigned int getT5FrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); return pixelQuintuplets->T5Indices[pT5]; } //____________________________________________________________________________________________ -std::vector getT3sFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getT3sFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getT3sFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getLSsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getLSsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getLSsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getMDsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getMDsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getMDsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getOuterTrackerHitsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getOuterTrackerHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, + unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getHitsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getPixelHitsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getPixelHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); return getPixelHitsFrompLS(event, pLS); } //____________________________________________________________________________________________ -std::vector getHitsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); unsigned int T5 = getT5FrompT5(event, pT5); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); @@ -368,7 +368,7 @@ std::vector getHitsFrompT5(lst::Event* event, unsigned int } //____________________________________________________________________________________________ -std::vector getHitIdxsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getHitIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { lst::Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFrompT5(event, pT5); std::vector hitidxs; @@ -378,7 +378,7 @@ std::vector getHitIdxsFrompT5(lst::Event* event, unsigned i } //____________________________________________________________________________________________ -std::vector getModuleIdxsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getModuleIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { std::vector hits = getOuterTrackerHitsFrompT5(event, pT5); std::vector module_idxs; lst::Hits const* hitsEvt = event->getHits()->data(); @@ -389,7 +389,7 @@ std::vector getModuleIdxsFrompT5(lst::Event* event, unsigne } //____________________________________________________________________________________________ -std::vector getHitTypesFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getHitTypesFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); // pixel Hits list will be either 3 or 4 and depending on it return accordingly @@ -400,8 +400,8 @@ std::vector getHitTypesFrompT5(lst::Event* event, unsigned } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompT5(lst::Event* event, - unsigned pT5) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompT5( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pT5) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFrompT5(event, pT5)); } @@ -410,7 +410,7 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromTC(lst::Event* event, unsigned int TC) { +std::vector getLSsFromTC(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int TC) { // Get the type of the track candidate lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[TC]; @@ -432,8 +432,8 @@ std::vector getLSsFromTC(lst::Event* event, unsigned int TC } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromTC(lst::Event* event, - unsigned TC) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromTC( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned TC) { // Get the type of the track candidate lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[TC]; diff --git a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.h b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.h index d0924518eeb4d..6c856b22915d4 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.h +++ b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.h @@ -5,7 +5,7 @@ #include #include "Event.h" -using LSTEvent = lst::Event; +using LSTEvent = ALPAKA_ACCELERATOR_NAMESPACE::lst::Event; enum { kpT5 = 7, kpT3 = 5, kT5 = 4, kpLS = 8 }; diff --git a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc index 9277b60253a64..73b5daabbfc1a 100644 --- a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc +++ b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc @@ -1,7 +1,5 @@ #include "trkCore.h" -using namespace ALPAKA_ACCELERATOR_NAMESPACE; - //___________________________________________________________________________________________________________________________________________________________________________________________ bool goodEvent() { if (ana.specific_event_index >= 0) { @@ -22,7 +20,7 @@ bool goodEvent() { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runMiniDoublet(lst::Event *event, int evt) { +float runMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, int evt) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Mini-Doublet start " << evt << std::endl; @@ -75,7 +73,7 @@ float runMiniDoublet(lst::Event *event, int evt) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runSegment(lst::Event *event) { +float runSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Segment start" << std::endl; @@ -113,7 +111,7 @@ float runSegment(lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runT3(lst::Event *event) { +float runT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco T3 start" << std::endl; @@ -155,7 +153,7 @@ float runT3(lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runpT3(lst::Event *event) { +float runpT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Triplet pT3 start" << std::endl; @@ -172,7 +170,7 @@ float runpT3(lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runQuintuplet(lst::Event *event) { +float runQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Quintuplet start" << std::endl; @@ -218,7 +216,7 @@ float runQuintuplet(lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runPixelLineSegment(lst::Event *event, bool no_pls_dupclean) { +float runPixelLineSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool no_pls_dupclean) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Line Segment start" << std::endl; @@ -233,7 +231,7 @@ float runPixelLineSegment(lst::Event *event, bool no_pls_dupclean) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runPixelQuintuplet(lst::Event *event) { +float runPixelQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Quintuplet start" << std::endl; @@ -250,7 +248,7 @@ float runPixelQuintuplet(lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runTrackCandidate(lst::Event *event, bool no_pls_dupclean, bool tc_pls_triplets) { +float runTrackCandidate(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool no_pls_dupclean, bool tc_pls_triplets) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco TrackCandidate start" << std::endl; @@ -847,7 +845,7 @@ void addInputsToLineSegmentTrackingPreLoad(std::vector> &out_ } //___________________________________________________________________________________________________________________________________________________________________________________________ -float addInputsToEventPreLoad(lst::Event *event, +float addInputsToEventPreLoad(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool useOMP, std::vector trkX, std::vector trkY, @@ -1152,7 +1150,7 @@ void writeMetaData() { // DEPRECATED FUNCTIONS //__________________________________________________________________________________________ -[[deprecated]] float addInputsToLineSegmentTracking(lst::Event &event, bool useOMP) { +[[deprecated]] float addInputsToLineSegmentTracking(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event &event, bool useOMP) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Loading Inputs (i.e. outer tracker hits, and pixel line segements) to the Line Segment Tracking.... " @@ -1348,6 +1346,6 @@ void writeMetaData() { } //__________________________________________________________________________________________ -[[deprecated]] float addInputsToLineSegmentTrackingUsingExplicitMemory(lst::Event &event) { +[[deprecated]] float addInputsToLineSegmentTrackingUsingExplicitMemory(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event &event) { return addInputsToLineSegmentTracking(event, true); } diff --git a/RecoTracker/LSTCore/standalone/code/core/trkCore.h b/RecoTracker/LSTCore/standalone/code/core/trkCore.h index 0a2fddaba9d5c..66d5c10baf431 100644 --- a/RecoTracker/LSTCore/standalone/code/core/trkCore.h +++ b/RecoTracker/LSTCore/standalone/code/core/trkCore.h @@ -11,7 +11,7 @@ #include #include -using LSTEvent = lst::Event; +using LSTEvent = ALPAKA_ACCELERATOR_NAMESPACE::lst::Event; // --------------------- ======================== --------------------- diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc index 33eaeefc2d796..911a34f519a6d 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc @@ -1,7 +1,5 @@ #include "write_lst_ntuple.h" -using namespace ALPAKA_ACCELERATOR_NAMESPACE; - //________________________________________________________________________________________________________________________________ void createOutputBranches() { createRequiredOutputBranches(); @@ -9,7 +7,7 @@ void createOutputBranches() { } //________________________________________________________________________________________________________________________________ -void fillOutputBranches(lst::Event* event) { +void fillOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { setOutputBranches(event); setOptionalOutputBranches(event); if (ana.gnn_ntuple) @@ -183,7 +181,7 @@ void createGnnNtupleBranches() { } //________________________________________________________________________________________________________________________________ -void setOutputBranches(lst::Event* event) { +void setOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { // ============ Sim tracks ============= int n_accepted_simtrk = 0; for (unsigned int isimtrk = 0; isimtrk < trk.sim_pt().size(); ++isimtrk) { @@ -278,7 +276,7 @@ void setOutputBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setOptionalOutputBranches(lst::Event* event) { +void setOptionalOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { #ifdef CUT_VALUE_DEBUG setPixelQuintupletOutputBranches(event); @@ -289,7 +287,7 @@ void setOptionalOutputBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setPixelQuintupletOutputBranches(lst::Event* event) { +void setPixelQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { // ============ pT5 ============= lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); @@ -365,7 +363,7 @@ void setPixelQuintupletOutputBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setQuintupletOutputBranches(lst::Event* event) { +void setQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); lst::ObjectRanges const* ranges = event->getRanges()->data(); lst::Modules const* modules = event->getModules()->data(); @@ -436,7 +434,7 @@ void setQuintupletOutputBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setPixelTripletOutputBranches(lst::Event* event) { +void setPixelTripletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); lst::Modules const* modules = event->getModules()->data(); lst::Segments const* segments = event->getSegments()->data(); @@ -499,7 +497,7 @@ void setPixelTripletOutputBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setGnnNtupleBranches(lst::Event* event) { +void setGnnNtupleBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { // Get relevant information lst::Segments const* segments = event->getSegments()->data(); lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); @@ -640,7 +638,7 @@ void setGnnNtupleBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setGnnNtupleMiniDoublet(lst::Event* event, unsigned int MD) { +void setGnnNtupleMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int MD) { // Get relevant information lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::Hits const* hitsEvt = event->getHits()->data(); @@ -708,8 +706,8 @@ void setGnnNtupleMiniDoublet(lst::Event* event, unsigned int MD) { } //________________________________________________________________________________________________________________________________ -std::tuple> parseTrackCandidate(lst::Event* event, - unsigned int idx) { +std::tuple> parseTrackCandidate( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { // Get the type of the track candidate lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[idx]; @@ -742,8 +740,8 @@ std::tuple> parseTrackCandidate( } //________________________________________________________________________________________________________________________________ -std::tuple, std::vector> parsepT5(lst::Event* event, - unsigned int idx) { +std::tuple, std::vector> parsepT5( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { // Get relevant information lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); @@ -854,8 +852,8 @@ std::tuple, std::vector, std::vector> parsepT3(lst::Event* event, - unsigned int idx) { +std::tuple, std::vector> parsepT3( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { // Get relevant information lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); lst::Triplets const* triplets = event->getTriplets()->data(); @@ -889,8 +887,8 @@ std::tuple, std::vector, std::vector> parseT5(lst::Event* event, - unsigned int idx) { +std::tuple, std::vector> parseT5( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); unsigned int T5 = trackCandidates->directObjectIndices[idx]; @@ -923,8 +921,8 @@ std::tuple, std::vector, std::vector> parsepLS(lst::Event* event, - unsigned int idx) { +std::tuple, std::vector> parsepLS( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); lst::Segments const* segments = event->getSegments()->data(); @@ -944,7 +942,7 @@ std::tuple, std::vector* event) { +void printHitMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::Modules const* modules = event->getModules()->data(); lst::ObjectRanges const* ranges = event->getRanges()->data(); @@ -959,7 +957,7 @@ void printHitMultiplicities(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printMiniDoubletMultiplicities(lst::Event* event) { +void printMiniDoubletMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::Modules const* modules = event->getModules()->data(); @@ -978,7 +976,7 @@ void printMiniDoubletMultiplicities(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printAllObjects(lst::Event* event) { +void printAllObjects(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { printMDs(event); printLSs(event); printpLSs(event); @@ -986,7 +984,7 @@ void printAllObjects(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printMDs(lst::Event* event) { +void printMDs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::Hits const* hitsEvt = event->getHits()->data(); lst::Modules const* modules = event->getModules()->data(); @@ -1008,7 +1006,7 @@ void printMDs(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printLSs(lst::Event* event) { +void printLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::Segments const* segments = event->getSegments()->data(); lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::Hits const* hitsEvt = event->getHits()->data(); @@ -1040,7 +1038,7 @@ void printLSs(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printpLSs(lst::Event* event) { +void printpLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::Segments const* segments = event->getSegments()->data(); lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::Hits const* hitsEvt = event->getHits()->data(); @@ -1070,7 +1068,7 @@ void printpLSs(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printT3s(lst::Event* event) { +void printT3s(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::Triplets const* triplets = event->getTriplets()->data(); lst::Segments const* segments = event->getSegments()->data(); lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); @@ -1112,7 +1110,7 @@ void printT3s(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void debugPrintOutlierMultiplicities(lst::Event* event) { +void debugPrintOutlierMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); lst::Triplets const* triplets = event->getTriplets()->data(); lst::Segments const* segments = event->getSegments()->data(); diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h index cd20553772b9a..7a25c0d3cbcc6 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h @@ -11,7 +11,7 @@ #include "trkCore.h" #include "AccessHelper.h" -using LSTEvent = lst::Event; +using LSTEvent = ALPAKA_ACCELERATOR_NAMESPACE::lst::Event; // Common void createOutputBranches(); From 79ea879d884fba79175bb0748c5e92debf1af55a Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Sat, 17 Aug 2024 07:31:46 -0700 Subject: [PATCH 2/2] Moved all alpaka code to ALPAKA_ACCELERATOR_NAMESPACE --- .../plugins/alpaka/LSTModulesDevESProducer.cc | 2 +- RecoTracker/LST/plugins/alpaka/LSTProducer.cc | 2 +- .../LSTCore/interface/alpaka/Constants.h | 171 ++--- .../LSTCore/interface/{ => alpaka}/LST.h | 8 +- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 553 ++++++++-------- RecoTracker/LSTCore/src/alpaka/Event.h | 95 +-- RecoTracker/LSTCore/src/alpaka/Hit.h | 17 +- RecoTracker/LSTCore/src/alpaka/Kernels.h | 43 +- RecoTracker/LSTCore/src/alpaka/LST.dev.cc | 18 +- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 302 ++++----- .../LSTCore/src/alpaka/NeuralNetwork.h | 273 ++++---- .../LSTCore/src/alpaka/NeuralNetworkWeights.h | 610 +++++++++--------- RecoTracker/LSTCore/src/alpaka/ObjectRanges.h | 4 +- .../LSTCore/src/alpaka/PixelQuintuplet.h | 246 +++---- RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 347 +++++----- RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 570 ++++++++-------- RecoTracker/LSTCore/src/alpaka/Segment.h | 136 ++-- .../LSTCore/src/alpaka/TrackCandidate.h | 86 +-- RecoTracker/LSTCore/src/alpaka/Triplet.h | 179 +++-- .../standalone/code/core/AccessHelper.cc | 166 ++--- .../standalone/code/core/AnalysisConfig.h | 2 +- .../LSTCore/standalone/code/core/trkCore.cc | 22 +- .../standalone/code/core/write_lst_ntuple.cc | 183 +++--- 23 files changed, 2001 insertions(+), 2034 deletions(-) rename RecoTracker/LSTCore/interface/{ => alpaka}/LST.h (96%) diff --git a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc index c1d815210bd53..46c99993c5ed9 100644 --- a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc +++ b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc @@ -9,7 +9,7 @@ // LST includes #include "RecoTracker/LSTCore/interface/Module.h" -#include "RecoTracker/LSTCore/interface/LST.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { diff --git a/RecoTracker/LST/plugins/alpaka/LSTProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc index 6365eb9822483..e92ff549dffd1 100644 --- a/RecoTracker/LST/plugins/alpaka/LSTProducer.cc +++ b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc @@ -19,7 +19,7 @@ #include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h" -#include "RecoTracker/LSTCore/interface/LST.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h index 459989670ccdd..9fed7760c721a 100644 --- a/RecoTracker/LSTCore/interface/alpaka/Constants.h +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -9,99 +9,118 @@ #include #endif -namespace lst { - - using namespace alpaka_common; +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace lst { + + // Re-export some useful things from the main namespace + using ::lst::allocBufWrapper; + using ::lst::Buf; + using ::lst::max_blocks; + using ::lst::max_connected_modules; + using ::lst::n_max_nonpixel_track_candidates; + using ::lst::n_max_pixel_md_per_modules; + using ::lst::n_max_pixel_quintuplets; + using ::lst::n_max_pixel_segments_per_module; + using ::lst::n_max_pixel_track_candidates; + using ::lst::n_max_pixel_triplets; + using ::lst::Params_LS; + using ::lst::Params_pLS; + using ::lst::Params_pT3; + using ::lst::Params_pT5; + using ::lst::Params_T3; + using ::lst::Params_T5; + using ::lst::size_superbins; // Half precision wrapper functions. #if defined(FP16_Base) #define __F2H __float2half #define __H2F __half2float - typedef __half float FPX; + typedef __half float FPX; #else #define __F2H #define __H2F - typedef float FPX; + typedef float FPX; #endif - Vec3D constexpr elementsPerThread(Vec3D::all(static_cast(1))); + Vec3D constexpr elementsPerThread(Vec3D::all(static_cast(1))); // Needed for files that are compiled by g++ to not throw an error. // uint4 is defined only for CUDA, so we will have to revisit this soon when running on other backends. #if !defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_HIP_ENABLED) - struct uint4 { - unsigned int x; - unsigned int y; - unsigned int z; - unsigned int w; - }; + struct uint4 { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; #endif - // Adjust grid and block sizes based on backend configuration - template > - ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, - const Vec& threadsPerBlock, - const Vec& elementsPerThreadArg) { - Vec adjustedBlocks = blocksPerGrid; - Vec adjustedThreads = threadsPerBlock; - - // special overrides for CPU/host cases - if constexpr (std::is_same_v) { - adjustedBlocks = Vec::all(static_cast(1)); - - if constexpr (alpaka::accMatchesTags) { - // Serial execution, set threads to 1 as well - adjustedThreads = Vec::all(static_cast(1)); // probably redundant + // Adjust grid and block sizes based on backend configuration + template > + ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, + const Vec& threadsPerBlock, + const Vec& elementsPerThreadArg) { + Vec adjustedBlocks = blocksPerGrid; + Vec adjustedThreads = threadsPerBlock; + + // special overrides for CPU/host cases + if constexpr (std::is_same_v) { + adjustedBlocks = Vec::all(static_cast(1)); + + if constexpr (alpaka::accMatchesTags) { + // Serial execution, set threads to 1 as well + adjustedThreads = Vec::all(static_cast(1)); // probably redundant + } } + + return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThreadArg); } - return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThreadArg); - } - - // The constants below are usually used in functions like alpaka::math::min(), - // expecting a reference (T const&) in the arguments. Hence, - // ALPAKA_STATIC_ACC_MEM_GLOBAL needs to be used in addition to constexpr. - - // 15 MeV constant from the approximate Bethe-Bloch formula - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMulsInGeV = 0.015; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleBarrel[6] = { - 0.0052, 0.0038, 0.0034, 0.0034, 0.0032, 0.0034}; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleEndcap[5] = {0.006, 0.006, 0.006, 0.006, 0.006}; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanBarrel[6] = { - 25.007152356, 37.2186993757, 52.3104270826, 68.6658656666, 85.9770373007, 108.301772384}; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanEndcap[5] = { - 130.992832231, 154.813883559, 185.352604327, 221.635123002, 265.022076742}; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float k2Rinv1GeVf = (2.99792458e-3 * 3.8) / 2; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kR1GeVf = 1. / (2.99792458e-3 * 3.8); - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kSinAlphaMax = 0.95; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float ptCut = PT_CUT; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kDeltaZLum = 15.0; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPixelPSZpitch = 0.15; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStripPSZpitch = 2.4; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStrip2SZpitch = 5.0; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidth2S = 0.009; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidthPS = 0.01; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPt_betaMax = 7.0; - // Since C++ can't represent infinity, lst_INF = 123456789 was used to represent infinity in the data table - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float lst_INF = 123456789.0; - - namespace t5dnn { - - // Working points matching LST fake rate (43.9%) or signal acceptance (82.0%) - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp1 = 0.3418833f; // 94.0% TPR, 43.9% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp2 = 0.6177366f; // 82.0% TPR, 20.0% FPR - // Other working points - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp70 = 0.7776195f; // 70.0% TPR, 10.0% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp75 = 0.7181118f; // 75.0% TPR, 13.5% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp80 = 0.6492643f; // 80.0% TPR, 17.9% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp85 = 0.5655319f; // 85.0% TPR, 23.8% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp90 = 0.4592205f; // 90.0% TPR, 32.6% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp95 = 0.3073708f; // 95.0% TPR, 47.7% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp97p5 = 0.2001348f; // 97.5% TPR, 61.2% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99 = 0.1120605f; // 99.0% TPR, 75.9% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99p9 = 0.0218196f; // 99.9% TPR, 95.4% FPR - - } // namespace t5dnn - -} //namespace lst + // The constants below are usually used in functions like alpaka::math::min(), + // expecting a reference (T const&) in the arguments. Hence, + // ALPAKA_STATIC_ACC_MEM_GLOBAL needs to be used in addition to constexpr. + + // 15 MeV constant from the approximate Bethe-Bloch formula + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMulsInGeV = 0.015; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleBarrel[6] = { + 0.0052, 0.0038, 0.0034, 0.0034, 0.0032, 0.0034}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleEndcap[5] = {0.006, 0.006, 0.006, 0.006, 0.006}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanBarrel[6] = { + 25.007152356, 37.2186993757, 52.3104270826, 68.6658656666, 85.9770373007, 108.301772384}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanEndcap[5] = { + 130.992832231, 154.813883559, 185.352604327, 221.635123002, 265.022076742}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float k2Rinv1GeVf = (2.99792458e-3 * 3.8) / 2; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kR1GeVf = 1. / (2.99792458e-3 * 3.8); + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kSinAlphaMax = 0.95; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float ptCut = PT_CUT; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kDeltaZLum = 15.0; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPixelPSZpitch = 0.15; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStripPSZpitch = 2.4; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStrip2SZpitch = 5.0; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidth2S = 0.009; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidthPS = 0.01; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPt_betaMax = 7.0; + // Since C++ can't represent infinity, lst_INF = 123456789 was used to represent infinity in the data table + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float lst_INF = 123456789.0; + + namespace t5dnn { + + // Working points matching LST fake rate (43.9%) or signal acceptance (82.0%) + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp1 = 0.3418833f; // 94.0% TPR, 43.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp2 = 0.6177366f; // 82.0% TPR, 20.0% FPR + // Other working points + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp70 = 0.7776195f; // 70.0% TPR, 10.0% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp75 = 0.7181118f; // 75.0% TPR, 13.5% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp80 = 0.6492643f; // 80.0% TPR, 17.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp85 = 0.5655319f; // 85.0% TPR, 23.8% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp90 = 0.4592205f; // 90.0% TPR, 32.6% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp95 = 0.3073708f; // 95.0% TPR, 47.7% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp97p5 = 0.2001348f; // 97.5% TPR, 61.2% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99 = 0.1120605f; // 99.0% TPR, 75.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99p9 = 0.0218196f; // 99.9% TPR, 95.4% FPR + + } // namespace t5dnn + + } //namespace lst +} //namespace ALPAKA_ACCELERATOR_NAMESPACE #endif diff --git a/RecoTracker/LSTCore/interface/LST.h b/RecoTracker/LSTCore/interface/alpaka/LST.h similarity index 96% rename from RecoTracker/LSTCore/interface/LST.h rename to RecoTracker/LSTCore/interface/alpaka/LST.h index a83399cbd8356..0e4c64d2535df 100644 --- a/RecoTracker/LSTCore/interface/LST.h +++ b/RecoTracker/LSTCore/interface/alpaka/LST.h @@ -1,5 +1,5 @@ -#ifndef RecoTracker_LSTCore_interface_LST_h -#define RecoTracker_LSTCore_interface_LST_h +#ifndef RecoTracker_LSTCore_interface_alpaka_LST_h +#define RecoTracker_LSTCore_interface_alpaka_LST_h #include "RecoTracker/LSTCore/interface/Constants.h" #include "RecoTracker/LSTCore/interface/LSTESData.h" @@ -8,6 +8,8 @@ #include #include +using ::lst::LSTESData; + namespace ALPAKA_ACCELERATOR_NAMESPACE { namespace lst { class Event; @@ -18,7 +20,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void run(Queue& queue, bool verbose, - ::lst::LSTESData const* deviceESData, + LSTESData const* deviceESData, std::vector const& see_px, std::vector const& see_py, std::vector const& see_pz, diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 1526acd8e676d..62629bb08fc52 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -7,7 +7,9 @@ using Queue = ALPAKA_ACCELERATOR_NAMESPACE::Queue; using Acc1D = ALPAKA_ACCELERATOR_NAMESPACE::Acc1D; using Acc3D = ALPAKA_ACCELERATOR_NAMESPACE::Acc3D; -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::initSync(bool verbose) { +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + +void Event::initSync(bool verbose) { alpaka::wait(queue); // other calls can be asynchronous addObjects = verbose; hitsInGPU = nullptr; @@ -50,7 +52,7 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::initSync(bool verbose) { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::resetEventSync() { +void Event::resetEventSync() { alpaka::wait(queue); // synchronize to reset consistently //reset the arrays for (int i = 0; i < 6; i++) { @@ -157,24 +159,24 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::resetEventSync() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addHitToEvent(std::vector const& x, - std::vector const& y, - std::vector const& z, - std::vector const& detId, - std::vector const& idxInNtuple) { +void Event::addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple) { // Use the actual number of hits instead of a max. unsigned int nHits = x.size(); // Initialize space on device/host for next event. if (hitsInGPU == nullptr) { - hitsInGPU = new ::lst::Hits(); - hitsBuffers = new ::lst::HitsBuffer(nModules_, nHits, devAcc, queue); + hitsInGPU = new Hits(); + hitsBuffers = new HitsBuffer(nModules_, nHits, devAcc, queue); hitsInGPU->setData(*hitsBuffers); } if (rangesInGPU == nullptr) { - rangesInGPU = new ::lst::ObjectRanges(); - rangesBuffers = new ::lst::ObjectRangesBuffer(nModules_, nLowerModules_, devAcc, queue); + rangesInGPU = new ObjectRanges(); + rangesBuffers = new ObjectRangesBuffer(nModules_, nLowerModules_, devAcc, queue); rangesInGPU->setData(*rangesBuffers); } @@ -191,12 +193,12 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addHitToEvent(std::vector alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock1{1, 1, 256}; - Vec3D const blocksPerGrid1{1, 1, ::lst::max_blocks}; - WorkDiv3D const hit_loop_workdiv = ::lst::createWorkDiv(blocksPerGrid1, threadsPerBlock1, ::lst::elementsPerThread); + Vec3D const blocksPerGrid1{1, 1, max_blocks}; + WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); alpaka::exec(queue, hit_loop_workdiv, - ::lst::HitLoopKernel{}, + HitLoopKernel{}, ::lst::Endcap, ::lst::TwoS, nModules_, @@ -208,41 +210,40 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addHitToEvent(std::vector nHits); Vec3D const threadsPerBlock2{1, 1, 256}; - Vec3D const blocksPerGrid2{1, 1, ::lst::max_blocks}; - WorkDiv3D const module_ranges_workdiv = - ::lst::createWorkDiv(blocksPerGrid2, threadsPerBlock2, ::lst::elementsPerThread); + Vec3D const blocksPerGrid2{1, 1, max_blocks}; + WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); alpaka::exec( - queue, module_ranges_workdiv, ::lst::ModuleRangesKernel{}, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); + queue, module_ranges_workdiv, ModuleRangesKernel{}, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vector const& hitIndices0, - std::vector const& hitIndices1, - std::vector const& hitIndices2, - std::vector const& hitIndices3, - std::vector const& dPhiChange, - std::vector const& ptIn, - std::vector const& ptErr, - std::vector const& px, - std::vector const& py, - std::vector const& pz, - std::vector const& eta, - std::vector const& etaErr, - std::vector const& phi, - std::vector const& charge, - std::vector const& seedIdx, - std::vector const& superbin, - std::vector const& pixelType, - std::vector const& isQuad) { +void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad) { unsigned int size = ptIn.size(); - if (size > ::lst::n_max_pixel_segments_per_module) { + if (size > n_max_pixel_segments_per_module) { printf( "*********************************************************\n" "* Warning: Pixel line segments will be truncated. *\n" "* You need to increase n_max_pixel_segments_per_module. *\n" "*********************************************************\n"); - size = ::lst::n_max_pixel_segments_per_module; + size = n_max_pixel_segments_per_module; } unsigned int mdSize = 2 * size; @@ -255,24 +256,24 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vecto // Create a host buffer for a value to be passed to the device auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - *pixelMaxMDs_buf_h.data() = ::lst::n_max_pixel_md_per_modules; + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - WorkDiv1D const createMDArrayRangesGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec( - queue, createMDArrayRangesGPU_workDiv, ::lst::CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); + queue, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); alpaka::wait(queue); // wait to get the data before manipulation - *nTotalMDs_buf_h.data() += ::lst::n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); - mdsInGPU = new ::lst::MiniDoublets(); - miniDoubletsBuffers = new ::lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); + mdsInGPU = new MiniDoublets(); + miniDoubletsBuffers = new MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_buf_h); @@ -281,11 +282,11 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vecto // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them - WorkDiv1D const createSegmentArrayRanges_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, createSegmentArrayRanges_workDiv, - ::lst::CreateSegmentArrayRanges{}, + CreateSegmentArrayRanges{}, *modulesBuffers_.data(), *rangesInGPU, *mdsInGPU); @@ -295,21 +296,21 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vecto alpaka::memcpy(queue, nTotalSegments_view, rangesBuffers->device_nTotalSegs_buf); alpaka::wait(queue); // wait to get the value before manipulation - nTotalSegments_ += ::lst::n_max_pixel_segments_per_module; + nTotalSegments_ += n_max_pixel_segments_per_module; - segmentsInGPU = new ::lst::Segments(); - segmentsBuffers = new ::lst::SegmentsBuffer( - nTotalSegments_, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devAcc, queue); + segmentsInGPU = new Segments(); + segmentsBuffers = + new SegmentsBuffer(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); alpaka::memcpy(queue, segmentsBuffers->nMemoryLocations_buf, nTotalSegments_view); } - auto hitIndices0_dev = ::lst::allocBufWrapper(devAcc, size, queue); - auto hitIndices1_dev = ::lst::allocBufWrapper(devAcc, size, queue); - auto hitIndices2_dev = ::lst::allocBufWrapper(devAcc, size, queue); - auto hitIndices3_dev = ::lst::allocBufWrapper(devAcc, size, queue); - auto dPhiChange_dev = ::lst::allocBufWrapper(devAcc, size, queue); + auto hitIndices0_dev = allocBufWrapper(devAcc, size, queue); + auto hitIndices1_dev = allocBufWrapper(devAcc, size, queue); + auto hitIndices2_dev = allocBufWrapper(devAcc, size, queue); + auto hitIndices3_dev = allocBufWrapper(devAcc, size, queue); + auto dPhiChange_dev = allocBufWrapper(devAcc, size, queue); alpaka::memcpy(queue, hitIndices0_dev, hitIndices0, size); alpaka::memcpy(queue, hitIndices1_dev, hitIndices1, size); @@ -352,13 +353,12 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vecto alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock{1, 1, 256}; - Vec3D const blocksPerGrid{1, 1, ::lst::max_blocks}; - WorkDiv3D const addPixelSegmentToEvent_workdiv = - ::lst::createWorkDiv(blocksPerGrid, threadsPerBlock, ::lst::elementsPerThread); + Vec3D const blocksPerGrid{1, 1, max_blocks}; + WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); alpaka::exec(queue, addPixelSegmentToEvent_workdiv, - ::lst::AddPixelSegmentToEventKernel{}, + AddPixelSegmentToEventKernel{}, *modulesBuffers_.data(), *rangesInGPU, *hitsInGPU, @@ -373,53 +373,53 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vecto size); } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createMiniDoublets() { +void Event::createMiniDoublets() { // Create a view for the element nLowerModules_ inside rangesBuffers->miniDoubletModuleOccupancy auto dst_view_miniDoubletModuleOccupancy = alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); // Create a host buffer for a value to be passed to the device auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - *pixelMaxMDs_buf_h.data() = ::lst::n_max_pixel_md_per_modules; + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - WorkDiv1D const createMDArrayRangesGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec( - queue, createMDArrayRangesGPU_workDiv, ::lst::CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); + queue, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); alpaka::wait(queue); // wait to get the data before manipulation - *nTotalMDs_buf_h.data() += ::lst::n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); if (mdsInGPU == nullptr) { - mdsInGPU = new ::lst::MiniDoublets(); - miniDoubletsBuffers = new ::lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); + mdsInGPU = new MiniDoublets(); + miniDoubletsBuffers = new MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); } Vec3D const threadsPerBlockCreateMDInGPU{1, 16, 32}; Vec3D const blocksPerGridCreateMDInGPU{1, nLowerModules_ / threadsPerBlockCreateMDInGPU[1], 1}; WorkDiv3D const createMiniDoubletsInGPUv2_workDiv = - ::lst::createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, elementsPerThread); alpaka::exec(queue, createMiniDoubletsInGPUv2_workDiv, - ::lst::CreateMiniDoubletsInGPUv2{}, + CreateMiniDoubletsInGPUv2{}, *modulesBuffers_.data(), *hitsInGPU, *mdsInGPU, *rangesInGPU); - WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, addMiniDoubletRangesToEventExplicit_workDiv, - ::lst::AddMiniDoubletRangesToEventExplicit{}, + AddMiniDoubletRangesToEventExplicit{}, *modulesBuffers_.data(), *mdsInGPU, *rangesInGPU, @@ -430,32 +430,32 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createMiniDoublets() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createSegmentsWithModuleMap() { +void Event::createSegmentsWithModuleMap() { if (segmentsInGPU == nullptr) { - segmentsInGPU = new ::lst::Segments(); - segmentsBuffers = new ::lst::SegmentsBuffer( - nTotalSegments_, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devAcc, queue); + segmentsInGPU = new Segments(); + segmentsBuffers = + new SegmentsBuffer(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); } Vec3D const threadsPerBlockCreateSeg{1, 1, 64}; Vec3D const blocksPerGridCreateSeg{1, 1, nLowerModules_}; WorkDiv3D const createSegmentsInGPUv2_workDiv = - ::lst::createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); alpaka::exec(queue, createSegmentsInGPUv2_workDiv, - ::lst::CreateSegmentsInGPUv2{}, + CreateSegmentsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, *rangesInGPU); - WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, addSegmentRangesToEventExplicit_workDiv, - ::lst::AddSegmentRangesToEventExplicit{}, + AddSegmentRangesToEventExplicit{}, *modulesBuffers_.data(), *segmentsInGPU, *rangesInGPU); @@ -465,13 +465,13 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createSegmentsWithModuleMap() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { +void Event::createTriplets() { if (tripletsInGPU == nullptr) { - WorkDiv1D const createTripletArrayRanges_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, createTripletArrayRanges_workDiv, - ::lst::CreateTripletArrayRanges{}, + CreateTripletArrayRanges{}, *modulesBuffers_.data(), *rangesInGPU, *segmentsInGPU); @@ -482,8 +482,8 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { alpaka::memcpy(queue, maxTriplets_buf_h, rangesBuffers->device_nTotalTrips_buf); alpaka::wait(queue); // wait to get the value before using it - tripletsInGPU = new ::lst::Triplets(); - tripletsBuffers = new ::lst::TripletsBuffer(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); + tripletsInGPU = new Triplets(); + tripletsBuffers = new TripletsBuffer(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); tripletsInGPU->setData(*tripletsBuffers); alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf_h); @@ -521,17 +521,17 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { } // Allocate and copy to device index - auto index_gpu_buf = ::lst::allocBufWrapper(devAcc, nLowerModules_, queue); + auto index_gpu_buf = allocBufWrapper(devAcc, nLowerModules_, queue); alpaka::memcpy(queue, index_gpu_buf, index_buf_h, nonZeroModules); Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; - Vec3D const blocksPerGridCreateTrip{::lst::max_blocks, 1, 1}; + Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; WorkDiv3D const createTripletsInGPUv2_workDiv = - ::lst::createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); alpaka::exec(queue, createTripletsInGPUv2_workDiv, - ::lst::CreateTripletsInGPUv2{}, + CreateTripletsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -540,11 +540,11 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { index_gpu_buf.data(), nonZeroModules); - WorkDiv1D const addTripletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, addTripletRangesToEventExplicit_workDiv, - ::lst::AddTripletRangesToEventExplicit{}, + AddTripletRangesToEventExplicit{}, *modulesBuffers_.data(), *tripletsInGPU, *rangesInGPU); @@ -554,33 +554,33 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { +void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { if (trackCandidatesInGPU == nullptr) { - trackCandidatesInGPU = new ::lst::TrackCandidates(); - trackCandidatesBuffers = new ::lst::TrackCandidatesBuffer( - ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devAcc, queue); + trackCandidatesInGPU = new TrackCandidates(); + trackCandidatesBuffers = new TrackCandidatesBuffer( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc, queue); trackCandidatesInGPU->setData(*trackCandidatesBuffers); } Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; Vec3D const blocksPerGrid_crossCleanpT3{1, 4, 20}; WorkDiv3D const crossCleanpT3_workDiv = - ::lst::createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, ::lst::elementsPerThread); + createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread); alpaka::exec(queue, crossCleanpT3_workDiv, - ::lst::CrossCleanpT3{}, + CrossCleanpT3{}, *modulesBuffers_.data(), *rangesInGPU, *pixelTripletsInGPU, *segmentsInGPU, *pixelQuintupletsInGPU); - WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = ::lst::createWorkDiv({1}, {512}, {1}); + WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); alpaka::exec(queue, addpT3asTrackCandidatesInGPU_workDiv, - ::lst::AddpT3asTrackCandidatesInGPU{}, + AddpT3asTrackCandidatesInGPU{}, nLowerModules_, *pixelTripletsInGPU, *trackCandidatesInGPU, @@ -596,22 +596,22 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = - ::lst::createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); alpaka::exec(queue, removeDupQuintupletsInGPUBeforeTC_workDiv, - ::lst::RemoveDupQuintupletsInGPUBeforeTC{}, + RemoveDupQuintupletsInGPUBeforeTC{}, *quintupletsInGPU, *rangesInGPU); Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; - Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, ::lst::max_blocks}; + Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks}; WorkDiv3D const crossCleanT5_workDiv = - ::lst::createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, ::lst::elementsPerThread); + createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread); alpaka::exec(queue, crossCleanT5_workDiv, - ::lst::CrossCleanT5{}, + CrossCleanT5{}, *modulesBuffers_.data(), *quintupletsInGPU, *pixelQuintupletsInGPU, @@ -620,12 +620,12 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls Vec3D const threadsPerBlock_addT5asTrackCandidateInGPU{1, 8, 128}; Vec3D const blocksPerGrid_addT5asTrackCandidateInGPU{1, 8, 10}; - WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = ::lst::createWorkDiv( - blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, ::lst::elementsPerThread); + WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = createWorkDiv( + blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, elementsPerThread); alpaka::exec(queue, addT5asTrackCandidateInGPU_workDiv, - ::lst::AddT5asTrackCandidateInGPU{}, + AddT5asTrackCandidateInGPU{}, nLowerModules_, *quintupletsInGPU, *trackCandidatesInGPU, @@ -633,22 +633,21 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; - Vec3D const blocksPerGridCheckHitspLS{1, ::lst::max_blocks * 4, ::lst::max_blocks / 4}; + Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; WorkDiv3D const checkHitspLS_workDiv = - ::lst::createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - alpaka::exec( - queue, checkHitspLS_workDiv, ::lst::CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, true); + alpaka::exec(queue, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, true); } Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; Vec3D const blocksPerGrid_crossCleanpLS{1, 4, 20}; WorkDiv3D const crossCleanpLS_workDiv = - ::lst::createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, ::lst::elementsPerThread); + createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread); alpaka::exec(queue, crossCleanpLS_workDiv, - ::lst::CrossCleanpLS{}, + CrossCleanpLS{}, *modulesBuffers_.data(), *rangesInGPU, *pixelTripletsInGPU, @@ -659,23 +658,23 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls *quintupletsInGPU); Vec3D const threadsPerBlock_addpLSasTrackCandidateInGPU{1, 1, 384}; - Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, ::lst::max_blocks}; - WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = ::lst::createWorkDiv( - blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, ::lst::elementsPerThread); + Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, max_blocks}; + WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = createWorkDiv( + blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, elementsPerThread); alpaka::exec(queue, addpLSasTrackCandidateInGPU_workDiv, - ::lst::AddpLSasTrackCandidateInGPU{}, + AddpLSasTrackCandidateInGPU{}, nLowerModules_, *trackCandidatesInGPU, *segmentsInGPU, tc_pls_triplets); // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached - auto nTrackCanpT5Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); - auto nTrackCanpT3Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); - auto nTrackCanpLSHost_buf = ::lst::allocBufWrapper(devHost, 1, queue); - auto nTrackCanT5Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nTrackCanpT5Host_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCanpT3Host_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCanpLSHost_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCanT5Host_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nTrackCanpT5Host_buf, trackCandidatesBuffers->nTrackCandidatespT5_buf); alpaka::memcpy(queue, nTrackCanpT3Host_buf, trackCandidatesBuffers->nTrackCandidatespT3_buf); alpaka::memcpy(queue, nTrackCanpLSHost_buf, trackCandidatesBuffers->nTrackCandidatespLS_buf); @@ -686,27 +685,26 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls auto nTrackCandidatespT3 = *nTrackCanpT3Host_buf.data(); auto nTrackCandidatespLS = *nTrackCanpLSHost_buf.data(); auto nTrackCandidatesT5 = *nTrackCanT5Host_buf.data(); - if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == ::lst::n_max_pixel_track_candidates) || - (nTrackCandidatesT5 == ::lst::n_max_nonpixel_track_candidates)) { + if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || + (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { printf( "****************************************************************************************************\n" "* Warning: Track candidates were possibly truncated. *\n" - "* You may need to increase either ::lst::n_max_pixel_track_candidates or " - "::lst::n_max_nonpixel_track_candidates. *\n" + "* You may need to increase either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates. *\n" "* Run the code with the WARNINGS flag activated for more details. *\n" "****************************************************************************************************\n"); } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { +void Event::createPixelTriplets() { if (pixelTripletsInGPU == nullptr) { - pixelTripletsInGPU = new ::lst::PixelTriplets(); - pixelTripletsBuffers = new ::lst::PixelTripletsBuffer(::lst::n_max_pixel_triplets, devAcc, queue); + pixelTripletsInGPU = new PixelTriplets(); + pixelTripletsBuffers = new PixelTripletsBuffer(n_max_pixel_triplets, devAcc, queue); pixelTripletsInGPU->setData(*pixelTripletsBuffers); } - auto superbins_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); + auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); @@ -722,25 +720,25 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using - auto connectedPixelSize_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelIndex_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelSize_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); - auto connectedPixelIndex_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); unsigned int* connectedPixelSize_host = connectedPixelSize_host_buf.data(); unsigned int* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); - int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::lst::size_superbins - 1] + - pixelMapping_.connectedPixelsSizes[::lst::size_superbins - 1]; - int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::lst::size_superbins - 1] + - pixelMapping_.connectedPixelsSizesPos[::lst::size_superbins - 1] + pixelIndexOffsetPos; + int pixelIndexOffsetPos = + pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; // TODO: check if a map/reduction to just eligible pLSs would speed up the kernel // the current selection still leaves a significant fraction of unmatchable pLSs for (unsigned int i = 0; i < nInnerSegments; i++) { // loop over # pLS int8_t pixelType = pixelTypes[i]; // Get pixel type for this pLS int superbin = superbins[i]; // Get superbin for this pixel - if ((superbin < 0) or (superbin >= (int)::lst::size_superbins) or (pixelType > 2) or (pixelType < 0)) { + if ((superbin < 0) or (superbin >= (int)size_superbins) or (pixelType > 2) or (pixelType < 0)) { connectedPixelSize_host[i] = 0; connectedPixelIndex_host[i] = 0; continue; @@ -772,11 +770,11 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { Vec3D const threadsPerBlock{1, 4, 32}; Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; WorkDiv3D const createPixelTripletsInGPUFromMapv2_workDiv = - ::lst::createWorkDiv(blocksPerGrid, threadsPerBlock, ::lst::elementsPerThread); + createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); alpaka::exec(queue, createPixelTripletsInGPUFromMapv2_workDiv, - ::lst::CreatePixelTripletsInGPUFromMapv2{}, + CreatePixelTripletsInGPUFromMapv2{}, *modulesBuffers_.data(), *rangesInGPU, *mdsInGPU, @@ -788,7 +786,7 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { nInnerSegments); #ifdef WARNINGS - auto nPixelTriplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); alpaka::wait(queue); // wait to get the value before using it @@ -801,26 +799,24 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { //seems like more blocks lead to conflicting writes Vec3D const blocksPerGridDupPixTrip{1, 40, 1}; WorkDiv3D const removeDupPixelTripletsInGPUFromMap_workDiv = - ::lst::createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); - alpaka::exec(queue, - removeDupPixelTripletsInGPUFromMap_workDiv, - ::lst::RemoveDupPixelTripletsInGPUFromMap{}, - *pixelTripletsInGPU); + alpaka::exec( + queue, removeDupPixelTripletsInGPUFromMap_workDiv, RemoveDupPixelTripletsInGPUFromMap{}, *pixelTripletsInGPU); } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { - WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); +void Event::createQuintuplets() { + WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, createEligibleModulesListForQuintupletsGPU_workDiv, - ::lst::CreateEligibleModulesListForQuintupletsGPU{}, + CreateEligibleModulesListForQuintupletsGPU{}, *modulesBuffers_.data(), *tripletsInGPU, *rangesInGPU); - auto nEligibleT5Modules_buf = ::lst::allocBufWrapper(devHost, 1, queue); - auto nTotalQuintuplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nEligibleT5Modules_buf = allocBufWrapper(devHost, 1, queue); + auto nTotalQuintuplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nEligibleT5Modules_buf, rangesBuffers->nEligibleT5Modules_buf); alpaka::memcpy(queue, nTotalQuintuplets_buf, rangesBuffers->device_nTotalQuints_buf); @@ -830,8 +826,8 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); if (quintupletsInGPU == nullptr) { - quintupletsInGPU = new ::lst::Quintuplets(); - quintupletsBuffers = new ::lst::QuintupletsBuffer(nTotalQuintuplets, nLowerModules_, devAcc, queue); + quintupletsInGPU = new Quintuplets(); + quintupletsBuffers = new QuintupletsBuffer(nTotalQuintuplets, nLowerModules_, devAcc, queue); quintupletsInGPU->setData(*quintupletsBuffers); alpaka::memcpy(queue, quintupletsBuffers->nMemoryLocations_buf, nTotalQuintuplets_buf); @@ -840,11 +836,11 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { Vec3D const threadsPerBlockQuints{1, 8, 32}; Vec3D const blocksPerGridQuints{std::max((int)nEligibleT5Modules, 1), 1, 1}; WorkDiv3D const createQuintupletsInGPUv2_workDiv = - ::lst::createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); alpaka::exec(queue, createQuintupletsInGPUv2_workDiv, - ::lst::CreateQuintupletsInGPUv2{}, + CreateQuintupletsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -854,22 +850,22 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { nEligibleT5Modules); Vec3D const threadsPerBlockDupQuint{1, 16, 16}; - Vec3D const blocksPerGridDupQuint{::lst::max_blocks, 1, 1}; + Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1}; WorkDiv3D const removeDupQuintupletsInGPUAfterBuild_workDiv = - ::lst::createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); alpaka::exec(queue, removeDupQuintupletsInGPUAfterBuild_workDiv, - ::lst::RemoveDupQuintupletsInGPUAfterBuild{}, + RemoveDupQuintupletsInGPUAfterBuild{}, *modulesBuffers_.data(), *quintupletsInGPU, *rangesInGPU); - WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, addQuintupletRangesToEventExplicit_workDiv, - ::lst::AddQuintupletRangesToEventExplicit{}, + AddQuintupletRangesToEventExplicit{}, *modulesBuffers_.data(), *quintupletsInGPU, *rangesInGPU); @@ -879,33 +875,32 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { +void Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; - Vec3D const blocksPerGridCheckHitspLS{1, ::lst::max_blocks * 4, ::lst::max_blocks / 4}; + Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; WorkDiv3D const checkHitspLS_workDiv = - ::lst::createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - alpaka::exec( - queue, checkHitspLS_workDiv, ::lst::CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, false); + alpaka::exec(queue, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, false); } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { +void Event::createPixelQuintuplets() { if (pixelQuintupletsInGPU == nullptr) { - pixelQuintupletsInGPU = new ::lst::PixelQuintuplets(); - pixelQuintupletsBuffers = new ::lst::PixelQuintupletsBuffer(::lst::n_max_pixel_quintuplets, devAcc, queue); + pixelQuintupletsInGPU = new PixelQuintuplets(); + pixelQuintupletsBuffers = new PixelQuintupletsBuffer(n_max_pixel_quintuplets, devAcc, queue); pixelQuintupletsInGPU->setData(*pixelQuintupletsBuffers); } if (trackCandidatesInGPU == nullptr) { - trackCandidatesInGPU = new ::lst::TrackCandidates(); - trackCandidatesBuffers = new ::lst::TrackCandidatesBuffer( - ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devAcc, queue); + trackCandidatesInGPU = new TrackCandidates(); + trackCandidatesBuffers = new TrackCandidatesBuffer( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc, queue); trackCandidatesInGPU->setData(*trackCandidatesBuffers); } - auto superbins_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); + auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); @@ -921,24 +916,24 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using - auto connectedPixelSize_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelIndex_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelSize_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); - auto connectedPixelIndex_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); auto* connectedPixelSize_host = connectedPixelSize_host_buf.data(); auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); - int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::lst::size_superbins - 1] + - pixelMapping_.connectedPixelsSizes[::lst::size_superbins - 1]; - int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::lst::size_superbins - 1] + - pixelMapping_.connectedPixelsSizesPos[::lst::size_superbins - 1] + pixelIndexOffsetPos; + int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::size_superbins - 1] + + pixelMapping_.connectedPixelsSizes[::size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[::size_superbins - 1] + pixelIndexOffsetPos; // Loop over # pLS for (unsigned int i = 0; i < nInnerSegments; i++) { int8_t pixelType = pixelTypes[i]; // Get pixel type for this pLS int superbin = superbins[i]; // Get superbin for this pixel - if ((superbin < 0) or (superbin >= (int)::lst::size_superbins) or (pixelType > 2) or (pixelType < 0)) { + if ((superbin < 0) or (superbin >= (int)::size_superbins) or (pixelType > 2) or (pixelType < 0)) { connectedPixelIndex_host[i] = 0; connectedPixelSize_host[i] = 0; continue; @@ -964,13 +959,13 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; - Vec3D const blocksPerGridCreatePixQuints{16, ::lst::max_blocks, 1}; + Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; WorkDiv3D const createPixelQuintupletsInGPUFromMapv2_workDiv = - ::lst::createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); alpaka::exec(queue, createPixelQuintupletsInGPUFromMapv2_workDiv, - ::lst::CreatePixelQuintupletsInGPUFromMapv2{}, + CreatePixelQuintupletsInGPUFromMapv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -983,20 +978,20 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { *rangesInGPU); Vec3D const threadsPerBlockDupPix{1, 16, 16}; - Vec3D const blocksPerGridDupPix{1, ::lst::max_blocks, 1}; + Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; WorkDiv3D const removeDupPixelQuintupletsInGPUFromMap_workDiv = - ::lst::createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); alpaka::exec(queue, removeDupPixelQuintupletsInGPUFromMap_workDiv, - ::lst::RemoveDupPixelQuintupletsInGPUFromMap{}, + RemoveDupPixelQuintupletsInGPUFromMap{}, *pixelQuintupletsInGPU); - WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = ::lst::createWorkDiv({1}, {256}, {1}); + WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = createWorkDiv({1}, {256}, {1}); alpaka::exec(queue, addpT5asTrackCandidateInGPU_workDiv, - ::lst::AddpT5asTrackCandidateInGPU{}, + AddpT5asTrackCandidateInGPU{}, nLowerModules_, *pixelQuintupletsInGPU, *trackCandidatesInGPU, @@ -1004,7 +999,7 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { *rangesInGPU); #ifdef WARNINGS - auto nPixelQuintuplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); alpaka::wait(queue); // wait to get the value before using it @@ -1013,18 +1008,18 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { #endif } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addMiniDoubletsToEventExplicit() { - auto nMDsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); +void Event::addMiniDoubletsToEventExplicit() { + auto nMDsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nMDsCPU_buf, miniDoubletsBuffers->nMDs_buf, nLowerModules_); // FIXME: replace by ES host data - auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - auto module_hitRanges_buf = ::lst::allocBufWrapper(devHost, nLowerModules_ * 2, queue); + auto module_hitRanges_buf = allocBufWrapper(devHost, nLowerModules_ * 2, queue); alpaka::memcpy(queue, module_hitRanges_buf, hitsBuffers->hitRanges_buf, nLowerModules_ * 2u); alpaka::wait(queue); // wait for inputs before using them @@ -1045,15 +1040,15 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addMiniDoubletsToEventExplicit() } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addSegmentsToEventExplicit() { - auto nSegmentsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); +void Event::addSegmentsToEventExplicit() { + auto nSegmentsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nSegmentsCPU_buf, segmentsBuffers->nSegments_buf, nLowerModules_); // FIXME: replace by ES host data - auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); alpaka::wait(queue); // wait for inputs before using them @@ -1073,18 +1068,18 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addSegmentsToEventExplicit() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addQuintupletsToEventExplicit() { - auto nQuintupletsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); +void Event::addQuintupletsToEventExplicit() { + auto nQuintupletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nQuintupletsCPU_buf, quintupletsBuffers->nQuintuplets_buf); // FIXME: replace by ES host data - auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nModules_, queue); + auto module_subdets_buf = allocBufWrapper(devHost, nModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); - auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - auto module_quintupletModuleIndices_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_quintupletModuleIndices_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_quintupletModuleIndices_buf, rangesBuffers->quintupletModuleIndices_buf); alpaka::wait(queue); // wait for inputs before using them @@ -1105,15 +1100,15 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addQuintupletsToEventExplicit() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addTripletsToEventExplicit() { - auto nTripletsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); +void Event::addTripletsToEventExplicit() { + auto nTripletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nTripletsCPU_buf, tripletsBuffers->nTriplets_buf); // FIXME: replace by ES host data - auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); alpaka::wait(queue); // wait for inputs before using them @@ -1133,7 +1128,7 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addTripletsToEventExplicit() { } } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHits() { +unsigned int Event::getNumberOfHits() { unsigned int hits = 0; for (auto& it : n_hits_by_layer_barrel_) { hits += it; @@ -1145,22 +1140,18 @@ unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHits() { return hits; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayer(unsigned int layer) { +unsigned int Event::getNumberOfHitsByLayer(unsigned int layer) { if (layer == 6) return n_hits_by_layer_barrel_[layer]; else return n_hits_by_layer_barrel_[layer] + n_hits_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayerBarrel(unsigned int layer) { - return n_hits_by_layer_barrel_[layer]; -} +unsigned int Event::getNumberOfHitsByLayerBarrel(unsigned int layer) { return n_hits_by_layer_barrel_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayerEndcap(unsigned int layer) { - return n_hits_by_layer_endcap_[layer]; -} +unsigned int Event::getNumberOfHitsByLayerEndcap(unsigned int layer) { return n_hits_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoublets() { +unsigned int Event::getNumberOfMiniDoublets() { unsigned int miniDoublets = 0; for (auto& it : n_minidoublets_by_layer_barrel_) { miniDoublets += it; @@ -1172,22 +1163,22 @@ unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoublets() return miniDoublets; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayer(unsigned int layer) { +unsigned int Event::getNumberOfMiniDoubletsByLayer(unsigned int layer) { if (layer == 6) return n_minidoublets_by_layer_barrel_[layer]; else return n_minidoublets_by_layer_barrel_[layer] + n_minidoublets_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) { +unsigned int Event::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) { return n_minidoublets_by_layer_barrel_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) { +unsigned int Event::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) { return n_minidoublets_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegments() { +unsigned int Event::getNumberOfSegments() { unsigned int segments = 0; for (auto& it : n_segments_by_layer_barrel_) { segments += it; @@ -1199,22 +1190,18 @@ unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegments() { return segments; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayer(unsigned int layer) { +unsigned int Event::getNumberOfSegmentsByLayer(unsigned int layer) { if (layer == 6) return n_segments_by_layer_barrel_[layer]; else return n_segments_by_layer_barrel_[layer] + n_segments_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayerBarrel(unsigned int layer) { - return n_segments_by_layer_barrel_[layer]; -} +unsigned int Event::getNumberOfSegmentsByLayerBarrel(unsigned int layer) { return n_segments_by_layer_barrel_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayerEndcap(unsigned int layer) { - return n_segments_by_layer_endcap_[layer]; -} +unsigned int Event::getNumberOfSegmentsByLayerEndcap(unsigned int layer) { return n_segments_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTriplets() { +unsigned int Event::getNumberOfTriplets() { unsigned int triplets = 0; for (auto& it : n_triplets_by_layer_barrel_) { triplets += it; @@ -1226,22 +1213,18 @@ unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTriplets() { return triplets; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayer(unsigned int layer) { +unsigned int Event::getNumberOfTripletsByLayer(unsigned int layer) { if (layer == 6) return n_triplets_by_layer_barrel_[layer]; else return n_triplets_by_layer_barrel_[layer] + n_triplets_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayerBarrel(unsigned int layer) { - return n_triplets_by_layer_barrel_[layer]; -} +unsigned int Event::getNumberOfTripletsByLayerBarrel(unsigned int layer) { return n_triplets_by_layer_barrel_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { - return n_triplets_by_layer_endcap_[layer]; -} +unsigned int Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { return n_triplets_by_layer_endcap_[layer]; } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTriplets() { +int Event::getNumberOfPixelTriplets() { auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); @@ -1249,7 +1232,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTriplets() { return *nPixelTriplets_buf_h.data(); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelQuintuplets() { +int Event::getNumberOfPixelQuintuplets() { auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); @@ -1257,7 +1240,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelQuintuplets() { return *nPixelQuintuplets_buf_h.data(); } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintuplets() { +unsigned int Event::getNumberOfQuintuplets() { unsigned int quintuplets = 0; for (auto& it : n_quintuplets_by_layer_barrel_) { quintuplets += it; @@ -1269,22 +1252,22 @@ unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintuplets() return quintuplets; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayer(unsigned int layer) { +unsigned int Event::getNumberOfQuintupletsByLayer(unsigned int layer) { if (layer == 6) return n_quintuplets_by_layer_barrel_[layer]; else return n_quintuplets_by_layer_barrel_[layer] + n_quintuplets_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) { +unsigned int Event::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) { return n_quintuplets_by_layer_barrel_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { +unsigned int Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { return n_quintuplets_by_layer_endcap_[layer]; } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTrackCandidates() { +int Event::getNumberOfTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); @@ -1292,7 +1275,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTrackCandidates() { return *nTrackCandidates_buf_h.data(); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT5TrackCandidates() { +int Event::getNumberOfPT5TrackCandidates() { auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers->nTrackCandidatespT5_buf); @@ -1301,7 +1284,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT5TrackCandidates() { return *nTrackCandidatesPT5_buf_h.data(); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT3TrackCandidates() { +int Event::getNumberOfPT3TrackCandidates() { auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers->nTrackCandidatespT3_buf); @@ -1309,7 +1292,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT3TrackCandidates() { return *nTrackCandidatesPT3_buf_h.data(); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPLSTrackCandidates() { +int Event::getNumberOfPLSTrackCandidates() { auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers->nTrackCandidatespLS_buf); @@ -1317,7 +1300,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPLSTrackCandidates() { return *nTrackCandidatesPLS_buf_h.data(); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTrackCandidates() { +int Event::getNumberOfPixelTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1327,7 +1310,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTrackCandidates() return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfT5TrackCandidates() { +int Event::getNumberOfT5TrackCandidates() { auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); @@ -1335,8 +1318,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfT5TrackCandidates() { return *nTrackCandidatesT5_buf_h.data(); } -lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getHits( - bool sync) //std::shared_ptr should take care of garbage collection +HitsBuffer* Event::getHits(bool sync) //std::shared_ptr should take care of garbage collection { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1344,7 +1326,7 @@ lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Even alpaka::wait(queue); // wait for the value before using auto const nHits = *nHits_buf_h.data(); - hitsInCPU = new ::lst::HitsBuffer(nModules_, nHits, devHost, queue); + hitsInCPU = new HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); *hitsInCPU->nHits_buf.data() = nHits; @@ -1360,14 +1342,14 @@ lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Even return hitsInCPU; } -lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getHitsInCMSSW(bool sync) { +HitsBuffer* Event::getHitsInCMSSW(bool sync) { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); alpaka::wait(queue); // wait for the value before using auto const nHits = *nHits_buf_h.data(); - hitsInCPU = new ::lst::HitsBuffer(nModules_, nHits, devHost, queue); + hitsInCPU = new HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); *hitsInCPU->nHits_buf.data() = nHits; @@ -1378,9 +1360,9 @@ lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Even return hitsInCPU; } -lst::ObjectRangesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getRanges(bool sync) { +ObjectRangesBuffer* Event::getRanges(bool sync) { if (rangesInCPU == nullptr) { - rangesInCPU = new ::lst::ObjectRangesBuffer(nModules_, nLowerModules_, devHost, queue); + rangesInCPU = new ObjectRangesBuffer(nModules_, nLowerModules_, devHost, queue); rangesInCPU->setData(*rangesInCPU); alpaka::memcpy(queue, rangesInCPU->hitRanges_buf, rangesBuffers->hitRanges_buf); @@ -1394,7 +1376,7 @@ lst::ObjectRangesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::l return rangesInCPU; } -lst::MiniDoubletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getMiniDoublets(bool sync) { +MiniDoubletsBuffer* Event::getMiniDoublets(bool sync) { if (mdsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based mdsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1402,7 +1384,7 @@ lst::MiniDoubletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::l alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - mdsInCPU = new ::lst::MiniDoubletsBuffer(nMemHost, nLowerModules_, devHost, queue); + mdsInCPU = new MiniDoubletsBuffer(nMemHost, nLowerModules_, devHost, queue); mdsInCPU->setData(*mdsInCPU); *mdsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1417,7 +1399,7 @@ lst::MiniDoubletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::l return mdsInCPU; } -lst::SegmentsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getSegments(bool sync) { +SegmentsBuffer* Event::getSegments(bool sync) { if (segmentsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based segmentsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1425,8 +1407,8 @@ lst::SegmentsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst:: alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - segmentsInCPU = new ::lst::SegmentsBuffer( - nMemHost, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devHost, queue); + segmentsInCPU = + new SegmentsBuffer(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, devHost, queue); segmentsInCPU->setData(*segmentsInCPU); *segmentsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1454,7 +1436,7 @@ lst::SegmentsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst:: return segmentsInCPU; } -lst::TripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTriplets(bool sync) { +TripletsBuffer* Event::getTriplets(bool sync) { if (tripletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based tripletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1462,7 +1444,7 @@ lst::TripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst:: alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - tripletsInCPU = new ::lst::TripletsBuffer(nMemHost, nLowerModules_, devHost, queue); + tripletsInCPU = new TripletsBuffer(nMemHost, nLowerModules_, devHost, queue); tripletsInCPU->setData(*tripletsInCPU); *tripletsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1477,12 +1459,9 @@ lst::TripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst:: alpaka::memcpy(queue, tripletsInCPU->rtLo_buf, tripletsBuffers->rtLo_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->rtHi_buf, tripletsBuffers->rtHi_buf, nMemHost); #endif + alpaka::memcpy(queue, tripletsInCPU->hitIndices_buf, tripletsBuffers->hitIndices_buf, Params_T3::kHits * nMemHost); alpaka::memcpy( - queue, tripletsInCPU->hitIndices_buf, tripletsBuffers->hitIndices_buf, ::lst::Params_T3::kHits * nMemHost); - alpaka::memcpy(queue, - tripletsInCPU->logicalLayers_buf, - tripletsBuffers->logicalLayers_buf, - ::lst::Params_T3::kLayers * nMemHost); + queue, tripletsInCPU->logicalLayers_buf, tripletsBuffers->logicalLayers_buf, Params_T3::kLayers * nMemHost); alpaka::memcpy(queue, tripletsInCPU->segmentIndices_buf, tripletsBuffers->segmentIndices_buf, 2 * nMemHost); alpaka::memcpy(queue, tripletsInCPU->betaIn_buf, tripletsBuffers->betaIn_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost); @@ -1494,7 +1473,7 @@ lst::TripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst:: return tripletsInCPU; } -lst::QuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getQuintuplets(bool sync) { +QuintupletsBuffer* Event::getQuintuplets(bool sync) { if (quintupletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based quintupletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1502,7 +1481,7 @@ lst::QuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::ls alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - quintupletsInCPU = new ::lst::QuintupletsBuffer(nMemHost, nLowerModules_, devHost, queue); + quintupletsInCPU = new QuintupletsBuffer(nMemHost, nLowerModules_, devHost, queue); quintupletsInCPU->setData(*quintupletsInCPU); *quintupletsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1513,7 +1492,7 @@ lst::QuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::ls alpaka::memcpy(queue, quintupletsInCPU->lowerModuleIndices_buf, quintupletsBuffers->lowerModuleIndices_buf, - ::lst::Params_T5::kLayers * nMemHost); + Params_T5::kLayers * nMemHost); alpaka::memcpy(queue, quintupletsInCPU->innerRadius_buf, quintupletsBuffers->innerRadius_buf, nMemHost); alpaka::memcpy(queue, quintupletsInCPU->bridgeRadius_buf, quintupletsBuffers->bridgeRadius_buf, nMemHost); alpaka::memcpy(queue, quintupletsInCPU->outerRadius_buf, quintupletsBuffers->outerRadius_buf, nMemHost); @@ -1531,7 +1510,7 @@ lst::QuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::ls return quintupletsInCPU; } -lst::PixelTripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getPixelTriplets(bool sync) { +PixelTripletsBuffer* Event::getPixelTriplets(bool sync) { if (pixelTripletsInCPU == nullptr) { // Get nPixelTriplets parameter to initialize host based quintupletsInCPU auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1539,7 +1518,7 @@ lst::PixelTripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE:: alpaka::wait(queue); // wait for the value before using auto const nPixelTriplets = *nPixelTriplets_buf_h.data(); - pixelTripletsInCPU = new ::lst::PixelTripletsBuffer(nPixelTriplets, devHost, queue); + pixelTripletsInCPU = new PixelTripletsBuffer(nPixelTriplets, devHost, queue); pixelTripletsInCPU->setData(*pixelTripletsInCPU); *pixelTripletsInCPU->nPixelTriplets_buf.data() = nPixelTriplets; @@ -1571,8 +1550,7 @@ lst::PixelTripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE:: return pixelTripletsInCPU; } -lst::PixelQuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getPixelQuintuplets( - bool sync) { +PixelQuintupletsBuffer* Event::getPixelQuintuplets(bool sync) { if (pixelQuintupletsInCPU == nullptr) { // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1580,7 +1558,7 @@ lst::PixelQuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPAC alpaka::wait(queue); // wait for the value before using auto const nPixelQuintuplets = *nPixelQuintuplets_buf_h.data(); - pixelQuintupletsInCPU = new ::lst::PixelQuintupletsBuffer(nPixelQuintuplets, devHost, queue); + pixelQuintupletsInCPU = new PixelQuintupletsBuffer(nPixelQuintuplets, devHost, queue); pixelQuintupletsInCPU->setData(*pixelQuintupletsInCPU); *pixelQuintupletsInCPU->nPixelQuintuplets_buf.data() = nPixelQuintuplets; @@ -1609,8 +1587,7 @@ lst::PixelQuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPAC return pixelQuintupletsInCPU; } -lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTrackCandidates( - bool sync) { +TrackCandidatesBuffer* Event::getTrackCandidates(bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1618,21 +1595,21 @@ lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE alpaka::wait(queue); auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - trackCandidatesInCPU = new ::lst::TrackCandidatesBuffer( - ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devHost, queue); + trackCandidatesInCPU = new TrackCandidatesBuffer( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, - ::lst::Params_pT5::kHits * nTrackCanHost); + Params_pT5::kHits * nTrackCanHost); alpaka::memcpy( queue, trackCandidatesInCPU->pixelSeedIndex_buf, trackCandidatesBuffers->pixelSeedIndex_buf, nTrackCanHost); alpaka::memcpy(queue, trackCandidatesInCPU->logicalLayers_buf, trackCandidatesBuffers->logicalLayers_buf, - ::lst::Params_pT5::kLayers * nTrackCanHost); + Params_pT5::kLayers * nTrackCanHost); alpaka::memcpy(queue, trackCandidatesInCPU->directObjectIndices_buf, trackCandidatesBuffers->directObjectIndices_buf, @@ -1649,8 +1626,7 @@ lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE return trackCandidatesInCPU; } -lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTrackCandidatesInCMSSW( - bool sync) { +TrackCandidatesBuffer* Event::getTrackCandidatesInCMSSW(bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1658,15 +1634,15 @@ lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE alpaka::wait(queue); // wait for the value before using auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - trackCandidatesInCPU = new ::lst::TrackCandidatesBuffer( - ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devHost, queue); + trackCandidatesInCPU = new TrackCandidatesBuffer( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, - ::lst::Params_pT5::kHits * nTrackCanHost); + Params_pT5::kHits * nTrackCanHost); alpaka::memcpy( queue, trackCandidatesInCPU->pixelSeedIndex_buf, trackCandidatesBuffers->pixelSeedIndex_buf, nTrackCanHost); alpaka::memcpy(queue, @@ -1679,11 +1655,10 @@ lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE return trackCandidatesInCPU; } -lst::ModulesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getModules(bool isFull, - bool sync) { +ModulesBuffer* Event::getModules(bool isFull, bool sync) { if (modulesInCPU == nullptr) { // The last input here is just a small placeholder for the allocation. - modulesInCPU = new ::lst::ModulesBuffer(devHost, nModules_, nPixels_); + modulesInCPU = new ModulesBuffer(devHost, nModules_, nPixels_); modulesInCPU->copyFromSrc(queue, modulesBuffers_, isFull); if (sync) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 3c3549f96d41e..2ad8e150ece88 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -2,8 +2,8 @@ #define RecoTracker_LSTCore_src_alpaka_Event_h #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" #include "RecoTracker/LSTCore/interface/Module.h" -#include "RecoTracker/LSTCore/interface/LST.h" #include "Hit.h" #include "Segment.h" @@ -17,6 +17,11 @@ #include "HeterogeneousCore/AlpakaInterface/interface/host.h" +using ::lst::EndcapGeometryBuffer; +using ::lst::LSTESData; +using ::lst::ModulesBuffer; +using ::lst::PixelMap; + namespace ALPAKA_ACCELERATOR_NAMESPACE { namespace lst { @@ -42,36 +47,36 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { unsigned int nTotalSegments_; //Device stuff - ::lst::ObjectRanges* rangesInGPU; - ::lst::ObjectRangesBuffer* rangesBuffers; - ::lst::Hits* hitsInGPU; - ::lst::HitsBuffer* hitsBuffers; - ::lst::MiniDoublets* mdsInGPU; - ::lst::MiniDoubletsBuffer* miniDoubletsBuffers; - ::lst::Segments* segmentsInGPU; - ::lst::SegmentsBuffer* segmentsBuffers; - ::lst::Triplets* tripletsInGPU; - ::lst::TripletsBuffer* tripletsBuffers; - ::lst::Quintuplets* quintupletsInGPU; - ::lst::QuintupletsBuffer* quintupletsBuffers; - ::lst::TrackCandidates* trackCandidatesInGPU; - ::lst::TrackCandidatesBuffer* trackCandidatesBuffers; - ::lst::PixelTriplets* pixelTripletsInGPU; - ::lst::PixelTripletsBuffer* pixelTripletsBuffers; - ::lst::PixelQuintuplets* pixelQuintupletsInGPU; - ::lst::PixelQuintupletsBuffer* pixelQuintupletsBuffers; + ObjectRanges* rangesInGPU; + ObjectRangesBuffer* rangesBuffers; + Hits* hitsInGPU; + HitsBuffer* hitsBuffers; + MiniDoublets* mdsInGPU; + MiniDoubletsBuffer* miniDoubletsBuffers; + Segments* segmentsInGPU; + SegmentsBuffer* segmentsBuffers; + Triplets* tripletsInGPU; + TripletsBuffer* tripletsBuffers; + Quintuplets* quintupletsInGPU; + QuintupletsBuffer* quintupletsBuffers; + TrackCandidates* trackCandidatesInGPU; + TrackCandidatesBuffer* trackCandidatesBuffers; + PixelTriplets* pixelTripletsInGPU; + PixelTripletsBuffer* pixelTripletsBuffers; + PixelQuintuplets* pixelQuintupletsInGPU; + PixelQuintupletsBuffer* pixelQuintupletsBuffers; //CPU interface stuff - ::lst::ObjectRangesBuffer* rangesInCPU; - ::lst::HitsBuffer* hitsInCPU; - ::lst::MiniDoubletsBuffer* mdsInCPU; - ::lst::SegmentsBuffer* segmentsInCPU; - ::lst::TripletsBuffer* tripletsInCPU; - ::lst::TrackCandidatesBuffer* trackCandidatesInCPU; - ::lst::ModulesBuffer* modulesInCPU; - ::lst::QuintupletsBuffer* quintupletsInCPU; - ::lst::PixelTripletsBuffer* pixelTripletsInCPU; - ::lst::PixelQuintupletsBuffer* pixelQuintupletsInCPU; + ObjectRangesBuffer* rangesInCPU; + HitsBuffer* hitsInCPU; + MiniDoubletsBuffer* mdsInCPU; + SegmentsBuffer* segmentsInCPU; + TripletsBuffer* tripletsInCPU; + TrackCandidatesBuffer* trackCandidatesInCPU; + ModulesBuffer* modulesInCPU; + QuintupletsBuffer* quintupletsInCPU; + PixelTripletsBuffer* pixelTripletsInCPU; + PixelQuintupletsBuffer* pixelQuintupletsInCPU; void initSync(bool verbose); @@ -82,13 +87,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const uint16_t nLowerModules_; const unsigned int nPixels_; const unsigned int nEndCapMap_; - ::lst::ModulesBuffer const& modulesBuffers_; - ::lst::PixelMap const& pixelMapping_; - ::lst::EndcapGeometryBuffer const& endcapGeometryBuffers_; + ModulesBuffer const& modulesBuffers_; + PixelMap const& pixelMapping_; + EndcapGeometryBuffer const& endcapGeometryBuffers_; public: // Constructor used for CMSSW integration. Uses an external queue. - Event(bool verbose, Queue const& q, const ::lst::LSTESData* deviceESData) + Event(bool verbose, Queue const& q, const LSTESData* deviceESData) : queue(q), devAcc(alpaka::getDev(q)), devHost(cms::alpakatools::host()), @@ -184,18 +189,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // (has no effect on repeated calls) // set to false may allow faster operation with concurrent calls of get* // HANDLE WITH CARE - ::lst::HitsBuffer* getHits(bool sync = true); - ::lst::HitsBuffer* getHitsInCMSSW(bool sync = true); - ::lst::ObjectRangesBuffer* getRanges(bool sync = true); - ::lst::MiniDoubletsBuffer* getMiniDoublets(bool sync = true); - ::lst::SegmentsBuffer* getSegments(bool sync = true); - ::lst::TripletsBuffer* getTriplets(bool sync = true); - ::lst::QuintupletsBuffer* getQuintuplets(bool sync = true); - ::lst::PixelTripletsBuffer* getPixelTriplets(bool sync = true); - ::lst::PixelQuintupletsBuffer* getPixelQuintuplets(bool sync = true); - ::lst::TrackCandidatesBuffer* getTrackCandidates(bool sync = true); - ::lst::TrackCandidatesBuffer* getTrackCandidatesInCMSSW(bool sync = true); - ::lst::ModulesBuffer* getModules(bool isFull = false, bool sync = true); + HitsBuffer* getHits(bool sync = true); + HitsBuffer* getHitsInCMSSW(bool sync = true); + ObjectRangesBuffer* getRanges(bool sync = true); + MiniDoubletsBuffer* getMiniDoublets(bool sync = true); + SegmentsBuffer* getSegments(bool sync = true); + TripletsBuffer* getTriplets(bool sync = true); + QuintupletsBuffer* getQuintuplets(bool sync = true); + PixelTripletsBuffer* getPixelTriplets(bool sync = true); + PixelQuintupletsBuffer* getPixelQuintuplets(bool sync = true); + TrackCandidatesBuffer* getTrackCandidates(bool sync = true); + TrackCandidatesBuffer* getTrackCandidatesInCMSSW(bool sync = true); + ModulesBuffer* getModules(bool isFull = false, bool sync = true); }; } // namespace lst diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h index cb95aa14538f3..1a54008d4331c 100644 --- a/RecoTracker/LSTCore/src/alpaka/Hit.h +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -4,7 +4,9 @@ #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/Module.h" -namespace lst { +using ::lst::Modules; + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct Hits { unsigned int* nHits; float* xs; @@ -178,10 +180,7 @@ namespace lst { struct ModuleRangesKernel { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Hits hitsInGPU, - int nLowerModules) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, Hits hitsInGPU, int nLowerModules) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -208,8 +207,8 @@ namespace lst { unsigned int nEndCapMap, // Number of elements in endcap map const unsigned int* geoMapDetId, // DetId's from endcap map const float* geoMapPhi, // Phi values from endcap map - lst::Modules modulesInGPU, - lst::Hits hitsInGPU, + Modules modulesInGPU, + Hits hitsInGPU, unsigned int nHits) const // Total number of hits in event { auto const globalThreadIdx = alpaka::getIdx(acc); @@ -221,7 +220,7 @@ namespace lst { int iDetId = hitsInGPU.detid[ihit]; hitsInGPU.rts[ihit] = alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y); - hitsInGPU.phis[ihit] = lst::phi(acc, ihit_x, ihit_y); + hitsInGPU.phis[ihit] = phi(acc, ihit_x, ihit_y); hitsInGPU.etas[ihit] = ((ihit_z > 0) - (ihit_z < 0)) * alpaka::math::acosh( @@ -255,5 +254,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h index 31f057017a766..bc284d052cc05 100644 --- a/RecoTracker/LSTCore/src/alpaka/Kernels.h +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -13,24 +13,24 @@ #include "PixelQuintuplet.h" #include "PixelTriplet.h" -namespace lst { - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(lst::Quintuplets& quintupletsInGPU, +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(Quintuplets& quintupletsInGPU, unsigned int quintupletIndex, bool secondpass = false) { quintupletsInGPU.isDup[quintupletIndex] |= 1 + secondpass; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(lst::PixelTriplets& pixelTripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(PixelTriplets& pixelTripletsInGPU, unsigned int pixelTripletIndex) { pixelTripletsInGPU.isDup[pixelTripletIndex] = true; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(lst::PixelQuintuplets& pixelQuintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(PixelQuintuplets& pixelQuintupletsInGPU, unsigned int pixelQuintupletIndex) { pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = true; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(lst::Segments& segmentsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(Segments& segmentsInGPU, unsigned int pixelSegmentArrayIndex, bool secondpass = false) { segmentsInGPU.isDup[pixelSegmentArrayIndex] |= 1 + secondpass; @@ -38,7 +38,7 @@ namespace lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, unsigned int jx, - lst::Quintuplets const& quintupletsInGPU) { + Quintuplets const& quintupletsInGPU) { unsigned int hits1[Params_T5::kHits]; unsigned int hits2[Params_T5::kHits]; @@ -65,7 +65,7 @@ namespace lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitspT5(unsigned int ix, unsigned int jx, - lst::PixelQuintuplets const& pixelQuintupletsInGPU) { + PixelQuintuplets const& pixelQuintupletsInGPU) { unsigned int hits1[Params_pT5::kHits]; unsigned int hits2[Params_pT5::kHits]; @@ -92,7 +92,7 @@ namespace lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE void checkHitspT3(unsigned int ix, unsigned int jx, - lst::PixelTriplets const& pixelTripletsInGPU, + PixelTriplets const& pixelTripletsInGPU, int* matched) { int phits1[Params_pLS::kHits]; int phits2[Params_pLS::kHits]; @@ -145,9 +145,9 @@ namespace lst { struct RemoveDupQuintupletsInGPUAfterBuild { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Quintuplets quintupletsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Quintuplets quintupletsInGPU, + ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -168,7 +168,7 @@ namespace lst { float eta2 = __H2F(quintupletsInGPU.eta[jx]); float phi2 = __H2F(quintupletsInGPU.phi[jx]); float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float score_rphisum2 = __H2F(quintupletsInGPU.score_rphisum[jx]); if (dEta > 0.1f) @@ -194,9 +194,7 @@ namespace lst { struct RemoveDupQuintupletsInGPUBeforeTC { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Quintuplets quintupletsInGPU, - lst::ObjectRanges rangesInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, Quintuplets quintupletsInGPU, ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -240,7 +238,7 @@ namespace lst { float score_rphisum2 = __H2F(quintupletsInGPU.score_rphisum[jx]); float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); if (dEta > 0.1f) continue; @@ -269,7 +267,7 @@ namespace lst { struct RemoveDupPixelTripletsInGPUFromMap { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::PixelTriplets pixelTripletsInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTripletsInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -306,7 +304,7 @@ namespace lst { struct RemoveDupPixelQuintupletsInGPUFromMap { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::PixelQuintuplets pixelQuintupletsInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintupletsInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -333,10 +331,7 @@ namespace lst { struct CheckHitspLS { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Segments segmentsInGPU, - bool secondpass) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, Segments segmentsInGPU, bool secondpass) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -411,7 +406,7 @@ namespace lst { } if (secondpass) { float dEta = alpaka::math::abs(acc, eta_pix1 - eta_pix2); - float dPhi = lst::calculate_dPhi(phi_pix1, phi_pix2); + float dPhi = calculate_dPhi(phi_pix1, phi_pix2); float dR2 = dEta * dEta + dPhi * dPhi; if ((npMatched >= 1) || (dR2 < 1e-5f)) { @@ -422,5 +417,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index e3e9909045a6d..e847eb892af8c 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -1,4 +1,4 @@ -#include "RecoTracker/LSTCore/interface/LST.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" #include "Event.h" @@ -219,16 +219,16 @@ std::vector ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::getHitIdxs(sho unsigned int maxNHits = 0; if (trackCandidateType == 7) - maxNHits = ::lst::Params_pT5::kHits; // pT5 + maxNHits = Params_pT5::kHits; // pT5 else if (trackCandidateType == 5) - maxNHits = ::lst::Params_pT3::kHits; // pT3 + maxNHits = Params_pT3::kHits; // pT3 else if (trackCandidateType == 4) - maxNHits = ::lst::Params_T5::kHits; // T5 + maxNHits = Params_T5::kHits; // T5 else if (trackCandidateType == 8) - maxNHits = ::lst::Params_pLS::kHits; // pLS + maxNHits = Params_pLS::kHits; // pLS for (unsigned int i = 0; i < maxNHits; i++) { - unsigned int hitIdxInGPU = TCHitIndices[::lst::Params_pT5::kHits * TCIdx + i]; + unsigned int hitIdxInGPU = TCHitIndices[Params_pT5::kHits * TCIdx + i]; unsigned int hitIdx = (trackCandidateType == 8) ? hitIdxInGPU @@ -252,8 +252,8 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::getOutput(ALPAKA_ACCELERATOR_NAMESP std::vector tc_seedIdx; std::vector tc_trackCandidateType; - ::lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line - ::lst::TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data(); + HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line + TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data(); unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; @@ -276,7 +276,7 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::getOutput(ALPAKA_ACCELERATOR_NAMESP void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::run(Queue& queue, bool verbose, - ::lst::LSTESData const* deviceESData, + LSTESData const* deviceESData, std::vector const& see_px, std::vector const& see_py, std::vector const& see_pz, diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index 469c79233f9e0..335ceeea2ab79 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -10,7 +10,7 @@ #include "Hit.h" #include "ObjectRanges.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct MiniDoublets { unsigned int* nMemoryLocations; @@ -189,9 +189,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc, - lst::MiniDoublets& mdsInGPU, - lst::Hits const& hitsInGPU, - lst::Modules const& modulesInGPU, + MiniDoublets& mdsInGPU, + Hits const& hitsInGPU, + Modules const& modulesInGPU, unsigned int lowerHitIdx, unsigned int upperHitIdx, uint16_t lowerModuleIdx, @@ -209,7 +209,8 @@ namespace lst { mdsInGPU.moduleIndices[idx] = lowerModuleIdx; unsigned int anchorHitIndex, outerHitIndex; - if (modulesInGPU.moduleType[lowerModuleIdx] == PS and modulesInGPU.moduleLayerType[lowerModuleIdx] == Strip) { + if (modulesInGPU.moduleType[lowerModuleIdx] == ::lst::PS and + modulesInGPU.moduleLayerType[lowerModuleIdx] == ::lst::Strip) { mdsInGPU.anchorHitIndices[idx] = upperHitIdx; mdsInGPU.outerHitIndices[idx] = lowerHitIdx; @@ -261,7 +262,7 @@ namespace lst { mdsInGPU.outerLowEdgeY[idx] = hitsInGPU.lowEdgeYs[outerHitIndex]; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules(lst::Modules const& modulesInGPU, uint16_t moduleIndex) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules(Modules const& modulesInGPU, uint16_t moduleIndex) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing // This is the same as what was previously considered as"isNormalTiltedModules" // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf @@ -270,10 +271,10 @@ namespace lst { short side = modulesInGPU.sides[moduleIndex]; short rod = modulesInGPU.rods[moduleIndex]; - if (subdet == Barrel) { - if ((side != Center and layer == 3) or (side == NegZ and layer == 2 and rod > 5) or - (side == PosZ and layer == 2 and rod < 8) or (side == NegZ and layer == 1 and rod > 9) or - (side == PosZ and layer == 1 and rod < 4)) + if (subdet == ::lst::Barrel) { + if ((side != ::lst::Center and layer == 3) or (side == ::lst::NegZ and layer == 2 and rod > 5) or + (side == ::lst::PosZ and layer == 2 and rod < 8) or (side == ::lst::NegZ and layer == 1 and rod > 9) or + (side == ::lst::PosZ and layer == 1 and rod < 4)) return true; else return false; @@ -281,7 +282,7 @@ namespace lst { return false; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(lst::Modules const& modulesInGPU, uint16_t moduleIndex) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(Modules const& modulesInGPU, uint16_t moduleIndex) { float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; @@ -318,11 +319,11 @@ namespace lst { float moduleSeparation = 0; - if (subdet == Barrel and side == Center) { + if (subdet == ::lst::Barrel and side == ::lst::Center) { moduleSeparation = miniDeltaFlat[iL]; } else if (isTighterTiltedModules(modulesInGPU, moduleIndex)) { moduleSeparation = miniDeltaTilted[iL]; - } else if (subdet == Endcap) { + } else if (subdet == ::lst::Endcap) { moduleSeparation = miniDeltaEndcap[iL][iR]; } else //Loose tilted modules { @@ -334,7 +335,7 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold( - TAcc const& acc, float rt, lst::Modules const& modulesInGPU, uint16_t moduleIndex, float dPhi = 0, float dz = 0) { + TAcc const& acc, float rt, Modules const& modulesInGPU, uint16_t moduleIndex, float dPhi = 0, float dz = 0) { // ================================================================= // Various constants // ================================================================= @@ -347,16 +348,19 @@ namespace lst { unsigned int iL = modulesInGPU.layers[moduleIndex] - 1; const float miniSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax)); const float rLayNominal = - ((modulesInGPU.subdets[moduleIndex] == Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]); + ((modulesInGPU.subdets[moduleIndex] == ::lst::Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]); const float miniPVoff = 0.1f / rLayNominal; - const float miniMuls = ((modulesInGPU.subdets[moduleIndex] == Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut - : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut); - const bool isTilted = modulesInGPU.subdets[moduleIndex] == Barrel and modulesInGPU.sides[moduleIndex] != Center; + const float miniMuls = + ((modulesInGPU.subdets[moduleIndex] == ::lst::Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut); + const bool isTilted = + modulesInGPU.subdets[moduleIndex] == ::lst::Barrel and modulesInGPU.sides[moduleIndex] != ::lst::Center; //the lower module is sent in irrespective of its layer type. We need to fetch the drdz properly float drdz; if (isTilted) { - if (modulesInGPU.moduleType[moduleIndex] == PS and modulesInGPU.moduleLayerType[moduleIndex] == Strip) { + if (modulesInGPU.moduleType[moduleIndex] == ::lst::PS and + modulesInGPU.moduleLayerType[moduleIndex] == ::lst::Strip) { drdz = modulesInGPU.drdzs[moduleIndex]; } else { drdz = modulesInGPU.drdzs[modulesInGPU.partnerModuleIndices[moduleIndex]]; @@ -375,12 +379,12 @@ namespace lst { // Return the threshold value // ================================================================= // Following condition is met if the module is central and flatly lying - if (modulesInGPU.subdets[moduleIndex] == Barrel and modulesInGPU.sides[moduleIndex] == Center) { + if (modulesInGPU.subdets[moduleIndex] == ::lst::Barrel and modulesInGPU.sides[moduleIndex] == ::lst::Center) { return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff); } // Following condition is met if the module is central and tilted - else if (modulesInGPU.subdets[moduleIndex] == Barrel and - modulesInGPU.sides[moduleIndex] != Center) //all types of tilted modules + else if (modulesInGPU.subdets[moduleIndex] == ::lst::Barrel and + modulesInGPU.sides[moduleIndex] != ::lst::Center) //all types of tilted modules { return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniTilt2 * miniSlope * miniSlope); @@ -393,7 +397,7 @@ namespace lst { template ALPAKA_FN_INLINE ALPAKA_FN_ACC void shiftStripHits(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, uint16_t lowerModuleIndex, uint16_t upperModuleIndex, unsigned int lowerHitIndex, @@ -419,8 +423,8 @@ namespace lst { // lowerModule // lowerHit // upperHit - // lst::endcapGeometry - // lst::tiltedGeometry + // endcapGeometry + // tiltedGeometry // Some variables relevant to the function float xp; // pixel x (pixel hit x) @@ -449,10 +453,11 @@ namespace lst { float absdzprime; // The distance between the two points after shifting const float& drdz_ = modulesInGPU.drdzs[lowerModuleIndex]; // Assign hit pointers based on their hit type - if (modulesInGPU.moduleType[lowerModuleIndex] == PS) { + if (modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS) { // TODO: This is somewhat of an mystery.... somewhat confused why this is the case - if (modulesInGPU.subdets[lowerModuleIndex] == Barrel ? modulesInGPU.moduleLayerType[lowerModuleIndex] != Pixel - : modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + if (modulesInGPU.subdets[lowerModuleIndex] == ::lst::Barrel + ? modulesInGPU.moduleLayerType[lowerModuleIndex] != ::lst::Pixel + : modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel) { xo = xUpper; yo = yUpper; xp = xLower; @@ -477,7 +482,7 @@ namespace lst { } // If it is endcap some of the math gets simplified (and also computers don't like infinities) - isEndcap = modulesInGPU.subdets[lowerModuleIndex] == Endcap; + isEndcap = modulesInGPU.subdets[lowerModuleIndex] == ::lst::Endcap; // NOTE: TODO: Keep in mind that the sin(atan) function can be simplified to something like x / sqrt(1 + x^2) and similar for cos // I am not sure how slow sin, atan, cos, functions are in c++. If x / sqrt(1 + x^2) are faster change this later to reduce arithmetic computation time @@ -492,14 +497,15 @@ namespace lst { moduleSeparation = moduleGapSize(modulesInGPU, lowerModuleIndex); // Sign flips if the pixel is later layer - if (modulesInGPU.moduleType[lowerModuleIndex] == PS and modulesInGPU.moduleLayerType[lowerModuleIndex] != Pixel) { + if (modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS and + modulesInGPU.moduleLayerType[lowerModuleIndex] != ::lst::Pixel) { moduleSeparation *= -1; } drprime = (moduleSeparation / alpaka::math::sin(acc, angleA + angleB)) * alpaka::math::sin(acc, angleA); // Compute arctan of the slope and take care of the slope = infinity case - absArctanSlope = ((slope != lst::lst_INF) ? fabs(alpaka::math::atan(acc, slope)) : float(M_PI) / 2.f); + absArctanSlope = ((slope != lst_INF) ? fabs(alpaka::math::atan(acc, slope)) : float(M_PI) / 2.f); // Depending on which quadrant the pixel hit lies, we define the angleM by shifting them slightly differently if (xp > 0 and yp > 0) { @@ -523,7 +529,7 @@ namespace lst { // Compute the new strip hit position (if the slope value is in special condition take care of the exceptions) if (slope == - lst::lst_INF) // Designated for tilted module when the slope is exactly infinity (module lying along y-axis) + lst_INF) // Designated for tilted module when the slope is exactly infinity (module lying along y-axis) { xn = xa; // New x point is simply where the anchor is yn = yo; // No shift in y @@ -544,7 +550,7 @@ namespace lst { angleA)); // module separation sign is for shifting in radial direction for z-axis direction take care of the sign later // Depending on which one as closer to the interactin point compute the new z wrt to the pixel properly - if (modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + if (modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel) { abszn = alpaka::math::abs(acc, zp) + absdzprime; } else { abszn = alpaka::math::abs(acc, zp) - absdzprime; @@ -557,81 +563,9 @@ namespace lst { shiftedCoords[2] = zn; } - template - ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, - lst::Modules const& modulesInGPU, - uint16_t lowerModuleIndex, - uint16_t upperModuleIndex, - unsigned int lowerHitIndex, - unsigned int upperHitIndex, - float& dz, - float& dPhi, - float& dPhiChange, - float& shiftedX, - float& shiftedY, - float& shiftedZ, - float& noShiftedDphi, - float& noShiftedDphiChange, - float xLower, - float yLower, - float zLower, - float rtLower, - float xUpper, - float yUpper, - float zUpper, - float rtUpper) { - if (modulesInGPU.subdets[lowerModuleIndex] == lst::Barrel) { - return runMiniDoubletDefaultAlgoBarrel(acc, - modulesInGPU, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, - dz, - dPhi, - dPhiChange, - shiftedX, - shiftedY, - shiftedZ, - noShiftedDphi, - noShiftedDphiChange, - xLower, - yLower, - zLower, - rtLower, - xUpper, - yUpper, - zUpper, - rtUpper); - } else { - return runMiniDoubletDefaultAlgoEndcap(acc, - modulesInGPU, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, - dz, - dPhi, - dPhiChange, - shiftedX, - shiftedY, - shiftedZ, - noShiftedDphi, - noShiftedDphiChange, - xLower, - yLower, - zLower, - rtLower, - xUpper, - yUpper, - zUpper, - rtUpper); - } - } - template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, uint16_t lowerModuleIndex, uint16_t upperModuleIndex, unsigned int lowerHitIndex, @@ -653,7 +587,7 @@ namespace lst { float zUpper, float rtUpper) { dz = zLower - zUpper; - const float dzCut = modulesInGPU.moduleType[lowerModuleIndex] == lst::PS ? 2.f : 10.f; + const float dzCut = modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS ? 2.f : 10.f; const float sign = ((dz > 0) - (dz < 0)) * ((zLower > 0) - (zLower < 0)); const float invertedcrossercut = (alpaka::math::abs(acc, dz) > 2) * sign; @@ -662,7 +596,7 @@ namespace lst { float miniCut = 0; - miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == lst::Pixel + miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel ? dPhiThreshold(acc, rtLower, modulesInGPU, lowerModuleIndex) : dPhiThreshold(acc, rtUpper, modulesInGPU, lowerModuleIndex); @@ -670,7 +604,7 @@ namespace lst { // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3085 float xn = 0.f, yn = 0.f; // , zn = 0; float shiftedRt2; - if (modulesInGPU.sides[lowerModuleIndex] != Center) // If barrel and not center it is tilted + if (modulesInGPU.sides[lowerModuleIndex] != ::lst::Center) // If barrel and not center it is tilted { // Shift the hits and calculate new xn, yn position float shiftedCoords[3]; @@ -693,27 +627,27 @@ namespace lst { yn = shiftedCoords[1]; // Lower or the upper hit needs to be modified depending on which one was actually shifted - if (modulesInGPU.moduleLayerType[lowerModuleIndex] == lst::Pixel) { + if (modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel) { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; shiftedRt2 = xn * xn + yn * yn; - dPhi = lst::deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); //function from Hit.cc - noShiftedDphi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); //function from Hit.cc + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); } else { shiftedX = xn; shiftedY = yn; shiftedZ = zLower; shiftedRt2 = xn * xn + yn * yn; - dPhi = lst::deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); - noShiftedDphi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); } } else { shiftedX = 0; shiftedY = 0; shiftedZ = 0; - dPhi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); noShiftedDphi = dPhi; } @@ -722,34 +656,34 @@ namespace lst { // Cut #3: The dphi change going from lower Hit to upper Hit // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3076 - if (modulesInGPU.sides[lowerModuleIndex] != Center) { + if (modulesInGPU.sides[lowerModuleIndex] != ::lst::Center) { // When it is tilted, use the new shifted positions // TODO: This is somewhat of an mystery.... somewhat confused why this is the case - if (modulesInGPU.moduleLayerType[lowerModuleIndex] != lst::Pixel) { + if (modulesInGPU.moduleLayerType[lowerModuleIndex] != ::lst::Pixel) { // dPhi Change should be calculated so that the upper hit has higher rt. // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) // setdeltaPhiChange(lowerHit.rt() < upperHitMod.rt() ? lowerHit.deltaPhiChange(upperHitMod) : upperHitMod.deltaPhiChange(lowerHit)); - dPhiChange = (rtLower * rtLower < shiftedRt2) ? lst::deltaPhiChange(acc, xLower, yLower, shiftedX, shiftedY) - : lst::deltaPhiChange(acc, shiftedX, shiftedY, xLower, yLower); - noShiftedDphiChange = rtLower < rtUpper ? lst::deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) - : lst::deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); + dPhiChange = (rtLower * rtLower < shiftedRt2) ? deltaPhiChange(acc, xLower, yLower, shiftedX, shiftedY) + : deltaPhiChange(acc, shiftedX, shiftedY, xLower, yLower); + noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); } else { // dPhi Change should be calculated so that the upper hit has higher rt. // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) - dPhiChange = (shiftedRt2 < rtUpper * rtUpper) ? lst::deltaPhiChange(acc, shiftedX, shiftedY, xUpper, yUpper) - : lst::deltaPhiChange(acc, xUpper, yUpper, shiftedX, shiftedY); - noShiftedDphiChange = rtLower < rtUpper ? lst::deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) - : lst::deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); + dPhiChange = (shiftedRt2 < rtUpper * rtUpper) ? deltaPhiChange(acc, shiftedX, shiftedY, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, shiftedX, shiftedY); + noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); } } else { // When it is flat lying module, whichever is the lowerSide will always have rt lower - dPhiChange = lst::deltaPhiChange(acc, xLower, yLower, xUpper, yUpper); + dPhiChange = deltaPhiChange(acc, xLower, yLower, xUpper, yUpper); noShiftedDphiChange = dPhiChange; } @@ -758,7 +692,7 @@ namespace lst { template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, uint16_t lowerModuleIndex, uint16_t upperModuleIndex, unsigned int lowerHitIndex, @@ -792,7 +726,7 @@ namespace lst { return false; // Cut #2 : drt cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3100 - const float drtCut = modulesInGPU.moduleType[lowerModuleIndex] == lst::PS ? 2.f : 10.f; + const float drtCut = modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS ? 2.f : 10.f; drt = rtLower - rtUpper; if (alpaka::math::abs(acc, drt) >= drtCut) return false; @@ -820,37 +754,37 @@ namespace lst { yn = shiftedCoords[1]; zn = shiftedCoords[2]; - if (modulesInGPU.moduleType[lowerModuleIndex] == lst::PS) { + if (modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS) { // Appropriate lower or upper hit is modified after checking which one was actually shifted - if (modulesInGPU.moduleLayerType[lowerModuleIndex] == lst::Pixel) { + if (modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel) { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; - dPhi = lst::deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); - noShiftedDphi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); } else { shiftedX = xn; shiftedY = yn; shiftedZ = zLower; - dPhi = lst::deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); - noShiftedDphi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); } } else { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; - dPhi = lst::deltaPhi(acc, xLower, yLower, xn, yn); - noShiftedDphi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, xLower, yLower, xn, yn); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); } // dz needs to change if it is a PS module where the strip hits are shifted in order to properly account for the case when a tilted module falls under "endcap logic" // if it was an endcap it will have zero effect - if (modulesInGPU.moduleType[lowerModuleIndex] == lst::PS) { - dz = modulesInGPU.moduleLayerType[lowerModuleIndex] == lst::Pixel ? zLower - zn : zUpper - zn; + if (modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS) { + dz = modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel ? zLower - zn : zUpper - zn; } float miniCut = 0; - miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == lst::Pixel + miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel ? dPhiThreshold(acc, rtLower, modulesInGPU, lowerModuleIndex, dPhi, dz) : dPhiThreshold(acc, rtUpper, modulesInGPU, lowerModuleIndex, dPhi, dz); @@ -867,13 +801,82 @@ namespace lst { return alpaka::math::abs(acc, dPhiChange) < miniCut; } + template + ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, + Modules const& modulesInGPU, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + if (modulesInGPU.subdets[lowerModuleIndex] == ::lst::Barrel) { + return runMiniDoubletDefaultAlgoBarrel(acc, + modulesInGPU, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + } else { + return runMiniDoubletDefaultAlgoEndcap(acc, + modulesInGPU, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + } + } + struct CreateMiniDoubletsInGPUv2 { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Hits hitsInGPU, - lst::MiniDoublets mdsInGPU, - lst::ObjectRanges rangesInGPU) const { + ALPAKA_FN_ACC void operator()( + TAcc const& acc, Modules modulesInGPU, Hits hitsInGPU, MiniDoublets mdsInGPU, ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -966,7 +969,7 @@ namespace lst { struct CreateMDArrayRangesGPU { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, lst::ObjectRanges rangesInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -1060,11 +1063,8 @@ namespace lst { struct AddMiniDoubletRangesToEventExplicit { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::MiniDoublets mdsInGPU, - lst::ObjectRanges rangesInGPU, - lst::Hits hitsInGPU) const { + ALPAKA_FN_ACC void operator()( + TAcc const& acc, Modules modulesInGPU, MiniDoublets mdsInGPU, ObjectRanges rangesInGPU, Hits hitsInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -1083,5 +1083,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h index b337b5f83f8ba..85b7b08dc075b 100644 --- a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h @@ -10,153 +10,156 @@ #include "Hit.h" #include "Triplet.h" -namespace lst::t5dnn { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - template - ALPAKA_FN_ACC ALPAKA_FN_INLINE float runInference(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets const& tripletsInGPU, - const float* xVec, - const float* yVec, - const unsigned int* mdIndices, - const uint16_t* lowerModuleIndices, - unsigned int innerTripletIndex, - unsigned int outerTripletIndex, - float innerRadius, - float outerRadius, - float bridgeRadius) { - // Unpack x-coordinates of hits - float x1 = xVec[0]; - float x2 = xVec[1]; - float x3 = xVec[2]; - float x4 = xVec[3]; - float x5 = xVec[4]; - // Unpack y-coordinates of hits - float y1 = yVec[0]; - float y2 = yVec[1]; - float y3 = yVec[2]; - float y4 = yVec[3]; - float y5 = yVec[4]; - // Unpack module indices - unsigned int mdIndex1 = mdIndices[0]; - unsigned int mdIndex2 = mdIndices[1]; - unsigned int mdIndex3 = mdIndices[2]; - unsigned int mdIndex4 = mdIndices[3]; - unsigned int mdIndex5 = mdIndices[4]; - // Unpack module indices - uint16_t lowerModuleIndex1 = lowerModuleIndices[0]; - uint16_t lowerModuleIndex2 = lowerModuleIndices[1]; - uint16_t lowerModuleIndex3 = lowerModuleIndices[2]; - uint16_t lowerModuleIndex4 = lowerModuleIndices[3]; - uint16_t lowerModuleIndex5 = lowerModuleIndices[4]; - // Compute some convenience variables - short layer2_adjustment = 0; - if (modulesInGPU.layers[lowerModuleIndex1] == 1) { - layer2_adjustment = 1; // get upper segment to be in second layer - } - unsigned int md_idx_for_t5_eta_phi = - segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + layer2_adjustment]]; - bool is_endcap1 = (modulesInGPU.subdets[lowerModuleIndex1] == 4); // true if anchor hit 1 is in the endcap - bool is_endcap2 = (modulesInGPU.subdets[lowerModuleIndex2] == 4); // true if anchor hit 2 is in the endcap - bool is_endcap3 = (modulesInGPU.subdets[lowerModuleIndex3] == 4); // true if anchor hit 3 is in the endcap - bool is_endcap4 = (modulesInGPU.subdets[lowerModuleIndex4] == 4); // true if anchor hit 4 is in the endcap - bool is_endcap5 = (modulesInGPU.subdets[lowerModuleIndex5] == 4); // true if anchor hit 5 is in the endcap + namespace t5dnn { + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float runInference(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, + const float* xVec, + const float* yVec, + const unsigned int* mdIndices, + const uint16_t* lowerModuleIndices, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex, + float innerRadius, + float outerRadius, + float bridgeRadius) { + // Unpack x-coordinates of hits + float x1 = xVec[0]; + float x2 = xVec[1]; + float x3 = xVec[2]; + float x4 = xVec[3]; + float x5 = xVec[4]; + // Unpack y-coordinates of hits + float y1 = yVec[0]; + float y2 = yVec[1]; + float y3 = yVec[2]; + float y4 = yVec[3]; + float y5 = yVec[4]; + // Unpack module indices + unsigned int mdIndex1 = mdIndices[0]; + unsigned int mdIndex2 = mdIndices[1]; + unsigned int mdIndex3 = mdIndices[2]; + unsigned int mdIndex4 = mdIndices[3]; + unsigned int mdIndex5 = mdIndices[4]; + // Unpack module indices + uint16_t lowerModuleIndex1 = lowerModuleIndices[0]; + uint16_t lowerModuleIndex2 = lowerModuleIndices[1]; + uint16_t lowerModuleIndex3 = lowerModuleIndices[2]; + uint16_t lowerModuleIndex4 = lowerModuleIndices[3]; + uint16_t lowerModuleIndex5 = lowerModuleIndices[4]; + // Compute some convenience variables + short layer2_adjustment = 0; + if (modulesInGPU.layers[lowerModuleIndex1] == 1) { + layer2_adjustment = 1; // get upper segment to be in second layer + } + unsigned int md_idx_for_t5_eta_phi = + segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + layer2_adjustment]]; + bool is_endcap1 = (modulesInGPU.subdets[lowerModuleIndex1] == 4); // true if anchor hit 1 is in the endcap + bool is_endcap2 = (modulesInGPU.subdets[lowerModuleIndex2] == 4); // true if anchor hit 2 is in the endcap + bool is_endcap3 = (modulesInGPU.subdets[lowerModuleIndex3] == 4); // true if anchor hit 3 is in the endcap + bool is_endcap4 = (modulesInGPU.subdets[lowerModuleIndex4] == 4); // true if anchor hit 4 is in the endcap + bool is_endcap5 = (modulesInGPU.subdets[lowerModuleIndex5] == 4); // true if anchor hit 5 is in the endcap - // Build DNN input vector (corresponding output N-tuple branch noted in parenthetical in comment) - float x[38] = { - alpaka::math::log10(acc, 2 * lst::k2Rinv1GeVf * innerRadius), // inner T3 pT (t3_pt) - mdsInGPU.anchorEta[mdIndex1], // inner T3 anchor hit 1 eta (t3_0_eta) - mdsInGPU.anchorPhi[mdIndex1], // inner T3 anchor hit 1 phi (t3_0_phi) - mdsInGPU.anchorZ[mdIndex1], // inner T3 anchor hit 1 z (t3_0_z) - alpaka::math::sqrt(acc, x1 * x1 + y1 * y1), // inner T3 anchor hit 1 r (t3_0_r) - float(modulesInGPU.layers[lowerModuleIndex1] + 6 * is_endcap1), // inner T3 anchor hit 1 layer (t3_0_layer) - mdsInGPU.anchorEta[mdIndex2], // inner T3 anchor hit 2 eta (t3_2_eta) - mdsInGPU.anchorPhi[mdIndex2], // inner T3 anchor hit 2 phi (t3_2_phi) - mdsInGPU.anchorZ[mdIndex2], // inner T3 anchor hit 2 z (t3_2_z) - alpaka::math::sqrt(acc, x2 * x2 + y2 * y2), // inner T3 anchor hit 2 r (t3_2_r) - float(modulesInGPU.layers[lowerModuleIndex2] + 6 * is_endcap2), // inner T3 anchor hit 2 layer (t3_2_layer) - mdsInGPU.anchorEta[mdIndex3], // inner T3 anchor hit 3 eta (t3_4_eta) - mdsInGPU.anchorPhi[mdIndex3], // inner T3 anchor hit 3 phi (t3_4_phi) - mdsInGPU.anchorZ[mdIndex3], // inner T3 anchor hit 3 z (t3_4_z) - alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // inner T3 anchor hit 3 r (t3_4_r) - float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // inner T3 anchor hit 3 layer (t3_4_layer) - alpaka::math::log10(acc, 2 * lst::k2Rinv1GeVf * outerRadius), // outer T3 pT (t3_pt) - mdsInGPU.anchorEta[mdIndex3], // outer T3 anchor hit 4 eta (t3_0_eta) - mdsInGPU.anchorPhi[mdIndex3], // outer T3 anchor hit 4 phi (t3_0_phi) - mdsInGPU.anchorZ[mdIndex3], // outer T3 anchor hit 3 eta (t3_0_z) - alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // outer T3 anchor hit 3 r (t3_0_r) - float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // outer T3 anchor hit 3 layer (t3_0_layer) - mdsInGPU.anchorEta[mdIndex4], // outer T3 anchor hit 4 eta (t3_2_eta) - mdsInGPU.anchorPhi[mdIndex4], // outer T3 anchor hit 4 phi (t3_2_phi) - mdsInGPU.anchorZ[mdIndex4], // outer T3 anchor hit 4 z (t3_2_z) - alpaka::math::sqrt(acc, x4 * x4 + y4 * y4), // outer T3 anchor hit 4 r (t3_2_r) - float(modulesInGPU.layers[lowerModuleIndex4] + 6 * is_endcap4), // outer T3 anchor hit 4 layer (t3_2_layer) - mdsInGPU.anchorEta[mdIndex5], // outer T3 anchor hit 5 eta (t3_4_eta) - mdsInGPU.anchorPhi[mdIndex5], // outer T3 anchor hit 5 phi (t3_4_phi) - mdsInGPU.anchorZ[mdIndex5], // outer T3 anchor hit 5 z (t3_4_z) - alpaka::math::sqrt(acc, x5 * x5 + y5 * y5), // outer T3 anchor hit 5 r (t3_4_r) - float(modulesInGPU.layers[lowerModuleIndex5] + 6 * is_endcap5), // outer T3 anchor hit 5 layer (t3_4_layer) - alpaka::math::log10(acc, (innerRadius + outerRadius) * lst::k2Rinv1GeVf), // T5 pT (t5_pt) - mdsInGPU.anchorEta[md_idx_for_t5_eta_phi], // T5 eta (t5_eta) - mdsInGPU.anchorPhi[md_idx_for_t5_eta_phi], // T5 phi (t5_phi) - alpaka::math::log10(acc, innerRadius), // T5 inner radius (t5_innerRadius) - alpaka::math::log10(acc, bridgeRadius), // T5 bridge radius (t5_bridgeRadius) - alpaka::math::log10(acc, outerRadius) // T5 outer radius (t5_outerRadius) - }; + // Build DNN input vector (corresponding output N-tuple branch noted in parenthetical in comment) + float x[38] = { + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * innerRadius), // inner T3 pT (t3_pt) + mdsInGPU.anchorEta[mdIndex1], // inner T3 anchor hit 1 eta (t3_0_eta) + mdsInGPU.anchorPhi[mdIndex1], // inner T3 anchor hit 1 phi (t3_0_phi) + mdsInGPU.anchorZ[mdIndex1], // inner T3 anchor hit 1 z (t3_0_z) + alpaka::math::sqrt(acc, x1 * x1 + y1 * y1), // inner T3 anchor hit 1 r (t3_0_r) + float(modulesInGPU.layers[lowerModuleIndex1] + 6 * is_endcap1), // inner T3 anchor hit 1 layer (t3_0_layer) + mdsInGPU.anchorEta[mdIndex2], // inner T3 anchor hit 2 eta (t3_2_eta) + mdsInGPU.anchorPhi[mdIndex2], // inner T3 anchor hit 2 phi (t3_2_phi) + mdsInGPU.anchorZ[mdIndex2], // inner T3 anchor hit 2 z (t3_2_z) + alpaka::math::sqrt(acc, x2 * x2 + y2 * y2), // inner T3 anchor hit 2 r (t3_2_r) + float(modulesInGPU.layers[lowerModuleIndex2] + 6 * is_endcap2), // inner T3 anchor hit 2 layer (t3_2_layer) + mdsInGPU.anchorEta[mdIndex3], // inner T3 anchor hit 3 eta (t3_4_eta) + mdsInGPU.anchorPhi[mdIndex3], // inner T3 anchor hit 3 phi (t3_4_phi) + mdsInGPU.anchorZ[mdIndex3], // inner T3 anchor hit 3 z (t3_4_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // inner T3 anchor hit 3 r (t3_4_r) + float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // inner T3 anchor hit 3 layer (t3_4_layer) + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * outerRadius), // outer T3 pT (t3_pt) + mdsInGPU.anchorEta[mdIndex3], // outer T3 anchor hit 4 eta (t3_0_eta) + mdsInGPU.anchorPhi[mdIndex3], // outer T3 anchor hit 4 phi (t3_0_phi) + mdsInGPU.anchorZ[mdIndex3], // outer T3 anchor hit 3 eta (t3_0_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // outer T3 anchor hit 3 r (t3_0_r) + float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // outer T3 anchor hit 3 layer (t3_0_layer) + mdsInGPU.anchorEta[mdIndex4], // outer T3 anchor hit 4 eta (t3_2_eta) + mdsInGPU.anchorPhi[mdIndex4], // outer T3 anchor hit 4 phi (t3_2_phi) + mdsInGPU.anchorZ[mdIndex4], // outer T3 anchor hit 4 z (t3_2_z) + alpaka::math::sqrt(acc, x4 * x4 + y4 * y4), // outer T3 anchor hit 4 r (t3_2_r) + float(modulesInGPU.layers[lowerModuleIndex4] + 6 * is_endcap4), // outer T3 anchor hit 4 layer (t3_2_layer) + mdsInGPU.anchorEta[mdIndex5], // outer T3 anchor hit 5 eta (t3_4_eta) + mdsInGPU.anchorPhi[mdIndex5], // outer T3 anchor hit 5 phi (t3_4_phi) + mdsInGPU.anchorZ[mdIndex5], // outer T3 anchor hit 5 z (t3_4_z) + alpaka::math::sqrt(acc, x5 * x5 + y5 * y5), // outer T3 anchor hit 5 r (t3_4_r) + float(modulesInGPU.layers[lowerModuleIndex5] + 6 * is_endcap5), // outer T3 anchor hit 5 layer (t3_4_layer) + alpaka::math::log10(acc, (innerRadius + outerRadius) * k2Rinv1GeVf), // T5 pT (t5_pt) + mdsInGPU.anchorEta[md_idx_for_t5_eta_phi], // T5 eta (t5_eta) + mdsInGPU.anchorPhi[md_idx_for_t5_eta_phi], // T5 phi (t5_phi) + alpaka::math::log10(acc, innerRadius), // T5 inner radius (t5_innerRadius) + alpaka::math::log10(acc, bridgeRadius), // T5 bridge radius (t5_bridgeRadius) + alpaka::math::log10(acc, outerRadius) // T5 outer radius (t5_outerRadius) + }; - // (0): Linear(in_features=38, out_features=32, bias=True) => x = x*W_T + b - float x_0[32]; - for (unsigned int col = 0; col < 32; ++col) { - x_0[col] = 0; - for (unsigned int inner = 0; inner < 38; ++inner) { - x_0[col] += x[inner] * wgtT_0[inner][col]; + // (0): Linear(in_features=38, out_features=32, bias=True) => x = x*W_T + b + float x_0[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_0[col] = 0; + for (unsigned int inner = 0; inner < 38; ++inner) { + x_0[col] += x[inner] * wgtT_0[inner][col]; + } + x_0[col] += bias_0[col]; } - x_0[col] += bias_0[col]; - } - // (1): ReLU() - float x_1[32]; - for (unsigned int col = 0; col < 32; ++col) { - x_1[col] = (x_0[col] > 0.f) ? x_0[col] : 0.f; - } + // (1): ReLU() + float x_1[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_1[col] = (x_0[col] > 0.f) ? x_0[col] : 0.f; + } - // (2): Linear(in_features=32, out_features=32, bias=True) => x = x*W_T + b - float x_2[32]; - for (unsigned int col = 0; col < 32; ++col) { - x_2[col] = 0; - for (unsigned int inner = 0; inner < 32; ++inner) { - x_2[col] += x_1[inner] * wgtT_2[inner][col]; + // (2): Linear(in_features=32, out_features=32, bias=True) => x = x*W_T + b + float x_2[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_2[col] = 0; + for (unsigned int inner = 0; inner < 32; ++inner) { + x_2[col] += x_1[inner] * wgtT_2[inner][col]; + } + x_2[col] += bias_2[col]; } - x_2[col] += bias_2[col]; - } - // (3): ReLU() - float x_3[32]; - for (unsigned int col = 0; col < 32; ++col) { - x_3[col] = (x_2[col] > 0.f) ? x_2[col] : 0.f; - } + // (3): ReLU() + float x_3[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_3[col] = (x_2[col] > 0.f) ? x_2[col] : 0.f; + } - // (4): Linear(in_features=32, out_features=1, bias=True) => x = x*W_T + b - float x_4[1]; - for (unsigned int col = 0; col < 1; ++col) { - x_4[col] = 0; - for (unsigned int inner = 0; inner < 32; ++inner) { - x_4[col] += x_3[inner] * wgtT_4[inner][col]; + // (4): Linear(in_features=32, out_features=1, bias=True) => x = x*W_T + b + float x_4[1]; + for (unsigned int col = 0; col < 1; ++col) { + x_4[col] = 0; + for (unsigned int inner = 0; inner < 32; ++inner) { + x_4[col] += x_3[inner] * wgtT_4[inner][col]; + } + x_4[col] += bias_4[col]; } - x_4[col] += bias_4[col]; - } - // (5): Sigmoid() - float x_5[1]; - for (unsigned int col = 0; col < 1; ++col) { - x_5[col] = alpaka::math::exp(acc, x_4[col]) / (alpaka::math::exp(acc, x_4[col]) + 1); - } + // (5): Sigmoid() + float x_5[1]; + for (unsigned int col = 0; col < 1; ++col) { + x_5[col] = alpaka::math::exp(acc, x_4[col]) / (alpaka::math::exp(acc, x_4[col]) + 1); + } - return x_5[0]; - } + return x_5[0]; + } -} //namespace lst::t5dnn + } // namespace t5dnn +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h index d7b2f03937bdb..d5321fea07a6e 100644 --- a/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h @@ -3,311 +3,313 @@ #include -namespace lst::t5dnn { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + namespace t5dnn { - ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_0[32] = { - -4.5069356f, -5.8842053f, 1.0793180f, -0.1540973f, -0.4705772f, 6.4027028f, -0.6620818f, -7.0734525f, - 0.6211641f, 4.9630723f, 3.4310920f, -0.8856288f, 4.5843782f, -6.0180559f, 0.0126438f, -1.5725276f, - -0.8549317f, -6.8545237f, -1.2129461f, 3.0617838f, -0.3911322f, 0.0799793f, -2.5398655f, -0.5780622f, - 2.8533990f, -0.1777968f, -2.6457164f, -0.7976936f, 4.5644889f, -2.1747942f, 3.4286616f, -10.1073380f}; - ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_0[38][32] = { - {6.1269712f, -10.6625051f, 17.4907818f, -0.0019928f, -3.4468415f, 1.6674044f, -7.8957767f, 2.2077549f, - 9.5517254f, -5.1345053f, -30.1643391f, 4.0148559f, -19.8330841f, -18.3806915f, 0.1334764f, 1.6213616f, - -4.1423774f, -15.3062429f, -1.0209556f, 1.5580219f, 0.7426265f, 0.0033929f, 1.3924170f, 0.9196110f, - -0.8995734f, 1.0594707f, 39.4390869f, 8.7642002f, 28.4583893f, -5.9235659f, 3.7221889f, 14.4167147f}, - {1.7863803f, -0.6068707f, 0.3166098f, -0.0608759f, 0.5939785f, 0.4870262f, -3.1375074f, -17.7147388f, - -0.7231818f, -9.3808413f, 2.2070611f, 15.7461920f, 0.9355862f, 2.3942475f, -0.0671409f, 3.5954301f, - -3.0463996f, -2.0748904f, -0.5450584f, -4.4800100f, 0.6074556f, -0.0161482f, 3.0624702f, -4.5688419f, - 2.9881518f, -0.3714012f, -0.0387531f, -0.7699140f, 4.4028845f, 5.0333014f, -4.7350726f, -8.6568584f}, - {5.6548429f, -0.0207700f, 0.1785973f, 0.0881671f, 0.2530097f, -0.1893259f, -0.1105739f, -0.5183877f, - 1.0728362f, 0.1833011f, 1.7765219f, 0.3127359f, 0.0455277f, -0.1442616f, -0.1048361f, -0.1235604f, - -0.1217661f, -0.5487315f, 0.7575656f, -0.1177454f, -17.0993137f, 0.1628031f, 0.2789381f, 0.5304270f, - 0.0837841f, -3.1120780f, 0.0074821f, -0.1648044f, -0.3395336f, 0.3958135f, 0.8718957f, -1.1980486f}, - {0.2401041f, -0.0585765f, -0.0144584f, 0.0411095f, 0.0752229f, 0.0292672f, -0.2437613f, -1.4396472f, - -0.0971315f, -1.7181139f, 0.2417643f, 2.2030578f, 0.0566049f, 0.1081589f, -0.1060181f, 0.3473758f, - -0.7095683f, -0.0345675f, 0.2794849f, -1.1702278f, 0.2622930f, -0.0072611f, 0.5026371f, -1.2882922f, - -0.4712771f, 0.0597130f, -0.0039970f, -0.6050836f, 0.1554724f, 1.0991164f, -0.4975886f, 0.2597970f}, - {0.0766028f, 0.0218421f, -0.1739017f, -0.0076569f, 0.0384461f, -0.1841756f, 0.9677940f, -3.1114254f, - 2.3830564f, 2.0706992f, -0.9643140f, 0.7361387f, -0.0060253f, -0.1554846f, -0.0831100f, 2.8754771f, - -1.4403527f, -0.5281797f, 0.5157787f, 4.2405987f, 0.4807618f, 0.0217647f, -1.2626950f, 0.9145837f, - -0.3931780f, 0.3426280f, -0.0065206f, -0.7510439f, -0.4555758f, 2.7724340f, -1.2173026f, 0.1039017f}, - {0.5685715f, 0.3927337f, 0.4942532f, -0.0671033f, -0.2808350f, -0.0336000f, -1.3983957f, 0.9876546f, - -2.3840380f, 0.7315395f, -2.2009561f, -1.4631602f, -0.4672308f, -0.4994236f, 0.1169335f, -1.1894208f, - -1.2692982f, 0.3303853f, -2.0147655f, -0.9912014f, 1.0042895f, 0.1121151f, -1.0789106f, -2.2821584f, - -6.6459913f, -0.0959398f, -0.0068429f, -2.8177626f, 0.3213172f, -2.6832986f, -4.7613306f, -0.9985733f}, - {1.4419515f, -0.3864825f, -0.6756768f, -0.1273375f, 0.4321181f, 0.3354745f, -0.8236564f, -2.8190827f, - 0.7090831f, 1.9072700f, -3.1834064f, -2.6938572f, 0.5051147f, 1.4382831f, 0.1241910f, -0.7352629f, - 0.7703634f, -1.7556250f, -2.1104112f, 3.0603442f, 1.9873468f, -0.0358815f, -1.0087154f, 3.8253262f, - -0.5466214f, 0.0875162f, 0.2691758f, 0.7121435f, 1.9314718f, -0.1580560f, 3.6484149f, -5.3173709f}, - {6.9104381f, -0.0033664f, -1.4405546f, -0.1768288f, 0.2028089f, -0.1012344f, -4.4735684f, 0.6354278f, - 4.3039737f, 0.2056303f, 1.8338999f, -1.1351355f, 0.1015760f, -0.0733253f, -0.0561627f, 2.5292397f, - 1.6314448f, -0.9333628f, -0.7773662f, 0.8313186f, -0.7829623f, 0.1265118f, 0.5922315f, -0.3463379f, - -1.3269740f, -3.3302619f, -0.0061799f, 2.3374722f, 0.0880938f, 0.7470241f, -0.4205743f, -4.7557602f}, - {0.0380794f, 0.0947470f, 0.0419397f, 0.0582226f, -0.0603404f, 0.0234028f, -0.2575402f, 0.4125248f, - 0.3035339f, 0.2663808f, -0.6092452f, -1.4727812f, 0.0247187f, -0.0539688f, -0.0150413f, 0.2094955f, - 0.5379737f, -0.3255228f, -0.5639279f, 0.0786276f, 0.6703192f, 0.1557026f, -0.2753083f, 1.1463971f, - -0.9372965f, 0.5657740f, 0.0041413f, 0.0870248f, 0.0101520f, -0.8214461f, 0.1212932f, 1.5648646f}, - {-0.0969819f, 0.0137566f, 1.3515147f, -0.0155047f, -0.1416170f, -0.1636726f, 0.5184190f, 0.4732984f, - 0.6815788f, -1.0522166f, -0.4486531f, -0.0516016f, 0.0201894f, -0.0849667f, -0.0861271f, -1.2027841f, - 1.2458711f, -0.7061657f, 1.0381308f, -0.3450044f, -0.1300479f, -0.0828402f, 0.6859242f, -1.0575374f, - 0.6947553f, -0.0922188f, 0.0199132f, 0.8038982f, -0.1734094f, -0.1057449f, 1.6305015f, -0.0688597f}, - {-1.8151448f, 0.1024327f, 1.7063105f, 0.1130912f, -0.1081472f, -0.2904744f, -1.3465070f, -1.0455177f, - -0.4581082f, -3.2220871f, 0.5221398f, -5.1637673f, 0.0811146f, -0.1326323f, -0.0379338f, -3.0439703f, - -2.4246936f, -0.3670847f, -3.1256330f, -1.6595014f, -3.4715190f, -0.1526113f, -1.0420206f, 0.9536474f, - -3.2932863f, 1.6048199f, 0.0025162f, -3.6049840f, 0.0604250f, -2.2404826f, 1.8406851f, -3.1381185f}, - {1.2985691f, -1.1044264f, 0.9062797f, -0.0788333f, 0.2694912f, 0.0032800f, -0.0574267f, 0.9734111f, - 1.1532565f, 2.6786125f, -3.8574269f, -2.2871449f, -0.1261243f, 1.0545347f, -0.1454154f, -0.5609738f, - 1.8385800f, -0.8035598f, -1.7668265f, 5.1665063f, 0.7966110f, 0.0940206f, -2.3943975f, 2.3344002f, - 1.0342182f, 0.4806454f, -0.3880928f, 0.6998246f, 1.4011886f, -1.7313483f, 4.9702630f, -6.0058608f}, - {1.0300356f, 0.0616315f, -0.1113776f, -0.1694220f, 0.7159944f, 0.0626456f, 2.0994680f, 0.3452290f, - -3.0487001f, 0.0654031f, -1.1510723f, 0.5370992f, -0.0290704f, -0.0300795f, 0.0751569f, -0.2345951f, - -0.3472281f, 0.4424143f, 1.2444530f, -0.2114656f, 0.7865694f, -0.0709381f, -0.1839961f, -0.0529834f, - 0.5867608f, -3.8793530f, -0.0814745f, -0.6368676f, 0.0361213f, -0.5549288f, 0.5661780f, 1.8374584f}, - {0.3345098f, 0.0068199f, -0.4205509f, -0.1088801f, -0.1043202f, -0.0040804f, 0.3400922f, 0.2673528f, - -0.6050695f, 0.4443954f, -0.4319905f, -0.6044132f, -0.0260679f, 0.0137036f, 0.0765494f, -0.0095099f, - 0.5880439f, -0.0083854f, -0.2407522f, 0.1942379f, 0.6554548f, -0.1322891f, -0.8298992f, 0.7909554f, - 1.0528831f, 0.1970959f, 0.0754069f, -0.0947960f, -0.0279494f, -0.5888316f, 0.8919419f, 0.4828835f}, - {0.3995822f, -0.2139665f, 0.3982936f, -0.1285759f, -0.3445527f, -0.1167238f, -0.1263519f, 0.8393803f, - -0.7758383f, 0.0719291f, -0.0134762f, 0.1715237f, 0.0796666f, 0.1023507f, -0.1172728f, -1.2364722f, - 1.2592632f, -0.3168479f, 0.7487004f, -1.5170647f, -0.2235429f, -0.1620898f, 1.4064828f, -1.0821995f, - 0.0740103f, -1.0412805f, -0.0621277f, 0.2439800f, 0.2684972f, -1.1661061f, 0.7859434f, -0.6170313f}, - {2.1615884f, 0.1431713f, 0.0642652f, -0.0522325f, -0.2658786f, -0.0245810f, -1.6857448f, -0.6685011f, - -0.6978170f, -0.8716729f, 0.3129902f, -2.5870812f, -0.2855283f, -0.3205920f, -0.0084069f, 1.3182145f, - -0.6923816f, -0.3730274f, -2.3638811f, -1.1128502f, -2.4709859f, 0.1349022f, -0.3574466f, -0.6597407f, - -4.1122031f, 0.2240651f, 0.1806145f, -1.6836300f, -0.0766231f, -3.2611966f, 0.0091456f, -0.0997367f}, - {5.2476101f, -0.1966512f, 4.8935304f, -0.1551689f, 1.6919724f, -0.8324367f, 14.3318472f, -0.3503132f, - 10.3614969f, -9.1522884f, -0.2543063f, -1.8476851f, 16.7961140f, 9.9541416f, -0.0434563f, -9.6973553f, - -5.0469398f, 6.1688442f, 7.6429725f, -7.3149266f, 1.2345183f, 0.1412155f, 0.7114770f, -1.6378664f, - 5.1548996f, 0.3686100f, -45.3027611f, 3.0492647f, -37.3445892f, 2.7421410f, -2.7958770f, -25.2034016f}, - {1.4597454f, -1.0561740f, 0.9751291f, 0.0446527f, 0.3691662f, 0.1006782f, 0.1418435f, 0.8871480f, - 1.1603093f, 2.8034730f, -4.0856910f, -1.9786842f, -0.2206208f, 0.9539357f, 0.0868183f, -0.6811873f, - 1.9642411f, -0.8065316f, -2.0244894f, 5.2936082f, 0.6120632f, -0.1194160f, -2.3925939f, 2.5555069f, - 1.0149733f, 0.4607603f, -0.2197217f, 0.5703423f, 1.4049014f, -1.5900208f, 5.1645074f, -6.0569463f}, - {0.9000676f, -0.0028781f, -0.1967366f, 0.1039593f, 0.7993248f, 0.0655172f, 2.2296758f, 0.4391927f, - -3.0292840f, 0.0334536f, -1.1728534f, 0.3479103f, -0.1190938f, 0.0410203f, 0.1146637f, -0.2958017f, - -0.3240463f, 0.4361866f, 1.0564958f, -0.1989332f, 0.5194008f, -0.0628912f, -0.1733121f, -0.1255383f, - 0.5990249f, -3.7692382f, 0.0995128f, -0.7101220f, -0.0785123f, -0.3514554f, 0.6662078f, 2.0991604f}, - {0.1781942f, -0.1873588f, -0.4653996f, -0.0153059f, -0.1399561f, -0.0498718f, 0.4552556f, 0.2300792f, - -0.7682312f, 0.4342302f, -0.3787803f, -0.6089386f, -0.1049337f, 0.0395331f, 0.0220332f, 0.0114750f, - 0.4672548f, 0.1284784f, -0.2472819f, 0.2892784f, 0.4788667f, 0.0472555f, -0.6593549f, 0.6508777f, - 0.9286987f, 0.3043948f, -0.0635985f, 0.0814399f, -0.1168853f, -0.6688027f, 0.8876534f, 0.4865684f}, - {0.4024099f, 0.0480259f, 0.4588822f, -0.1793082f, -0.2151573f, -0.1871128f, -0.1502780f, 1.1011307f, - -0.9467706f, 0.2632496f, -0.1257263f, -0.0241331f, 0.2280627f, 0.0878608f, -0.1334262f, -1.1642927f, - 1.0943586f, -0.4799654f, 0.5981907f, -1.5051398f, -0.4235946f, 0.0012827f, 1.2342577f, -0.8281875f, - 0.2776567f, -1.0362227f, 0.0408372f, 0.1540821f, 0.1777556f, -1.2684357f, 0.8836584f, -0.4001710f}, - {2.1558056f, 0.2082023f, 0.0863442f, 0.0364868f, -0.3985825f, 0.0307202f, -1.8889453f, -0.5614714f, - -0.7311882f, -0.8075573f, 0.4895108f, -2.7770483f, -0.3121874f, -0.1671291f, -0.1281284f, 1.3212786f, - -0.5310181f, -0.1974759f, -2.6240873f, -0.8320529f, -2.3875966f, -0.0286360f, -0.6263188f, -0.6553424f, - -4.1658955f, -0.0601300f, 0.0946256f, -1.6795633f, -0.1251303f, -3.0974686f, 0.2412274f, -0.0687501f}, - {2.0523887f, -0.6387668f, 2.0633900f, -0.0550964f, 0.5181718f, -0.4202190f, 1.8569367f, 0.8295385f, - 0.8555872f, 2.4727983f, -0.2072828f, -1.9006120f, 0.5379534f, 0.4463673f, 0.1468820f, 0.4918649f, - -3.4016700f, 0.2884440f, -1.9418719f, 4.5157170f, -0.5160927f, -0.0199372f, 3.1353824f, -0.9863126f, - -1.5135859f, 0.7576568f, 0.6715558f, 2.7409093f, 0.9291748f, -0.3247162f, 1.8204515f, -8.9181070f}, - {-0.1428107f, -0.0829889f, 0.4213613f, 0.0225415f, 1.2238166f, 0.0477106f, 0.3031853f, -0.7466553f, - 2.0663500f, 0.7588379f, 0.3689216f, -0.2003786f, 0.1242338f, 0.1693589f, -0.0351716f, -0.0186597f, - -0.0189417f, 0.5468715f, -0.2862698f, -0.1311738f, 3.0747476f, -0.0310747f, 0.0943165f, 0.3139819f, - 0.6274695f, -1.8314874f, 0.0147495f, 0.3554756f, 0.3829916f, 0.4891713f, 0.1328600f, 1.0535098f}, - {0.0534900f, 0.1787969f, -0.0571320f, -0.0685673f, 0.1968977f, 0.0374476f, 0.7876674f, 0.0828491f, - 0.6444036f, -0.2203166f, -0.2383427f, 0.5397566f, 0.0106769f, -0.1230072f, -0.0135021f, -0.5691944f, - -1.5040319f, 0.0406933f, -0.0025478f, 0.9251419f, -1.7180276f, -0.1112956f, 1.4840862f, 0.0407115f, - -0.0100329f, 0.0583593f, -0.0110524f, 0.7431355f, -0.0971857f, -0.5501527f, -0.6371027f, -0.1935233f}, - {-0.6455778f, 0.2317368f, 0.9285696f, -0.1415854f, 0.0822560f, 0.2488030f, -2.6992166f, 0.0884904f, - 0.6735302f, -0.1467820f, 0.5641044f, 0.6436581f, 0.0818401f, -0.0336634f, -0.0729000f, -0.1206900f, - -2.5739892f, 0.5776953f, 0.9531668f, -1.2362405f, -0.0615577f, -0.0143544f, -2.7525210f, 1.3738545f, - 0.2751348f, -1.7463943f, -0.0020144f, 2.4814103f, 0.1716725f, -0.7055540f, -0.3474010f, 0.4482578f}, - {-0.2526205f, -0.7463821f, -3.6076138f, -0.1511098f, 0.1216256f, 0.0888247f, -1.0190924f, -1.3260181f, - -0.0443211f, -4.8911066f, -3.4385188f, -6.0057454f, 0.3340450f, 0.2997236f, -0.0907855f, 0.7500492f, - -0.4007562f, 1.9382039f, 0.5687234f, 2.6511824f, 4.7703862f, 0.0006749f, -0.0201394f, -3.5885489f, - -4.1518898f, 0.0807014f, -0.0584071f, -0.8100027f, 0.7697087f, -0.8038046f, -1.2945876f, -4.0110312f}, - {0.4337017f, -1.1532011f, 2.0740633f, 0.0271806f, 0.6654227f, 0.1012998f, -4.0791736f, 1.2631345f, - 1.9511020f, 2.3272331f, 1.2707534f, 1.6306664f, 0.4936035f, 0.8285242f, 0.0807625f, 3.8652387f, - 0.0281145f, 1.6877037f, 1.2557380f, -0.3036775f, 0.5604967f, 0.1551418f, -0.9599600f, -6.3067718f, - -0.6352320f, 0.8058553f, 0.3657880f, -2.0491202f, -0.3926269f, 2.5650854f, 1.3697821f, -8.3070078f}, - {5.1334143f, -0.0351738f, -0.4774780f, -0.0679726f, 1.4569254f, 0.0580191f, -0.3649136f, -0.2298838f, - -3.3826666f, -0.7392708f, -0.6036060f, -0.2612940f, -0.1877640f, -0.1145124f, -0.0042578f, -0.0311193f, - -0.0320479f, 0.5270581f, -0.4324475f, 0.2681437f, 4.7813129f, -0.0222701f, -0.0525629f, -0.2861001f, - -0.1251072f, 3.9112861f, 0.0045046f, -0.0426071f, -0.3299106f, -0.0686970f, -0.1602017f, -0.0070103f}, - {-0.6633690f, 0.0103367f, 0.5998458f, 0.1256577f, -0.0359184f, -0.0176820f, -0.6458368f, -0.0370536f, - 0.3542259f, 0.1394724f, 0.8255956f, 0.2501569f, 0.0320156f, -0.0256806f, 0.0277949f, 0.0036392f, - 0.2825173f, 0.1400358f, 1.0011463f, -0.6792242f, 0.0672508f, 0.0728705f, -0.1089695f, -1.0414587f, - -0.4135485f, 0.4293025f, -0.0041241f, -0.9564193f, 0.0314900f, 0.8658463f, -0.7734696f, -0.7610567f}, - {-0.0200122f, -0.0749178f, -1.5026549f, -0.0387432f, -0.0713735f, 0.1214790f, 1.8730290f, -0.0552839f, - -1.6867150f, 0.2282097f, 0.7161849f, -0.1018546f, -0.1092003f, 0.0365504f, -0.1326883f, 1.2310545f, - 0.1800210f, 0.7024739f, -2.9606545f, 1.2275347f, -0.2050014f, 0.0940569f, 0.4761694f, 0.8812068f, - -0.0083424f, -1.5406264f, 0.0061815f, -2.7606382f, 0.0248556f, 1.1086880f, -1.3608936f, 1.0795454f}, - {0.9734020f, 0.3905411f, -3.7008634f, 0.0013557f, 0.1649124f, 0.9935362f, 1.3489184f, 0.9505764f, - 0.7966231f, -0.1627246f, -2.5754328f, 1.4892205f, 0.8586300f, 0.6974363f, 0.1320204f, -0.7840260f, - 0.3121157f, 0.0966901f, 2.7447381f, 1.8256680f, 0.7229405f, -0.1723188f, 0.9145948f, -2.1376033f, - 0.5259342f, 0.0731194f, -0.2908303f, -0.2603913f, -0.2326528f, 3.6684167f, -0.2883157f, -2.8546307f}, - {-4.8917460f, 6.7944999f, -0.2255474f, 0.1051999f, 3.9000113f, 2.0624907f, 5.3019547f, 10.0209141f, - 1.1268179f, 2.2669628f, -6.5002980f, 1.8408583f, 5.3039579f, 2.2055962f, 0.1055369f, 1.7230233f, - 6.9605255f, 7.7025104f, 2.9880707f, -0.9274251f, -0.2287160f, -0.0206735f, 0.6885675f, 2.8179996f, - -7.1129837f, -1.3772345f, 3.8655453f, -5.9388318f, -0.0469947f, 7.2763596f, -6.3536129f, -17.0069847f}, - {1.8787041f, -0.9953383f, -1.4839923f, 0.1308209f, 0.3657510f, 0.3106483f, -1.4158971f, -6.7449651f, - 0.6553892f, -4.5046172f, -3.5489719f, 3.5363002f, 0.5454772f, 2.3521471f, 0.1612140f, -0.9744226f, - 0.6546553f, -2.7179255f, -1.7758157f, 0.3089439f, 1.7462813f, 0.1654593f, -0.2440207f, 3.9501827f, - 1.3750844f, 0.0596805f, -0.1977254f, 0.0264880f, 2.6396444f, 1.0816911f, 3.6413448f, -6.0299959f}, - {-4.1295738f, 0.1044480f, 0.2131937f, 0.0420826f, 0.5292229f, 0.0090477f, -0.0973486f, 0.9596778f, - 2.9579651f, -0.6364226f, -1.7556342f, 0.1539868f, -0.1273174f, -0.1348504f, 0.1257833f, -1.4168571f, - -1.0960362f, 0.0482449f, -1.4395387f, -0.2524115f, -2.9162085f, -0.0451428f, -0.4021681f, -0.5756381f, - 0.0515293f, -3.1996479f, -0.0007676f, -1.3878343f, -0.2864279f, -0.9579773f, -1.0999249f, 1.6500067f}, - {-2.4806111f, -6.8115449f, 3.2805641f, 0.1187415f, -0.9950783f, 6.2553434f, -1.6450261f, -6.1463733f, - 2.7507148f, 4.2995782f, 0.0461297f, -0.5417359f, 2.4306326f, -7.3530145f, 0.0698273f, -0.9394333f, - -1.3595498f, -7.5141478f, -1.4911395f, 3.2300410f, 0.1203540f, 0.0314884f, -2.0116949f, -0.8167119f, - 2.4133310f, 0.1920709f, 1.0619365f, 0.2459123f, 6.9166069f, -2.6384118f, 3.6829739f, -7.2385545f}, - {0.9408096f, 14.9067144f, 1.7709646f, 0.1105646f, -0.5600107f, -15.3188124f, -12.3718462f, -1.8893757f, - 13.6364670f, -5.7327847f, -14.1805468f, 1.0581509f, -14.2186184f, 14.8948650f, 0.0190344f, 5.4395180f, - 6.7243400f, 9.8468456f, 4.5144215f, -1.4551491f, 1.1032411f, -0.0317988f, 2.3398454f, -3.1671596f, - -7.7541409f, 1.1255593f, 6.7340465f, -4.4448423f, -9.1472626f, -3.1959128f, 4.4181323f, -2.7904994f}, - {-2.1621978f, -4.7202382f, 1.7378219f, 0.1417439f, -0.5000908f, 5.4468708f, 1.4260571f, -6.6136570f, - 1.5713804f, 3.4479704f, 2.7354901f, -0.7388076f, 5.4666147f, -3.8697338f, -0.1368596f, -2.7903373f, - -1.2043713f, -4.9554005f, 0.3324645f, 1.6767365f, 0.1156244f, -0.0326964f, -2.0945346f, -0.4590589f, - 3.0942657f, 0.0015020f, -6.2626700f, -0.3969755f, 0.7717427f, -1.9667094f, 2.9664171f, -11.9477053f}, - }; - ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_2[32] = { - 9.8383608f, 3.6922295f, 3.5774977f, -4.4619012f, 6.5087032f, -0.9540017f, -0.5059246f, 0.0706402f, - 14.3396597f, -0.2771132f, -4.8409863f, -8.3581600f, -3.5078344f, 4.3287506f, -5.7808843f, 3.9264839f, - -2.1697845f, -0.0040514f, -0.2095029f, -6.8678174f, 1.7911285f, -0.4510343f, 1.2410443f, -4.5678806f, - -0.5693849f, 2.3320096f, 4.4606552f, -6.3771009f, -4.3149071f, -0.1905672f, -3.5726390f, -1.0744030f}; - ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_2[32][32] = { - {-0.0155548f, 0.0243339f, 0.0037967f, -0.2771824f, 0.0111955f, -0.0115980f, 0.0079653f, -2.9803498f, - -0.0061037f, -0.0956634f, 0.0332446f, 0.0179244f, -0.0080377f, -9.0180779f, 0.1720033f, 0.0350694f, - -0.0146588f, -0.2135506f, -0.3158041f, 1.3697664f, 0.0119146f, 0.0119120f, -0.0986927f, 0.0297492f, - 0.0355827f, -0.1196868f, -0.0745119f, 0.0281862f, -0.0422190f, -0.3069138f, -0.0477367f, -0.0550450f}, - {-1.7374619f, 1.4822800f, -2.1885235f, 1.8354234f, -0.5380136f, 1.6621803f, 0.6251035f, 0.1008954f, - -0.8387129f, -0.2063313f, 1.0661691f, -0.9799694f, -5.1710258f, -3.2260630f, -1.5073707f, -1.0792168f, - 1.8569958f, -0.2289213f, 0.0563821f, -1.6398847f, -4.1649504f, -2.7527378f, -0.0134577f, 3.0424533f, - 0.0364320f, 0.6762254f, -3.1551330f, 2.4888904f, 1.4757305f, -0.3141717f, -2.0126467f, -0.1675602f}, - {-0.9571826f, 0.0914152f, 0.0404339f, 0.2927902f, 0.2933607f, 0.0619171f, 0.0772318f, -1.3796169f, - -0.8194544f, -0.2179988f, -1.1241078f, -0.1443964f, 0.0559355f, -1.2914546f, -0.3445117f, 0.2031156f, - 0.0273864f, -0.0193422f, -0.2136522f, 0.0429592f, 0.0212854f, 0.0414394f, -1.1734651f, 0.0582848f, - 0.0136039f, -0.1892604f, 0.0764908f, -0.0130132f, -0.1272559f, -0.0818855f, -0.0408583f, -0.1563294f}, - {-0.0213695f, 0.0596942f, -0.0641309f, -0.0146449f, 0.0416586f, -0.0378931f, 0.1234860f, 0.1622967f, - 0.0794091f, -0.0639933f, -0.1030663f, 0.0579078f, 0.1050275f, -0.0136866f, 0.0149978f, 0.0876813f, - 0.0693554f, 0.1612417f, -0.0595916f, -0.1008234f, -0.0579058f, 0.0915138f, 0.1321436f, -0.1484535f, - -0.0920316f, -0.0024532f, -0.1045300f, 0.0924260f, 0.0277524f, -0.0287276f, -0.1271127f, 0.1164243f}, - {0.0713067f, 0.0198056f, -0.3023696f, -0.0025908f, -0.0085885f, -1.1157553f, 0.0236462f, -0.0704844f, - -0.0189257f, -0.0997382f, 0.3379845f, -0.1229390f, -0.0616165f, -0.8968034f, 0.0401445f, -0.1144476f, - -0.0532077f, 0.0604580f, 0.0609454f, -0.1613472f, 0.0103525f, -0.1653874f, 0.0205189f, 0.0758978f, - -0.1514593f, 0.0151441f, 0.2043469f, 0.0349607f, -0.1361278f, -0.1255922f, 0.0631648f, 0.3570991f}, - {0.3371337f, -3.7541580f, 2.2215877f, -0.3390516f, 0.1912718f, -4.1861577f, -1.2264019f, 2.8179801f, - 0.0667294f, -0.0093539f, 2.3029909f, 3.1814916f, 3.9780347f, 0.2310601f, 0.3986159f, -0.8544636f, - 0.4139664f, -0.1876569f, -0.2448732f, -2.8053334f, 4.0488625f, 2.1094146f, -6.7310257f, -4.9950023f, - -0.8315823f, 0.0555959f, 2.4573720f, -3.7234364f, -4.2910552f, -0.2995245f, -3.2605181f, 2.3620574f}, - {-1.5522735f, -0.1866350f, -0.0067679f, 0.3196557f, 1.4052233f, 2.8143549f, -0.9992948f, -0.5309914f, - -25.8852596f, -0.1218249f, 0.6625420f, 0.3007106f, -0.2767264f, -0.1847300f, -0.5313534f, -0.0383462f, - -0.1987552f, 0.0581405f, -0.3376078f, 1.2621028f, 0.0818709f, -0.1401216f, -0.4550788f, -0.1592657f, - 0.0597123f, 0.1344101f, -0.1005317f, -0.1538406f, 2.9142656f, -0.0806051f, -0.4267367f, -31.9512234f}, - {0.6859627f, 0.1212986f, 0.1291616f, 0.0459838f, -0.0899920f, 0.0287645f, 0.1987007f, -2.7079368f, - -0.2628384f, -0.1402464f, -0.6302179f, -0.2923960f, -0.1106663f, 0.8256195f, -2.8054097f, -0.0296494f, - -0.5632019f, -0.1335654f, -0.1558440f, -6.8611612f, 0.0203786f, 0.0046566f, -0.4401442f, -0.0471430f, - 0.4535986f, -0.8657981f, 0.0684740f, 0.0518814f, -0.0123748f, -0.2270164f, 0.0922878f, -0.3863277f}, - {0.0127175f, 2.3346109f, -0.4390767f, -0.4657893f, 0.1659466f, -0.1132782f, -0.4928388f, 0.7652873f, - 1.1510741f, -0.0879600f, 0.2721785f, -0.1878961f, -0.3477249f, -0.8473209f, -0.8931856f, -0.4328294f, - -11.9181929f, -0.0282545f, -0.0217915f, 1.6676594f, -0.2122232f, -0.6190930f, 1.9053432f, -0.7592348f, - -1.0739189f, -0.7170524f, 0.3864411f, -0.8849231f, 0.1393488f, 0.0738489f, 0.4460345f, 1.9020857f}, - {0.4453296f, -0.0767821f, 0.1638939f, 1.6997167f, -0.1098599f, -0.0551604f, 0.0040561f, -13.5290670f, - -0.1285677f, -0.0590394f, 0.6499141f, -0.7617344f, 0.0453151f, 0.3104213f, -1.0711143f, 0.1361838f, - -0.4365610f, -0.1300649f, 0.2013344f, -0.5308123f, 0.1451896f, 0.1030715f, -0.6487910f, -0.3136590f, - -0.0280079f, 0.5394178f, 0.1318262f, -0.0159292f, 0.0636870f, -0.3224248f, -0.1868187f, -0.2468304f}, - {-0.0333494f, -0.0834255f, -0.1221875f, 0.6861304f, 0.0521738f, -0.0416543f, -0.4437352f, -19.3246250f, - -0.1520821f, 0.0528602f, -0.6375434f, -0.5803806f, -0.0958465f, -2.0058544f, -0.8282642f, 0.0259000f, - 0.4846996f, 0.1211179f, 0.0356884f, 1.0009497f, 0.0635682f, -0.0314105f, -0.0011147f, 0.0131714f, - -0.3410152f, 0.2798154f, 0.0961889f, 0.1266228f, -0.0934717f, -0.0904307f, 0.1355542f, 0.5722573f}, - {0.2146454f, 0.2143834f, 0.1290650f, -0.9063646f, 0.2100945f, 0.1331054f, -0.2620614f, -0.1264993f, - 0.1313979f, 0.0455465f, -0.8395286f, -0.4967833f, -0.0538581f, 0.9155380f, 0.6627046f, 0.1691243f, - 0.9887002f, -0.1597013f, -0.1236713f, -1.9041336f, 0.0427585f, 0.0849747f, -5.2559652f, -0.3133100f, - 0.0141170f, -0.1635530f, 0.4938746f, 0.0162943f, 0.2107756f, -0.3413893f, -0.0657575f, 1.0542560f}, - {-2.8868380f, -2.0837426f, -1.0611480f, -0.6143807f, -0.6398501f, -2.8018746f, 0.5166737f, -1.0814301f, - -1.9272422f, -0.1017482f, -0.4651161f, -1.4021232f, 1.8854499f, 0.1815407f, 0.5965426f, -2.3344259f, - -0.0690846f, -0.1678239f, -0.4219488f, 0.6215640f, 1.0270095f, -0.3473049f, -0.3926674f, -0.7942593f, - 1.1305071f, -1.4621233f, -0.8051161f, -0.7698632f, -2.6038630f, -0.3090037f, -1.6365144f, -1.0179478f}, - {0.0046026f, 1.1319581f, -2.6405678f, -2.0353596f, -2.1687336f, 0.3364883f, 2.1122196f, 0.2584647f, - -2.4344857f, -0.0378498f, 0.6158544f, -0.6060749f, -4.9598379f, 0.1570698f, 2.2436838f, -2.6198347f, - -2.0935996f, -0.1845744f, -0.0716080f, -1.9338604f, -4.1995640f, -3.6706774f, -1.6762524f, 3.9646862f, - -0.9677961f, 1.8319578f, -3.1916575f, 3.7312632f, 0.0820446f, -0.0497568f, -0.0898171f, -0.2499462f}, - {-0.0780375f, -0.0286571f, 0.1007227f, 0.0012229f, -0.0531285f, 0.0840718f, 0.1013894f, 0.1312424f, - -0.0673772f, 0.1603183f, 0.0074385f, -0.0718321f, -0.1549873f, 0.1616689f, 0.0405887f, -0.1558588f, - 0.0740745f, 0.1696893f, -0.0064026f, -0.1656420f, -0.1186674f, -0.1262667f, -0.0784757f, -0.1280154f, - 0.0909976f, 0.0853046f, -0.1075811f, 0.1310615f, 0.0610194f, 0.0647223f, 0.1360559f, 0.0440074f}, - {-0.2106480f, 0.0087131f, 0.1119385f, -1.0611318f, 0.5250220f, 0.0525479f, -0.2733742f, -1.0799565f, - -0.5601607f, -0.0651806f, -1.9793440f, -0.3373334f, -0.1550518f, 0.8932216f, 0.7264332f, -0.0450735f, - 1.2373760f, -0.1236272f, 0.0680048f, -3.0446634f, -0.1533586f, -0.0127355f, -0.3326311f, -0.0225603f, - -0.2265739f, -2.3752897f, -0.3771705f, -0.0728938f, 0.1741305f, 0.1111639f, 0.4131119f, 0.2239323f}, - {-2.5691276f, -1.4011253f, -2.0640867f, -3.7236946f, 1.5542637f, -0.9456654f, -1.7575809f, 3.6794879f, - -0.4439790f, -0.1009826f, 3.6702275f, -0.1935008f, -0.4423219f, -0.3825364f, -0.4784791f, 0.5927492f, - -2.3482494f, 0.0801714f, -0.1567418f, -1.7934613f, -0.1706410f, -0.6326947f, 0.6260155f, 0.3631033f, - -0.9325932f, 1.9647995f, -1.3409088f, 1.3501998f, 0.0367797f, -0.1744210f, 1.8690013f, -1.0737898f}, - {-0.5934777f, 0.6232591f, -0.3391055f, 0.2640936f, -0.2824444f, 0.4815128f, 0.6625078f, -0.1103976f, - 0.9555223f, -0.0624896f, -0.6778919f, 0.1181502f, -0.5425385f, 0.7297349f, -1.7261271f, -0.2917557f, - 1.1873137f, -0.2725933f, 0.0975242f, 1.7756181f, -0.5735835f, -0.4453230f, 0.9800369f, 0.9344145f, - -1.8692539f, 0.0120440f, -0.7315661f, 0.6250805f, 0.3839143f, -0.0376306f, 0.3816243f, 0.6059195f}, - {0.5522162f, -1.8043815f, -10.9379101f, 0.5719097f, -0.2246755f, -1.4856353f, 0.4877502f, 0.7163438f, - -11.8135147f, -0.0180790f, -0.9928634f, 0.1107815f, -0.0005064f, -0.3824990f, -0.7453306f, -1.9909632f, - -7.4362645f, -0.0245507f, -0.1815712f, -3.5507584f, -0.0075889f, -11.0296011f, -1.1292133f, -0.0710276f, - 0.5675677f, 0.2017778f, -0.0684891f, -0.0367653f, -1.6674192f, 0.0281711f, -0.8356591f, -0.0447807f}, - {0.2537312f, -3.0178010f, -0.3493635f, 1.8573236f, 0.4017631f, 0.9912633f, -0.8625028f, -0.7783228f, - -1.7815375f, -0.1204695f, 1.8551122f, 0.3344182f, -0.2828701f, -1.3226960f, -1.4470471f, 0.2895959f, - 0.6780876f, -0.2010069f, 0.0425280f, -2.1786852f, -0.1274053f, -0.2549899f, -0.2233993f, -0.1561645f, - -0.4640818f, 0.6375850f, 0.7733670f, -0.2388286f, 1.0447853f, -0.1503223f, 0.3823584f, -13.8176088f}, - {0.2575197f, -2.2127593f, -0.0389457f, -0.0215759f, 0.1659477f, -0.0097748f, -0.1935415f, -0.9091369f, - -0.1453371f, 0.0442428f, -0.1206519f, 0.1435609f, -0.0186047f, -5.0154042f, 0.0538177f, 0.0403250f, - 0.0240955f, 0.0331080f, 0.0517951f, 0.7422639f, 0.0069818f, 0.0248351f, -0.2205741f, -0.0082387f, - 0.2043269f, 0.0459435f, 0.0876343f, 0.0140607f, 0.1056308f, 0.0062555f, 0.0184278f, -0.5539715f}, - {-0.0398742f, 0.1075264f, 0.1725024f, -0.0755192f, -0.0360048f, 0.1325573f, 0.0903103f, -0.0882263f, - 0.1207692f, 0.0032722f, 0.0048489f, -0.1257241f, 0.1450990f, -0.0713558f, 0.1116815f, 0.1107689f, - -0.1447252f, 0.1581838f, -0.0160124f, -0.0425587f, 0.1411217f, 0.0865060f, -0.0643460f, -0.0431262f, - -0.1452804f, -0.0195101f, 0.1234572f, 0.0520887f, 0.1117576f, -0.0751791f, 0.1511539f, 0.1224861f}, - {0.7728126f, 2.3075340f, -0.0385258f, -3.1270287f, 0.9414487f, 3.5251477f, -0.8043440f, 0.7212446f, - -7.6850162f, -0.1609414f, -3.7687578f, -1.0751100f, -0.2052089f, 5.0728245f, 2.2835267f, 0.5930225f, - 0.1303335f, -0.1428799f, -0.3715075f, 0.5136011f, -0.4755619f, -0.2192461f, -3.8696294f, -0.0062392f, - -1.3774812f, -0.0034140f, -1.5944362f, 0.9773729f, 3.2859125f, -0.1616932f, -1.2785367f, -13.5732412f}, - {0.5535743f, 0.1461481f, -0.2218016f, -0.2971808f, -0.2169309f, 0.1564545f, -0.0390397f, 1.1558976f, - -0.0119933f, -0.0774637f, 1.1907971f, -0.5127968f, -0.0066028f, -1.6794037f, -0.3650940f, 0.2555613f, - -0.9488379f, 0.0449603f, -0.1620417f, 0.1583214f, 0.0000908f, 0.0152763f, -1.0660053f, -0.0139402f, - -1.7440189f, 0.2515209f, 0.3333162f, 0.1904725f, 0.1116094f, -0.2287960f, -0.0007165f, -1.7047704f}, - {-5.9897852f, -0.1316296f, -0.0218074f, -0.4602887f, 0.3288545f, -0.0882939f, -0.5929499f, 0.4294790f, - -0.0383545f, 0.0556869f, 0.1975944f, 0.1341491f, 0.0629570f, -2.2742157f, 0.0175826f, -0.1439869f, - -24.8701649f, -0.1582915f, -0.2460304f, -3.9643264f, 0.0863483f, 0.0180861f, -0.2210452f, -0.0868723f, - -0.4175525f, -0.8231756f, 0.0247534f, -0.1473545f, -0.0021330f, -0.0410253f, -1.1944869f, -1.1523768f}, - {0.1031547f, -3.3402514f, -4.3636522f, -0.1534714f, -0.0622189f, 0.0374694f, -0.0870097f, -4.1865788f, - -0.0555377f, 0.0252329f, 0.1339467f, 0.0461691f, -0.0503090f, 0.0289890f, -0.0095674f, -0.3289992f, - -0.0279080f, 0.0274977f, -0.0903500f, 0.5610157f, -0.0478177f, 0.4346960f, 0.4822784f, -0.1058945f, - -0.2026870f, -0.0560638f, 0.0910069f, -0.0818529f, 0.0819198f, -0.0292193f, 0.3040628f, -0.1275230f}, - {-5.8789845f, -17.1114635f, -4.6755161f, 0.1016624f, -0.8685016f, -0.3898779f, -2.3363957f, 0.1413794f, - -2.4254086f, -0.2171030f, -0.0901150f, 0.7058705f, 0.4166250f, -0.0231085f, -0.1789686f, -9.4244318f, - -0.6418229f, -0.0857969f, 0.1683681f, -0.0310597f, -0.0247807f, -5.3748040f, -7.4730940f, 0.1019564f, - -1.2126822f, -0.3726285f, -1.0287101f, 0.1803891f, -0.2227769f, -0.0791530f, -0.0159770f, -1.4883354f}, - {-17.9394970f, -0.5228514f, -11.3547935f, -0.0672671f, -2.0371394f, -0.9076943f, 2.4331825f, -6.9409127f, - 0.8286008f, 0.0208618f, -0.8009814f, 1.2268484f, 0.1943726f, -1.7297083f, -0.7668949f, -6.5505466f, - -0.6495168f, -0.0404727f, -0.1260914f, -3.5029383f, -0.0852898f, -2.9679556f, 1.6404767f, -0.0251449f, - 1.1460075f, -0.7877688f, -0.0586593f, -0.4741839f, -1.7420560f, 0.0295600f, -2.3574052f, 0.0974777f}, - {0.4443443f, 0.6384261f, 1.3317494f, -1.0085982f, 0.9508762f, 1.3168396f, -0.1862490f, -0.1801148f, - 1.1106120f, -0.0654911f, 0.1186706f, -0.7198273f, 0.5449172f, -0.5886080f, 0.7504217f, 1.8046317f, - -0.1294390f, -0.1939137f, -0.2383934f, 0.4131435f, 0.6910310f, 1.2821866f, -0.1088722f, -0.5660405f, - -0.1188610f, 0.0364403f, 0.3597929f, -0.6409024f, 1.2114668f, -0.0212278f, 0.8423592f, 0.4848156f}, - {-0.8772649f, -13.5265112f, -4.5540547f, -0.2856667f, 0.7604876f, -0.6829260f, -0.8320626f, 0.6541347f, - 0.4020181f, 0.0009324f, -10.9660740f, -0.3540186f, -0.2316812f, 0.3576394f, 0.0998953f, -1.5738430f, - 1.2089975f, 0.0706465f, -0.2538019f, 0.7016497f, -0.0282650f, -3.1291001f, -0.4375663f, -0.3979468f, - -0.1588882f, 0.3978875f, 0.2038192f, -0.4281644f, -0.5787544f, -0.0922198f, 0.9595569f, 0.0212818f}, - {0.3392667f, 0.1170919f, -0.0705636f, -0.1025443f, -0.1192213f, -0.0495686f, 0.0284667f, -0.1226804f, - 0.0050191f, -0.0516545f, -1.0892097f, 0.0033689f, 0.0471462f, 1.4266804f, 0.0288870f, -0.0110408f, - -1.1283765f, -0.1299917f, -0.4318301f, -0.9854419f, -0.0190479f, -0.0269406f, 0.3697925f, -0.0757695f, - -0.3632923f, -0.1714077f, 0.0669245f, 0.0557428f, -0.1713906f, -0.4307863f, -0.1749060f, -2.1246362f}, - {0.8383662f, -3.8122442f, 0.1568939f, -2.2105119f, -0.7086993f, -0.4664145f, -0.3578597f, 0.5554636f, - 0.6965880f, -0.1506968f, 0.2646832f, 0.2874083f, 0.1901203f, -2.4997077f, -0.3519035f, -0.0518054f, - 1.0862818f, -0.2502540f, -0.3133347f, -0.7411230f, 0.1268138f, 0.1069811f, -0.8109779f, 0.0264679f, - 0.1604289f, -0.7534032f, -0.1419461f, 0.0688303f, -0.1570919f, -0.3055144f, -0.7415189f, 2.5547018f}, - }; - ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_4[1] = {1.4616280f}; - ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_4[32][1] = { - {0.0609813f}, {0.0685224f}, {0.1655236f}, {-0.0599842f}, {0.0669006f}, {-0.1817371f}, {-0.0539167f}, - {-0.0737955f}, {0.0654664f}, {0.0302955f}, {-0.0586768f}, {0.0717433f}, {0.1472274f}, {-0.0610073f}, - {-0.0601061f}, {0.2086218f}, {-0.0545418f}, {-0.0388369f}, {-0.0613536f}, {-0.1141072f}, {-0.2289097f}, - {-0.3354485f}, {0.0831025f}, {0.1333673f}, {0.0490410f}, {0.0484894f}, {0.0436755f}, {-0.1479877f}, - {0.1540713f}, {0.0021261f}, {-0.0845848f}, {-0.0564973f}, - }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_0[32] = { + -4.5069356f, -5.8842053f, 1.0793180f, -0.1540973f, -0.4705772f, 6.4027028f, -0.6620818f, -7.0734525f, + 0.6211641f, 4.9630723f, 3.4310920f, -0.8856288f, 4.5843782f, -6.0180559f, 0.0126438f, -1.5725276f, + -0.8549317f, -6.8545237f, -1.2129461f, 3.0617838f, -0.3911322f, 0.0799793f, -2.5398655f, -0.5780622f, + 2.8533990f, -0.1777968f, -2.6457164f, -0.7976936f, 4.5644889f, -2.1747942f, 3.4286616f, -10.1073380f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_0[38][32] = { + {6.1269712f, -10.6625051f, 17.4907818f, -0.0019928f, -3.4468415f, 1.6674044f, -7.8957767f, 2.2077549f, + 9.5517254f, -5.1345053f, -30.1643391f, 4.0148559f, -19.8330841f, -18.3806915f, 0.1334764f, 1.6213616f, + -4.1423774f, -15.3062429f, -1.0209556f, 1.5580219f, 0.7426265f, 0.0033929f, 1.3924170f, 0.9196110f, + -0.8995734f, 1.0594707f, 39.4390869f, 8.7642002f, 28.4583893f, -5.9235659f, 3.7221889f, 14.4167147f}, + {1.7863803f, -0.6068707f, 0.3166098f, -0.0608759f, 0.5939785f, 0.4870262f, -3.1375074f, -17.7147388f, + -0.7231818f, -9.3808413f, 2.2070611f, 15.7461920f, 0.9355862f, 2.3942475f, -0.0671409f, 3.5954301f, + -3.0463996f, -2.0748904f, -0.5450584f, -4.4800100f, 0.6074556f, -0.0161482f, 3.0624702f, -4.5688419f, + 2.9881518f, -0.3714012f, -0.0387531f, -0.7699140f, 4.4028845f, 5.0333014f, -4.7350726f, -8.6568584f}, + {5.6548429f, -0.0207700f, 0.1785973f, 0.0881671f, 0.2530097f, -0.1893259f, -0.1105739f, -0.5183877f, + 1.0728362f, 0.1833011f, 1.7765219f, 0.3127359f, 0.0455277f, -0.1442616f, -0.1048361f, -0.1235604f, + -0.1217661f, -0.5487315f, 0.7575656f, -0.1177454f, -17.0993137f, 0.1628031f, 0.2789381f, 0.5304270f, + 0.0837841f, -3.1120780f, 0.0074821f, -0.1648044f, -0.3395336f, 0.3958135f, 0.8718957f, -1.1980486f}, + {0.2401041f, -0.0585765f, -0.0144584f, 0.0411095f, 0.0752229f, 0.0292672f, -0.2437613f, -1.4396472f, + -0.0971315f, -1.7181139f, 0.2417643f, 2.2030578f, 0.0566049f, 0.1081589f, -0.1060181f, 0.3473758f, + -0.7095683f, -0.0345675f, 0.2794849f, -1.1702278f, 0.2622930f, -0.0072611f, 0.5026371f, -1.2882922f, + -0.4712771f, 0.0597130f, -0.0039970f, -0.6050836f, 0.1554724f, 1.0991164f, -0.4975886f, 0.2597970f}, + {0.0766028f, 0.0218421f, -0.1739017f, -0.0076569f, 0.0384461f, -0.1841756f, 0.9677940f, -3.1114254f, + 2.3830564f, 2.0706992f, -0.9643140f, 0.7361387f, -0.0060253f, -0.1554846f, -0.0831100f, 2.8754771f, + -1.4403527f, -0.5281797f, 0.5157787f, 4.2405987f, 0.4807618f, 0.0217647f, -1.2626950f, 0.9145837f, + -0.3931780f, 0.3426280f, -0.0065206f, -0.7510439f, -0.4555758f, 2.7724340f, -1.2173026f, 0.1039017f}, + {0.5685715f, 0.3927337f, 0.4942532f, -0.0671033f, -0.2808350f, -0.0336000f, -1.3983957f, 0.9876546f, + -2.3840380f, 0.7315395f, -2.2009561f, -1.4631602f, -0.4672308f, -0.4994236f, 0.1169335f, -1.1894208f, + -1.2692982f, 0.3303853f, -2.0147655f, -0.9912014f, 1.0042895f, 0.1121151f, -1.0789106f, -2.2821584f, + -6.6459913f, -0.0959398f, -0.0068429f, -2.8177626f, 0.3213172f, -2.6832986f, -4.7613306f, -0.9985733f}, + {1.4419515f, -0.3864825f, -0.6756768f, -0.1273375f, 0.4321181f, 0.3354745f, -0.8236564f, -2.8190827f, + 0.7090831f, 1.9072700f, -3.1834064f, -2.6938572f, 0.5051147f, 1.4382831f, 0.1241910f, -0.7352629f, + 0.7703634f, -1.7556250f, -2.1104112f, 3.0603442f, 1.9873468f, -0.0358815f, -1.0087154f, 3.8253262f, + -0.5466214f, 0.0875162f, 0.2691758f, 0.7121435f, 1.9314718f, -0.1580560f, 3.6484149f, -5.3173709f}, + {6.9104381f, -0.0033664f, -1.4405546f, -0.1768288f, 0.2028089f, -0.1012344f, -4.4735684f, 0.6354278f, + 4.3039737f, 0.2056303f, 1.8338999f, -1.1351355f, 0.1015760f, -0.0733253f, -0.0561627f, 2.5292397f, + 1.6314448f, -0.9333628f, -0.7773662f, 0.8313186f, -0.7829623f, 0.1265118f, 0.5922315f, -0.3463379f, + -1.3269740f, -3.3302619f, -0.0061799f, 2.3374722f, 0.0880938f, 0.7470241f, -0.4205743f, -4.7557602f}, + {0.0380794f, 0.0947470f, 0.0419397f, 0.0582226f, -0.0603404f, 0.0234028f, -0.2575402f, 0.4125248f, + 0.3035339f, 0.2663808f, -0.6092452f, -1.4727812f, 0.0247187f, -0.0539688f, -0.0150413f, 0.2094955f, + 0.5379737f, -0.3255228f, -0.5639279f, 0.0786276f, 0.6703192f, 0.1557026f, -0.2753083f, 1.1463971f, + -0.9372965f, 0.5657740f, 0.0041413f, 0.0870248f, 0.0101520f, -0.8214461f, 0.1212932f, 1.5648646f}, + {-0.0969819f, 0.0137566f, 1.3515147f, -0.0155047f, -0.1416170f, -0.1636726f, 0.5184190f, 0.4732984f, + 0.6815788f, -1.0522166f, -0.4486531f, -0.0516016f, 0.0201894f, -0.0849667f, -0.0861271f, -1.2027841f, + 1.2458711f, -0.7061657f, 1.0381308f, -0.3450044f, -0.1300479f, -0.0828402f, 0.6859242f, -1.0575374f, + 0.6947553f, -0.0922188f, 0.0199132f, 0.8038982f, -0.1734094f, -0.1057449f, 1.6305015f, -0.0688597f}, + {-1.8151448f, 0.1024327f, 1.7063105f, 0.1130912f, -0.1081472f, -0.2904744f, -1.3465070f, -1.0455177f, + -0.4581082f, -3.2220871f, 0.5221398f, -5.1637673f, 0.0811146f, -0.1326323f, -0.0379338f, -3.0439703f, + -2.4246936f, -0.3670847f, -3.1256330f, -1.6595014f, -3.4715190f, -0.1526113f, -1.0420206f, 0.9536474f, + -3.2932863f, 1.6048199f, 0.0025162f, -3.6049840f, 0.0604250f, -2.2404826f, 1.8406851f, -3.1381185f}, + {1.2985691f, -1.1044264f, 0.9062797f, -0.0788333f, 0.2694912f, 0.0032800f, -0.0574267f, 0.9734111f, + 1.1532565f, 2.6786125f, -3.8574269f, -2.2871449f, -0.1261243f, 1.0545347f, -0.1454154f, -0.5609738f, + 1.8385800f, -0.8035598f, -1.7668265f, 5.1665063f, 0.7966110f, 0.0940206f, -2.3943975f, 2.3344002f, + 1.0342182f, 0.4806454f, -0.3880928f, 0.6998246f, 1.4011886f, -1.7313483f, 4.9702630f, -6.0058608f}, + {1.0300356f, 0.0616315f, -0.1113776f, -0.1694220f, 0.7159944f, 0.0626456f, 2.0994680f, 0.3452290f, + -3.0487001f, 0.0654031f, -1.1510723f, 0.5370992f, -0.0290704f, -0.0300795f, 0.0751569f, -0.2345951f, + -0.3472281f, 0.4424143f, 1.2444530f, -0.2114656f, 0.7865694f, -0.0709381f, -0.1839961f, -0.0529834f, + 0.5867608f, -3.8793530f, -0.0814745f, -0.6368676f, 0.0361213f, -0.5549288f, 0.5661780f, 1.8374584f}, + {0.3345098f, 0.0068199f, -0.4205509f, -0.1088801f, -0.1043202f, -0.0040804f, 0.3400922f, 0.2673528f, + -0.6050695f, 0.4443954f, -0.4319905f, -0.6044132f, -0.0260679f, 0.0137036f, 0.0765494f, -0.0095099f, + 0.5880439f, -0.0083854f, -0.2407522f, 0.1942379f, 0.6554548f, -0.1322891f, -0.8298992f, 0.7909554f, + 1.0528831f, 0.1970959f, 0.0754069f, -0.0947960f, -0.0279494f, -0.5888316f, 0.8919419f, 0.4828835f}, + {0.3995822f, -0.2139665f, 0.3982936f, -0.1285759f, -0.3445527f, -0.1167238f, -0.1263519f, 0.8393803f, + -0.7758383f, 0.0719291f, -0.0134762f, 0.1715237f, 0.0796666f, 0.1023507f, -0.1172728f, -1.2364722f, + 1.2592632f, -0.3168479f, 0.7487004f, -1.5170647f, -0.2235429f, -0.1620898f, 1.4064828f, -1.0821995f, + 0.0740103f, -1.0412805f, -0.0621277f, 0.2439800f, 0.2684972f, -1.1661061f, 0.7859434f, -0.6170313f}, + {2.1615884f, 0.1431713f, 0.0642652f, -0.0522325f, -0.2658786f, -0.0245810f, -1.6857448f, -0.6685011f, + -0.6978170f, -0.8716729f, 0.3129902f, -2.5870812f, -0.2855283f, -0.3205920f, -0.0084069f, 1.3182145f, + -0.6923816f, -0.3730274f, -2.3638811f, -1.1128502f, -2.4709859f, 0.1349022f, -0.3574466f, -0.6597407f, + -4.1122031f, 0.2240651f, 0.1806145f, -1.6836300f, -0.0766231f, -3.2611966f, 0.0091456f, -0.0997367f}, + {5.2476101f, -0.1966512f, 4.8935304f, -0.1551689f, 1.6919724f, -0.8324367f, 14.3318472f, -0.3503132f, + 10.3614969f, -9.1522884f, -0.2543063f, -1.8476851f, 16.7961140f, 9.9541416f, -0.0434563f, -9.6973553f, + -5.0469398f, 6.1688442f, 7.6429725f, -7.3149266f, 1.2345183f, 0.1412155f, 0.7114770f, -1.6378664f, + 5.1548996f, 0.3686100f, -45.3027611f, 3.0492647f, -37.3445892f, 2.7421410f, -2.7958770f, -25.2034016f}, + {1.4597454f, -1.0561740f, 0.9751291f, 0.0446527f, 0.3691662f, 0.1006782f, 0.1418435f, 0.8871480f, + 1.1603093f, 2.8034730f, -4.0856910f, -1.9786842f, -0.2206208f, 0.9539357f, 0.0868183f, -0.6811873f, + 1.9642411f, -0.8065316f, -2.0244894f, 5.2936082f, 0.6120632f, -0.1194160f, -2.3925939f, 2.5555069f, + 1.0149733f, 0.4607603f, -0.2197217f, 0.5703423f, 1.4049014f, -1.5900208f, 5.1645074f, -6.0569463f}, + {0.9000676f, -0.0028781f, -0.1967366f, 0.1039593f, 0.7993248f, 0.0655172f, 2.2296758f, 0.4391927f, + -3.0292840f, 0.0334536f, -1.1728534f, 0.3479103f, -0.1190938f, 0.0410203f, 0.1146637f, -0.2958017f, + -0.3240463f, 0.4361866f, 1.0564958f, -0.1989332f, 0.5194008f, -0.0628912f, -0.1733121f, -0.1255383f, + 0.5990249f, -3.7692382f, 0.0995128f, -0.7101220f, -0.0785123f, -0.3514554f, 0.6662078f, 2.0991604f}, + {0.1781942f, -0.1873588f, -0.4653996f, -0.0153059f, -0.1399561f, -0.0498718f, 0.4552556f, 0.2300792f, + -0.7682312f, 0.4342302f, -0.3787803f, -0.6089386f, -0.1049337f, 0.0395331f, 0.0220332f, 0.0114750f, + 0.4672548f, 0.1284784f, -0.2472819f, 0.2892784f, 0.4788667f, 0.0472555f, -0.6593549f, 0.6508777f, + 0.9286987f, 0.3043948f, -0.0635985f, 0.0814399f, -0.1168853f, -0.6688027f, 0.8876534f, 0.4865684f}, + {0.4024099f, 0.0480259f, 0.4588822f, -0.1793082f, -0.2151573f, -0.1871128f, -0.1502780f, 1.1011307f, + -0.9467706f, 0.2632496f, -0.1257263f, -0.0241331f, 0.2280627f, 0.0878608f, -0.1334262f, -1.1642927f, + 1.0943586f, -0.4799654f, 0.5981907f, -1.5051398f, -0.4235946f, 0.0012827f, 1.2342577f, -0.8281875f, + 0.2776567f, -1.0362227f, 0.0408372f, 0.1540821f, 0.1777556f, -1.2684357f, 0.8836584f, -0.4001710f}, + {2.1558056f, 0.2082023f, 0.0863442f, 0.0364868f, -0.3985825f, 0.0307202f, -1.8889453f, -0.5614714f, + -0.7311882f, -0.8075573f, 0.4895108f, -2.7770483f, -0.3121874f, -0.1671291f, -0.1281284f, 1.3212786f, + -0.5310181f, -0.1974759f, -2.6240873f, -0.8320529f, -2.3875966f, -0.0286360f, -0.6263188f, -0.6553424f, + -4.1658955f, -0.0601300f, 0.0946256f, -1.6795633f, -0.1251303f, -3.0974686f, 0.2412274f, -0.0687501f}, + {2.0523887f, -0.6387668f, 2.0633900f, -0.0550964f, 0.5181718f, -0.4202190f, 1.8569367f, 0.8295385f, + 0.8555872f, 2.4727983f, -0.2072828f, -1.9006120f, 0.5379534f, 0.4463673f, 0.1468820f, 0.4918649f, + -3.4016700f, 0.2884440f, -1.9418719f, 4.5157170f, -0.5160927f, -0.0199372f, 3.1353824f, -0.9863126f, + -1.5135859f, 0.7576568f, 0.6715558f, 2.7409093f, 0.9291748f, -0.3247162f, 1.8204515f, -8.9181070f}, + {-0.1428107f, -0.0829889f, 0.4213613f, 0.0225415f, 1.2238166f, 0.0477106f, 0.3031853f, -0.7466553f, + 2.0663500f, 0.7588379f, 0.3689216f, -0.2003786f, 0.1242338f, 0.1693589f, -0.0351716f, -0.0186597f, + -0.0189417f, 0.5468715f, -0.2862698f, -0.1311738f, 3.0747476f, -0.0310747f, 0.0943165f, 0.3139819f, + 0.6274695f, -1.8314874f, 0.0147495f, 0.3554756f, 0.3829916f, 0.4891713f, 0.1328600f, 1.0535098f}, + {0.0534900f, 0.1787969f, -0.0571320f, -0.0685673f, 0.1968977f, 0.0374476f, 0.7876674f, 0.0828491f, + 0.6444036f, -0.2203166f, -0.2383427f, 0.5397566f, 0.0106769f, -0.1230072f, -0.0135021f, -0.5691944f, + -1.5040319f, 0.0406933f, -0.0025478f, 0.9251419f, -1.7180276f, -0.1112956f, 1.4840862f, 0.0407115f, + -0.0100329f, 0.0583593f, -0.0110524f, 0.7431355f, -0.0971857f, -0.5501527f, -0.6371027f, -0.1935233f}, + {-0.6455778f, 0.2317368f, 0.9285696f, -0.1415854f, 0.0822560f, 0.2488030f, -2.6992166f, 0.0884904f, + 0.6735302f, -0.1467820f, 0.5641044f, 0.6436581f, 0.0818401f, -0.0336634f, -0.0729000f, -0.1206900f, + -2.5739892f, 0.5776953f, 0.9531668f, -1.2362405f, -0.0615577f, -0.0143544f, -2.7525210f, 1.3738545f, + 0.2751348f, -1.7463943f, -0.0020144f, 2.4814103f, 0.1716725f, -0.7055540f, -0.3474010f, 0.4482578f}, + {-0.2526205f, -0.7463821f, -3.6076138f, -0.1511098f, 0.1216256f, 0.0888247f, -1.0190924f, -1.3260181f, + -0.0443211f, -4.8911066f, -3.4385188f, -6.0057454f, 0.3340450f, 0.2997236f, -0.0907855f, 0.7500492f, + -0.4007562f, 1.9382039f, 0.5687234f, 2.6511824f, 4.7703862f, 0.0006749f, -0.0201394f, -3.5885489f, + -4.1518898f, 0.0807014f, -0.0584071f, -0.8100027f, 0.7697087f, -0.8038046f, -1.2945876f, -4.0110312f}, + {0.4337017f, -1.1532011f, 2.0740633f, 0.0271806f, 0.6654227f, 0.1012998f, -4.0791736f, 1.2631345f, + 1.9511020f, 2.3272331f, 1.2707534f, 1.6306664f, 0.4936035f, 0.8285242f, 0.0807625f, 3.8652387f, + 0.0281145f, 1.6877037f, 1.2557380f, -0.3036775f, 0.5604967f, 0.1551418f, -0.9599600f, -6.3067718f, + -0.6352320f, 0.8058553f, 0.3657880f, -2.0491202f, -0.3926269f, 2.5650854f, 1.3697821f, -8.3070078f}, + {5.1334143f, -0.0351738f, -0.4774780f, -0.0679726f, 1.4569254f, 0.0580191f, -0.3649136f, -0.2298838f, + -3.3826666f, -0.7392708f, -0.6036060f, -0.2612940f, -0.1877640f, -0.1145124f, -0.0042578f, -0.0311193f, + -0.0320479f, 0.5270581f, -0.4324475f, 0.2681437f, 4.7813129f, -0.0222701f, -0.0525629f, -0.2861001f, + -0.1251072f, 3.9112861f, 0.0045046f, -0.0426071f, -0.3299106f, -0.0686970f, -0.1602017f, -0.0070103f}, + {-0.6633690f, 0.0103367f, 0.5998458f, 0.1256577f, -0.0359184f, -0.0176820f, -0.6458368f, -0.0370536f, + 0.3542259f, 0.1394724f, 0.8255956f, 0.2501569f, 0.0320156f, -0.0256806f, 0.0277949f, 0.0036392f, + 0.2825173f, 0.1400358f, 1.0011463f, -0.6792242f, 0.0672508f, 0.0728705f, -0.1089695f, -1.0414587f, + -0.4135485f, 0.4293025f, -0.0041241f, -0.9564193f, 0.0314900f, 0.8658463f, -0.7734696f, -0.7610567f}, + {-0.0200122f, -0.0749178f, -1.5026549f, -0.0387432f, -0.0713735f, 0.1214790f, 1.8730290f, -0.0552839f, + -1.6867150f, 0.2282097f, 0.7161849f, -0.1018546f, -0.1092003f, 0.0365504f, -0.1326883f, 1.2310545f, + 0.1800210f, 0.7024739f, -2.9606545f, 1.2275347f, -0.2050014f, 0.0940569f, 0.4761694f, 0.8812068f, + -0.0083424f, -1.5406264f, 0.0061815f, -2.7606382f, 0.0248556f, 1.1086880f, -1.3608936f, 1.0795454f}, + {0.9734020f, 0.3905411f, -3.7008634f, 0.0013557f, 0.1649124f, 0.9935362f, 1.3489184f, 0.9505764f, + 0.7966231f, -0.1627246f, -2.5754328f, 1.4892205f, 0.8586300f, 0.6974363f, 0.1320204f, -0.7840260f, + 0.3121157f, 0.0966901f, 2.7447381f, 1.8256680f, 0.7229405f, -0.1723188f, 0.9145948f, -2.1376033f, + 0.5259342f, 0.0731194f, -0.2908303f, -0.2603913f, -0.2326528f, 3.6684167f, -0.2883157f, -2.8546307f}, + {-4.8917460f, 6.7944999f, -0.2255474f, 0.1051999f, 3.9000113f, 2.0624907f, 5.3019547f, 10.0209141f, + 1.1268179f, 2.2669628f, -6.5002980f, 1.8408583f, 5.3039579f, 2.2055962f, 0.1055369f, 1.7230233f, + 6.9605255f, 7.7025104f, 2.9880707f, -0.9274251f, -0.2287160f, -0.0206735f, 0.6885675f, 2.8179996f, + -7.1129837f, -1.3772345f, 3.8655453f, -5.9388318f, -0.0469947f, 7.2763596f, -6.3536129f, -17.0069847f}, + {1.8787041f, -0.9953383f, -1.4839923f, 0.1308209f, 0.3657510f, 0.3106483f, -1.4158971f, -6.7449651f, + 0.6553892f, -4.5046172f, -3.5489719f, 3.5363002f, 0.5454772f, 2.3521471f, 0.1612140f, -0.9744226f, + 0.6546553f, -2.7179255f, -1.7758157f, 0.3089439f, 1.7462813f, 0.1654593f, -0.2440207f, 3.9501827f, + 1.3750844f, 0.0596805f, -0.1977254f, 0.0264880f, 2.6396444f, 1.0816911f, 3.6413448f, -6.0299959f}, + {-4.1295738f, 0.1044480f, 0.2131937f, 0.0420826f, 0.5292229f, 0.0090477f, -0.0973486f, 0.9596778f, + 2.9579651f, -0.6364226f, -1.7556342f, 0.1539868f, -0.1273174f, -0.1348504f, 0.1257833f, -1.4168571f, + -1.0960362f, 0.0482449f, -1.4395387f, -0.2524115f, -2.9162085f, -0.0451428f, -0.4021681f, -0.5756381f, + 0.0515293f, -3.1996479f, -0.0007676f, -1.3878343f, -0.2864279f, -0.9579773f, -1.0999249f, 1.6500067f}, + {-2.4806111f, -6.8115449f, 3.2805641f, 0.1187415f, -0.9950783f, 6.2553434f, -1.6450261f, -6.1463733f, + 2.7507148f, 4.2995782f, 0.0461297f, -0.5417359f, 2.4306326f, -7.3530145f, 0.0698273f, -0.9394333f, + -1.3595498f, -7.5141478f, -1.4911395f, 3.2300410f, 0.1203540f, 0.0314884f, -2.0116949f, -0.8167119f, + 2.4133310f, 0.1920709f, 1.0619365f, 0.2459123f, 6.9166069f, -2.6384118f, 3.6829739f, -7.2385545f}, + {0.9408096f, 14.9067144f, 1.7709646f, 0.1105646f, -0.5600107f, -15.3188124f, -12.3718462f, -1.8893757f, + 13.6364670f, -5.7327847f, -14.1805468f, 1.0581509f, -14.2186184f, 14.8948650f, 0.0190344f, 5.4395180f, + 6.7243400f, 9.8468456f, 4.5144215f, -1.4551491f, 1.1032411f, -0.0317988f, 2.3398454f, -3.1671596f, + -7.7541409f, 1.1255593f, 6.7340465f, -4.4448423f, -9.1472626f, -3.1959128f, 4.4181323f, -2.7904994f}, + {-2.1621978f, -4.7202382f, 1.7378219f, 0.1417439f, -0.5000908f, 5.4468708f, 1.4260571f, -6.6136570f, + 1.5713804f, 3.4479704f, 2.7354901f, -0.7388076f, 5.4666147f, -3.8697338f, -0.1368596f, -2.7903373f, + -1.2043713f, -4.9554005f, 0.3324645f, 1.6767365f, 0.1156244f, -0.0326964f, -2.0945346f, -0.4590589f, + 3.0942657f, 0.0015020f, -6.2626700f, -0.3969755f, 0.7717427f, -1.9667094f, 2.9664171f, -11.9477053f}, + }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_2[32] = { + 9.8383608f, 3.6922295f, 3.5774977f, -4.4619012f, 6.5087032f, -0.9540017f, -0.5059246f, 0.0706402f, + 14.3396597f, -0.2771132f, -4.8409863f, -8.3581600f, -3.5078344f, 4.3287506f, -5.7808843f, 3.9264839f, + -2.1697845f, -0.0040514f, -0.2095029f, -6.8678174f, 1.7911285f, -0.4510343f, 1.2410443f, -4.5678806f, + -0.5693849f, 2.3320096f, 4.4606552f, -6.3771009f, -4.3149071f, -0.1905672f, -3.5726390f, -1.0744030f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_2[32][32] = { + {-0.0155548f, 0.0243339f, 0.0037967f, -0.2771824f, 0.0111955f, -0.0115980f, 0.0079653f, -2.9803498f, + -0.0061037f, -0.0956634f, 0.0332446f, 0.0179244f, -0.0080377f, -9.0180779f, 0.1720033f, 0.0350694f, + -0.0146588f, -0.2135506f, -0.3158041f, 1.3697664f, 0.0119146f, 0.0119120f, -0.0986927f, 0.0297492f, + 0.0355827f, -0.1196868f, -0.0745119f, 0.0281862f, -0.0422190f, -0.3069138f, -0.0477367f, -0.0550450f}, + {-1.7374619f, 1.4822800f, -2.1885235f, 1.8354234f, -0.5380136f, 1.6621803f, 0.6251035f, 0.1008954f, + -0.8387129f, -0.2063313f, 1.0661691f, -0.9799694f, -5.1710258f, -3.2260630f, -1.5073707f, -1.0792168f, + 1.8569958f, -0.2289213f, 0.0563821f, -1.6398847f, -4.1649504f, -2.7527378f, -0.0134577f, 3.0424533f, + 0.0364320f, 0.6762254f, -3.1551330f, 2.4888904f, 1.4757305f, -0.3141717f, -2.0126467f, -0.1675602f}, + {-0.9571826f, 0.0914152f, 0.0404339f, 0.2927902f, 0.2933607f, 0.0619171f, 0.0772318f, -1.3796169f, + -0.8194544f, -0.2179988f, -1.1241078f, -0.1443964f, 0.0559355f, -1.2914546f, -0.3445117f, 0.2031156f, + 0.0273864f, -0.0193422f, -0.2136522f, 0.0429592f, 0.0212854f, 0.0414394f, -1.1734651f, 0.0582848f, + 0.0136039f, -0.1892604f, 0.0764908f, -0.0130132f, -0.1272559f, -0.0818855f, -0.0408583f, -0.1563294f}, + {-0.0213695f, 0.0596942f, -0.0641309f, -0.0146449f, 0.0416586f, -0.0378931f, 0.1234860f, 0.1622967f, + 0.0794091f, -0.0639933f, -0.1030663f, 0.0579078f, 0.1050275f, -0.0136866f, 0.0149978f, 0.0876813f, + 0.0693554f, 0.1612417f, -0.0595916f, -0.1008234f, -0.0579058f, 0.0915138f, 0.1321436f, -0.1484535f, + -0.0920316f, -0.0024532f, -0.1045300f, 0.0924260f, 0.0277524f, -0.0287276f, -0.1271127f, 0.1164243f}, + {0.0713067f, 0.0198056f, -0.3023696f, -0.0025908f, -0.0085885f, -1.1157553f, 0.0236462f, -0.0704844f, + -0.0189257f, -0.0997382f, 0.3379845f, -0.1229390f, -0.0616165f, -0.8968034f, 0.0401445f, -0.1144476f, + -0.0532077f, 0.0604580f, 0.0609454f, -0.1613472f, 0.0103525f, -0.1653874f, 0.0205189f, 0.0758978f, + -0.1514593f, 0.0151441f, 0.2043469f, 0.0349607f, -0.1361278f, -0.1255922f, 0.0631648f, 0.3570991f}, + {0.3371337f, -3.7541580f, 2.2215877f, -0.3390516f, 0.1912718f, -4.1861577f, -1.2264019f, 2.8179801f, + 0.0667294f, -0.0093539f, 2.3029909f, 3.1814916f, 3.9780347f, 0.2310601f, 0.3986159f, -0.8544636f, + 0.4139664f, -0.1876569f, -0.2448732f, -2.8053334f, 4.0488625f, 2.1094146f, -6.7310257f, -4.9950023f, + -0.8315823f, 0.0555959f, 2.4573720f, -3.7234364f, -4.2910552f, -0.2995245f, -3.2605181f, 2.3620574f}, + {-1.5522735f, -0.1866350f, -0.0067679f, 0.3196557f, 1.4052233f, 2.8143549f, -0.9992948f, -0.5309914f, + -25.8852596f, -0.1218249f, 0.6625420f, 0.3007106f, -0.2767264f, -0.1847300f, -0.5313534f, -0.0383462f, + -0.1987552f, 0.0581405f, -0.3376078f, 1.2621028f, 0.0818709f, -0.1401216f, -0.4550788f, -0.1592657f, + 0.0597123f, 0.1344101f, -0.1005317f, -0.1538406f, 2.9142656f, -0.0806051f, -0.4267367f, -31.9512234f}, + {0.6859627f, 0.1212986f, 0.1291616f, 0.0459838f, -0.0899920f, 0.0287645f, 0.1987007f, -2.7079368f, + -0.2628384f, -0.1402464f, -0.6302179f, -0.2923960f, -0.1106663f, 0.8256195f, -2.8054097f, -0.0296494f, + -0.5632019f, -0.1335654f, -0.1558440f, -6.8611612f, 0.0203786f, 0.0046566f, -0.4401442f, -0.0471430f, + 0.4535986f, -0.8657981f, 0.0684740f, 0.0518814f, -0.0123748f, -0.2270164f, 0.0922878f, -0.3863277f}, + {0.0127175f, 2.3346109f, -0.4390767f, -0.4657893f, 0.1659466f, -0.1132782f, -0.4928388f, 0.7652873f, + 1.1510741f, -0.0879600f, 0.2721785f, -0.1878961f, -0.3477249f, -0.8473209f, -0.8931856f, -0.4328294f, + -11.9181929f, -0.0282545f, -0.0217915f, 1.6676594f, -0.2122232f, -0.6190930f, 1.9053432f, -0.7592348f, + -1.0739189f, -0.7170524f, 0.3864411f, -0.8849231f, 0.1393488f, 0.0738489f, 0.4460345f, 1.9020857f}, + {0.4453296f, -0.0767821f, 0.1638939f, 1.6997167f, -0.1098599f, -0.0551604f, 0.0040561f, -13.5290670f, + -0.1285677f, -0.0590394f, 0.6499141f, -0.7617344f, 0.0453151f, 0.3104213f, -1.0711143f, 0.1361838f, + -0.4365610f, -0.1300649f, 0.2013344f, -0.5308123f, 0.1451896f, 0.1030715f, -0.6487910f, -0.3136590f, + -0.0280079f, 0.5394178f, 0.1318262f, -0.0159292f, 0.0636870f, -0.3224248f, -0.1868187f, -0.2468304f}, + {-0.0333494f, -0.0834255f, -0.1221875f, 0.6861304f, 0.0521738f, -0.0416543f, -0.4437352f, -19.3246250f, + -0.1520821f, 0.0528602f, -0.6375434f, -0.5803806f, -0.0958465f, -2.0058544f, -0.8282642f, 0.0259000f, + 0.4846996f, 0.1211179f, 0.0356884f, 1.0009497f, 0.0635682f, -0.0314105f, -0.0011147f, 0.0131714f, + -0.3410152f, 0.2798154f, 0.0961889f, 0.1266228f, -0.0934717f, -0.0904307f, 0.1355542f, 0.5722573f}, + {0.2146454f, 0.2143834f, 0.1290650f, -0.9063646f, 0.2100945f, 0.1331054f, -0.2620614f, -0.1264993f, + 0.1313979f, 0.0455465f, -0.8395286f, -0.4967833f, -0.0538581f, 0.9155380f, 0.6627046f, 0.1691243f, + 0.9887002f, -0.1597013f, -0.1236713f, -1.9041336f, 0.0427585f, 0.0849747f, -5.2559652f, -0.3133100f, + 0.0141170f, -0.1635530f, 0.4938746f, 0.0162943f, 0.2107756f, -0.3413893f, -0.0657575f, 1.0542560f}, + {-2.8868380f, -2.0837426f, -1.0611480f, -0.6143807f, -0.6398501f, -2.8018746f, 0.5166737f, -1.0814301f, + -1.9272422f, -0.1017482f, -0.4651161f, -1.4021232f, 1.8854499f, 0.1815407f, 0.5965426f, -2.3344259f, + -0.0690846f, -0.1678239f, -0.4219488f, 0.6215640f, 1.0270095f, -0.3473049f, -0.3926674f, -0.7942593f, + 1.1305071f, -1.4621233f, -0.8051161f, -0.7698632f, -2.6038630f, -0.3090037f, -1.6365144f, -1.0179478f}, + {0.0046026f, 1.1319581f, -2.6405678f, -2.0353596f, -2.1687336f, 0.3364883f, 2.1122196f, 0.2584647f, + -2.4344857f, -0.0378498f, 0.6158544f, -0.6060749f, -4.9598379f, 0.1570698f, 2.2436838f, -2.6198347f, + -2.0935996f, -0.1845744f, -0.0716080f, -1.9338604f, -4.1995640f, -3.6706774f, -1.6762524f, 3.9646862f, + -0.9677961f, 1.8319578f, -3.1916575f, 3.7312632f, 0.0820446f, -0.0497568f, -0.0898171f, -0.2499462f}, + {-0.0780375f, -0.0286571f, 0.1007227f, 0.0012229f, -0.0531285f, 0.0840718f, 0.1013894f, 0.1312424f, + -0.0673772f, 0.1603183f, 0.0074385f, -0.0718321f, -0.1549873f, 0.1616689f, 0.0405887f, -0.1558588f, + 0.0740745f, 0.1696893f, -0.0064026f, -0.1656420f, -0.1186674f, -0.1262667f, -0.0784757f, -0.1280154f, + 0.0909976f, 0.0853046f, -0.1075811f, 0.1310615f, 0.0610194f, 0.0647223f, 0.1360559f, 0.0440074f}, + {-0.2106480f, 0.0087131f, 0.1119385f, -1.0611318f, 0.5250220f, 0.0525479f, -0.2733742f, -1.0799565f, + -0.5601607f, -0.0651806f, -1.9793440f, -0.3373334f, -0.1550518f, 0.8932216f, 0.7264332f, -0.0450735f, + 1.2373760f, -0.1236272f, 0.0680048f, -3.0446634f, -0.1533586f, -0.0127355f, -0.3326311f, -0.0225603f, + -0.2265739f, -2.3752897f, -0.3771705f, -0.0728938f, 0.1741305f, 0.1111639f, 0.4131119f, 0.2239323f}, + {-2.5691276f, -1.4011253f, -2.0640867f, -3.7236946f, 1.5542637f, -0.9456654f, -1.7575809f, 3.6794879f, + -0.4439790f, -0.1009826f, 3.6702275f, -0.1935008f, -0.4423219f, -0.3825364f, -0.4784791f, 0.5927492f, + -2.3482494f, 0.0801714f, -0.1567418f, -1.7934613f, -0.1706410f, -0.6326947f, 0.6260155f, 0.3631033f, + -0.9325932f, 1.9647995f, -1.3409088f, 1.3501998f, 0.0367797f, -0.1744210f, 1.8690013f, -1.0737898f}, + {-0.5934777f, 0.6232591f, -0.3391055f, 0.2640936f, -0.2824444f, 0.4815128f, 0.6625078f, -0.1103976f, + 0.9555223f, -0.0624896f, -0.6778919f, 0.1181502f, -0.5425385f, 0.7297349f, -1.7261271f, -0.2917557f, + 1.1873137f, -0.2725933f, 0.0975242f, 1.7756181f, -0.5735835f, -0.4453230f, 0.9800369f, 0.9344145f, + -1.8692539f, 0.0120440f, -0.7315661f, 0.6250805f, 0.3839143f, -0.0376306f, 0.3816243f, 0.6059195f}, + {0.5522162f, -1.8043815f, -10.9379101f, 0.5719097f, -0.2246755f, -1.4856353f, 0.4877502f, 0.7163438f, + -11.8135147f, -0.0180790f, -0.9928634f, 0.1107815f, -0.0005064f, -0.3824990f, -0.7453306f, -1.9909632f, + -7.4362645f, -0.0245507f, -0.1815712f, -3.5507584f, -0.0075889f, -11.0296011f, -1.1292133f, -0.0710276f, + 0.5675677f, 0.2017778f, -0.0684891f, -0.0367653f, -1.6674192f, 0.0281711f, -0.8356591f, -0.0447807f}, + {0.2537312f, -3.0178010f, -0.3493635f, 1.8573236f, 0.4017631f, 0.9912633f, -0.8625028f, -0.7783228f, + -1.7815375f, -0.1204695f, 1.8551122f, 0.3344182f, -0.2828701f, -1.3226960f, -1.4470471f, 0.2895959f, + 0.6780876f, -0.2010069f, 0.0425280f, -2.1786852f, -0.1274053f, -0.2549899f, -0.2233993f, -0.1561645f, + -0.4640818f, 0.6375850f, 0.7733670f, -0.2388286f, 1.0447853f, -0.1503223f, 0.3823584f, -13.8176088f}, + {0.2575197f, -2.2127593f, -0.0389457f, -0.0215759f, 0.1659477f, -0.0097748f, -0.1935415f, -0.9091369f, + -0.1453371f, 0.0442428f, -0.1206519f, 0.1435609f, -0.0186047f, -5.0154042f, 0.0538177f, 0.0403250f, + 0.0240955f, 0.0331080f, 0.0517951f, 0.7422639f, 0.0069818f, 0.0248351f, -0.2205741f, -0.0082387f, + 0.2043269f, 0.0459435f, 0.0876343f, 0.0140607f, 0.1056308f, 0.0062555f, 0.0184278f, -0.5539715f}, + {-0.0398742f, 0.1075264f, 0.1725024f, -0.0755192f, -0.0360048f, 0.1325573f, 0.0903103f, -0.0882263f, + 0.1207692f, 0.0032722f, 0.0048489f, -0.1257241f, 0.1450990f, -0.0713558f, 0.1116815f, 0.1107689f, + -0.1447252f, 0.1581838f, -0.0160124f, -0.0425587f, 0.1411217f, 0.0865060f, -0.0643460f, -0.0431262f, + -0.1452804f, -0.0195101f, 0.1234572f, 0.0520887f, 0.1117576f, -0.0751791f, 0.1511539f, 0.1224861f}, + {0.7728126f, 2.3075340f, -0.0385258f, -3.1270287f, 0.9414487f, 3.5251477f, -0.8043440f, 0.7212446f, + -7.6850162f, -0.1609414f, -3.7687578f, -1.0751100f, -0.2052089f, 5.0728245f, 2.2835267f, 0.5930225f, + 0.1303335f, -0.1428799f, -0.3715075f, 0.5136011f, -0.4755619f, -0.2192461f, -3.8696294f, -0.0062392f, + -1.3774812f, -0.0034140f, -1.5944362f, 0.9773729f, 3.2859125f, -0.1616932f, -1.2785367f, -13.5732412f}, + {0.5535743f, 0.1461481f, -0.2218016f, -0.2971808f, -0.2169309f, 0.1564545f, -0.0390397f, 1.1558976f, + -0.0119933f, -0.0774637f, 1.1907971f, -0.5127968f, -0.0066028f, -1.6794037f, -0.3650940f, 0.2555613f, + -0.9488379f, 0.0449603f, -0.1620417f, 0.1583214f, 0.0000908f, 0.0152763f, -1.0660053f, -0.0139402f, + -1.7440189f, 0.2515209f, 0.3333162f, 0.1904725f, 0.1116094f, -0.2287960f, -0.0007165f, -1.7047704f}, + {-5.9897852f, -0.1316296f, -0.0218074f, -0.4602887f, 0.3288545f, -0.0882939f, -0.5929499f, 0.4294790f, + -0.0383545f, 0.0556869f, 0.1975944f, 0.1341491f, 0.0629570f, -2.2742157f, 0.0175826f, -0.1439869f, + -24.8701649f, -0.1582915f, -0.2460304f, -3.9643264f, 0.0863483f, 0.0180861f, -0.2210452f, -0.0868723f, + -0.4175525f, -0.8231756f, 0.0247534f, -0.1473545f, -0.0021330f, -0.0410253f, -1.1944869f, -1.1523768f}, + {0.1031547f, -3.3402514f, -4.3636522f, -0.1534714f, -0.0622189f, 0.0374694f, -0.0870097f, -4.1865788f, + -0.0555377f, 0.0252329f, 0.1339467f, 0.0461691f, -0.0503090f, 0.0289890f, -0.0095674f, -0.3289992f, + -0.0279080f, 0.0274977f, -0.0903500f, 0.5610157f, -0.0478177f, 0.4346960f, 0.4822784f, -0.1058945f, + -0.2026870f, -0.0560638f, 0.0910069f, -0.0818529f, 0.0819198f, -0.0292193f, 0.3040628f, -0.1275230f}, + {-5.8789845f, -17.1114635f, -4.6755161f, 0.1016624f, -0.8685016f, -0.3898779f, -2.3363957f, 0.1413794f, + -2.4254086f, -0.2171030f, -0.0901150f, 0.7058705f, 0.4166250f, -0.0231085f, -0.1789686f, -9.4244318f, + -0.6418229f, -0.0857969f, 0.1683681f, -0.0310597f, -0.0247807f, -5.3748040f, -7.4730940f, 0.1019564f, + -1.2126822f, -0.3726285f, -1.0287101f, 0.1803891f, -0.2227769f, -0.0791530f, -0.0159770f, -1.4883354f}, + {-17.9394970f, -0.5228514f, -11.3547935f, -0.0672671f, -2.0371394f, -0.9076943f, 2.4331825f, -6.9409127f, + 0.8286008f, 0.0208618f, -0.8009814f, 1.2268484f, 0.1943726f, -1.7297083f, -0.7668949f, -6.5505466f, + -0.6495168f, -0.0404727f, -0.1260914f, -3.5029383f, -0.0852898f, -2.9679556f, 1.6404767f, -0.0251449f, + 1.1460075f, -0.7877688f, -0.0586593f, -0.4741839f, -1.7420560f, 0.0295600f, -2.3574052f, 0.0974777f}, + {0.4443443f, 0.6384261f, 1.3317494f, -1.0085982f, 0.9508762f, 1.3168396f, -0.1862490f, -0.1801148f, + 1.1106120f, -0.0654911f, 0.1186706f, -0.7198273f, 0.5449172f, -0.5886080f, 0.7504217f, 1.8046317f, + -0.1294390f, -0.1939137f, -0.2383934f, 0.4131435f, 0.6910310f, 1.2821866f, -0.1088722f, -0.5660405f, + -0.1188610f, 0.0364403f, 0.3597929f, -0.6409024f, 1.2114668f, -0.0212278f, 0.8423592f, 0.4848156f}, + {-0.8772649f, -13.5265112f, -4.5540547f, -0.2856667f, 0.7604876f, -0.6829260f, -0.8320626f, 0.6541347f, + 0.4020181f, 0.0009324f, -10.9660740f, -0.3540186f, -0.2316812f, 0.3576394f, 0.0998953f, -1.5738430f, + 1.2089975f, 0.0706465f, -0.2538019f, 0.7016497f, -0.0282650f, -3.1291001f, -0.4375663f, -0.3979468f, + -0.1588882f, 0.3978875f, 0.2038192f, -0.4281644f, -0.5787544f, -0.0922198f, 0.9595569f, 0.0212818f}, + {0.3392667f, 0.1170919f, -0.0705636f, -0.1025443f, -0.1192213f, -0.0495686f, 0.0284667f, -0.1226804f, + 0.0050191f, -0.0516545f, -1.0892097f, 0.0033689f, 0.0471462f, 1.4266804f, 0.0288870f, -0.0110408f, + -1.1283765f, -0.1299917f, -0.4318301f, -0.9854419f, -0.0190479f, -0.0269406f, 0.3697925f, -0.0757695f, + -0.3632923f, -0.1714077f, 0.0669245f, 0.0557428f, -0.1713906f, -0.4307863f, -0.1749060f, -2.1246362f}, + {0.8383662f, -3.8122442f, 0.1568939f, -2.2105119f, -0.7086993f, -0.4664145f, -0.3578597f, 0.5554636f, + 0.6965880f, -0.1506968f, 0.2646832f, 0.2874083f, 0.1901203f, -2.4997077f, -0.3519035f, -0.0518054f, + 1.0862818f, -0.2502540f, -0.3133347f, -0.7411230f, 0.1268138f, 0.1069811f, -0.8109779f, 0.0264679f, + 0.1604289f, -0.7534032f, -0.1419461f, 0.0688303f, -0.1570919f, -0.3055144f, -0.7415189f, 2.5547018f}, + }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_4[1] = {1.4616280f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_4[32][1] = { + {0.0609813f}, {0.0685224f}, {0.1655236f}, {-0.0599842f}, {0.0669006f}, {-0.1817371f}, {-0.0539167f}, + {-0.0737955f}, {0.0654664f}, {0.0302955f}, {-0.0586768f}, {0.0717433f}, {0.1472274f}, {-0.0610073f}, + {-0.0601061f}, {0.2086218f}, {-0.0545418f}, {-0.0388369f}, {-0.0613536f}, {-0.1141072f}, {-0.2289097f}, + {-0.3354485f}, {0.0831025f}, {0.1333673f}, {0.0490410f}, {0.0484894f}, {0.0436755f}, {-0.1479877f}, + {0.1540713f}, {0.0021261f}, {-0.0845848f}, {-0.0564973f}, + }; -} //namespace lst::t5dnn + } // namespace t5dnn +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h index 0e17185104c74..81e4358ab30d6 100644 --- a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h +++ b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h @@ -3,7 +3,7 @@ #include "RecoTracker/LSTCore/interface/Constants.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct ObjectRanges { int* hitRanges; @@ -150,5 +150,5 @@ namespace lst { void setData(ObjectRangesBuffer& buf) { data_.setData(buf); } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index 12161acc08de0..1ecc256887c77 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -11,7 +11,7 @@ #include "Quintuplet.h" #include "PixelTriplet.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct PixelQuintuplets { unsigned int* pixelIndices; unsigned int* T5Indices; @@ -106,11 +106,11 @@ namespace lst { inline void setData(PixelQuintupletsBuffer& buf) { data_.setData(buf); } }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelQuintupletToMemory(lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Quintuplets const& quintupletsInGPU, - lst::PixelQuintuplets& pixelQuintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelQuintupletToMemory(Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Quintuplets const& quintupletsInGPU, + PixelQuintuplets& pixelQuintupletsInGPU, unsigned int pixelIndex, unsigned int T5Index, unsigned int pixelQuintupletIndex, @@ -202,7 +202,7 @@ namespace lst { pixelQuintupletsInGPU.rPhiChiSquaredInwards[pixelQuintupletIndex] = rPhiChiSquaredInwards; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -210,25 +210,25 @@ namespace lst { uint16_t lowerModuleIndex5, float rzChiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); const int layer4 = modulesInGPU.layers[lowerModuleIndex4] + - 6 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex4] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex4] == ::lst::TwoS); const int layer5 = modulesInGPU.layers[lowerModuleIndex5] + - 6 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex5] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex5] == ::lst::TwoS); if (layer1 == 1 and layer2 == 2 and layer3 == 3) { if (layer4 == 12 and layer5 == 13) { @@ -292,7 +292,7 @@ namespace lst { return true; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -300,25 +300,25 @@ namespace lst { uint16_t lowerModuleIndex5, float rPhiChiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); const int layer4 = modulesInGPU.layers[lowerModuleIndex4] + - 6 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex4] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex4] == ::lst::TwoS); const int layer5 = modulesInGPU.layers[lowerModuleIndex5] + - 6 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex5] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex5] == ::lst::TwoS); if (layer1 == 1 and layer2 == 2 and layer3 == 3) { if (layer4 == 12 and layer5 == 13) { @@ -401,8 +401,8 @@ namespace lst { float chiSquared = 0.f; float absArctanSlope, angleM, xPrime, yPrime, sigma2; for (size_t i = 0; i < nPoints; i++) { - absArctanSlope = ((slopes[i] != lst::lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) - : 0.5f * float(M_PI)); + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); if (xs[i] > 0 and ys[i] > 0) { angleM = 0.5f * float(M_PI) - absArctanSlope; } else if (xs[i] < 0 and ys[i] > 0) { @@ -430,7 +430,7 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression_pT5(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, const uint16_t* lowerModuleIndices, float* delta1, float* delta2, @@ -446,7 +446,7 @@ namespace lst { need not always be a PS strip module, but all non-anchor hits sit on strip modules. */ - ModuleType moduleType; + ::lst::ModuleType moduleType; short moduleSubdet, moduleSide; float inv1 = kWidthPS / kWidth2S; float inv2 = kPixelPSZpitch / kWidth2S; @@ -458,21 +458,21 @@ namespace lst { const float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; //category 1 - barrel PS flat - if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide == ::lst::Center) { delta1[i] = inv1; delta2[i] = inv1; slopes[i] = -999.f; isFlat[i] = true; } //category 2 - barrel 2S - else if (moduleSubdet == Barrel and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::TwoS) { delta1[i] = 1.f; delta2[i] = 1.f; slopes[i] = -999.f; isFlat[i] = true; } //category 3 - barrel PS tilted - else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide != ::lst::Center) { delta1[i] = inv1; isFlat[i] = false; @@ -483,7 +483,7 @@ namespace lst { } } //category 4 - endcap PS - else if (moduleSubdet == Endcap and moduleType == PS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::PS) { delta1[i] = inv1; isFlat[i] = false; /* @@ -498,7 +498,7 @@ namespace lst { } } //category 5 - endcap 2S - else if (moduleSubdet == Endcap and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::TwoS) { delta1[i] = 1.f; delta2[i] = 500.f * inv1; isFlat[i] = false; @@ -516,7 +516,7 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquared(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, uint16_t* lowerModuleIndices, float g, float f, @@ -552,7 +552,7 @@ namespace lst { return chiSquared; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredInwardsCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredInwardsCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -560,25 +560,25 @@ namespace lst { uint16_t lowerModuleIndex5, float rPhiChiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); const int layer4 = modulesInGPU.layers[lowerModuleIndex4] + - 6 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex4] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex4] == ::lst::TwoS); const int layer5 = modulesInGPU.layers[lowerModuleIndex5] + - 6 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex5] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex5] == ::lst::TwoS); if (layer1 == 1 and layer2 == 2 and layer3 == 3) { if (layer4 == 12 and layer5 == 13) { @@ -642,14 +642,58 @@ namespace lst { return true; } + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RZChiSquared(TAcc const& acc, + Modules const& modulesInGPU, + uint16_t* lowerModuleIndices, + float* rtPix, + float* zPix, + float* rts, + float* zs) { + //use the two anchor hits of the pixel segment to compute the slope + //then compute the pseudo chi squared of the five outer hits + + float slope = (zPix[1] - zPix[0]) / (rtPix[1] - rtPix[0]); + float residual = 0; + float error2 = 0; + //hardcoded array indices!!! + float RMSE = 0; + for (size_t i = 0; i < Params_T5::kLayers; i++) { + uint16_t& lowerModuleIndex = lowerModuleIndices[i]; + const int moduleType = modulesInGPU.moduleType[lowerModuleIndex]; + const int moduleSide = modulesInGPU.sides[lowerModuleIndex]; + const int moduleSubdet = modulesInGPU.subdets[lowerModuleIndex]; + + residual = (moduleSubdet == ::lst::Barrel) ? (zs[i] - zPix[0]) - slope * (rts[i] - rtPix[0]) + : (rts[i] - rtPix[0]) - (zs[i] - zPix[0]) / slope; + const float& drdz = modulesInGPU.drdzs[lowerModuleIndex]; + //PS Modules + if (moduleType == 0) { + error2 = kPixelPSZpitch * kPixelPSZpitch; + } else //2S modules + { + error2 = kStrip2SZpitch * kStrip2SZpitch; + } + + //special dispensation to tilted PS modules! + if (moduleType == 0 and moduleSubdet == ::lst::Barrel and moduleSide != ::lst::Center) { + error2 /= (1.f + drdz * drdz); + } + RMSE += (residual * residual) / error2; + } + + RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. + return RMSE; + } + template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelQuintupletDefaultAlgo(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::ObjectRanges const& rangesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets const& tripletsInGPU, - lst::Quintuplets const& quintupletsInGPU, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, + Quintuplets const& quintupletsInGPU, unsigned int pixelSegmentIndex, unsigned int quintupletIndex, float& rzChiSquared, @@ -788,63 +832,19 @@ namespace lst { return true; } - template - ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RZChiSquared(TAcc const& acc, - lst::Modules const& modulesInGPU, - uint16_t* lowerModuleIndices, - float* rtPix, - float* zPix, - float* rts, - float* zs) { - //use the two anchor hits of the pixel segment to compute the slope - //then compute the pseudo chi squared of the five outer hits - - float slope = (zPix[1] - zPix[0]) / (rtPix[1] - rtPix[0]); - float residual = 0; - float error2 = 0; - //hardcoded array indices!!! - float RMSE = 0; - for (size_t i = 0; i < Params_T5::kLayers; i++) { - uint16_t& lowerModuleIndex = lowerModuleIndices[i]; - const int moduleType = modulesInGPU.moduleType[lowerModuleIndex]; - const int moduleSide = modulesInGPU.sides[lowerModuleIndex]; - const int moduleSubdet = modulesInGPU.subdets[lowerModuleIndex]; - - residual = (moduleSubdet == lst::Barrel) ? (zs[i] - zPix[0]) - slope * (rts[i] - rtPix[0]) - : (rts[i] - rtPix[0]) - (zs[i] - zPix[0]) / slope; - const float& drdz = modulesInGPU.drdzs[lowerModuleIndex]; - //PS Modules - if (moduleType == 0) { - error2 = kPixelPSZpitch * kPixelPSZpitch; - } else //2S modules - { - error2 = kStrip2SZpitch * kStrip2SZpitch; - } - - //special dispensation to tilted PS modules! - if (moduleType == 0 and moduleSubdet == lst::Barrel and moduleSide != Center) { - error2 /= (1.f + drdz * drdz); - } - RMSE += (residual * residual) / error2; - } - - RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. - return RMSE; - } - struct CreatePixelQuintupletsInGPUFromMapv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, - lst::Triplets tripletsInGPU, - lst::Quintuplets quintupletsInGPU, - lst::PixelQuintuplets pixelQuintupletsInGPU, + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + Quintuplets quintupletsInGPU, + PixelQuintuplets pixelQuintupletsInGPU, unsigned int* connectedPixelSize, unsigned int* connectedPixelIndex, unsigned int nPixelSegments, - lst::ObjectRanges rangesInGPU) const { + ObjectRanges rangesInGPU) const { auto const globalBlockIdx = alpaka::getIdx(acc); auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridBlockExtent = alpaka::getWorkDiv(acc); @@ -858,7 +858,7 @@ namespace lst { uint16_t quintupletLowerModuleIndex = modulesInGPU.connectedPixels[iLSModule]; if (quintupletLowerModuleIndex >= *modulesInGPU.nLowerModules) continue; - if (modulesInGPU.moduleType[quintupletLowerModuleIndex] == lst::TwoS) + if (modulesInGPU.moduleType[quintupletLowerModuleIndex] == ::lst::TwoS) continue; uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules; if (segmentsInGPU.isDup[i_pLS]) @@ -942,5 +942,5 @@ namespace lst { } // end i_pLS } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 0c78efcafc87f..710c760fb809f 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -11,7 +11,7 @@ #include "ObjectRanges.h" #include "Quintuplet.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // One pixel segment, one outer tracker triplet! struct PixelTriplets { unsigned int* pixelSegmentIndices; @@ -129,10 +129,10 @@ namespace lst { inline void setData(PixelTripletsBuffer& buf) { data_.setData(buf); } }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelTripletToMemory(lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets const& tripletsInGPU, - lst::PixelTriplets& pixelTripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelTripletToMemory(MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, + PixelTriplets& pixelTripletsInGPU, unsigned int pixelSegmentIndex, unsigned int tripletIndex, float pixelRadius, @@ -210,10 +210,10 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTrackletDefaultAlgopT3(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::ObjectRanges const& rangesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t pixelLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -228,8 +228,8 @@ namespace lst { unsigned int thirdMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * outerSegmentIndex]; unsigned int fourthMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * outerSegmentIndex + 1]; - if (outerInnerLowerModuleSubdet == lst::Barrel and - (outerOuterLowerModuleSubdet == lst::Barrel or outerOuterLowerModuleSubdet == lst::Endcap)) { + if (outerInnerLowerModuleSubdet == ::lst::Barrel and + (outerOuterLowerModuleSubdet == ::lst::Barrel or outerOuterLowerModuleSubdet == ::lst::Endcap)) { return runTripletDefaultAlgoPPBB(acc, modulesInGPU, rangesInGPU, @@ -244,7 +244,7 @@ namespace lst { secondMDIndex, thirdMDIndex, fourthMDIndex); - } else if (outerInnerLowerModuleSubdet == lst::Endcap and outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (outerInnerLowerModuleSubdet == ::lst::Endcap and outerOuterLowerModuleSubdet == ::lst::Endcap) { return runTripletDefaultAlgoPPEE(acc, modulesInGPU, rangesInGPU, @@ -263,23 +263,23 @@ namespace lst { return false; }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RZChiSquaredCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RZChiSquaredCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, float rzChiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); if (layer1 == 8 and layer2 == 9 and layer3 == 10) { return rzChiSquared < 13.6067f; @@ -335,8 +335,8 @@ namespace lst { float chiSquared = 0.f; float absArctanSlope, angleM, xPrime, yPrime, sigma2; for (size_t i = 0; i < nPoints; i++) { - absArctanSlope = ((slopes[i] != lst::lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) - : 0.5f * float(M_PI)); + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); if (xs[i] > 0 and ys[i] > 0) { angleM = 0.5f * float(M_PI) - absArctanSlope; } else if (xs[i] < 0 and ys[i] > 0) { @@ -366,7 +366,7 @@ namespace lst { //TODO: merge this one and the pT5 function later into a single function template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RPhiChiSquared(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, uint16_t* lowerModuleIndices, float g, float f, @@ -379,33 +379,33 @@ namespace lst { float inv1 = kWidthPS / kWidth2S; float inv2 = kPixelPSZpitch / kWidth2S; for (size_t i = 0; i < 3; i++) { - ModuleType moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; + ::lst::ModuleType moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; short moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]]; short moduleSide = modulesInGPU.sides[lowerModuleIndices[i]]; float drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; //category 1 - barrel PS flat - if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide == ::lst::Center) { delta1[i] = inv1; delta2[i] = inv1; slopes[i] = -999; isFlat[i] = true; } //category 2 - barrel 2S - else if (moduleSubdet == Barrel and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::TwoS) { delta1[i] = 1; delta2[i] = 1; slopes[i] = -999; isFlat[i] = true; } //category 3 - barrel PS tilted - else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide != ::lst::Center) { delta1[i] = inv1; isFlat[i] = false; delta2[i] = (inv2 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); } //category 4 - endcap PS - else if (moduleSubdet == Endcap and moduleType == PS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::PS) { delta1[i] = inv1; isFlat[i] = false; @@ -416,7 +416,7 @@ namespace lst { delta2[i] = inv2; } //category 5 - endcap 2S - else if (moduleSubdet == Endcap and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::TwoS) { delta1[i] = 1; delta2[i] = 500 * inv1; isFlat[i] = false; @@ -447,23 +447,23 @@ namespace lst { }; //90pc threshold - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, float chiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); if (layer1 == 8 and layer2 == 9 and layer3 == 10) { return chiSquared < 7.003f; @@ -494,23 +494,23 @@ namespace lst { return true; }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredInwardsCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredInwardsCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, float chiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); if (layer1 == 7 and layer2 == 8 and layer3 == 9) // endcap layer 1,2,3, ps { @@ -663,18 +663,18 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterion(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, float pixelRadius, float pixelRadiusError, float tripletRadius, int16_t lowerModuleIndex, uint16_t middleModuleIndex, uint16_t upperModuleIndex) { - if (modulesInGPU.subdets[lowerModuleIndex] == lst::Endcap) { + if (modulesInGPU.subdets[lowerModuleIndex] == ::lst::Endcap) { return passRadiusCriterionEEE(acc, pixelRadius, pixelRadiusError, tripletRadius); - } else if (modulesInGPU.subdets[middleModuleIndex] == lst::Endcap) { + } else if (modulesInGPU.subdets[middleModuleIndex] == ::lst::Endcap) { return passRadiusCriterionBEE(acc, pixelRadius, pixelRadiusError, tripletRadius); - } else if (modulesInGPU.subdets[upperModuleIndex] == lst::Endcap) { + } else if (modulesInGPU.subdets[upperModuleIndex] == ::lst::Endcap) { return passRadiusCriterionBBE(acc, pixelRadius, pixelRadiusError, tripletRadius); } else { return passRadiusCriterionBBB(acc, pixelRadius, pixelRadiusError, tripletRadius); @@ -683,7 +683,7 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RZChiSquared(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, const uint16_t* lowerModuleIndices, const float* rtPix, const float* xPix, @@ -724,14 +724,14 @@ namespace lst { float p = alpaka::math::sqrt(acc, Px * Px + Py * Py + Pz * Pz); float rou = a / p; - if (moduleSubdet == lst::Endcap) { + if (moduleSubdet == ::lst::Endcap) { float s = (zsi - z1) * p / Pz; float x = x1 + Px / a * alpaka::math::sin(acc, rou * s) - Py / a * (1 - alpaka::math::cos(acc, rou * s)); float y = y1 + Py / a * alpaka::math::sin(acc, rou * s) + Px / a * (1 - alpaka::math::cos(acc, rou * s)); diffr = alpaka::math::abs(acc, rtsi - alpaka::math::sqrt(acc, x * x + y * y)) * 100; } - if (moduleSubdet == lst::Barrel) { + if (moduleSubdet == ::lst::Barrel) { float paraA = r1 * r1 + 2 * (Px * Px + Py * Py) / (a * a) + 2 * (y1 * Px - x1 * Py) / a - rtsi * rtsi; float paraB = 2 * (x1 * Px + y1 * Py) / a; float paraC = 2 * (y1 * Px - x1 * Py) / a + 2 * (Px * Px + Py * Py) / (a * a); @@ -747,7 +747,7 @@ namespace lst { diffz = alpaka::math::min(acc, diffz1, diffz2); } - residual = moduleSubdet == lst::Barrel ? diffz : diffr; + residual = moduleSubdet == ::lst::Barrel ? diffz : diffr; //PS Modules if (moduleType == 0) { @@ -758,7 +758,7 @@ namespace lst { } //special dispensation to tilted PS modules! - if (moduleType == 0 and moduleSubdet == lst::Barrel and moduleSide != Center) { + if (moduleType == 0 and moduleSubdet == ::lst::Barrel and moduleSide != ::lst::Center) { float drdz = modulesInGPU.drdzs[lowerModuleIndex]; error2 /= (1 + drdz * drdz); } @@ -772,11 +772,11 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTripletDefaultAlgo(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::ObjectRanges const& rangesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets const& tripletsInGPU, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, unsigned int pixelSegmentIndex, unsigned int tripletIndex, float& pixelRadius, @@ -928,12 +928,12 @@ namespace lst { struct CreatePixelTripletsInGPUFromMapv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, - lst::Triplets tripletsInGPU, - lst::PixelTriplets pixelTripletsInGPU, + Modules modulesInGPU, + ObjectRanges rangesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + PixelTriplets pixelTripletsInGPU, unsigned int* connectedPixelSize, unsigned int* connectedPixelIndex, unsigned int nPixelSegments) const { @@ -959,7 +959,7 @@ namespace lst { } #endif //Removes 2S-2S :FIXME: filter these out in the pixel map - if (modulesInGPU.moduleType[tripletLowerModuleIndex] == lst::TwoS) + if (modulesInGPU.moduleType[tripletLowerModuleIndex] == ::lst::TwoS) continue; uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules; @@ -990,7 +990,7 @@ namespace lst { outerTripletArrayIndex += gridThreadExtent[2]) { unsigned int outerTripletIndex = rangesInGPU.tripletModuleIndices[tripletLowerModuleIndex] + outerTripletArrayIndex; - if (modulesInGPU.moduleType[tripletsInGPU.lowerModuleIndices[3 * outerTripletIndex + 1]] == lst::TwoS) + if (modulesInGPU.moduleType[tripletsInGPU.lowerModuleIndices[3 * outerTripletIndex + 1]] == ::lst::TwoS) continue; //REMOVES PS-2S if (tripletsInGPU.partOfPT5[outerTripletIndex]) @@ -1076,33 +1076,30 @@ namespace lst { betaOut += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaOut); return; } if (betaIn * betaOut > 0.f and - (alpaka::math::abs(acc, pt_beta) < 4.f * lst::kPt_betaMax or + (alpaka::math::abs(acc, pt_beta) < 4.f * kPt_betaMax or (lIn >= 11 and alpaka::math::abs(acc, pt_beta) < - 8.f * lst::kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap + 8.f * kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap { const float betaInUpd = - betaIn + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), - betaIn); //FIXME: need a faster version + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version const float betaOutUpd = - betaOut + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), - betaOut); //FIXME: need a faster version + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); //FIXME: need a faster version betaAv = 0.5f * (betaInUpd + betaOutUpd); //1st update @@ -1111,69 +1108,65 @@ namespace lst { betaIn += alpaka::math::copysign( acc, - alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * lst::k2Rinv1GeVf * pt_beta_inv, lst::kSinAlphaMax)), + alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), betaIn); //FIXME: need a faster version betaOut += alpaka::math::copysign( acc, - alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf * pt_beta_inv, lst::kSinAlphaMax)), + alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), betaOut); //FIXME: need a faster version //update the av and pt betaAv = 0.5f * (betaIn + betaOut); //2nd update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } else if (lIn < 11 && alpaka::math::abs(acc, betaOut) < 0.2f * alpaka::math::abs(acc, betaIn) && - alpaka::math::abs(acc, pt_beta) < 12.f * lst::kPt_betaMax) //use betaIn sign as ref + alpaka::math::abs(acc, pt_beta) < 12.f * kPt_betaMax) //use betaIn sign as ref { const float pt_betaIn = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaIn); const float betaInUpd = - betaIn + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), lst::kSinAlphaMax)), - betaIn); //FIXME: need a faster version + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version const float betaOutUpd = betaOut + alpaka::math::copysign( acc, alpaka::math::asin( acc, - alpaka::math::min( - acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), lst::kSinAlphaMax)), + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), betaIn); //FIXME: need a faster version betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) ? (0.5f * (betaInUpd + betaOutUpd)) : betaInUpd; //1st update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate betaIn += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaIn); //FIXME: need a faster version betaOut += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaIn); //FIXME: need a faster version //update the av and pt betaAv = 0.5f * (betaIn + betaOut); //2nd update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::ObjectRanges const& rangesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t pixelModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -1185,7 +1178,7 @@ namespace lst { unsigned int fourthMDIndex) { float dPhi, betaIn, betaOut, pt_beta, zLo, zHi, zLoPointed, zHiPointed, dPhiCut, betaOutCut; - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS); + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS); float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; float rt_InUp = mdsInGPU.anchorRt[secondMDIndex]; @@ -1207,7 +1200,7 @@ namespace lst { float rt_InOut = rt_InUp; - if (alpaka::math::abs(acc, lst::deltaPhi(acc, x_InUp, y_InUp, x_OutLo, y_OutLo)) > 0.5f * float(M_PI)) + if (alpaka::math::abs(acc, deltaPhi(acc, x_InUp, y_InUp, x_OutLo, y_OutLo)) > 0.5f * float(M_PI)) return false; unsigned int pixelSegmentArrayIndex = innerSegmentIndex - rangesInGPU.segmentModuleIndices[pixelModuleIndex]; @@ -1281,7 +1274,7 @@ namespace lst { float diffX = x_OutLo - x_InLo; float diffY = y_OutLo - y_InLo; - dPhi = lst::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; @@ -1291,11 +1284,11 @@ namespace lst { float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); - bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == lst::Endcap and - modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::TwoS; + bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == ::lst::Endcap and + modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::TwoS; float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; - alpha_OutUp = lst::deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + alpha_OutUp = deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); alpha_OutUp_highEdge = alpha_OutUp; alpha_OutUp_lowEdge = alpha_OutUp; @@ -1309,42 +1302,42 @@ namespace lst { float tl_axis_lowEdge_x = tl_axis_x; float tl_axis_lowEdge_y = tl_axis_y; - betaIn = -lst::deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + betaIn = -deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); float betaInRHmin = betaIn; float betaInRHmax = betaIn; - betaOut = -alpha_OutUp + lst::deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + betaOut = -alpha_OutUp + deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; if (isEC_lastLayer) { - alpha_OutUp_highEdge = lst::deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], - mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); - alpha_OutUp_lowEdge = lst::deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], - mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); + alpha_OutUp_highEdge = deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); + alpha_OutUp_lowEdge = deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_InUp; tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_InUp; tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_InUp; tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_InUp; - betaOutRHmin = -alpha_OutUp_highEdge + lst::deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], - tl_axis_highEdge_x, - tl_axis_highEdge_y); - betaOutRHmax = -alpha_OutUp_lowEdge + lst::deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], - tl_axis_lowEdge_x, - tl_axis_lowEdge_y); + betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); } //beta computation @@ -1378,7 +1371,7 @@ namespace lst { betaOutRHmax *= betaOutMMSF; float min_ptBeta_ptBetaMax = alpaka::math::min( - acc, alpaka::math::abs(acc, pt_beta), lst::kPt_betaMax); //need to confirm the range-out value of 7 GeV + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); const float alphaInAbsReg = alpaka::math::max(acc, @@ -1428,10 +1421,10 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::ObjectRanges const& rangesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t pixelModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -1443,7 +1436,7 @@ namespace lst { unsigned int fourthMDIndex) { float dPhi, betaIn, betaOut, pt_beta, rtLo, rtHi, dPhiCut, betaOutCut; - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS); + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS); float z_InUp = mdsInGPU.anchorZ[secondMDIndex]; float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; @@ -1487,7 +1480,7 @@ namespace lst { const float dzDrtScale = alpaka::math::tan(acc, slope) / slope; //FIXME: need approximate value const float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InUp); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS; + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS; const float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch @@ -1544,7 +1537,7 @@ namespace lst { float diffX = x_OutLo - x_InLo; float diffY = y_OutLo - y_InLo; - dPhi = lst::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); // Cut #5: deltaPhiChange if (alpaka::math::abs(acc, dPhi) > dPhiCut) @@ -1553,12 +1546,12 @@ namespace lst { float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); - bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == lst::Endcap and - modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::TwoS; + bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == ::lst::Endcap and + modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::TwoS; float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; - alpha_OutUp = lst::deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + alpha_OutUp = deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); alpha_OutUp_highEdge = alpha_OutUp; alpha_OutUp_lowEdge = alpha_OutUp; @@ -1571,41 +1564,41 @@ namespace lst { float tl_axis_lowEdge_x = tl_axis_x; float tl_axis_lowEdge_y = tl_axis_y; - betaIn = -lst::deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + betaIn = -deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); float betaInRHmin = betaIn; float betaInRHmax = betaIn; - betaOut = -alpha_OutUp + lst::deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + betaOut = -alpha_OutUp + deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; if (isEC_lastLayer) { - alpha_OutUp_highEdge = lst::deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], - mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); - alpha_OutUp_lowEdge = lst::deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], - mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); + alpha_OutUp_highEdge = deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); + alpha_OutUp_lowEdge = deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_InUp; tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_InUp; tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_InUp; tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_InUp; - betaOutRHmin = -alpha_OutUp_highEdge + lst::deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], - tl_axis_highEdge_x, - tl_axis_highEdge_y); - betaOutRHmax = -alpha_OutUp_lowEdge + lst::deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], - tl_axis_lowEdge_x, - tl_axis_lowEdge_y); + betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); } //beta computation @@ -1637,7 +1630,7 @@ namespace lst { betaOutRHmax *= betaOutMMSF; float min_ptBeta_ptBetaMax = alpaka::math::min( - acc, alpaka::math::abs(acc, pt_beta), lst::kPt_betaMax); //need to confirm the range-out value of 7 GeV + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); const float alphaInAbsReg = @@ -1690,5 +1683,5 @@ namespace lst { return dBeta * dBeta <= dBetaCut2; } -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 6b602a426a889..4ff67d66d2844 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -14,7 +14,7 @@ #include "ObjectRanges.h" #include "Triplet.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct Quintuplets { unsigned int* tripletIndices; uint16_t* lowerModuleIndices; @@ -149,8 +149,8 @@ namespace lst { return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(lst::Triplets const& tripletsInGPU, - lst::Quintuplets& quintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(Triplets const& tripletsInGPU, + Quintuplets& quintupletsInGPU, unsigned int innerTripletIndex, unsigned int outerTripletIndex, uint16_t lowerModule1, @@ -232,7 +232,7 @@ namespace lst { } //90% constraint - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -317,8 +317,8 @@ namespace lst { //bounds can be found at http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_RZFix/t5_rz_thresholds.txt template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passT5RZConstraint(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, unsigned int firstMDIndex, unsigned int secondMDIndex, unsigned int thirdMDIndex, @@ -528,7 +528,7 @@ namespace lst { continue; } - // calculation is copied from PixelTriplet.cc lst::computePT3RZChiSquared + // calculation is copied from PixelTriplet.cc computePT3RZChiSquared float diffr = 0, diffz = 0; float rou = a / p; @@ -586,14 +586,14 @@ namespace lst { subdets = modulesInGPU.subdets[lowerModuleIndex3]; } if (i == 2 || i == 3) { - residual = (layeri <= 6 && ((side == lst::Center) or (drdz < 1))) ? diffz : diffr; + residual = (layeri <= 6 && ((side == ::lst::Center) or (drdz < 1))) ? diffz : diffr; float projection_missing2 = 1.f; if (drdz < 1) - projection_missing2 = ((subdets == lst::Endcap) or (side == lst::Center)) + projection_missing2 = ((subdets == ::lst::Endcap) or (side == ::lst::Center)) ? 1.f : 1.f / (1 + drdz * drdz); // cos(atan(drdz)), if dr/dz<1 if (drdz > 1) - projection_missing2 = ((subdets == lst::Endcap) or (side == lst::Center)) + projection_missing2 = ((subdets == ::lst::Endcap) or (side == ::lst::Center)) ? 1.f : (drdz * drdz) / (1 + drdz * drdz); //sin(atan(drdz)), if dr/dz>1 error2 = error2 * projection_missing2; @@ -751,8 +751,8 @@ namespace lst { } template - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(lst::Triplets const& tripletsInGPU, - lst::Segments const& segmentsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(Triplets const& tripletsInGPU, + Segments const& segmentsInGPU, unsigned int innerTripletIndex, unsigned int outerTripletIndex) { unsigned int innerOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1]; @@ -778,7 +778,7 @@ namespace lst { //brute force float candidateRadius; float g, f; - minimumRadius = lst::lst_INF; + minimumRadius = lst_INF; maximumRadius = 0.f; for (size_t i = 0; i < 3; i++) { float x1 = x1Vec[i]; @@ -1012,7 +1012,7 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, const uint16_t* lowerModuleIndices, float* delta1, float* delta2, @@ -1029,7 +1029,7 @@ namespace lst { modules. */ - ModuleType moduleType; + ::lst::ModuleType moduleType; short moduleSubdet, moduleSide; float inv1 = kWidthPS / kWidth2S; float inv2 = kPixelPSZpitch / kWidth2S; @@ -1041,21 +1041,21 @@ namespace lst { const float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; //category 1 - barrel PS flat - if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide == ::lst::Center) { delta1[i] = inv1; delta2[i] = inv1; slopes[i] = -999.f; isFlat[i] = true; } //category 2 - barrel 2S - else if (moduleSubdet == Barrel and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::TwoS) { delta1[i] = 1.f; delta2[i] = 1.f; slopes[i] = -999.f; isFlat[i] = true; } //category 3 - barrel PS tilted - else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide != ::lst::Center) { delta1[i] = inv1; isFlat[i] = false; @@ -1066,7 +1066,7 @@ namespace lst { } } //category 4 - endcap PS - else if (moduleSubdet == Endcap and moduleType == PS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::PS) { delta1[i] = inv1; isFlat[i] = false; @@ -1082,7 +1082,7 @@ namespace lst { } } //category 5 - endcap 2S - else if (moduleSubdet == Endcap and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::TwoS) { delta1[i] = 1.f; delta2[i] = 500.f * inv1; isFlat[i] = false; @@ -1130,8 +1130,8 @@ namespace lst { // Computing sigmas is a very tricky affair // if the module is tilted or endcap, we need to use the slopes properly! - absArctanSlope = ((slopes[i] != lst::lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) - : 0.5f * float(M_PI)); + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); if (xs[i] > 0 and ys[i] > 0) { angleM = 0.5f * float(M_PI) - absArctanSlope; @@ -1213,8 +1213,8 @@ namespace lst { float chiSquared = 0.f; float absArctanSlope, angleM, xPrime, yPrime, sigma2; for (size_t i = 0; i < nPoints; i++) { - absArctanSlope = ((slopes[i] != lst::lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) - : 0.5f * float(M_PI)); + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); if (xs[i] > 0 and ys[i] > 0) { angleM = 0.5f * float(M_PI) - absArctanSlope; } else if (xs[i] < 0 and ys[i] > 0) { @@ -1255,33 +1255,30 @@ namespace lst { betaOut += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaOut); return; } if (betaIn * betaOut > 0.f and - (alpaka::math::abs(acc, pt_beta) < 4.f * lst::kPt_betaMax or + (alpaka::math::abs(acc, pt_beta) < 4.f * kPt_betaMax or (lIn >= 11 and alpaka::math::abs(acc, pt_beta) < - 8.f * lst::kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap + 8.f * kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap { const float betaInUpd = - betaIn + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), - betaIn); //FIXME: need a faster version + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version const float betaOutUpd = - betaOut + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), - betaOut); //FIXME: need a faster version + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); //FIXME: need a faster version betaAv = 0.5f * (betaInUpd + betaOutUpd); //1st update @@ -1290,68 +1287,64 @@ namespace lst { betaIn += alpaka::math::copysign( acc, - alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * lst::k2Rinv1GeVf * pt_beta_inv, lst::kSinAlphaMax)), + alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), betaIn); //FIXME: need a faster version betaOut += alpaka::math::copysign( acc, - alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf * pt_beta_inv, lst::kSinAlphaMax)), + alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), betaOut); //FIXME: need a faster version //update the av and pt betaAv = 0.5f * (betaIn + betaOut); //2nd update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } else if (lIn < 11 && alpaka::math::abs(acc, betaOut) < 0.2f * alpaka::math::abs(acc, betaIn) && - alpaka::math::abs(acc, pt_beta) < 12.f * lst::kPt_betaMax) //use betaIn sign as ref + alpaka::math::abs(acc, pt_beta) < 12.f * kPt_betaMax) //use betaIn sign as ref { const float pt_betaIn = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaIn); const float betaInUpd = - betaIn + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), lst::kSinAlphaMax)), - betaIn); //FIXME: need a faster version + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version const float betaOutUpd = betaOut + alpaka::math::copysign( acc, alpaka::math::asin( acc, - alpaka::math::min( - acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), lst::kSinAlphaMax)), + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), betaIn); //FIXME: need a faster version betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) ? (0.5f * (betaInUpd + betaOutUpd)) : betaInUpd; //1st update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate betaIn += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaIn); //FIXME: need a faster version betaOut += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaIn); //FIXME: need a faster version //update the av and pt betaAv = 0.5f * (betaIn + betaOut); //2nd update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBBB(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1362,8 +1355,8 @@ namespace lst { unsigned int secondMDIndex, unsigned int thirdMDIndex, unsigned int fourthMDIndex) { - bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS); - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS); + bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS); + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS); float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; @@ -1374,17 +1367,17 @@ namespace lst { float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; float alpha1GeV_OutLo = - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float rtRatio_OutLoInLo = rt_OutLo / rt_InLo; // Outer segment beginning rt divided by inner segment beginning rt; float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly - float zpitch_InLo = (isPS_InLo ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); - float zpitch_OutLo = (isPS_OutLo ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); + float zpitch_InLo = (isPS_InLo ? kPixelPSZpitch : kStrip2SZpitch); + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); - float zHi = z_InLo + (z_InLo + lst::kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo < 0.f ? 1.f : dzDrtScale) + + float zHi = z_InLo + (z_InLo + kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo < 0.f ? 1.f : dzDrtScale) + (zpitch_InLo + zpitch_OutLo); - float zLo = z_InLo + (z_InLo - lst::kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - + float zLo = z_InLo + (z_InLo - kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - (zpitch_InLo + zpitch_OutLo); //Cut 1 - z compatibility @@ -1402,7 +1395,7 @@ namespace lst { float dzErr = (zpitch_InLo + zpitch_OutLo) * (zpitch_InLo + zpitch_OutLo) * 2.f; float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * (r3_InLo / rt_InLo); - float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; dzErr += muls2 * drt_OutLo_InLo * drt_OutLo_InLo / 3.f * coshEta * coshEta; dzErr = alpaka::math::sqrt(acc, dzErr); @@ -1410,7 +1403,7 @@ namespace lst { const float dzMean = dz_InSeg / drt_InSeg * drt_OutLo_InLo; const float zWindow = dzErr / drt_InSeg * drt_OutLo_InLo + - (zpitch_InLo + zpitch_OutLo); //FIXME for lst::ptCut lower than ~0.8 need to add curv path correction + (zpitch_InLo + zpitch_OutLo); //FIXME for ptCut lower than ~0.8 need to add curv path correction float zLoPointed = z_InLo + dzMean * (z_InLo > 0.f ? 1.f : dzDrtScale) - zWindow; float zHiPointed = z_InLo + dzMean * (z_InLo < 0.f ? 1.f : dzDrtScale) + zWindow; @@ -1421,7 +1414,7 @@ namespace lst { float pvOffset = 0.1f / rt_OutLo; float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); - float deltaPhiPos = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); // Cut #3: FIXME:deltaPhiPos can be tighter if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) return false; @@ -1431,7 +1424,7 @@ namespace lst { float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - float dPhi = lst::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); // Cut #4: deltaPhiChange if (alpaka::math::abs(acc, dPhi) > dPhiCut) @@ -1442,16 +1435,16 @@ namespace lst { float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); - bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == lst::Endcap and - modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::TwoS; + bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == ::lst::Endcap and + modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::TwoS; float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; - alpha_OutUp = lst::phi_mpi_pi(acc, - lst::phi(acc, - mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorPhi[fourthMDIndex]); + alpha_OutUp = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorPhi[fourthMDIndex]); alpha_OutUp_highEdge = alpha_OutUp; alpha_OutUp_lowEdge = alpha_OutUp; @@ -1463,42 +1456,38 @@ namespace lst { float tl_axis_lowEdge_x = tl_axis_x; float tl_axis_lowEdge_y = tl_axis_y; - float betaIn = - alpha_InLo - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + float betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); float betaInRHmin = betaIn; float betaInRHmax = betaIn; - float betaOut = - -alpha_OutUp + lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + float betaOut = -alpha_OutUp + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; if (isEC_lastLayer) { - alpha_OutUp_highEdge = - lst::phi_mpi_pi(acc, - lst::phi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); - alpha_OutUp_lowEdge = - lst::phi_mpi_pi(acc, - lst::phi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); + alpha_OutUp_highEdge = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); + alpha_OutUp_lowEdge = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - betaOutRHmin = -alpha_OutUp_highEdge + lst::phi_mpi_pi(acc, - lst::phi(acc, tl_axis_highEdge_x, tl_axis_highEdge_y) - - mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); - betaOutRHmax = -alpha_OutUp_lowEdge + lst::phi_mpi_pi(acc, - lst::phi(acc, tl_axis_lowEdge_x, tl_axis_lowEdge_y) - - mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); + betaOutRHmin = + -alpha_OutUp_highEdge + + phi_mpi_pi(acc, phi(acc, tl_axis_highEdge_x, tl_axis_highEdge_y) - mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); + betaOutRHmax = + -alpha_OutUp_lowEdge + + phi_mpi_pi(acc, phi(acc, tl_axis_lowEdge_x, tl_axis_lowEdge_y) - mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); } //beta computation @@ -1514,9 +1503,7 @@ namespace lst { (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); float betaInCut = alpaka::math::asin( - acc, - alpaka::math::min( - acc, (-rt_InSeg * corrF + drt_tl_axis) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + acc, alpaka::math::min(acc, (-rt_InSeg * corrF + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / drt_InSeg); //Cut #5: first beta cut @@ -1524,7 +1511,7 @@ namespace lst { return false; float betaAv = 0.5f * (betaIn + betaOut); - float pt_beta = drt_tl_axis * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + float pt_beta = drt_tl_axis * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); int lIn = 5; int lOut = isEC_lastLayer ? 11 : 5; float sdOut_dr = alpaka::math::sqrt(acc, @@ -1534,7 +1521,7 @@ namespace lst { (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; - lst::runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) @@ -1548,19 +1535,19 @@ namespace lst { betaOutRHmax *= betaOutMMSF; float min_ptBeta_maxPtBeta = alpaka::math::min( - acc, alpaka::math::abs(acc, pt_beta), lst::kPt_betaMax); //need to confimm the range-out value of 7 GeV + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confimm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); - const float alphaInAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, alpha_InLo), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float alphaOutAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, alpha_OutLo), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * lst::kDeltaZLum / z_InLo); - const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * lst::kDeltaZLum / z_OutLo); + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_InLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_OutLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); const float sinDPhi = alpaka::math::sin(acc, dPhi); @@ -1580,8 +1567,7 @@ namespace lst { const float dBetaROut2 = dBetaROut * dBetaROut; float betaOutCut = - alpaka::math::asin(acc, - alpaka::math::min(acc, drt_tl_axis * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); //Cut #6: The real beta cut @@ -1601,9 +1587,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBEE(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1614,8 +1600,8 @@ namespace lst { unsigned int secondMDIndex, unsigned int thirdMDIndex, unsigned int fourthMDIndex) { - bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS); - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS); + bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS); + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS); float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; @@ -1626,21 +1612,21 @@ namespace lst { float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; float alpha1GeV_OutLo = - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly - float zpitch_InLo = (isPS_InLo ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); - float zpitch_OutLo = (isPS_OutLo ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); + float zpitch_InLo = (isPS_InLo ? kPixelPSZpitch : kStrip2SZpitch); + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); float zGeom = zpitch_InLo + zpitch_OutLo; // Cut #0: Preliminary (Only here in endcap case) if (z_InLo * z_OutLo <= 0) return false; - float dLum = alpaka::math::copysign(acc, lst::kDeltaZLum, z_InLo); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS; - float rtGeom1 = isOutSgInnerMDPS ? lst::kPixelPSZpitch : lst::kStrip2SZpitch; + float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS; + float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; float zGeom1 = alpaka::math::copysign(acc, zGeom, z_InLo); float rtLo = rt_InLo * (1.f + (z_OutLo - z_InLo - zGeom1) / (z_InLo + zGeom1 + dLum) / dzDrtScale) - rtGeom1; //slope correction only on the lower end @@ -1669,12 +1655,12 @@ namespace lst { const float coshEta = dr3SDIn / drtSDIn; //direction estimate const float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InLo); const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); - const float zGeom1_another = lst::kPixelPSZpitch; + const float zGeom1_another = kPixelPSZpitch; float kZ = (z_OutLo - z_InLo) / dzSDIn; float drtErr = zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ); const float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * (rIn / rt_InLo); - const float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; drtErr += muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta; drtErr = alpaka::math::sqrt(acc, drtErr); @@ -1685,7 +1671,7 @@ namespace lst { const float pvOffset = 0.1f / rt_OutLo; float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); - float deltaPhiPos = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); //Cut #4: deltaPhiPos can be tighter if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) @@ -1696,7 +1682,7 @@ namespace lst { float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - float dPhi = lst::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); // Cut #5: deltaPhiChange if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; @@ -1706,33 +1692,32 @@ namespace lst { float sdIn_alpha_max = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); float sdOut_alpha = sdIn_alpha; - float sdOut_alphaOut = lst::phi_mpi_pi(acc, - lst::phi(acc, - mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorPhi[fourthMDIndex]); + float sdOut_alphaOut = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorPhi[fourthMDIndex]); - float sdOut_alphaOut_min = lst::phi_mpi_pi( + float sdOut_alphaOut_min = phi_mpi_pi( acc, __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMins[outerSegmentIndex])); - float sdOut_alphaOut_max = lst::phi_mpi_pi( + float sdOut_alphaOut_max = phi_mpi_pi( acc, __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMaxs[outerSegmentIndex])); float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - float betaIn = - sdIn_alpha - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); float betaInRHmin = betaIn; float betaInRHmax = betaIn; float betaOut = - -sdOut_alphaOut + lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; - bool isEC_secondLayer = (modulesInGPU.subdets[innerOuterLowerModuleIndex] == lst::Endcap) and - (modulesInGPU.moduleType[innerOuterLowerModuleIndex] == lst::TwoS); + bool isEC_secondLayer = (modulesInGPU.subdets[innerOuterLowerModuleIndex] == ::lst::Endcap) and + (modulesInGPU.moduleType[innerOuterLowerModuleIndex] == ::lst::TwoS); if (isEC_secondLayer) { betaInRHmin = betaIn - sdIn_alpha_min + sdIn_alpha; @@ -1765,8 +1750,7 @@ namespace lst { float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); const float corrF = 1.f; float betaInCut = - alpaka::math::asin( - acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / sdIn_d); //Cut #6: first beta cut @@ -1774,7 +1758,7 @@ namespace lst { return false; float betaAv = 0.5f * (betaIn + betaOut); - float pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + float pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); float lIn = 5; float lOut = 11; @@ -1786,7 +1770,7 @@ namespace lst { (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; - lst::runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) @@ -1800,25 +1784,25 @@ namespace lst { betaOutRHmax *= betaOutMMSF; float min_ptBeta_maxPtBeta = alpaka::math::min( - acc, alpaka::math::abs(acc, pt_beta), lst::kPt_betaMax); //need to confirm the range-out value of 7 GeV + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); - const float alphaInAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, sdIn_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float alphaOutAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, sdOut_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * lst::kDeltaZLum / z_InLo); - const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * lst::kDeltaZLum / z_OutLo); + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdIn_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdOut_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); const float sinDPhi = alpaka::math::sin(acc, dPhi); const float dBetaRIn2 = 0; // TODO-RH float dBetaROut = 0; - if (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::TwoS) { + if (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::TwoS) { dBetaROut = (alpaka::math::sqrt(acc, mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + @@ -1830,9 +1814,8 @@ namespace lst { } const float dBetaROut2 = dBetaROut * dBetaROut; - float betaOutCut = - alpaka::math::asin(acc, alpaka::math::min(acc, dr * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + - (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + float betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); //Cut #6: The real beta cut if (alpaka::math::abs(acc, betaOut) >= betaOutCut) @@ -1851,9 +1834,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoEEEE(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1873,7 +1856,7 @@ namespace lst { float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; float alpha1GeV_OutLo = - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly @@ -1882,13 +1865,13 @@ namespace lst { if ((z_InLo * z_OutLo) <= 0) return false; - float dLum = alpaka::math::copysign(acc, lst::kDeltaZLum, z_InLo); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS; - bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS; + float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS; + bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS; - float rtGeom = (isInSgInnerMDPS and isOutSgInnerMDPS) ? 2.f * lst::kPixelPSZpitch - : (isInSgInnerMDPS or isOutSgInnerMDPS) ? lst::kPixelPSZpitch + lst::kStrip2SZpitch - : 2.f * lst::kStrip2SZpitch; + float rtGeom = (isInSgInnerMDPS and isOutSgInnerMDPS) ? 2.f * kPixelPSZpitch + : (isInSgInnerMDPS or isOutSgInnerMDPS) ? kPixelPSZpitch + kStrip2SZpitch + : 2.f * kStrip2SZpitch; float dz = z_OutLo - z_InLo; float rtLo = rt_InLo * (1.f + dz / (z_InLo + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end @@ -1902,7 +1885,7 @@ namespace lst { if ((rtOut < rtLo) || (rtOut > rtHi)) return false; - bool isInSgOuterMDPS = modulesInGPU.moduleType[innerOuterLowerModuleIndex] == lst::PS; + bool isInSgOuterMDPS = modulesInGPU.moduleType[innerOuterLowerModuleIndex] == ::lst::PS; const float drtSDIn = rt_InOut - rt_InLo; const float dzSDIn = z_InOut - z_InLo; @@ -1915,12 +1898,12 @@ namespace lst { float kZ = (z_OutLo - z_InLo) / dzSDIn; float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f); - float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; - float drtErr = alpaka::math::sqrt( - acc, - lst::kPixelPSZpitch * lst::kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + - muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); + float drtErr = + alpaka::math::sqrt(acc, + kPixelPSZpitch * kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + + muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); float drtMean = drtSDIn * dzOutInAbs / alpaka::math::abs(acc, dzSDIn); float rtWindow = drtErr + rtGeom; @@ -1939,7 +1922,7 @@ namespace lst { float pvOffset = 0.1f / rtOut; float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); - float deltaPhiPos = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) return false; @@ -1949,7 +1932,7 @@ namespace lst { float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - float dPhi = lst::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); // Cut #5: deltaPhiChange if (alpaka::math::abs(acc, dPhi) > dPhiCut) @@ -1957,21 +1940,20 @@ namespace lst { float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); float sdOut_alpha = sdIn_alpha; //weird - float sdOut_dPhiPos = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[thirdMDIndex]); + float sdOut_dPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[thirdMDIndex]); float sdOut_dPhiChange = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); float sdOut_dPhiChange_min = __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]); float sdOut_dPhiChange_max = __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]); - float sdOut_alphaOutRHmin = lst::phi_mpi_pi(acc, sdOut_dPhiChange_min - sdOut_dPhiPos); - float sdOut_alphaOutRHmax = lst::phi_mpi_pi(acc, sdOut_dPhiChange_max - sdOut_dPhiPos); - float sdOut_alphaOut = lst::phi_mpi_pi(acc, sdOut_dPhiChange - sdOut_dPhiPos); + float sdOut_alphaOutRHmin = phi_mpi_pi(acc, sdOut_dPhiChange_min - sdOut_dPhiPos); + float sdOut_alphaOutRHmax = phi_mpi_pi(acc, sdOut_dPhiChange_max - sdOut_dPhiPos); + float sdOut_alphaOut = phi_mpi_pi(acc, sdOut_dPhiChange - sdOut_dPhiPos); float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - float betaIn = - sdIn_alpha - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); @@ -1979,7 +1961,7 @@ namespace lst { float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; float betaOut = - -sdOut_alphaOut + lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); float betaOutRHmin = betaOut - sdOut_alphaOutRHmin + sdOut_alphaOut; float betaOutRHmax = betaOut - sdOut_alphaOutRHmax + sdOut_alphaOut; @@ -2006,8 +1988,7 @@ namespace lst { float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); const float corrF = 1.f; float betaInCut = - alpaka::math::asin( - acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / sdIn_d); //Cut #6: first beta cut @@ -2015,7 +1996,7 @@ namespace lst { return false; float betaAv = 0.5f * (betaIn + betaOut); - float pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + float pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); int lIn = 11; //endcap int lOut = 13; //endcap @@ -2027,7 +2008,7 @@ namespace lst { (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; - lst::runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) @@ -2041,27 +2022,26 @@ namespace lst { betaOutRHmax *= betaOutMMSF; float min_ptBeta_maxPtBeta = alpaka::math::min( - acc, alpaka::math::abs(acc, pt_beta), lst::kPt_betaMax); //need to confirm the range-out value of 7 GeV + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); - const float alphaInAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, sdIn_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float alphaOutAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, sdOut_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * lst::kDeltaZLum / z_InLo); - const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * lst::kDeltaZLum / z_OutLo); + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdIn_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdOut_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); const float dBetaRIn2 = 0; // TODO-RH float dBetaROut2 = 0; //TODO-RH - float betaOutCut = - alpaka::math::asin(acc, alpaka::math::min(acc, dr * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + - (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + float betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); //Cut #6: The real beta cut if (alpaka::math::abs(acc, betaOut) >= betaOutCut) @@ -2080,9 +2060,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletAlgoSelector(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -2098,8 +2078,8 @@ namespace lst { short outerInnerLowerModuleSubdet = modulesInGPU.subdets[outerInnerLowerModuleIndex]; short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex]; - if (innerInnerLowerModuleSubdet == lst::Barrel and innerOuterLowerModuleSubdet == lst::Barrel and - outerInnerLowerModuleSubdet == lst::Barrel and outerOuterLowerModuleSubdet == lst::Barrel) { + if (innerInnerLowerModuleSubdet == ::lst::Barrel and innerOuterLowerModuleSubdet == ::lst::Barrel and + outerInnerLowerModuleSubdet == ::lst::Barrel and outerOuterLowerModuleSubdet == ::lst::Barrel) { return runQuintupletDefaultAlgoBBBB(acc, modulesInGPU, mdsInGPU, @@ -2114,8 +2094,8 @@ namespace lst { secondMDIndex, thirdMDIndex, fourthMDIndex); - } else if (innerInnerLowerModuleSubdet == lst::Barrel and innerOuterLowerModuleSubdet == lst::Barrel and - outerInnerLowerModuleSubdet == lst::Endcap and outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Barrel and innerOuterLowerModuleSubdet == ::lst::Barrel and + outerInnerLowerModuleSubdet == ::lst::Endcap and outerOuterLowerModuleSubdet == ::lst::Endcap) { return runQuintupletDefaultAlgoBBEE(acc, modulesInGPU, mdsInGPU, @@ -2130,8 +2110,8 @@ namespace lst { secondMDIndex, thirdMDIndex, fourthMDIndex); - } else if (innerInnerLowerModuleSubdet == lst::Barrel and innerOuterLowerModuleSubdet == lst::Barrel and - outerInnerLowerModuleSubdet == lst::Barrel and outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Barrel and innerOuterLowerModuleSubdet == ::lst::Barrel and + outerInnerLowerModuleSubdet == ::lst::Barrel and outerOuterLowerModuleSubdet == ::lst::Endcap) { return runQuintupletDefaultAlgoBBBB(acc, modulesInGPU, mdsInGPU, @@ -2146,8 +2126,8 @@ namespace lst { secondMDIndex, thirdMDIndex, fourthMDIndex); - } else if (innerInnerLowerModuleSubdet == lst::Barrel and innerOuterLowerModuleSubdet == lst::Endcap and - outerInnerLowerModuleSubdet == lst::Endcap and outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Barrel and innerOuterLowerModuleSubdet == ::lst::Endcap and + outerInnerLowerModuleSubdet == ::lst::Endcap and outerOuterLowerModuleSubdet == ::lst::Endcap) { return runQuintupletDefaultAlgoBBEE(acc, modulesInGPU, mdsInGPU, @@ -2162,8 +2142,8 @@ namespace lst { secondMDIndex, thirdMDIndex, fourthMDIndex); - } else if (innerInnerLowerModuleSubdet == lst::Endcap and innerOuterLowerModuleSubdet == lst::Endcap and - outerInnerLowerModuleSubdet == lst::Endcap and outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Endcap and innerOuterLowerModuleSubdet == ::lst::Endcap and + outerInnerLowerModuleSubdet == ::lst::Endcap and outerOuterLowerModuleSubdet == ::lst::Endcap) { return runQuintupletDefaultAlgoEEEE(acc, modulesInGPU, mdsInGPU, @@ -2185,10 +2165,10 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgo(TAcc const& acc, - lst::Modules& modulesInGPU, - lst::MiniDoublets& mdsInGPU, - lst::Segments& segmentsInGPU, - lst::Triplets& tripletsInGPU, + Modules& modulesInGPU, + MiniDoublets& mdsInGPU, + Segments& segmentsInGPU, + Triplets& tripletsInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -2278,24 +2258,24 @@ namespace lst { float x3Vec[] = {x3, x3, x3}; float y3Vec[] = {y3, y3, y3}; - if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS) { + if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS) { x1Vec[1] = mdsInGPU.anchorLowEdgeX[firstMDIndex]; x1Vec[2] = mdsInGPU.anchorHighEdgeX[firstMDIndex]; y1Vec[1] = mdsInGPU.anchorLowEdgeY[firstMDIndex]; y1Vec[2] = mdsInGPU.anchorHighEdgeY[firstMDIndex]; } - if (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS) { + if (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS) { x2Vec[1] = mdsInGPU.anchorLowEdgeX[secondMDIndex]; x2Vec[2] = mdsInGPU.anchorHighEdgeX[secondMDIndex]; y2Vec[1] = mdsInGPU.anchorLowEdgeY[secondMDIndex]; y2Vec[2] = mdsInGPU.anchorHighEdgeY[secondMDIndex]; } - if (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS) { + if (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS) { x3Vec[1] = mdsInGPU.anchorLowEdgeX[thirdMDIndex]; x3Vec[2] = mdsInGPU.anchorHighEdgeX[thirdMDIndex]; @@ -2310,8 +2290,8 @@ namespace lst { x1Vec[i] = x4; y1Vec[i] = y4; } - if (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex4] == lst::TwoS) { + if (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex4] == ::lst::TwoS) { x1Vec[1] = mdsInGPU.anchorLowEdgeX[fourthMDIndex]; x1Vec[2] = mdsInGPU.anchorHighEdgeX[fourthMDIndex]; @@ -2326,8 +2306,8 @@ namespace lst { x2Vec[i] = x5; y2Vec[i] = y5; } - if (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex5] == lst::TwoS) { + if (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex5] == ::lst::TwoS) { x2Vec[1] = mdsInGPU.anchorLowEdgeX[fifthMDIndex]; x2Vec[2] = mdsInGPU.anchorHighEdgeX[fifthMDIndex]; @@ -2376,23 +2356,23 @@ namespace lst { //split by category bool matchedRadii; - if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex4] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex5] == lst::Barrel) { + if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Barrel) { matchedRadii = matchRadiiBBBBB(acc, innerRadius, bridgeRadius, outerRadius); - } else if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex4] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) { + } else if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) { matchedRadii = matchRadiiBBBBE(acc, innerRadius, bridgeRadius, outerRadius); - } else if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) { + } else if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) { if (modulesInGPU.layers[lowerModuleIndex1] == 1) { matchedRadii = matchRadiiBBBEE12378(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); @@ -2405,17 +2385,17 @@ namespace lst { } } - else if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) { + else if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) { matchedRadii = matchRadiiBBEEE(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); - } else if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) { + } else if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) { matchedRadii = matchRadiiBEEEE(acc, innerRadius, bridgeRadius, @@ -2464,22 +2444,22 @@ namespace lst { #ifdef USE_T5_DNN unsigned int mdIndices[] = {firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, fifthMDIndex}; - float inference = lst::t5dnn::runInference(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, - tripletsInGPU, - xVec, - yVec, - mdIndices, - lowerModuleIndices, - innerTripletIndex, - outerTripletIndex, - innerRadius, - outerRadius, - bridgeRadius); - TightCutFlag = TightCutFlag and (inference > lst::t5dnn::kLSTWp2); // T5-in-TC cut - if (inference <= lst::t5dnn::kLSTWp2) // T5-building cut + float inference = t5dnn::runInference(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + tripletsInGPU, + xVec, + yVec, + mdIndices, + lowerModuleIndices, + innerTripletIndex, + outerTripletIndex, + innerRadius, + outerRadius, + bridgeRadius); + TightCutFlag = TightCutFlag and (inference > t5dnn::kLSTWp2); // T5-in-TC cut + if (inference <= t5dnn::kLSTWp2) // T5-building cut return false; #endif @@ -2537,12 +2517,12 @@ namespace lst { struct CreateQuintupletsInGPUv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, - lst::Triplets tripletsInGPU, - lst::Quintuplets quintupletsInGPU, - lst::ObjectRanges rangesInGPU, + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + Quintuplets quintupletsInGPU, + ObjectRanges rangesInGPU, uint16_t nEligibleT5Modules) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -2624,7 +2604,7 @@ namespace lst { float eta = mdsInGPU.anchorEta[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + layer2_adjustment]]]; - float pt = (innerRadius + outerRadius) * lst::k2Rinv1GeVf; + float pt = (innerRadius + outerRadius) * k2Rinv1GeVf; float scores = chiSquared + nonAnchorChiSquared; addQuintupletToMemory(tripletsInGPU, quintupletsInGPU, @@ -2666,9 +2646,9 @@ namespace lst { struct CreateEligibleModulesListForQuintupletsGPU { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Triplets tripletsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Triplets tripletsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -2698,9 +2678,9 @@ namespace lst { if (tripletsInGPU.nTriplets[i] == 0) continue; - if (module_subdets == lst::Barrel and module_layers >= 3) + if (module_subdets == ::lst::Barrel and module_layers >= 3) continue; - if (module_subdets == lst::Endcap and module_layers > 1) + if (module_subdets == ::lst::Endcap and module_layers > 1) continue; int nEligibleT5Modules = alpaka::atomicAdd(acc, &nEligibleT5Modulesx, 1, alpaka::hierarchy::Threads{}); @@ -2770,9 +2750,9 @@ namespace lst { struct AddQuintupletRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Quintuplets quintupletsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Quintuplets quintupletsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -2792,5 +2772,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 2c9634fd34373..b74de58f3c233 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -11,7 +11,7 @@ #include "Hit.h" #include "ObjectRanges.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct Segments { FPX* dPhis; FPX* dPhiMins; @@ -176,7 +176,7 @@ namespace lst { inline void setData(SegmentsBuffer& buf) { data_.setData(buf); } }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(Modules const& modulesInGPU, unsigned int moduleIndex) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing // This is the same as what was previously considered as"isNormalTiltedModules" @@ -186,18 +186,20 @@ namespace lst { short side = modulesInGPU.sides[moduleIndex]; short rod = modulesInGPU.rods[moduleIndex]; - return (subdet == Barrel) && (((side != Center) && (layer == 3)) || - ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || - ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); + return (subdet == ::lst::Barrel) && + (((side != ::lst::Center) && (layer == 3)) || + ((side == ::lst::NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || + ((side == ::lst::PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); } ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(short subdet, short layer, short side, short rod) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing // This is the same as what was previously considered as"isNormalTiltedModules" // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf - return (subdet == Barrel) && (((side != Center) && (layer == 3)) || - ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || - ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); + return (subdet == ::lst::Barrel) && + (((side != ::lst::Center) && (layer == 3)) || + ((side == ::lst::NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || + ((side == ::lst::PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); } ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(short layer, short ring, short subdet, short side, short rod) { @@ -216,11 +218,11 @@ namespace lst { float moduleSeparation = 0; - if (subdet == Barrel and side == Center) { + if (subdet == ::lst::Barrel and side == ::lst::Center) { moduleSeparation = miniDeltaFlat[iL]; } else if (isTighterTiltedModules_seg(subdet, layer, side, rod)) { moduleSeparation = miniDeltaTilted[iL]; - } else if (subdet == Endcap) { + } else if (subdet == ::lst::Endcap) { moduleSeparation = miniDeltaEndcap[iL][iR]; } else //Loose tilted modules { @@ -230,7 +232,7 @@ namespace lst { return moduleSeparation; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(lst::Modules const& modulesInGPU, unsigned int moduleIndex) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(Modules const& modulesInGPU, unsigned int moduleIndex) { static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; static constexpr float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; static constexpr float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; @@ -248,11 +250,11 @@ namespace lst { float moduleSeparation = 0; - if (subdet == Barrel and side == Center) { + if (subdet == ::lst::Barrel and side == ::lst::Center) { moduleSeparation = miniDeltaFlat[iL]; } else if (isTighterTiltedModules_seg(modulesInGPU, moduleIndex)) { moduleSeparation = miniDeltaTilted[iL]; - } else if (subdet == Endcap) { + } else if (subdet == ::lst::Endcap) { moduleSeparation = miniDeltaEndcap[iL][iR]; } else //Loose tilted modules { @@ -265,8 +267,8 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void dAlphaThreshold(TAcc const& acc, float* dAlphaThresholdValues, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, float xIn, float yIn, float zIn, @@ -279,7 +281,7 @@ namespace lst { uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, unsigned int outerMDIndex) { - float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel) + float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel) ? kMiniMulsPtScaleBarrel[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut : kMiniMulsPtScaleEndcap[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut; @@ -289,15 +291,15 @@ namespace lst { const float dAlpha_Bfield = alpaka::math::asin(acc, alpaka::math::min(acc, segmentDr * k2Rinv1GeVf / ptCut, kSinAlphaMax)); - bool isInnerTilted = modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel and - modulesInGPU.sides[innerLowerModuleIndex] != lst::Center; - bool isOuterTilted = modulesInGPU.subdets[outerLowerModuleIndex] == lst::Barrel and - modulesInGPU.sides[outerLowerModuleIndex] != lst::Center; + bool isInnerTilted = modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel and + modulesInGPU.sides[innerLowerModuleIndex] != ::lst::Center; + bool isOuterTilted = modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Barrel and + modulesInGPU.sides[outerLowerModuleIndex] != ::lst::Center; float drdzInner = modulesInGPU.drdzs[innerLowerModuleIndex]; float drdzOuter = modulesInGPU.drdzs[outerLowerModuleIndex]; - float innerModuleGapSize = lst::moduleGapSize_seg(modulesInGPU, innerLowerModuleIndex); - float outerModuleGapSize = lst::moduleGapSize_seg(modulesInGPU, outerLowerModuleIndex); + float innerModuleGapSize = moduleGapSize_seg(modulesInGPU, innerLowerModuleIndex); + float outerModuleGapSize = moduleGapSize_seg(modulesInGPU, outerLowerModuleIndex); const float innerminiTilt2 = isInnerTilted ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzInner * drdzInner) / (1.f + drdzInner * drdzInner) / (innerModuleGapSize * innerModuleGapSize)) @@ -313,14 +315,14 @@ namespace lst { float sdLumForInnerMini2; float sdLumForOuterMini2; - if (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel) { + if (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel) { sdLumForInnerMini2 = innerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); } else { sdLumForInnerMini2 = (mdsInGPU.dphis[innerMDIndex] * mdsInGPU.dphis[innerMDIndex]) * (kDeltaZLum * kDeltaZLum) / (mdsInGPU.dzs[innerMDIndex] * mdsInGPU.dzs[innerMDIndex]); } - if (modulesInGPU.subdets[outerLowerModuleIndex] == lst::Barrel) { + if (modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Barrel) { sdLumForOuterMini2 = outerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); } else { sdLumForOuterMini2 = (mdsInGPU.dphis[outerMDIndex] * mdsInGPU.dphis[outerMDIndex]) * (kDeltaZLum * kDeltaZLum) / @@ -330,23 +332,23 @@ namespace lst { // Unique stuff for the segment dudes alone float dAlpha_res_inner = 0.02f / miniDelta * - (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); + (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); float dAlpha_res_outer = 0.02f / miniDelta * - (modulesInGPU.subdets[outerLowerModuleIndex] == lst::Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); + (modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); float dAlpha_res = dAlpha_res_inner + dAlpha_res_outer; - if (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel and - modulesInGPU.sides[innerLowerModuleIndex] == lst::Center) { + if (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel and + modulesInGPU.sides[innerLowerModuleIndex] == ::lst::Center) { dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } else { dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForInnerMini2); } - if (modulesInGPU.subdets[outerLowerModuleIndex] == lst::Barrel and - modulesInGPU.sides[outerLowerModuleIndex] == lst::Center) { + if (modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Barrel and + modulesInGPU.sides[outerLowerModuleIndex] == ::lst::Center) { dAlphaThresholdValues[1] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } else { dAlphaThresholdValues[1] = @@ -357,7 +359,7 @@ namespace lst { dAlphaThresholdValues[2] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(lst::Segments& segmentsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(Segments& segmentsInGPU, unsigned int lowerMDIndex, unsigned int upperMDIndex, uint16_t innerLowerModuleIndex, @@ -388,8 +390,8 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelSegmentToMemory(TAcc const& acc, - lst::Segments& segmentsInGPU, - lst::MiniDoublets const& mdsInGPU, + Segments& segmentsInGPU, + MiniDoublets const& mdsInGPU, unsigned int innerMDIndex, unsigned int outerMDIndex, uint16_t pixelModuleIndex, @@ -427,7 +429,7 @@ namespace lst { mdsInGPU.anchorY[innerMDIndex] + circleRadius * alpaka::math::cos(acc, circlePhi)}; //check which of the circles can accommodate r3LH better (we won't get perfect agreement) - float bestChiSquared = lst::lst_INF; + float bestChiSquared = lst_INF; float chiSquared; size_t bestIndex; for (size_t i = 0; i < 2; i++) { @@ -451,8 +453,8 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoBarrel(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, uint16_t innerLowerModuleIndex, uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, @@ -463,7 +465,7 @@ namespace lst { float& dPhiChange, float& dPhiChangeMin, float& dPhiChangeMax) { - float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel) + float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel) ? kMiniMulsPtScaleBarrel[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut : kMiniMulsPtScaleEndcap[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut; @@ -494,12 +496,12 @@ namespace lst { float sdCut = sdSlope + alpaka::math::sqrt(acc, sdMuls * sdMuls + sdPVoff * sdPVoff); - dPhi = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + dPhi = phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); if (alpaka::math::abs(acc, dPhi) > sdCut) return false; - dPhiChange = lst::phi_mpi_pi(acc, lst::phi(acc, xOut - xIn, yOut - yIn) - mdsInGPU.anchorPhi[innerMDIndex]); + dPhiChange = phi_mpi_pi(acc, phi(acc, xOut - xIn, yOut - yIn) - mdsInGPU.anchorPhi[innerMDIndex]); if (alpaka::math::abs(acc, dPhiChange) > sdCut) return false; @@ -541,8 +543,8 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoEndcap(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, uint16_t innerLowerModuleIndex, uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, @@ -565,8 +567,8 @@ namespace lst { zOut = mdsInGPU.anchorZ[outerMDIndex]; rtOut = mdsInGPU.anchorRt[outerMDIndex]; - bool outerLayerEndcapTwoS = (modulesInGPU.subdets[outerLowerModuleIndex] == lst::Endcap) && - (modulesInGPU.moduleType[outerLowerModuleIndex] == lst::TwoS); + bool outerLayerEndcapTwoS = (modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Endcap) && + (modulesInGPU.moduleType[outerLowerModuleIndex] == ::lst::TwoS); float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float disks2SMinRadius = 60.f; @@ -594,14 +596,12 @@ namespace lst { if ((rtOut < rtLo) || (rtOut > rtHi)) return false; - dPhi = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + dPhi = phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); float sdCut = sdSlope; if (outerLayerEndcapTwoS) { - float dPhiPos_high = - lst::phi_mpi_pi(acc, mdsInGPU.anchorHighEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); - float dPhiPos_low = - lst::phi_mpi_pi(acc, mdsInGPU.anchorLowEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + float dPhiPos_high = phi_mpi_pi(acc, mdsInGPU.anchorHighEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + float dPhiPos_low = phi_mpi_pi(acc, mdsInGPU.anchorLowEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); dPhiMax = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_high : dPhiPos_low; dPhiMin = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_low : dPhiPos_high; @@ -657,8 +657,8 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgo(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, uint16_t innerLowerModuleIndex, uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, @@ -669,8 +669,8 @@ namespace lst { float& dPhiChange, float& dPhiChangeMin, float& dPhiChangeMax) { - if (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel and - modulesInGPU.subdets[outerLowerModuleIndex] == lst::Barrel) { + if (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel and + modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Barrel) { return runSegmentDefaultAlgoBarrel(acc, modulesInGPU, mdsInGPU, @@ -704,10 +704,10 @@ namespace lst { struct CreateSegmentsInGPUv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { auto const globalBlockIdx = alpaka::getIdx(acc); auto const blockThreadIdx = alpaka::getIdx(acc); auto const gridBlockExtent = alpaka::getWorkDiv(acc); @@ -798,9 +798,9 @@ namespace lst { struct CreateSegmentArrayRanges { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::MiniDoublets mdsInGPU) const { + Modules modulesInGPU, + ObjectRanges rangesInGPU, + MiniDoublets mdsInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -902,9 +902,9 @@ namespace lst { struct AddSegmentRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Segments segmentsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -927,11 +927,11 @@ namespace lst { struct AddPixelSegmentToEventKernel { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::Hits hitsInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, + Modules modulesInGPU, + ObjectRanges rangesInGPU, + Hits hitsInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, unsigned int* hitIndices0, unsigned int* hitIndices1, unsigned int* hitIndices2, @@ -1009,6 +1009,6 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 2a35542afc5fc..16f36df3257cd 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -12,7 +12,7 @@ #include "Hit.h" #include "ObjectRanges.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct TrackCandidates { short* trackCandidateType; // 4-T5 5-pT3 7-pT5 8-pLS unsigned int* directObjectIndices; // Will hold direct indices to each type containers @@ -108,7 +108,7 @@ namespace lst { inline void setData(TrackCandidatesBuffer& buf) { data_.setData(buf); } }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(lst::TrackCandidates& trackCandidatesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, unsigned int trackletIndex, unsigned int trackCandidateIndex, uint4 hitIndices, @@ -127,7 +127,7 @@ namespace lst { trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 3] = hitIndices.w; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(lst::TrackCandidates& trackCandidatesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, short trackCandidateType, unsigned int innerTrackletIndex, unsigned int outerTrackletIndex, @@ -166,9 +166,9 @@ namespace lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits(unsigned int ix, unsigned int jx, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Hits const& hitsInGPU) { + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Hits const& hitsInGPU) { int phits1[Params_pLS::kHits]; int phits2[Params_pLS::kHits]; @@ -207,11 +207,11 @@ namespace lst { struct CrossCleanpT3 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::PixelTriplets pixelTripletsInGPU, - lst::Segments segmentsInGPU, - lst::PixelQuintuplets pixelQuintupletsInGPU) const { + Modules modulesInGPU, + ObjectRanges rangesInGPU, + PixelTriplets pixelTripletsInGPU, + Segments segmentsInGPU, + PixelQuintuplets pixelQuintupletsInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -235,7 +235,7 @@ namespace lst { float eta2 = segmentsInGPU.eta[pLS_jx - prefix]; float phi2 = segmentsInGPU.phi[pLS_jx - prefix]; float dEta = alpaka::math::abs(acc, (eta1 - eta2)); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-5f) @@ -248,11 +248,11 @@ namespace lst { struct CrossCleanT5 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Quintuplets quintupletsInGPU, - lst::PixelQuintuplets pixelQuintupletsInGPU, - lst::PixelTriplets pixelTripletsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Quintuplets quintupletsInGPU, + PixelQuintuplets pixelQuintupletsInGPU, + PixelTriplets pixelTripletsInGPU, + ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -288,7 +288,7 @@ namespace lst { } float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-3f) @@ -303,14 +303,14 @@ namespace lst { struct CrossCleanpLS { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::PixelTriplets pixelTripletsInGPU, - lst::TrackCandidates trackCandidatesInGPU, - lst::Segments segmentsInGPU, - lst::MiniDoublets mdsInGPU, - lst::Hits hitsInGPU, - lst::Quintuplets quintupletsInGPU) const { + Modules modulesInGPU, + ObjectRanges rangesInGPU, + PixelTriplets pixelTripletsInGPU, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, + MiniDoublets mdsInGPU, + Hits hitsInGPU, + Quintuplets quintupletsInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -336,7 +336,7 @@ namespace lst { float eta2 = __H2F(quintupletsInGPU.eta[quintupletIndex]); float phi2 = __H2F(quintupletsInGPU.phi[quintupletIndex]); float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-3f) @@ -353,7 +353,7 @@ namespace lst { float eta2 = __H2F(pixelTripletsInGPU.eta_pix[pT3Index]); float phi2 = __H2F(pixelTripletsInGPU.phi_pix[pT3Index]); float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 0.000001f) @@ -370,7 +370,7 @@ namespace lst { float eta2 = segmentsInGPU.eta[pLSIndex - prefix]; float phi2 = segmentsInGPU.phi[pLSIndex - prefix]; float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 0.000001f) @@ -385,10 +385,10 @@ namespace lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - lst::PixelTriplets pixelTripletsInGPU, - lst::TrackCandidates trackCandidatesInGPU, - lst::Segments segmentsInGPU, - lst::ObjectRanges rangesInGPU) const { + PixelTriplets pixelTripletsInGPU, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -441,9 +441,9 @@ namespace lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - lst::Quintuplets quintupletsInGPU, - lst::TrackCandidates trackCandidatesInGPU, - lst::ObjectRanges rangesInGPU) const { + Quintuplets quintupletsInGPU, + TrackCandidates trackCandidatesInGPU, + ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -495,8 +495,8 @@ namespace lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - lst::TrackCandidates trackCandidatesInGPU, - lst::Segments segmentsInGPU, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, bool tc_pls_triplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -534,10 +534,10 @@ namespace lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - lst::PixelQuintuplets pixelQuintupletsInGPU, - lst::TrackCandidates trackCandidatesInGPU, - lst::Segments segmentsInGPU, - lst::ObjectRanges rangesInGPU) const { + PixelQuintuplets pixelQuintupletsInGPU, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -586,5 +586,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index 1f909bdc02d77..c5ac8bda543d8 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -11,7 +11,7 @@ #include "Hit.h" #include "ObjectRanges.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct Triplets { unsigned int* segmentIndices; uint16_t* lowerModuleIndices; //3 of them @@ -136,10 +136,10 @@ namespace lst { }; #ifdef CUT_VALUE_DEBUG - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets& tripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets& tripletsInGPU, unsigned int innerSegmentIndex, unsigned int outerSegmentIndex, uint16_t innerInnerLowerModuleIndex, @@ -154,10 +154,10 @@ namespace lst { float circleCenterY, unsigned int tripletIndex) #else - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets& tripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets& tripletsInGPU, unsigned int innerSegmentIndex, unsigned int outerSegmentIndex, uint16_t innerInnerLowerModuleIndex, @@ -206,9 +206,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -266,9 +266,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBB(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -280,8 +280,8 @@ namespace lst { unsigned int innerSegmentIndex, float& betaIn, float& betaInCut) { - bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS); - bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::PS); + bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS); + bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::PS); float rtIn = mdsInGPU.anchorRt[firstMDIndex]; float rtMid = mdsInGPU.anchorRt[secondMDIndex]; @@ -291,17 +291,16 @@ namespace lst { float zMid = mdsInGPU.anchorZ[secondMDIndex]; zOut = mdsInGPU.anchorZ[thirdMDIndex]; - float alpha1GeVOut = - alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + float alpha1GeVOut = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float rtRatio_OutIn = rtOut / rtIn; // Outer segment beginning rt divided by inner segment beginning rt; float dzDrtScale = alpaka::math::tan(acc, alpha1GeVOut) / alpha1GeVOut; // The track can bend in r-z plane slightly - float zpitchIn = (isPSIn ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); - float zpitchOut = (isPSOut ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); + float zpitchIn = (isPSIn ? kPixelPSZpitch : kStrip2SZpitch); + float zpitchOut = (isPSOut ? kPixelPSZpitch : kStrip2SZpitch); const float zHi = - zIn + (zIn + lst::kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + (zpitchIn + zpitchOut); - const float zLo = zIn + (zIn - lst::kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - + zIn + (zIn + kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + (zpitchIn + zpitchOut); + const float zLo = zIn + (zIn - kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - (zpitchIn + zpitchOut); //slope-correction only on outer end //Cut 1 - z compatibility @@ -320,15 +319,14 @@ namespace lst { float dzErr = (zpitchIn + zpitchOut) * (zpitchIn + zpitchOut) * 2.f; float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rtOut - rtIn) / 50.f) * (r3In / rtIn); - float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; dzErr += muls2 * drt_OutIn * drt_OutIn / 3.f * coshEta * coshEta; dzErr = alpaka::math::sqrt(acc, dzErr); // Constructing upper and lower bound const float dzMean = dz_InSeg / drt_InSeg * drt_OutIn; - const float zWindow = - dzErr / drt_InSeg * drt_OutIn + - (zpitchIn + zpitchOut); //FIXME for lst::ptCut lower than ~0.8 need to add curv path correction + const float zWindow = dzErr / drt_InSeg * drt_OutIn + + (zpitchIn + zpitchOut); //FIXME for ptCut lower than ~0.8 need to add curv path correction const float zLoPointed = zIn + dzMean * (zIn > 0.f ? 1.f : dzDrtScale) - zWindow; const float zHiPointed = zIn + dzMean * (zIn < 0.f ? 1.f : dzDrtScale) + zWindow; @@ -342,7 +340,7 @@ namespace lst { float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - betaIn = alpha_InLo - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); //beta computation float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); @@ -355,8 +353,7 @@ namespace lst { (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); betaInCut = - alpaka::math::asin( - acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / drt_InSeg); //Cut #3: first beta cut @@ -365,9 +362,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBE(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -381,8 +378,8 @@ namespace lst { unsigned int outerSegmentIndex, float& betaIn, float& betaInCut) { - bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS); - bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::PS); + bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS); + bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::PS); float rtIn = mdsInGPU.anchorRt[firstMDIndex]; float rtMid = mdsInGPU.anchorRt[secondMDIndex]; @@ -392,22 +389,21 @@ namespace lst { float zMid = mdsInGPU.anchorZ[secondMDIndex]; zOut = mdsInGPU.anchorZ[thirdMDIndex]; - float alpha1GeV_OutLo = - alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly - float zpitchIn = (isPSIn ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); - float zpitchOut = (isPSOut ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); + float zpitchIn = (isPSIn ? kPixelPSZpitch : kStrip2SZpitch); + float zpitchOut = (isPSOut ? kPixelPSZpitch : kStrip2SZpitch); float zGeom = zpitchIn + zpitchOut; // Cut #0: Preliminary (Only here in endcap case) if (zIn * zOut <= 0) return false; - float dLum = alpaka::math::copysign(acc, lst::kDeltaZLum, zIn); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::PS; - float rtGeom1 = isOutSgInnerMDPS ? lst::kPixelPSZpitch : lst::kStrip2SZpitch; + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::PS; + float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; float zGeom1 = alpaka::math::copysign(acc, zGeom, zIn); float rtLo = rtIn * (1.f + (zOut - zIn - zGeom1) / (zIn + zGeom1 + dLum) / dzDrtScale) - rtGeom1; //slope correction only on the lower end @@ -433,12 +429,12 @@ namespace lst { const float coshEta = dr3SDIn / drtSDIn; //direction estimate const float dzOutInAbs = alpaka::math::abs(acc, zOut - zIn); const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); - const float zGeom1_another = lst::kPixelPSZpitch; + const float zGeom1_another = kPixelPSZpitch; const float kZ = (zOut - zIn) / dzSDIn; float drtErr = zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ); const float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2 * (rtOut - rtIn) / 50.f) * (rIn / rtIn); - const float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; drtErr += muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta; drtErr = alpaka::math::sqrt(acc, drtErr); @@ -455,7 +451,7 @@ namespace lst { float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - betaIn = sdIn_alpha - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); float betaInRHmin = betaIn; float betaInRHmax = betaIn; @@ -476,8 +472,7 @@ namespace lst { float sdIn_d = rt_InOut - rt_InLo; float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); - betaInCut = alpaka::math::asin( - acc, alpaka::math::min(acc, (-sdIn_dr + dr) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / sdIn_d); //Cut #4: first beta cut @@ -486,9 +481,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintEEE(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -509,8 +504,7 @@ namespace lst { float zMid = mdsInGPU.anchorZ[secondMDIndex]; zOut = mdsInGPU.anchorZ[thirdMDIndex]; - float alpha1GeV_Out = - alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + float alpha1GeV_Out = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_Out) / alpha1GeV_Out; // The track can bend in r-z plane slightly @@ -519,13 +513,13 @@ namespace lst { if (zIn * zOut <= 0) return false; - float dLum = alpaka::math::copysign(acc, lst::kDeltaZLum, zIn); - bool isOutSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::PS; - bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS; + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + bool isOutSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::PS; + bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS; - float rtGeom = (isInSgInnerMDPS and isOutSgOuterMDPS) ? 2.f * lst::kPixelPSZpitch - : (isInSgInnerMDPS or isOutSgOuterMDPS) ? lst::kPixelPSZpitch + lst::kStrip2SZpitch - : 2.f * lst::kStrip2SZpitch; + float rtGeom = (isInSgInnerMDPS and isOutSgOuterMDPS) ? 2.f * kPixelPSZpitch + : (isInSgInnerMDPS or isOutSgOuterMDPS) ? kPixelPSZpitch + kStrip2SZpitch + : 2.f * kStrip2SZpitch; float dz = zOut - zIn; const float rtLo = rtIn * (1.f + dz / (zIn + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end @@ -535,7 +529,7 @@ namespace lst { if ((rtOut < rtLo) || (rtOut > rtHi)) return false; - bool isInSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::PS; + bool isInSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::PS; float drtSDIn = rtMid - rtIn; float dzSDIn = zMid - zIn; @@ -549,12 +543,12 @@ namespace lst { float kZ = (zOut - zIn) / dzSDIn; float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rtOut - rtIn) / 50.f); - float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; - float drtErr = alpaka::math::sqrt( - acc, - lst::kPixelPSZpitch * lst::kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + - muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); + float drtErr = + alpaka::math::sqrt(acc, + kPixelPSZpitch * kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + + muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); float drtMean = drtSDIn * dzOutInAbs / alpaka::math::abs(acc, dzSDIn); float rtWindow = drtErr + rtGeom; @@ -577,7 +571,7 @@ namespace lst { float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - betaIn = sdIn_alpha - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); @@ -599,8 +593,7 @@ namespace lst { float sdIn_d = rt_InOut - rt_InLo; float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); - betaInCut = alpaka::math::asin( - acc, alpaka::math::min(acc, (-sdIn_dr + dr) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / sdIn_d); //Cut #4: first beta cut @@ -609,9 +602,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -629,8 +622,8 @@ namespace lst { short middleLowerModuleSubdet = modulesInGPU.subdets[middleLowerModuleIndex]; short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex]; - if (innerInnerLowerModuleSubdet == lst::Barrel and middleLowerModuleSubdet == lst::Barrel and - outerOuterLowerModuleSubdet == lst::Barrel) { + if (innerInnerLowerModuleSubdet == ::lst::Barrel and middleLowerModuleSubdet == ::lst::Barrel and + outerOuterLowerModuleSubdet == ::lst::Barrel) { return passPointingConstraintBBB(acc, modulesInGPU, mdsInGPU, @@ -646,8 +639,8 @@ namespace lst { innerSegmentIndex, betaIn, betaInCut); - } else if (innerInnerLowerModuleSubdet == lst::Barrel and middleLowerModuleSubdet == lst::Barrel and - outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Barrel and middleLowerModuleSubdet == ::lst::Barrel and + outerOuterLowerModuleSubdet == ::lst::Endcap) { return passPointingConstraintBBE(acc, modulesInGPU, mdsInGPU, @@ -665,8 +658,8 @@ namespace lst { outerSegmentIndex, betaIn, betaInCut); - } else if (innerInnerLowerModuleSubdet == lst::Barrel and middleLowerModuleSubdet == lst::Endcap and - outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Barrel and middleLowerModuleSubdet == ::lst::Endcap and + outerOuterLowerModuleSubdet == ::lst::Endcap) { return passPointingConstraintBBE(acc, modulesInGPU, mdsInGPU, @@ -687,8 +680,8 @@ namespace lst { } - else if (innerInnerLowerModuleSubdet == lst::Endcap and middleLowerModuleSubdet == lst::Endcap and - outerOuterLowerModuleSubdet == lst::Endcap) { + else if (innerInnerLowerModuleSubdet == ::lst::Endcap and middleLowerModuleSubdet == ::lst::Endcap and + outerOuterLowerModuleSubdet == ::lst::Endcap) { return passPointingConstraintEEE(acc, modulesInGPU, mdsInGPU, @@ -744,9 +737,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletConstraintsAndAlgo(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -811,11 +804,11 @@ namespace lst { struct CreateTripletsInGPUv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, - lst::Triplets tripletsInGPU, - lst::ObjectRanges rangesInGPU, + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + ObjectRanges rangesInGPU, uint16_t* index_gpu, uint16_t nonZeroModules) const { auto const globalThreadIdx = alpaka::getIdx(acc); @@ -928,9 +921,9 @@ namespace lst { struct CreateTripletArrayRanges { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::Segments segmentsInGPU) const { + Modules modulesInGPU, + ObjectRanges rangesInGPU, + Segments segmentsInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -1031,9 +1024,9 @@ namespace lst { struct AddTripletRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Triplets tripletsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Triplets tripletsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -1052,5 +1045,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc index bf513865ffbed..eb48917952a38 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc +++ b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc @@ -1,13 +1,15 @@ #include "AccessHelper.h" +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + // =============== // ----* Hit *---- // =============== //____________________________________________________________________________________________ std::tuple, std::vector> convertHitsToHitIdxsAndHitTypes( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, std::vector hits) { - lst::Hits const* hitsEvt = event->getHits()->data(); + Event* event, std::vector hits) { + Hits const* hitsEvt = event->getHits()->data(); std::vector hitidxs; std::vector hittypes; for (auto& hit : hits) { @@ -25,11 +27,11 @@ std::tuple, std::vector> convertHitsToHi // =============== //____________________________________________________________________________________________ -std::vector getPixelHitsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); - lst::Modules const* modulesEvt = event->getModules()->data(); +std::vector getPixelHitsFrompLS(Event* event, unsigned int pLS) { + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + ObjectRanges const* rangesEvt = event->getRanges()->data(); + ::lst::Modules const* modulesEvt = event->getModules()->data(); const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; unsigned int MD_1 = segments->mdIndices[2 * (pLS + pLS_offset)]; unsigned int MD_2 = segments->mdIndices[2 * (pLS + pLS_offset) + 1]; @@ -44,8 +46,8 @@ std::vector getPixelHitsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst: } //____________________________________________________________________________________________ -std::vector getPixelHitIdxsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { - lst::Hits const* hitsEvt = event->getHits()->data(); +std::vector getPixelHitIdxsFrompLS(Event* event, unsigned int pLS) { + Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getPixelHitsFrompLS(event, pLS); std::vector hitidxs; for (auto& hit : hits) @@ -54,15 +56,15 @@ std::vector getPixelHitIdxsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::l } //____________________________________________________________________________________________ -std::vector getPixelHitTypesFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { +std::vector getPixelHitTypesFrompLS(Event* event, unsigned int pLS) { std::vector hits = getPixelHitsFrompLS(event, pLS); std::vector hittypes(hits.size(), 0); return hittypes; } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompLS( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pLS) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompLS(Event* event, + unsigned pLS) { return convertHitsToHitIdxsAndHitTypes(event, getPixelHitsFrompLS(event, pLS)); } @@ -71,16 +73,16 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getHitsFromMD(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int MD) { - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); +std::vector getHitsFromMD(Event* event, unsigned int MD) { + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); unsigned int hit_1 = miniDoublets->anchorHitIndices[MD]; unsigned int hit_2 = miniDoublets->outerHitIndices[MD]; return {hit_1, hit_2}; } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromMD( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned MD) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromMD(Event* event, + unsigned MD) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromMD(event, MD)); } @@ -89,15 +91,15 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getMDsFromLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int LS) { - lst::Segments const* segments = event->getSegments()->data(); +std::vector getMDsFromLS(Event* event, unsigned int LS) { + Segments const* segments = event->getSegments()->data(); unsigned int MD_1 = segments->mdIndices[2 * LS]; unsigned int MD_2 = segments->mdIndices[2 * LS + 1]; return {MD_1, MD_2}; } //____________________________________________________________________________________________ -std::vector getHitsFromLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int LS) { +std::vector getHitsFromLS(Event* event, unsigned int LS) { std::vector MDs = getMDsFromLS(event, LS); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -105,8 +107,8 @@ std::vector getHitsFromLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromLS( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned LS) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromLS(Event* event, + unsigned LS) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromLS(event, LS)); } @@ -115,15 +117,15 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { - lst::Triplets const* triplets = event->getTriplets()->data(); +std::vector getLSsFromT3(Event* event, unsigned int T3) { + Triplets const* triplets = event->getTriplets()->data(); unsigned int LS_1 = triplets->segmentIndices[2 * T3]; unsigned int LS_2 = triplets->segmentIndices[2 * T3 + 1]; return {LS_1, LS_2}; } //____________________________________________________________________________________________ -std::vector getMDsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { +std::vector getMDsFromT3(Event* event, unsigned int T3) { std::vector LSs = getLSsFromT3(event, T3); std::vector MDs_0 = getMDsFromLS(event, LSs[0]); std::vector MDs_1 = getMDsFromLS(event, LSs[1]); @@ -131,7 +133,7 @@ std::vector getMDsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* } //____________________________________________________________________________________________ -std::vector getHitsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { +std::vector getHitsFromT3(Event* event, unsigned int T3) { std::vector MDs = getMDsFromT3(event, T3); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -140,8 +142,8 @@ std::vector getHitsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromT3( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned T3) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromT3(Event* event, + unsigned T3) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromT3(event, T3)); } @@ -150,15 +152,15 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getT3sFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { - lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); +std::vector getT3sFromT5(Event* event, unsigned int T5) { + Quintuplets const* quintuplets = event->getQuintuplets()->data(); unsigned int T3_1 = quintuplets->tripletIndices[2 * T5]; unsigned int T3_2 = quintuplets->tripletIndices[2 * T5 + 1]; return {T3_1, T3_2}; } //____________________________________________________________________________________________ -std::vector getLSsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { +std::vector getLSsFromT5(Event* event, unsigned int T5) { std::vector T3s = getT3sFromT5(event, T5); std::vector LSs_0 = getLSsFromT3(event, T3s[0]); std::vector LSs_1 = getLSsFromT3(event, T3s[1]); @@ -166,7 +168,7 @@ std::vector getLSsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* } //____________________________________________________________________________________________ -std::vector getMDsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { +std::vector getMDsFromT5(Event* event, unsigned int T5) { std::vector LSs = getLSsFromT5(event, T5); std::vector MDs_0 = getMDsFromLS(event, LSs[0]); std::vector MDs_1 = getMDsFromLS(event, LSs[1]); @@ -176,7 +178,7 @@ std::vector getMDsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* } //____________________________________________________________________________________________ -std::vector getHitsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { +std::vector getHitsFromT5(Event* event, unsigned int T5) { std::vector MDs = getMDsFromT5(event, T5); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -187,8 +189,8 @@ std::vector getHitsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event } //____________________________________________________________________________________________ -std::vector getHitIdxsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { - lst::Hits const* hitsEvt = event->getHits()->data(); +std::vector getHitIdxsFromT5(Event* event, unsigned int T5) { + Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFromT5(event, T5); std::vector hitidxs; for (auto& hit : hits) @@ -196,24 +198,24 @@ std::vector getHitIdxsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Ev return hitidxs; } //____________________________________________________________________________________________ -std::vector getModuleIdxsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { +std::vector getModuleIdxsFromT5(Event* event, unsigned int T5) { std::vector hits = getHitsFromT5(event, T5); std::vector module_idxs; - lst::Hits const* hitsEvt = event->getHits()->data(); + Hits const* hitsEvt = event->getHits()->data(); for (auto& hitIdx : hits) { module_idxs.push_back(hitsEvt->moduleIndices[hitIdx]); } return module_idxs; } //____________________________________________________________________________________________ -std::vector getHitTypesFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { +std::vector getHitTypesFromT5(Event* event, unsigned int T5) { return {4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; ; } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromT5( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned T5) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromT5(Event* event, + unsigned T5) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromT5(event, T5)); } @@ -222,47 +224,46 @@ std::tuple, std::vector> getHitIdxsAndHi // =============== //____________________________________________________________________________________________ -unsigned int getPixelLSFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { - lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); - lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); - lst::Modules const* modulesEvt = event->getModules()->data(); +unsigned int getPixelLSFrompT3(Event* event, unsigned int pT3) { + PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); + ObjectRanges const* rangesEvt = event->getRanges()->data(); + ::lst::Modules const* modulesEvt = event->getModules()->data(); const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; return pixelTriplets->pixelSegmentIndices[pT3] - pLS_offset; } //____________________________________________________________________________________________ -unsigned int getT3FrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { - lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); +unsigned int getT3FrompT3(Event* event, unsigned int pT3) { + PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); return pixelTriplets->tripletIndices[pT3]; } //____________________________________________________________________________________________ -std::vector getLSsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getLSsFrompT3(Event* event, unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getLSsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getMDsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getMDsFrompT3(Event* event, unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getMDsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getOuterTrackerHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, - unsigned int pT3) { +std::vector getOuterTrackerHitsFrompT3(Event* event, unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getHitsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getPixelHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getPixelHitsFrompT3(Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); return getPixelHitsFrompLS(event, pLS); } //____________________________________________________________________________________________ -std::vector getHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getHitsFrompT3(Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); unsigned int T3 = getT3FrompT3(event, pT3); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); @@ -272,8 +273,8 @@ std::vector getHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Even } //____________________________________________________________________________________________ -std::vector getHitIdxsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { - lst::Hits const* hitsEvt = event->getHits()->data(); +std::vector getHitIdxsFrompT3(Event* event, unsigned int pT3) { + Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFrompT3(event, pT3); std::vector hitidxs; for (auto& hit : hits) @@ -281,17 +282,17 @@ std::vector getHitIdxsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::E return hitidxs; } //____________________________________________________________________________________________ -std::vector getModuleIdxsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getModuleIdxsFrompT3(Event* event, unsigned int pT3) { std::vector hits = getOuterTrackerHitsFrompT3(event, pT3); std::vector module_idxs; - lst::Hits const* hitsEvt = event->getHits()->data(); + Hits const* hitsEvt = event->getHits()->data(); for (auto& hitIdx : hits) { module_idxs.push_back(hitsEvt->moduleIndices[hitIdx]); } return module_idxs; } //____________________________________________________________________________________________ -std::vector getHitTypesFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getHitTypesFrompT3(Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); // pixel Hits list will be either 3 or 4 and depending on it return accordingly @@ -302,8 +303,8 @@ std::vector getHitTypesFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst:: } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompT3( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pT3) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompT3(Event* event, + unsigned pT3) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFrompT3(event, pT3)); } @@ -312,53 +313,52 @@ std::tuple, std::vector> getHitIdxsAndHi // =============== //____________________________________________________________________________________________ -unsigned int getPixelLSFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { - lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); - lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); - lst::Modules const* modulesEvt = event->getModules()->data(); +unsigned int getPixelLSFrompT5(Event* event, unsigned int pT5) { + PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); + ObjectRanges const* rangesEvt = event->getRanges()->data(); + ::lst::Modules const* modulesEvt = event->getModules()->data(); const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; return pixelQuintuplets->pixelIndices[pT5] - pLS_offset; } //____________________________________________________________________________________________ -unsigned int getT5FrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { - lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); +unsigned int getT5FrompT5(Event* event, unsigned int pT5) { + PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); return pixelQuintuplets->T5Indices[pT5]; } //____________________________________________________________________________________________ -std::vector getT3sFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getT3sFrompT5(Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getT3sFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getLSsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getLSsFrompT5(Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getLSsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getMDsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getMDsFrompT5(Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getMDsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getOuterTrackerHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, - unsigned int pT5) { +std::vector getOuterTrackerHitsFrompT5(Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getHitsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getPixelHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getPixelHitsFrompT5(Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); return getPixelHitsFrompLS(event, pLS); } //____________________________________________________________________________________________ -std::vector getHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getHitsFrompT5(Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); unsigned int T5 = getT5FrompT5(event, pT5); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); @@ -368,8 +368,8 @@ std::vector getHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Even } //____________________________________________________________________________________________ -std::vector getHitIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { - lst::Hits const* hitsEvt = event->getHits()->data(); +std::vector getHitIdxsFrompT5(Event* event, unsigned int pT5) { + Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFrompT5(event, pT5); std::vector hitidxs; for (auto& hit : hits) @@ -378,10 +378,10 @@ std::vector getHitIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::E } //____________________________________________________________________________________________ -std::vector getModuleIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getModuleIdxsFrompT5(Event* event, unsigned int pT5) { std::vector hits = getOuterTrackerHitsFrompT5(event, pT5); std::vector module_idxs; - lst::Hits const* hitsEvt = event->getHits()->data(); + Hits const* hitsEvt = event->getHits()->data(); for (auto& hitIdx : hits) { module_idxs.push_back(hitsEvt->moduleIndices[hitIdx]); } @@ -389,7 +389,7 @@ std::vector getModuleIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst } //____________________________________________________________________________________________ -std::vector getHitTypesFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getHitTypesFrompT5(Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); // pixel Hits list will be either 3 or 4 and depending on it return accordingly @@ -400,8 +400,8 @@ std::vector getHitTypesFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst:: } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompT5( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pT5) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompT5(Event* event, + unsigned pT5) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFrompT5(event, pT5)); } @@ -410,9 +410,9 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromTC(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int TC) { +std::vector getLSsFromTC(Event* event, unsigned int TC) { // Get the type of the track candidate - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[TC]; unsigned int objidx = trackCandidates->directObjectIndices[TC]; switch (type) { @@ -432,10 +432,10 @@ std::vector getLSsFromTC(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromTC( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned TC) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromTC(Event* event, + unsigned TC) { // Get the type of the track candidate - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[TC]; unsigned int objidx = trackCandidates->directObjectIndices[TC]; switch (type) { diff --git a/RecoTracker/LSTCore/standalone/code/core/AnalysisConfig.h b/RecoTracker/LSTCore/standalone/code/core/AnalysisConfig.h index 8608bc95ed2fa..ce7ce3824849e 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AnalysisConfig.h +++ b/RecoTracker/LSTCore/standalone/code/core/AnalysisConfig.h @@ -100,7 +100,7 @@ class AnalysisConfig { std::map>> moduleSimHits; std::map modulePopulation; - lst::ModuleConnectionMap moduleConnectiongMapLoose; + ::lst::ModuleConnectionMap moduleConnectiongMapLoose; // Boolean to trigger whether to run cut_value_ntupling bool do_cut_value_ntuple; diff --git a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc index 73b5daabbfc1a..3841affaaf059 100644 --- a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc +++ b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc @@ -20,7 +20,7 @@ bool goodEvent() { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, int evt) { +float runMiniDoublet(LSTEvent *event, int evt) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Mini-Doublet start " << evt << std::endl; @@ -73,7 +73,7 @@ float runMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, int evt) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { +float runSegment(LSTEvent *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Segment start" << std::endl; @@ -111,7 +111,7 @@ float runSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { +float runT3(LSTEvent *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco T3 start" << std::endl; @@ -153,7 +153,7 @@ float runT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runpT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { +float runpT3(LSTEvent *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Triplet pT3 start" << std::endl; @@ -170,7 +170,7 @@ float runpT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { +float runQuintuplet(LSTEvent *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Quintuplet start" << std::endl; @@ -216,7 +216,7 @@ float runQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runPixelLineSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool no_pls_dupclean) { +float runPixelLineSegment(LSTEvent *event, bool no_pls_dupclean) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Line Segment start" << std::endl; @@ -231,7 +231,7 @@ float runPixelLineSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runPixelQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { +float runPixelQuintuplet(LSTEvent *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Quintuplet start" << std::endl; @@ -248,7 +248,7 @@ float runPixelQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runTrackCandidate(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool no_pls_dupclean, bool tc_pls_triplets) { +float runTrackCandidate(LSTEvent *event, bool no_pls_dupclean, bool tc_pls_triplets) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco TrackCandidate start" << std::endl; @@ -845,7 +845,7 @@ void addInputsToLineSegmentTrackingPreLoad(std::vector> &out_ } //___________________________________________________________________________________________________________________________________________________________________________________________ -float addInputsToEventPreLoad(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, +float addInputsToEventPreLoad(LSTEvent *event, bool useOMP, std::vector trkX, std::vector trkY, @@ -1150,7 +1150,7 @@ void writeMetaData() { // DEPRECATED FUNCTIONS //__________________________________________________________________________________________ -[[deprecated]] float addInputsToLineSegmentTracking(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event &event, bool useOMP) { +[[deprecated]] float addInputsToLineSegmentTracking(LSTEvent &event, bool useOMP) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Loading Inputs (i.e. outer tracker hits, and pixel line segements) to the Line Segment Tracking.... " @@ -1346,6 +1346,6 @@ void writeMetaData() { } //__________________________________________________________________________________________ -[[deprecated]] float addInputsToLineSegmentTrackingUsingExplicitMemory(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event &event) { +[[deprecated]] float addInputsToLineSegmentTrackingUsingExplicitMemory(LSTEvent &event) { return addInputsToLineSegmentTracking(event, true); } diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc index 911a34f519a6d..7c330a768a175 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc @@ -1,5 +1,7 @@ #include "write_lst_ntuple.h" +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + //________________________________________________________________________________________________________________________________ void createOutputBranches() { createRequiredOutputBranches(); @@ -7,7 +9,7 @@ void createOutputBranches() { } //________________________________________________________________________________________________________________________________ -void fillOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void fillOutputBranches(Event* event) { setOutputBranches(event); setOptionalOutputBranches(event); if (ana.gnn_ntuple) @@ -181,7 +183,7 @@ void createGnnNtupleBranches() { } //________________________________________________________________________________________________________________________________ -void setOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void setOutputBranches(Event* event) { // ============ Sim tracks ============= int n_accepted_simtrk = 0; for (unsigned int isimtrk = 0; isimtrk < trk.sim_pt().size(); ++isimtrk) { @@ -224,7 +226,7 @@ void setOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { std::vector> tc_matched_simIdx; // ============ Track candidates ============= - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { // Compute reco quantities of track candidate based on final object @@ -276,7 +278,7 @@ void setOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setOptionalOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void setOptionalOutputBranches(Event* event) { #ifdef CUT_VALUE_DEBUG setPixelQuintupletOutputBranches(event); @@ -287,12 +289,12 @@ void setOptionalOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) } //________________________________________________________________________________________________________________________________ -void setPixelQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void setPixelQuintupletOutputBranches(Event* event) { // ============ pT5 ============= - lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); - lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); - lst::Segments const* segments = event->getSegments()->data(); - lst::Modules const* modules = event->getModules()->data(); + PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); + Quintuplets const* quintuplets = event->getQuintuplets()->data(); + Segments const* segments = event->getSegments()->data(); + ::lst::Modules const* modules = event->getModules()->data(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); unsigned int nPixelQuintuplets = @@ -303,7 +305,7 @@ void setPixelQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* for (unsigned int pT5 = 0; pT5 < nPixelQuintuplets; pT5++) { unsigned int T5Index = getT5FrompT5(event, pT5); unsigned int pLSIndex = getPixelLSFrompT5(event, pT5); - float pt = (__H2F(quintuplets->innerRadius[T5Index]) * lst::k2Rinv1GeVf * 2 + segments->ptIn[pLSIndex]) / 2; + float pt = (__H2F(quintuplets->innerRadius[T5Index]) * k2Rinv1GeVf * 2 + segments->ptIn[pLSIndex]) / 2; float eta = segments->eta[pLSIndex]; float phi = segments->phi[pLSIndex]; @@ -363,10 +365,10 @@ void setPixelQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* } //________________________________________________________________________________________________________________________________ -void setQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); - lst::Modules const* modules = event->getModules()->data(); +void setQuintupletOutputBranches(Event* event) { + Quintuplets const* quintuplets = event->getQuintuplets()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); + ::lst::Modules const* modules = event->getModules()->data(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); std::vector sim_t5_matched(n_accepted_simtrk); @@ -376,7 +378,7 @@ void setQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event int nQuintuplets = quintuplets->nQuintuplets[lowerModuleIdx]; for (unsigned int idx = 0; idx < nQuintuplets; idx++) { unsigned int quintupletIndex = ranges->quintupletModuleIndices[lowerModuleIdx] + idx; - float pt = __H2F(quintuplets->innerRadius[quintupletIndex]) * lst::k2Rinv1GeVf * 2; + float pt = __H2F(quintuplets->innerRadius[quintupletIndex]) * k2Rinv1GeVf * 2; float eta = __H2F(quintuplets->eta[quintupletIndex]); float phi = __H2F(quintuplets->phi[quintupletIndex]); @@ -434,10 +436,10 @@ void setQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event } //________________________________________________________________________________________________________________________________ -void setPixelTripletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::Segments const* segments = event->getSegments()->data(); +void setPixelTripletOutputBranches(Event* event) { + PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + Segments const* segments = event->getSegments()->data(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); unsigned int nPixelTriplets = *pixelTriplets->nPixelTriplets; @@ -497,14 +499,14 @@ void setPixelTripletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* eve } //________________________________________________________________________________________________________________________________ -void setGnnNtupleBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void setGnnNtupleBranches(Event* event) { // Get relevant information - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); std::set mds_used_in_sg; std::map md_index_map; @@ -638,10 +640,10 @@ void setGnnNtupleBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setGnnNtupleMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int MD) { +void setGnnNtupleMiniDoublet(Event* event, unsigned int MD) { // Get relevant information - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); // Get the hit indices unsigned int hit0 = miniDoublets->anchorHitIndices[MD]; @@ -678,7 +680,7 @@ void setGnnNtupleMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, un float dphichange = miniDoublets->dphichanges[MD]; // Computing pt - float pt = hit0_r * lst::k2Rinv1GeVf / sin(dphichange); + float pt = hit0_r * k2Rinv1GeVf / sin(dphichange); // T5 eta and phi are computed using outer and innermost hits lst_math::Hit hitA(trk.ph2_x()[anchitidx], trk.ph2_y()[anchitidx], trk.ph2_z()[anchitidx]); @@ -706,10 +708,9 @@ void setGnnNtupleMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, un } //________________________________________________________________________________________________________________________________ -std::tuple> parseTrackCandidate( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { +std::tuple> parseTrackCandidate(Event* event, unsigned int idx) { // Get the type of the track candidate - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[idx]; enum { pT5 = 7, pT3 = 5, T5 = 4, pLS = 8 }; @@ -740,12 +741,12 @@ std::tuple> parseTrackCandidate( } //________________________________________________________________________________________________________________________________ -std::tuple, std::vector> parsepT5( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { +std::tuple, std::vector> parsepT5(Event* event, + unsigned int idx) { // Get relevant information - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); - lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); - lst::Segments const* segments = event->getSegments()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Quintuplets const* quintuplets = event->getQuintuplets()->data(); + Segments const* segments = event->getSegments()->data(); // // pictorial representation of a pT5 @@ -841,7 +842,7 @@ std::tuple, std::vectorptIn[pLS]; const float eta_pLS = segments->eta[pLS]; const float phi_pLS = segments->phi[pLS]; - float pt_T5 = __H2F(quintuplets->innerRadius[T5Index]) * 2 * lst::k2Rinv1GeVf; + float pt_T5 = __H2F(quintuplets->innerRadius[T5Index]) * 2 * k2Rinv1GeVf; const float pt = (pt_T5 + pt_pLS) / 2; // Form the hit idx/type std::vector @@ -852,12 +853,12 @@ std::tuple, std::vector, std::vector> parsepT3( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { +std::tuple, std::vector> parsepT3(Event* event, + unsigned int idx) { // Get relevant information - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); - lst::Triplets const* triplets = event->getTriplets()->data(); - lst::Segments const* segments = event->getSegments()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Triplets const* triplets = event->getTriplets()->data(); + Segments const* segments = event->getSegments()->data(); // // pictorial representation of a pT3 @@ -874,7 +875,7 @@ std::tuple, std::vectorptIn[pLS]; const float eta_pLS = segments->eta[pLS]; const float phi_pLS = segments->phi[pLS]; - float pt_T3 = triplets->circleRadius[T3] * 2 * lst::k2Rinv1GeVf; + float pt_T3 = triplets->circleRadius[T3] * 2 * k2Rinv1GeVf; // average pt const float pt = (pt_pLS + pt_T3) / 2; @@ -887,10 +888,10 @@ std::tuple, std::vector, std::vector> parseT5( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); - lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); +std::tuple, std::vector> parseT5(Event* event, + unsigned int idx) { + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Quintuplets const* quintuplets = event->getQuintuplets()->data(); unsigned int T5 = trackCandidates->directObjectIndices[idx]; std::vector hits = getHitsFromT5(event, T5); @@ -906,7 +907,7 @@ std::tuple, std::vectorinnerRadius[T5] * lst::k2Rinv1GeVf * 2; + const float pt = quintuplets->innerRadius[T5] * k2Rinv1GeVf * 2; // T5 eta and phi are computed using outer and innermost hits lst_math::Hit hitA(trk.ph2_x()[Hit_0], trk.ph2_y()[Hit_0], trk.ph2_z()[Hit_0]); @@ -921,10 +922,10 @@ std::tuple, std::vector, std::vector> parsepLS( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); - lst::Segments const* segments = event->getSegments()->data(); +std::tuple, std::vector> parsepLS(Event* event, + unsigned int idx) { + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Segments const* segments = event->getSegments()->data(); // Getting pLS index unsigned int pLS = trackCandidates->directObjectIndices[idx]; @@ -942,9 +943,9 @@ std::tuple, std::vectorgetModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); +void printHitMultiplicities(Event* event) { + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); int nHits = 0; for (unsigned int idx = 0; idx <= *(modules->nLowerModules); @@ -957,9 +958,9 @@ void printHitMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printMiniDoubletMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Modules const* modules = event->getModules()->data(); +void printMiniDoubletMultiplicities(Event* event) { + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + ::lst::Modules const* modules = event->getModules()->data(); int nMiniDoublets = 0; int totOccupancyMiniDoublets = 0; @@ -976,7 +977,7 @@ void printMiniDoubletMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* ev } //________________________________________________________________________________________________________________________________ -void printAllObjects(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void printAllObjects(Event* event) { printMDs(event); printLSs(event); printpLSs(event); @@ -984,11 +985,11 @@ void printAllObjects(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printMDs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); +void printMDs(Event* event) { + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); // Then obtain the lower module index for (unsigned int idx = 0; idx <= *(modules->nLowerModules); ++idx) { @@ -1006,12 +1007,12 @@ void printMDs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); +void printLSs(Event* event) { + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); int nSegments = 0; for (unsigned int i = 0; i < *(modules->nLowerModules); ++i) { @@ -1038,12 +1039,12 @@ void printLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printpLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); +void printpLSs(Event* event) { + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); unsigned int i = *(modules->nLowerModules); unsigned int idx = i; //modules->lowerModuleIndices[i]; @@ -1068,12 +1069,12 @@ void printpLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printT3s(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::Triplets const* triplets = event->getTriplets()->data(); - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); - lst::Modules const* modules = event->getModules()->data(); +void printT3s(Event* event) { + Triplets const* triplets = event->getTriplets()->data(); + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); + ::lst::Modules const* modules = event->getModules()->data(); int nTriplets = 0; for (unsigned int i = 0; i < *(modules->nLowerModules); ++i) { // unsigned int idx = modules->lowerModuleIndices[i]; @@ -1110,13 +1111,13 @@ void printT3s(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void debugPrintOutlierMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); - lst::Triplets const* triplets = event->getTriplets()->data(); - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); +void debugPrintOutlierMultiplicities(Event* event) { + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Triplets const* triplets = event->getTriplets()->data(); + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); //int nTrackCandidates = 0; for (unsigned int idx = 0; idx <= *(modules->nLowerModules); ++idx) { if (trackCandidates->nTrackCandidates[idx] > 50000) {