diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 318622da2ce42..05b9faac480e7 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -1,3 +1,5 @@ +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + #include "Event.h" using namespace ALPAKA_ACCELERATOR_NAMESPACE; @@ -181,7 +183,7 @@ void lst::Event::addHitToEvent(std::vector const& x, alpaka::memcpy(queue, hitsBuffers->detid_buf, detId, nHits); alpaka::memcpy(queue, hitsBuffers->idxs_buf, idxInNtuple, nHits); alpaka::memcpy(queue, hitsBuffers->nHits_buf, nHits_view); - alpaka::wait(queue); + alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock1{1, 1, 256}; Vec3D const blocksPerGrid1{1, 1, max_blocks}; @@ -253,12 +255,11 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& auto dst_view_miniDoubletModuleOccupancy = alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); - // Create a source view for the value to be set - int value = n_max_pixel_md_per_modules; - auto src_view_value = alpaka::createView(devHost, &value, (Idx)1u); + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; - alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, src_view_value); - alpaka::wait(queue); + alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); Vec3D const threadsPerBlockCreateMD{1, 1, 1024}; Vec3D const blocksPerGridCreateMD{1, 1, 1}; @@ -270,22 +271,19 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); alpaka::enqueue(queue, createMDArrayRangesGPUTask); - alpaka::wait(queue); - unsigned int nTotalMDs; - auto nTotalMDs_view = alpaka::createView(devHost, &nTotalMDs, (Idx)1u); + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); + alpaka::wait(queue); // wait to get the data before manipulation - alpaka::memcpy(queue, nTotalMDs_view, rangesBuffers->device_nTotalMDs_buf); - alpaka::wait(queue); - - nTotalMDs += n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); mdsInGPU = new lst::MiniDoublets(); miniDoubletsBuffers = new lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); - alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_view); - alpaka::wait(queue); + alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_buf_h); } if (segmentsInGPU == nullptr) { // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. @@ -304,22 +302,20 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& *mdsInGPU)); alpaka::enqueue(queue, createSegmentArrayRangesTask); - alpaka::wait(queue); - auto nTotalSegments_view = alpaka::createView(devHost, &nTotalSegments, (Idx)1u); + auto nTotalSegments_view = alpaka::createView(devHost, &nTotalSegments_, (Idx)1u); alpaka::memcpy(queue, nTotalSegments_view, rangesBuffers->device_nTotalSegs_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the value before manipulation - nTotalSegments += n_max_pixel_segments_per_module; + nTotalSegments_ += n_max_pixel_segments_per_module; segmentsInGPU = new lst::Segments(); - segmentsBuffers = - new lst::SegmentsBuffer(nTotalSegments, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); + segmentsBuffers = new lst::SegmentsBuffer( + nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); alpaka::memcpy(queue, segmentsBuffers->nMemoryLocations_buf, nTotalSegments_view); - alpaka::wait(queue); } auto hitIndices0_dev = allocBufWrapper(devAcc, size, queue); @@ -366,7 +362,7 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& alpaka::createSubView(miniDoubletsBuffers->totOccupancyMDs_buf, (Idx)1u, (Idx)pixelModuleIndex); alpaka::memcpy(queue, dst_view_totOccupancyMDs, src_view_mdSize); - alpaka::wait(queue); + alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock{1, 1, 256}; Vec3D const blocksPerGrid{1, 1, max_blocks}; @@ -389,7 +385,6 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& size)); alpaka::enqueue(queue, addPixelSegmentToEvent_task); - alpaka::wait(queue); } void lst::Event::createMiniDoublets() { @@ -397,12 +392,11 @@ void lst::Event::createMiniDoublets() { auto dst_view_miniDoubletModuleOccupancy = alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); - // Create a source view for the value to be set - int value = n_max_pixel_md_per_modules; - auto src_view_value = alpaka::createView(devHost, &value, (Idx)1u); + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; - alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, src_view_value); - alpaka::wait(queue); + alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); Vec3D const threadsPerBlockCreateMD{1, 1, 1024}; Vec3D const blocksPerGridCreateMD{1, 1, 1}; @@ -414,16 +408,13 @@ void lst::Event::createMiniDoublets() { createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); alpaka::enqueue(queue, createMDArrayRangesGPUTask); - alpaka::wait(queue); - auto nTotalMDs_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTotalMDs_buf, rangesBuffers->device_nTotalMDs_buf); - alpaka::wait(queue); + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); + alpaka::wait(queue); // wait to get the data before manipulation - unsigned int nTotalMDs = *alpaka::getPtrNative(nTotalMDs_buf); - - nTotalMDs += n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); if (mdsInGPU == nullptr) { mdsInGPU = new lst::MiniDoublets(); @@ -461,7 +452,6 @@ void lst::Event::createMiniDoublets() { *hitsInGPU)); alpaka::enqueue(queue, addMiniDoubletRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addMiniDoubletsToEventExplicit(); @@ -471,8 +461,8 @@ void lst::Event::createMiniDoublets() { void lst::Event::createSegmentsWithModuleMap() { if (segmentsInGPU == nullptr) { segmentsInGPU = new lst::Segments(); - segmentsBuffers = - new lst::SegmentsBuffer(nTotalSegments, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); + segmentsBuffers = new lst::SegmentsBuffer( + nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); } @@ -505,7 +495,6 @@ void lst::Event::createSegmentsWithModuleMap() { *rangesInGPU)); alpaka::enqueue(queue, addSegmentRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addSegmentsToEventExplicit(); @@ -527,46 +516,40 @@ void lst::Event::createTriplets() { *segmentsInGPU)); alpaka::enqueue(queue, createTripletArrayRangesTask); - alpaka::wait(queue); // TODO: Why are we pulling this back down only to put it back on the device in a new struct? - auto maxTriplets_buf = allocBufWrapper(devHost, 1, queue); + auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - alpaka::memcpy(queue, maxTriplets_buf, rangesBuffers->device_nTotalTrips_buf); - alpaka::wait(queue); + alpaka::memcpy(queue, maxTriplets_buf_h, rangesBuffers->device_nTotalTrips_buf); + alpaka::wait(queue); // wait to get the value before using it tripletsInGPU = new lst::Triplets(); - tripletsBuffers = - new lst::TripletsBuffer(*alpaka::getPtrNative(maxTriplets_buf), nLowerModules_, devAcc, queue); + tripletsBuffers = new lst::TripletsBuffer(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); tripletsInGPU->setData(*tripletsBuffers); - alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf); - alpaka::wait(queue); + alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf_h); } uint16_t nonZeroModules = 0; unsigned int max_InnerSeg = 0; - // Allocate host index - auto index_buf = allocBufWrapper(devHost, nLowerModules_, queue); - uint16_t* index = alpaka::getPtrNative(index_buf); + // Allocate and copy nSegments from device to host (only nLowerModules in OT, not the +1 with pLSs) + auto nSegments_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); + alpaka::memcpy(queue, nSegments_buf_h, segmentsBuffers->nSegments_buf, nLowerModules_); - // Allocate device index - auto index_gpu_buf = allocBufWrapper(devAcc, nLowerModules_, queue); + // ... same for module_nConnectedModules + // FIXME: replace by ES host data + auto module_nConnectedModules_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); + alpaka::memcpy(queue, module_nConnectedModules_buf_h, modulesBuffers_.nConnectedModules_buf, nLowerModules_); - // Allocate and copy nSegments from device to host - auto nSegments_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, nSegments_buf, segmentsBuffers->nSegments_buf, nLowerModules_); - alpaka::wait(queue); + alpaka::wait(queue); // wait for nSegments and module_nConnectedModules before using - unsigned int* nSegments = alpaka::getPtrNative(nSegments_buf); + auto const* nSegments = nSegments_buf_h.data(); + auto const* module_nConnectedModules = module_nConnectedModules_buf_h.data(); - // Allocate and copy module_nConnectedModules from device to host - auto module_nConnectedModules_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_nConnectedModules_buf, modulesBuffers_.nConnectedModules_buf, nLowerModules_); - alpaka::wait(queue); - - uint16_t* module_nConnectedModules = alpaka::getPtrNative(module_nConnectedModules_buf); + // Allocate host index and fill it directly + auto index_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); + auto* index = index_buf_h.data(); for (uint16_t innerLowerModuleIndex = 0; innerLowerModuleIndex < nLowerModules_; innerLowerModuleIndex++) { uint16_t nConnectedModules = module_nConnectedModules[innerLowerModuleIndex]; @@ -578,9 +561,9 @@ void lst::Event::createTriplets() { max_InnerSeg = std::max(max_InnerSeg, nInnerSegments); } - // Copy index from host to device - alpaka::memcpy(queue, index_gpu_buf, index_buf, nonZeroModules); - alpaka::wait(queue); + // Allocate and copy to device index + auto index_gpu_buf = allocBufWrapper(devAcc, nLowerModules_, queue); + alpaka::memcpy(queue, index_gpu_buf, index_buf_h, nonZeroModules); Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; @@ -614,7 +597,6 @@ void lst::Event::createTriplets() { *rangesInGPU)); alpaka::enqueue(queue, addTripletRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addTripletsToEventExplicit(); @@ -629,12 +611,6 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ trackCandidatesInGPU->setData(*trackCandidatesBuffers); } - // Pull nEligibleT5Modules from the device. - auto nEligibleModules_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nEligibleModules_buf, rangesBuffers->nEligibleT5Modules_buf); - alpaka::wait(queue); - uint16_t nEligibleModules = *alpaka::getPtrNative(nEligibleModules_buf); - Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; Vec3D const blocksPerGrid_crossCleanpT3{1, 4, 20}; WorkDiv3D const crossCleanpT3_workDiv = @@ -667,6 +643,12 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ alpaka::enqueue(queue, addpT3asTrackCandidatesInGPUTask); + // Pull nEligibleT5Modules from the device. + auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nEligibleModules_buf_h, rangesBuffers->nEligibleT5Modules_buf); + alpaka::wait(queue); // wait to get the value before using + auto const nEligibleModules = *nEligibleModules_buf_h.data(); + Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = @@ -768,12 +750,12 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ alpaka::memcpy(queue, nTrackCanpT3Host_buf, trackCandidatesBuffers->nTrackCandidatespT3_buf); alpaka::memcpy(queue, nTrackCanpLSHost_buf, trackCandidatesBuffers->nTrackCandidatespLS_buf); alpaka::memcpy(queue, nTrackCanT5Host_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the values before using them - int nTrackCandidatespT5 = *alpaka::getPtrNative(nTrackCanpT5Host_buf); - int nTrackCandidatespT3 = *alpaka::getPtrNative(nTrackCanpT3Host_buf); - int nTrackCandidatespLS = *alpaka::getPtrNative(nTrackCanpLSHost_buf); - int nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCanT5Host_buf); + auto nTrackCandidatespT5 = *alpaka::getPtrNative(nTrackCanpT5Host_buf); + auto nTrackCandidatespT3 = *alpaka::getPtrNative(nTrackCanpT3Host_buf); + auto nTrackCandidatespLS = *alpaka::getPtrNative(nTrackCanpLSHost_buf); + auto nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCanT5Host_buf); if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { printf( @@ -792,31 +774,30 @@ void lst::Event::createPixelTriplets() { pixelTripletsInGPU->setData(*pixelTripletsBuffers); } + auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + + alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); + alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + unsigned int nInnerSegments; auto nInnerSegments_src_view = alpaka::createView(devHost, &nInnerSegments, (size_t)1u); + // Create a sub-view for the device buffer auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); - alpaka::wait(queue); - - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - - alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); - alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - int* superbins = alpaka::getPtrNative(superbins_buf); - int8_t* pixelTypes = alpaka::getPtrNative(pixelTypes_buf); unsigned int* connectedPixelSize_host = alpaka::getPtrNative(connectedPixelSize_host_buf); unsigned int* connectedPixelIndex_host = alpaka::getPtrNative(connectedPixelIndex_host_buf); - alpaka::wait(queue); int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; @@ -856,7 +837,6 @@ void lst::Event::createPixelTriplets() { alpaka::memcpy(queue, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); - alpaka::wait(queue); Vec3D const threadsPerBlock{1, 4, 32}; Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; @@ -878,13 +858,12 @@ void lst::Event::createPixelTriplets() { nInnerSegments)); alpaka::enqueue(queue, createPixelTripletsInGPUFromMapv2Task); - alpaka::wait(queue); #ifdef WARNINGS auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the value before using it std::cout << "number of pixel triplets = " << *alpaka::getPtrNative(nPixelTriplets_buf) << std::endl; #endif @@ -901,7 +880,6 @@ void lst::Event::createPixelTriplets() { removeDupPixelTripletsInGPUFromMap_workDiv, removeDupPixelTripletsInGPUFromMap_kernel, *pixelTripletsInGPU)); alpaka::enqueue(queue, removeDupPixelTripletsInGPUFromMapTask); - alpaka::wait(queue); } void lst::Event::createQuintuplets() { @@ -919,17 +897,16 @@ void lst::Event::createQuintuplets() { *rangesInGPU)); alpaka::enqueue(queue, createEligibleModulesListForQuintupletsGPUTask); - alpaka::wait(queue); auto nEligibleT5Modules_buf = allocBufWrapper(devHost, 1, queue); auto nTotalQuintuplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nEligibleT5Modules_buf, rangesBuffers->nEligibleT5Modules_buf); alpaka::memcpy(queue, nTotalQuintuplets_buf, rangesBuffers->device_nTotalQuints_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait for the values before using them - uint16_t nEligibleT5Modules = *alpaka::getPtrNative(nEligibleT5Modules_buf); - unsigned int nTotalQuintuplets = *alpaka::getPtrNative(nTotalQuintuplets_buf); + auto nEligibleT5Modules = *nEligibleT5Modules_buf.data(); + auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); if (quintupletsInGPU == nullptr) { quintupletsInGPU = new lst::Quintuplets(); @@ -937,7 +914,6 @@ void lst::Event::createQuintuplets() { quintupletsInGPU->setData(*quintupletsBuffers); alpaka::memcpy(queue, quintupletsBuffers->nMemoryLocations_buf, nTotalQuintuplets_buf); - alpaka::wait(queue); } Vec3D const threadsPerBlockQuints{1, 8, 32}; @@ -987,7 +963,6 @@ void lst::Event::createQuintuplets() { *rangesInGPU)); alpaka::enqueue(queue, addQuintupletRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addQuintupletsToEventExplicit(); @@ -1006,7 +981,6 @@ void lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, false)); alpaka::enqueue(queue, checkHitspLSTask); - alpaka::wait(queue); } } @@ -1023,6 +997,14 @@ void lst::Event::createPixelQuintuplets() { trackCandidatesInGPU->setData(*trackCandidatesBuffers); } + auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + + alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); + alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + unsigned int nInnerSegments; auto nInnerSegments_src_view = alpaka::createView(devHost, &nInnerSegments, (size_t)1u); @@ -1030,25 +1012,15 @@ void lst::Event::createPixelQuintuplets() { auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); - alpaka::wait(queue); - - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - - alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); - alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - int* superbins = alpaka::getPtrNative(superbins_buf); - int8_t* pixelTypes = alpaka::getPtrNative(pixelTypes_buf); - unsigned int* connectedPixelSize_host = alpaka::getPtrNative(connectedPixelSize_host_buf); - unsigned int* connectedPixelIndex_host = alpaka::getPtrNative(connectedPixelIndex_host_buf); - alpaka::wait(queue); + auto* connectedPixelSize_host = connectedPixelSize_host_buf.data(); + auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; @@ -1083,7 +1055,6 @@ void lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); - alpaka::wait(queue); Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; @@ -1135,13 +1106,12 @@ void lst::Event::createPixelQuintuplets() { *rangesInGPU)); alpaka::enqueue(queue, addpT5asTrackCandidateInGPUTask); - alpaka::wait(queue); #ifdef WARNINGS auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the value before using it std::cout << "number of pixel quintuplets = " << *alpaka::getPtrNative(nPixelQuintuplets_buf) << std::endl; #endif @@ -1151,6 +1121,7 @@ void lst::Event::addMiniDoubletsToEventExplicit() { auto nMDsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nMDsCPU_buf, miniDoubletsBuffers->nMDs_buf, nLowerModules_); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); @@ -1160,12 +1131,12 @@ void lst::Event::addMiniDoubletsToEventExplicit() { auto module_hitRanges_buf = allocBufWrapper(devHost, nLowerModules_ * 2, queue); alpaka::memcpy(queue, module_hitRanges_buf, hitsBuffers->hitRanges_buf, nLowerModules_ * 2u); - alpaka::wait(queue); + alpaka::wait(queue); // wait for inputs before using them - unsigned int* nMDsCPU = alpaka::getPtrNative(nMDsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); - int* module_hitRanges = alpaka::getPtrNative(module_hitRanges_buf); + auto const* nMDsCPU = nMDsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_hitRanges = module_hitRanges_buf.data(); for (unsigned int i = 0; i < nLowerModules_; i++) { if (!(nMDsCPU[i] == 0 or module_hitRanges[i * 2] == -1)) { @@ -1182,17 +1153,18 @@ void lst::Event::addSegmentsToEventExplicit() { auto nSegmentsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nSegmentsCPU_buf, segmentsBuffers->nSegments_buf, nLowerModules_); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - alpaka::wait(queue); + alpaka::wait(queue); // wait for inputs before using them - unsigned int* nSegmentsCPU = alpaka::getPtrNative(nSegmentsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); + auto const* nSegmentsCPU = nSegmentsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); for (unsigned int i = 0; i < nLowerModules_; i++) { if (!(nSegmentsCPU[i] == 0)) { @@ -1209,6 +1181,7 @@ void lst::Event::addQuintupletsToEventExplicit() { auto nQuintupletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nQuintupletsCPU_buf, quintupletsBuffers->nQuintuplets_buf); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); @@ -1218,12 +1191,12 @@ void lst::Event::addQuintupletsToEventExplicit() { auto module_quintupletModuleIndices_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_quintupletModuleIndices_buf, rangesBuffers->quintupletModuleIndices_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait for inputs before using them - unsigned int* nQuintupletsCPU = alpaka::getPtrNative(nQuintupletsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); - int* module_quintupletModuleIndices = alpaka::getPtrNative(module_quintupletModuleIndices_buf); + auto const* nQuintupletsCPU = nQuintupletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_quintupletModuleIndices = module_quintupletModuleIndices_buf.data(); for (uint16_t i = 0; i < nLowerModules_; i++) { if (!(nQuintupletsCPU[i] == 0 or module_quintupletModuleIndices[i] == -1)) { @@ -1240,16 +1213,18 @@ void lst::Event::addTripletsToEventExplicit() { auto nTripletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nTripletsCPU_buf, tripletsBuffers->nTriplets_buf); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - alpaka::wait(queue); - unsigned int* nTripletsCPU = alpaka::getPtrNative(nTripletsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); + alpaka::wait(queue); // wait for inputs before using them + + auto const* nTripletsCPU = nTripletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); for (uint16_t i = 0; i < nLowerModules_; i++) { if (nTripletsCPU[i] != 0) { @@ -1371,25 +1346,19 @@ unsigned int lst::Event::getNumberOfTripletsByLayerEndcap(unsigned int la } int lst::Event::getNumberOfPixelTriplets() { - auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); + alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); - int nPixelTriplets = *alpaka::getPtrNative(nPixelTriplets_buf); - - return nPixelTriplets; + return *nPixelTriplets_buf_h.data(); } int lst::Event::getNumberOfPixelQuintuplets() { - auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nPixelQuintuplets = *alpaka::getPtrNative(nPixelQuintuplets_buf); + alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - return nPixelQuintuplets; + return *nPixelQuintuplets_buf_h.data(); } unsigned int lst::Event::getNumberOfQuintuplets() { @@ -1420,110 +1389,90 @@ unsigned int lst::Event::getNumberOfQuintupletsByLayerEndcap(unsigned int } int lst::Event::getNumberOfTrackCandidates() { - auto nTrackCandidates_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nTrackCandidates_buf, trackCandidatesBuffers->nTrackCandidates_buf); - alpaka::wait(queue); - - int nTrackCandidates = *alpaka::getPtrNative(nTrackCandidates_buf); + alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); - return nTrackCandidates; + return *nTrackCandidates_buf_h.data(); } int lst::Event::getNumberOfPT5TrackCandidates() { - auto nTrackCandidatesPT5_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nTrackCandidatesPT5_buf, trackCandidatesBuffers->nTrackCandidatespT5_buf); + alpaka::memcpy(queue, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers->nTrackCandidatespT5_buf); alpaka::wait(queue); - int nTrackCandidatesPT5 = *alpaka::getPtrNative(nTrackCandidatesPT5_buf); - - return nTrackCandidatesPT5; + return *nTrackCandidatesPT5_buf_h.data(); } int lst::Event::getNumberOfPT3TrackCandidates() { - auto nTrackCandidatesPT3_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidatesPT3_buf, trackCandidatesBuffers->nTrackCandidatespT3_buf); - alpaka::wait(queue); + auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nTrackCandidatesPT3 = *alpaka::getPtrNative(nTrackCandidatesPT3_buf); + alpaka::memcpy(queue, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers->nTrackCandidatespT3_buf); - return nTrackCandidatesPT3; + return *nTrackCandidatesPT3_buf_h.data(); } int lst::Event::getNumberOfPLSTrackCandidates() { - auto nTrackCandidatesPLS_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidatesPLS_buf, trackCandidatesBuffers->nTrackCandidatespLS_buf); - alpaka::wait(queue); + auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - unsigned int nTrackCandidatesPLS = *alpaka::getPtrNative(nTrackCandidatesPLS_buf); + alpaka::memcpy(queue, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers->nTrackCandidatespLS_buf); - return nTrackCandidatesPLS; + return *nTrackCandidatesPLS_buf_h.data(); } int lst::Event::getNumberOfPixelTrackCandidates() { - auto nTrackCandidates_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCandidatesT5_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidates_buf, trackCandidatesBuffers->nTrackCandidates_buf); - alpaka::memcpy(queue, nTrackCandidatesT5_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nTrackCandidates = *alpaka::getPtrNative(nTrackCandidates_buf); - int nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCandidatesT5_buf); + alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); + alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); - return nTrackCandidates - nTrackCandidatesT5; + return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); } int lst::Event::getNumberOfT5TrackCandidates() { - auto nTrackCandidatesT5_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidatesT5_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCandidatesT5_buf); + alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); - return nTrackCandidatesT5; + return *nTrackCandidatesT5_buf_h.data(); } lst::HitsBuffer* lst::Event::getHits() //std::shared_ptr should take care of garbage collection { if (hitsInCPU == nullptr) { - auto nHits_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nHits_buf, hitsBuffers->nHits_buf); - alpaka::wait(queue); + auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nHits = *alpaka::getPtrNative(nHits_buf); + auto const nHits = *nHits_buf_h.data(); hitsInCPU = new lst::HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); - *alpaka::getPtrNative(hitsInCPU->nHits_buf) = nHits; + *hitsInCPU->nHits_buf.data() = nHits; alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->detid_buf, hitsBuffers->detid_buf, nHits); alpaka::memcpy(queue, hitsInCPU->xs_buf, hitsBuffers->xs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->ys_buf, hitsBuffers->ys_buf, nHits); alpaka::memcpy(queue, hitsInCPU->zs_buf, hitsBuffers->zs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->moduleIndices_buf, hitsBuffers->moduleIndices_buf, nHits); - alpaka::wait(queue); } return hitsInCPU; } lst::HitsBuffer* lst::Event::getHitsInCMSSW() { if (hitsInCPU == nullptr) { - auto nHits_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nHits_buf, hitsBuffers->nHits_buf); - alpaka::wait(queue); + auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nHits = *alpaka::getPtrNative(nHits_buf); + auto const nHits = *nHits_buf_h.data(); hitsInCPU = new lst::HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); - *alpaka::getPtrNative(hitsInCPU->nHits_buf) = nHits; + *hitsInCPU->nHits_buf.data() = nHits; alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits); - alpaka::wait(queue); } return hitsInCPU; } @@ -1538,7 +1487,7 @@ lst::ObjectRangesBuffer* lst::Event::getRanges() { alpaka::memcpy(queue, rangesInCPU->miniDoubletModuleIndices_buf, rangesBuffers->miniDoubletModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->segmentModuleIndices_buf, rangesBuffers->segmentModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->tripletModuleIndices_buf, rangesBuffers->tripletModuleIndices_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get completed host data } return rangesInCPU; } @@ -1546,21 +1495,20 @@ lst::ObjectRangesBuffer* lst::Event::getRanges() { lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { if (mdsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based mdsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, miniDoubletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, miniDoubletsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); mdsInCPU = new lst::MiniDoubletsBuffer(nMemHost, nLowerModules_, devHost, queue); mdsInCPU->setData(*mdsInCPU); - *alpaka::getPtrNative(mdsInCPU->nMemoryLocations_buf) = nMemHost; + *mdsInCPU->nMemoryLocations_buf.data() = nMemHost; alpaka::memcpy(queue, mdsInCPU->anchorHitIndices_buf, miniDoubletsBuffers->anchorHitIndices_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->outerHitIndices_buf, miniDoubletsBuffers->outerHitIndices_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->dphichanges_buf, miniDoubletsBuffers->dphichanges_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->nMDs_buf, miniDoubletsBuffers->nMDs_buf); alpaka::memcpy(queue, mdsInCPU->totOccupancyMDs_buf, miniDoubletsBuffers->totOccupancyMDs_buf); - alpaka::wait(queue); } return mdsInCPU; } @@ -1568,16 +1516,16 @@ lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { lst::SegmentsBuffer* lst::Event::getSegments() { if (segmentsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based segmentsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, segmentsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, segmentsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); segmentsInCPU = new lst::SegmentsBuffer(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, devHost, queue); segmentsInCPU->setData(*segmentsInCPU); - *alpaka::getPtrNative(segmentsInCPU->nMemoryLocations_buf) = nMemHost; + *segmentsInCPU->nMemoryLocations_buf.data() = nMemHost; alpaka::memcpy(queue, segmentsInCPU->nSegments_buf, segmentsBuffers->nSegments_buf); alpaka::memcpy(queue, segmentsInCPU->mdIndices_buf, segmentsBuffers->mdIndices_buf, 2u * nMemHost); alpaka::memcpy(queue, @@ -1596,7 +1544,6 @@ lst::SegmentsBuffer* lst::Event::getSegments() { alpaka::memcpy(queue, segmentsInCPU->isDup_buf, segmentsBuffers->isDup_buf); alpaka::memcpy(queue, segmentsInCPU->isQuad_buf, segmentsBuffers->isQuad_buf); alpaka::memcpy(queue, segmentsInCPU->score_buf, segmentsBuffers->score_buf); - alpaka::wait(queue); } return segmentsInCPU; } @@ -1604,15 +1551,15 @@ lst::SegmentsBuffer* lst::Event::getSegments() { lst::TripletsBuffer* lst::Event::getTriplets() { if (tripletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based tripletsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, tripletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, tripletsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); tripletsInCPU = new lst::TripletsBuffer(nMemHost, nLowerModules_, devHost, queue); tripletsInCPU->setData(*tripletsInCPU); - *alpaka::getPtrNative(tripletsInCPU->nMemoryLocations_buf) = nMemHost; + *tripletsInCPU->nMemoryLocations_buf.data() = nMemHost; #ifdef CUT_VALUE_DEBUG alpaka::memcpy(queue, tripletsInCPU->zOut_buf, tripletsBuffers->zOut_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->zLo_buf, tripletsBuffers->zLo_buf, nMemHost); @@ -1632,7 +1579,6 @@ lst::TripletsBuffer* lst::Event::getTriplets() { alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->nTriplets_buf, tripletsBuffers->nTriplets_buf); alpaka::memcpy(queue, tripletsInCPU->totOccupancyTriplets_buf, tripletsBuffers->totOccupancyTriplets_buf); - alpaka::wait(queue); } return tripletsInCPU; } @@ -1640,15 +1586,15 @@ lst::TripletsBuffer* lst::Event::getTriplets() { lst::QuintupletsBuffer* lst::Event::getQuintuplets() { if (quintupletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based quintupletsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, quintupletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, quintupletsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); quintupletsInCPU = new lst::QuintupletsBuffer(nMemHost, nLowerModules_, devHost, queue); quintupletsInCPU->setData(*quintupletsInCPU); - *alpaka::getPtrNative(quintupletsInCPU->nMemoryLocations_buf) = nMemHost; + *quintupletsInCPU->nMemoryLocations_buf.data() = nMemHost; alpaka::memcpy(queue, quintupletsInCPU->nQuintuplets_buf, quintupletsBuffers->nQuintuplets_buf); alpaka::memcpy( queue, quintupletsInCPU->totOccupancyQuintuplets_buf, quintupletsBuffers->totOccupancyQuintuplets_buf); @@ -1668,7 +1614,6 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets() { alpaka::memcpy(queue, quintupletsInCPU->rzChiSquared_buf, quintupletsBuffers->rzChiSquared_buf, nMemHost); alpaka::memcpy( queue, quintupletsInCPU->nonAnchorChiSquared_buf, quintupletsBuffers->nonAnchorChiSquared_buf, nMemHost); - alpaka::wait(queue); } return quintupletsInCPU; } @@ -1676,15 +1621,15 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets() { lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { if (pixelTripletsInCPU == nullptr) { // Get nPixelTriplets parameter to initialize host based quintupletsInCPU - auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nPixelTriplets = *alpaka::getPtrNative(nPixelTriplets_buf); + auto const nPixelTriplets = *nPixelTriplets_buf_h.data(); pixelTripletsInCPU = new lst::PixelTripletsBuffer(nPixelTriplets, devHost, queue); pixelTripletsInCPU->setData(*pixelTripletsInCPU); - *alpaka::getPtrNative(pixelTripletsInCPU->nPixelTriplets_buf) = nPixelTriplets; + *pixelTripletsInCPU->nPixelTriplets_buf.data() = nPixelTriplets; alpaka::memcpy( queue, pixelTripletsInCPU->totOccupancyPixelTriplets_buf, pixelTripletsBuffers->totOccupancyPixelTriplets_buf); alpaka::memcpy(queue, pixelTripletsInCPU->rzChiSquared_buf, pixelTripletsBuffers->rzChiSquared_buf, nPixelTriplets); @@ -1707,7 +1652,6 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { alpaka::memcpy(queue, pixelTripletsInCPU->eta_buf, pixelTripletsBuffers->eta_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->phi_buf, pixelTripletsBuffers->phi_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->score_buf, pixelTripletsBuffers->score_buf, nPixelTriplets); - alpaka::wait(queue); } return pixelTripletsInCPU; } @@ -1715,15 +1659,15 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { if (pixelQuintupletsInCPU == nullptr) { // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU - auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nPixelQuintuplets = *alpaka::getPtrNative(nPixelQuintuplets_buf); + auto const nPixelQuintuplets = *nPixelQuintuplets_buf_h.data(); pixelQuintupletsInCPU = new lst::PixelQuintupletsBuffer(nPixelQuintuplets, devHost, queue); pixelQuintupletsInCPU->setData(*pixelQuintupletsInCPU); - *alpaka::getPtrNative(pixelQuintupletsInCPU->nPixelQuintuplets_buf) = nPixelQuintuplets; + *pixelQuintupletsInCPU->nPixelQuintuplets_buf.data() = nPixelQuintuplets; alpaka::memcpy(queue, pixelQuintupletsInCPU->totOccupancyPixelQuintuplets_buf, pixelQuintupletsBuffers->totOccupancyPixelQuintuplets_buf); @@ -1743,7 +1687,6 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { queue, pixelQuintupletsInCPU->T5Indices_buf, pixelQuintupletsBuffers->T5Indices_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->isDup_buf, pixelQuintupletsBuffers->isDup_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->score_buf, pixelQuintupletsBuffers->score_buf, nPixelQuintuplets); - alpaka::wait(queue); } return pixelQuintupletsInCPU; } @@ -1751,16 +1694,16 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU - auto nTrackCanHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nTrackCanHost_buf, trackCandidatesBuffers->nTrackCandidates_buf); + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nTrackCanHost_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); alpaka::wait(queue); - unsigned int nTrackCanHost = *alpaka::getPtrNative(nTrackCanHost_buf); + auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); trackCandidatesInCPU = new lst::TrackCandidatesBuffer( n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); - *alpaka::getPtrNative(trackCandidatesInCPU->nTrackCandidates_buf) = nTrackCanHost; + *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, @@ -1781,7 +1724,6 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); - alpaka::wait(queue); } return trackCandidatesInCPU; } @@ -1789,16 +1731,16 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSSW() { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU - auto nTrackCanHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nTrackCanHost_buf, trackCandidatesBuffers->nTrackCandidates_buf); - alpaka::wait(queue); + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nTrackCanHost_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nTrackCanHost = *alpaka::getPtrNative(nTrackCanHost_buf); + auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); trackCandidatesInCPU = new lst::TrackCandidatesBuffer( n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); - *alpaka::getPtrNative(trackCandidatesInCPU->nTrackCandidates_buf) = nTrackCanHost; + *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, @@ -1809,7 +1751,6 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSS trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); - alpaka::wait(queue); } return trackCandidatesInCPU; } diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index f1fa3a7d23347..7e2a351a8b699 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -44,9 +44,9 @@ namespace lst { std::array n_trackCandidates_by_layer_endcap_; std::array n_quintuplets_by_layer_barrel_; std::array n_quintuplets_by_layer_endcap_; + unsigned int nTotalSegments_; //Device stuff - unsigned int nTotalSegments; ObjectRanges* rangesInGPU; ObjectRangesBuffer* rangesBuffers; Hits* hitsInGPU;