Skip to content

Commit

Permalink
Migrated MDs to SoA+PortableCollection
Browse files Browse the repository at this point in the history
  • Loading branch information
ariostas committed Sep 30, 2024
1 parent 3858cf3 commit 20f6f9c
Show file tree
Hide file tree
Showing 14 changed files with 713 additions and 812 deletions.
94 changes: 51 additions & 43 deletions RecoTracker/LSTCore/src/alpaka/Event.dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ void Event::resetEventSync() {
}
hitsInGPU_.reset();
hitsBuffers_.reset();
mdsInGPU_.reset();
miniDoubletsBuffers_.reset();
mdsDev_.reset();
rangesInGPU_.reset();
rangesBuffers_.reset();
segmentsInGPU_.reset();
Expand All @@ -72,7 +71,7 @@ void Event::resetEventSync() {

hitsInCPU_.reset();
rangesInCPU_.reset();
mdsInCPU_.reset();
mdsHost_.reset();
segmentsInCPU_.reset();
tripletsInCPU_.reset();
quintupletsInCPU_.reset();
Expand Down Expand Up @@ -172,7 +171,7 @@ void Event::addPixelSegmentToEvent(std::vector<unsigned int> const& hitIndices0,
unsigned int mdSize = 2 * size;
uint16_t pixelModuleIndex = pixelMapping_.pixelModuleIndex;

if (!mdsInGPU_) {
if (!mdsDev_) {
// Create a view for the element nLowerModules_ inside rangesBuffers_->miniDoubletModuleOccupancy
auto dst_view_miniDoubletModuleOccupancy =
alpaka::createSubView(rangesBuffers_->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_);
Expand All @@ -195,11 +194,16 @@ void Event::addPixelSegmentToEvent(std::vector<unsigned int> const& hitIndices0,
*nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules;
unsigned int nTotalMDs = *nTotalMDs_buf_h.data();

mdsInGPU_.emplace();
miniDoubletsBuffers_.emplace(nTotalMDs, nLowerModules_, devAcc_, queue_);
mdsInGPU_->setData(*miniDoubletsBuffers_);
std::array<int, 2> const mds_sizes{{static_cast<int>(nTotalMDs),
static_cast<int>(nLowerModules_ + 1)}};
mdsDev_.emplace(mds_sizes, queue_);

alpaka::memcpy(queue_, miniDoubletsBuffers_->nMemoryLocations_buf, nTotalMDs_buf_h);
auto nMDs_view =
alpaka::createView(devAcc_, mdsDev_->view<MiniDoubletsOccupancySoA>().nMDs(), nLowerModules_ + 1);
auto totOccupancyMDs_view =
alpaka::createView(devAcc_, mdsDev_->view<MiniDoubletsOccupancySoA>().totOccupancyMDs(), nLowerModules_ + 1);
alpaka::memset(queue_, nMDs_view, 0u);
alpaka::memset(queue_, totOccupancyMDs_view, 0u);
}
if (!segmentsInGPU_) {
// can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously.
Expand All @@ -212,7 +216,7 @@ void Event::addPixelSegmentToEvent(std::vector<unsigned int> const& hitIndices0,
CreateSegmentArrayRanges{},
*modulesBuffers_.data(),
*rangesInGPU_,
*mdsInGPU_);
mdsDev_->const_view<MiniDoubletsSoA>());

auto nTotalSegments_view = alpaka::createView(cms::alpakatools::host(), &nTotalSegments_, (Idx)1u);

Expand Down Expand Up @@ -265,11 +269,14 @@ void Event::addPixelSegmentToEvent(std::vector<unsigned int> const& hitIndices0,
alpaka::createSubView(segmentsBuffers_->totOccupancySegments_buf, (Idx)1u, (Idx)pixelModuleIndex);
alpaka::memcpy(queue_, dst_view_totOccupancySegments, src_view_size);

auto dst_view_nMDs = alpaka::createSubView(miniDoubletsBuffers_->nMDs_buf, (Idx)1u, (Idx)pixelModuleIndex);
MiniDoubletsOccupancy mdsOccupancy = mdsDev_->view<MiniDoubletsOccupancySoA>();
auto nMDs_view = alpaka::createView(devAcc_, mdsOccupancy.nMDs(), (Idx)nLowerModules_ + 1);
auto dst_view_nMDs = alpaka::createSubView(nMDs_view, (Idx)1u, (Idx)pixelModuleIndex);
alpaka::memcpy(queue_, dst_view_nMDs, src_view_mdSize);

auto totOccupancyMDs_view = alpaka::createView(devAcc_, mdsOccupancy.totOccupancyMDs(), (Idx)nLowerModules_ + 1);
auto dst_view_totOccupancyMDs =
alpaka::createSubView(miniDoubletsBuffers_->totOccupancyMDs_buf, (Idx)1u, (Idx)pixelModuleIndex);
alpaka::createSubView(totOccupancyMDs_view, (Idx)1u, (Idx)pixelModuleIndex);
alpaka::memcpy(queue_, dst_view_totOccupancyMDs, src_view_mdSize);

alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory
Expand All @@ -284,7 +291,7 @@ void Event::addPixelSegmentToEvent(std::vector<unsigned int> const& hitIndices0,
*modulesBuffers_.data(),
*rangesInGPU_,
*hitsInGPU_,
*mdsInGPU_,
mdsDev_->view<MiniDoubletsSoA>(),
*segmentsInGPU_,
hitIndices0_dev.data(),
hitIndices1_dev.data(),
Expand Down Expand Up @@ -318,10 +325,17 @@ void Event::createMiniDoublets() {
*nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules;
unsigned int nTotalMDs = *nTotalMDs_buf_h.data();

if (!mdsInGPU_) {
mdsInGPU_.emplace();
miniDoubletsBuffers_.emplace(nTotalMDs, nLowerModules_, devAcc_, queue_);
mdsInGPU_->setData(*miniDoubletsBuffers_);
if (!mdsDev_) {
std::array<int, 2> const mds_sizes{{static_cast<int>(nTotalMDs),
static_cast<int>(nLowerModules_ + 1)}};
mdsDev_.emplace(mds_sizes, queue_);

auto nMDs_view =
alpaka::createView(devAcc_, mdsDev_->view<MiniDoubletsOccupancySoA>().nMDs(), nLowerModules_ + 1);
auto totOccupancyMDs_view =
alpaka::createView(devAcc_, mdsDev_->view<MiniDoubletsOccupancySoA>().totOccupancyMDs(), nLowerModules_ + 1);
alpaka::memset(queue_, nMDs_view, 0u);
alpaka::memset(queue_, totOccupancyMDs_view, 0u);
}

Vec3D const threadsPerBlockCreateMDInGPU{1, 16, 32};
Expand All @@ -334,7 +348,8 @@ void Event::createMiniDoublets() {
CreateMiniDoubletsInGPUv2{},
*modulesBuffers_.data(),
*hitsInGPU_,
*mdsInGPU_,
mdsDev_->view<MiniDoubletsSoA>(),
mdsDev_->view<MiniDoubletsOccupancySoA>(),
*rangesInGPU_);

WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv<Vec1D>({1}, {1024}, {1});
Expand All @@ -343,7 +358,7 @@ void Event::createMiniDoublets() {
addMiniDoubletRangesToEventExplicit_workDiv,
AddMiniDoubletRangesToEventExplicit{},
*modulesBuffers_.data(),
*mdsInGPU_,
mdsDev_->view<MiniDoubletsOccupancySoA>(),
*rangesInGPU_,
*hitsInGPU_);

Expand All @@ -368,7 +383,8 @@ void Event::createSegmentsWithModuleMap() {
createSegmentsInGPUv2_workDiv,
CreateSegmentsInGPUv2{},
*modulesBuffers_.data(),
*mdsInGPU_,
mdsDev_->const_view<MiniDoubletsSoA>(),
mdsDev_->const_view<MiniDoubletsOccupancySoA>(),
*segmentsInGPU_,
*rangesInGPU_);

Expand Down Expand Up @@ -454,7 +470,7 @@ void Event::createTriplets() {
createTripletsInGPUv2_workDiv,
CreateTripletsInGPUv2{},
*modulesBuffers_.data(),
*mdsInGPU_,
mdsDev_->const_view<MiniDoubletsSoA>(),
*segmentsInGPU_,
*tripletsInGPU_,
*rangesInGPU_,
Expand Down Expand Up @@ -573,7 +589,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) {
*pixelTripletsInGPU_,
*trackCandidatesInGPU_,
*segmentsInGPU_,
*mdsInGPU_,
mdsDev_->const_view<MiniDoubletsSoA>(),
*hitsInGPU_,
*quintupletsInGPU_);

Expand Down Expand Up @@ -704,7 +720,7 @@ void Event::createPixelTriplets() {
CreatePixelTripletsInGPUFromMapv2{},
*modulesBuffers_.data(),
*rangesInGPU_,
*mdsInGPU_,
mdsDev_->const_view<MiniDoubletsSoA>(),
*segmentsInGPU_,
*tripletsInGPU_,
*pixelTripletsInGPU_,
Expand Down Expand Up @@ -769,7 +785,7 @@ void Event::createQuintuplets() {
createQuintupletsInGPUv2_workDiv,
CreateQuintupletsInGPUv2{},
*modulesBuffers_.data(),
*mdsInGPU_,
mdsDev_->const_view<MiniDoubletsSoA>(),
*segmentsInGPU_,
*tripletsInGPU_,
*quintupletsInGPU_,
Expand Down Expand Up @@ -904,7 +920,7 @@ void Event::createPixelQuintuplets() {
createPixelQuintupletsInGPUFromMapv2_workDiv,
CreatePixelQuintupletsInGPUFromMapv2{},
*modulesBuffers_.data(),
*mdsInGPU_,
mdsDev_->const_view<MiniDoubletsSoA>(),
*segmentsInGPU_,
*tripletsInGPU_,
*quintupletsInGPU_,
Expand Down Expand Up @@ -947,7 +963,8 @@ void Event::createPixelQuintuplets() {

void Event::addMiniDoubletsToEventExplicit() {
auto nMDsCPU_buf = allocBufWrapper<unsigned int>(cms::alpakatools::host(), nLowerModules_, queue_);
alpaka::memcpy(queue_, nMDsCPU_buf, miniDoubletsBuffers_->nMDs_buf, nLowerModules_);
auto nMDs_view = alpaka::createView(devAcc_, mdsDev_->const_view<MiniDoubletsOccupancySoA>().nMDs(), nLowerModules_);
alpaka::memcpy(queue_, nMDsCPU_buf, nMDs_view, nLowerModules_);

// FIXME: replace by ES host data
auto module_subdets_buf = allocBufWrapper<short>(cms::alpakatools::host(), nLowerModules_, queue_);
Expand Down Expand Up @@ -1320,28 +1337,19 @@ ObjectRangesBuffer<alpaka_common::DevHost>& Event::getRanges(bool sync) {
return rangesInCPU_.value();
}

MiniDoubletsBuffer<alpaka_common::DevHost>& Event::getMiniDoublets(bool sync) {
if (!mdsInCPU_) {
// Get nMemoryLocations parameter to initialize host based mdsInCPU_
auto nMemHost_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, 1u);
alpaka::memcpy(queue_, nMemHost_buf_h, miniDoubletsBuffers_->nMemoryLocations_buf);
alpaka::wait(queue_); // wait for the value before using

auto const nMemHost = *nMemHost_buf_h.data();
mdsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_);
mdsInCPU_->setData(*mdsInCPU_);

alpaka::memcpy(queue_, mdsInCPU_->nMemoryLocations_buf, miniDoubletsBuffers_->nMemoryLocations_buf);
alpaka::memcpy(queue_, mdsInCPU_->anchorHitIndices_buf, miniDoubletsBuffers_->anchorHitIndices_buf, nMemHost);
alpaka::memcpy(queue_, mdsInCPU_->outerHitIndices_buf, miniDoubletsBuffers_->outerHitIndices_buf, nMemHost);
alpaka::memcpy(queue_, mdsInCPU_->dphichanges_buf, miniDoubletsBuffers_->dphichanges_buf, nMemHost);
alpaka::memcpy(queue_, mdsInCPU_->nMDs_buf, miniDoubletsBuffers_->nMDs_buf);
alpaka::memcpy(queue_, mdsInCPU_->totOccupancyMDs_buf, miniDoubletsBuffers_->totOccupancyMDs_buf);
template <typename TSoA>
typename TSoA::ConstView Event::getMiniDoublets(bool sync) {
if constexpr (std::is_same_v<Device, DevHost>)
return mdsDev_->const_view<TSoA>();
if (!mdsHost_) {
mdsHost_.emplace(cms::alpakatools::CopyToHost<MiniDoubletsDeviceCollection>::copyAsync(queue_, *mdsDev_));
if (sync)
alpaka::wait(queue_); // host consumers expect filled data
}
return mdsInCPU_.value();
return mdsHost_->const_view<TSoA>();
}
template MiniDoubletsConst Event::getMiniDoublets<MiniDoubletsSoA>(bool);
template MiniDoubletsOccupancyConst Event::getMiniDoublets<MiniDoubletsOccupancySoA>(bool);

SegmentsBuffer<alpaka_common::DevHost>& Event::getSegments(bool sync) {
if (!segmentsInCPU_) {
Expand Down
8 changes: 4 additions & 4 deletions RecoTracker/LSTCore/src/alpaka/Event.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
std::optional<ObjectRangesBuffer<Device>> rangesBuffers_;
std::optional<Hits> hitsInGPU_;
std::optional<HitsBuffer<Device>> hitsBuffers_;
std::optional<MiniDoublets> mdsInGPU_;
std::optional<MiniDoubletsBuffer<Device>> miniDoubletsBuffers_;
std::optional<MiniDoubletsDeviceCollection> mdsDev_;
std::optional<Segments> segmentsInGPU_;
std::optional<SegmentsBuffer<Device>> segmentsBuffers_;
std::optional<Triplets> tripletsInGPU_;
Expand All @@ -64,7 +63,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
//CPU interface stuff
std::optional<ObjectRangesBuffer<DevHost>> rangesInCPU_;
std::optional<HitsBuffer<DevHost>> hitsInCPU_;
std::optional<MiniDoubletsBuffer<DevHost>> mdsInCPU_;
std::optional<MiniDoubletsHostCollection> mdsHost_;
std::optional<SegmentsBuffer<DevHost>> segmentsInCPU_;
std::optional<TripletsBuffer<DevHost>> tripletsInCPU_;
std::optional<TrackCandidatesBuffer<DevHost>> trackCandidatesInCPU_;
Expand Down Expand Up @@ -183,7 +182,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
HitsBuffer<DevHost>& getHits(bool sync = true);
HitsBuffer<DevHost>& getHitsInCMSSW(bool sync = true);
ObjectRangesBuffer<DevHost>& getRanges(bool sync = true);
MiniDoubletsBuffer<DevHost>& getMiniDoublets(bool sync = true);
template <typename TSoA>
typename TSoA::ConstView getMiniDoublets(bool sync = true);
SegmentsBuffer<DevHost>& getSegments(bool sync = true);
TripletsBuffer<DevHost>& getTriplets(bool sync = true);
QuintupletsBuffer<DevHost>& getQuintuplets(bool sync = true);
Expand Down
Loading

0 comments on commit 20f6f9c

Please sign in to comment.