Skip to content

Commit

Permalink
add synchronizations in callers of the event methods where it matters…
Browse files Browse the repository at this point in the history
…; make synchronization more explicit/flexible in names or function arguments
  • Loading branch information
slava77devel committed Aug 15, 2024
1 parent 43ce20e commit 7889093
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 55 deletions.
55 changes: 40 additions & 15 deletions RecoTracker/LSTCore/src/alpaka/Event.dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

using namespace ALPAKA_ACCELERATOR_NAMESPACE;

void lst::Event<Acc3D>::init(bool verbose) {
void lst::Event<Acc3D>::initSync(bool verbose) {
alpaka::wait(queue); // other calls can be asynchronous
addObjects = verbose;
hitsInGPU = nullptr;
mdsInGPU = nullptr;
Expand Down Expand Up @@ -46,7 +47,8 @@ void lst::Event<Acc3D>::init(bool verbose) {
}
}

void lst::Event<Acc3D>::resetEvent() {
void lst::Event<Acc3D>::resetEventSync() {
alpaka::wait(queue); // synchronize to reset consistently
//reset the arrays
for (int i = 0; i < 6; i++) {
n_hits_by_layer_barrel_[i] = 0;
Expand Down Expand Up @@ -1358,7 +1360,7 @@ int lst::Event<Acc3D>::getNumberOfT5TrackCandidates() {
return *nTrackCandidatesT5_buf_h.data();
}

lst::HitsBuffer<DevHost>* lst::Event<Acc3D>::getHits() //std::shared_ptr should take care of garbage collection
lst::HitsBuffer<DevHost>* lst::Event<Acc3D>::getHits(bool sync) //std::shared_ptr should take care of garbage collection
{
if (hitsInCPU == nullptr) {
auto nHits_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue, 1u);
Expand All @@ -1376,11 +1378,13 @@ lst::HitsBuffer<DevHost>* lst::Event<Acc3D>::getHits() //std::shared_ptr should
alpaka::memcpy(queue, hitsInCPU->ys_buf, hitsBuffers->ys_buf, nHits);
alpaka::memcpy(queue, hitsInCPU->zs_buf, hitsBuffers->zs_buf, nHits);
alpaka::memcpy(queue, hitsInCPU->moduleIndices_buf, hitsBuffers->moduleIndices_buf, nHits);
if (sync)
alpaka::wait(queue); // host consumers expect filled data
}
return hitsInCPU;
}

lst::HitsBuffer<DevHost>* lst::Event<Acc3D>::getHitsInCMSSW() {
lst::HitsBuffer<DevHost>* lst::Event<Acc3D>::getHitsInCMSSW(bool sync) {
if (hitsInCPU == nullptr) {
auto nHits_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue, 1u);
alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf);
Expand All @@ -1392,11 +1396,13 @@ lst::HitsBuffer<DevHost>* lst::Event<Acc3D>::getHitsInCMSSW() {

*hitsInCPU->nHits_buf.data() = nHits;
alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits);
if (sync)
alpaka::wait(queue); // host consumers expect filled data
}
return hitsInCPU;
}

lst::ObjectRangesBuffer<DevHost>* lst::Event<Acc3D>::getRanges() {
lst::ObjectRangesBuffer<DevHost>* lst::Event<Acc3D>::getRanges(bool sync) {
if (rangesInCPU == nullptr) {
rangesInCPU = new lst::ObjectRangesBuffer<DevHost>(nModules_, nLowerModules_, devHost, queue);
rangesInCPU->setData(*rangesInCPU);
Expand All @@ -1406,12 +1412,13 @@ lst::ObjectRangesBuffer<DevHost>* lst::Event<Acc3D>::getRanges() {
alpaka::memcpy(queue, rangesInCPU->miniDoubletModuleIndices_buf, rangesBuffers->miniDoubletModuleIndices_buf);
alpaka::memcpy(queue, rangesInCPU->segmentModuleIndices_buf, rangesBuffers->segmentModuleIndices_buf);
alpaka::memcpy(queue, rangesInCPU->tripletModuleIndices_buf, rangesBuffers->tripletModuleIndices_buf);
alpaka::wait(queue); // wait to get completed host data
if (sync)
alpaka::wait(queue); // wait to get completed host data
}
return rangesInCPU;
}

lst::MiniDoubletsBuffer<DevHost>* lst::Event<Acc3D>::getMiniDoublets() {
lst::MiniDoubletsBuffer<DevHost>* lst::Event<Acc3D>::getMiniDoublets(bool sync) {
if (mdsInCPU == nullptr) {
// Get nMemoryLocations parameter to initialize host based mdsInCPU
auto nMemHost_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue, 1u);
Expand All @@ -1428,11 +1435,13 @@ lst::MiniDoubletsBuffer<DevHost>* lst::Event<Acc3D>::getMiniDoublets() {
alpaka::memcpy(queue, mdsInCPU->dphichanges_buf, miniDoubletsBuffers->dphichanges_buf, nMemHost);
alpaka::memcpy(queue, mdsInCPU->nMDs_buf, miniDoubletsBuffers->nMDs_buf);
alpaka::memcpy(queue, mdsInCPU->totOccupancyMDs_buf, miniDoubletsBuffers->totOccupancyMDs_buf);
if (sync)
alpaka::wait(queue); // host consumers expect filled data
}
return mdsInCPU;
}

lst::SegmentsBuffer<DevHost>* lst::Event<Acc3D>::getSegments() {
lst::SegmentsBuffer<DevHost>* lst::Event<Acc3D>::getSegments(bool sync) {
if (segmentsInCPU == nullptr) {
// Get nMemoryLocations parameter to initialize host based segmentsInCPU
auto nMemHost_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue, 1u);
Expand Down Expand Up @@ -1463,11 +1472,13 @@ lst::SegmentsBuffer<DevHost>* lst::Event<Acc3D>::getSegments() {
alpaka::memcpy(queue, segmentsInCPU->isDup_buf, segmentsBuffers->isDup_buf);
alpaka::memcpy(queue, segmentsInCPU->isQuad_buf, segmentsBuffers->isQuad_buf);
alpaka::memcpy(queue, segmentsInCPU->score_buf, segmentsBuffers->score_buf);
if (sync)
alpaka::wait(queue); // host consumers expect filled data
}
return segmentsInCPU;
}

lst::TripletsBuffer<DevHost>* lst::Event<Acc3D>::getTriplets() {
lst::TripletsBuffer<DevHost>* lst::Event<Acc3D>::getTriplets(bool sync) {
if (tripletsInCPU == nullptr) {
// Get nMemoryLocations parameter to initialize host based tripletsInCPU
auto nMemHost_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue, 1u);
Expand Down Expand Up @@ -1498,11 +1509,13 @@ lst::TripletsBuffer<DevHost>* lst::Event<Acc3D>::getTriplets() {
alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost);
alpaka::memcpy(queue, tripletsInCPU->nTriplets_buf, tripletsBuffers->nTriplets_buf);
alpaka::memcpy(queue, tripletsInCPU->totOccupancyTriplets_buf, tripletsBuffers->totOccupancyTriplets_buf);
if (sync)
alpaka::wait(queue); // host consumers expect filled data
}
return tripletsInCPU;
}

lst::QuintupletsBuffer<DevHost>* lst::Event<Acc3D>::getQuintuplets() {
lst::QuintupletsBuffer<DevHost>* lst::Event<Acc3D>::getQuintuplets(bool sync) {
if (quintupletsInCPU == nullptr) {
// Get nMemoryLocations parameter to initialize host based quintupletsInCPU
auto nMemHost_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue, 1u);
Expand Down Expand Up @@ -1533,11 +1546,13 @@ lst::QuintupletsBuffer<DevHost>* lst::Event<Acc3D>::getQuintuplets() {
alpaka::memcpy(queue, quintupletsInCPU->rzChiSquared_buf, quintupletsBuffers->rzChiSquared_buf, nMemHost);
alpaka::memcpy(
queue, quintupletsInCPU->nonAnchorChiSquared_buf, quintupletsBuffers->nonAnchorChiSquared_buf, nMemHost);
if (sync)
alpaka::wait(queue); // host consumers expect filled data
}
return quintupletsInCPU;
}

lst::PixelTripletsBuffer<DevHost>* lst::Event<Acc3D>::getPixelTriplets() {
lst::PixelTripletsBuffer<DevHost>* lst::Event<Acc3D>::getPixelTriplets(bool sync) {
if (pixelTripletsInCPU == nullptr) {
// Get nPixelTriplets parameter to initialize host based quintupletsInCPU
auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue, 1u);
Expand Down Expand Up @@ -1571,11 +1586,13 @@ lst::PixelTripletsBuffer<DevHost>* lst::Event<Acc3D>::getPixelTriplets() {
alpaka::memcpy(queue, pixelTripletsInCPU->eta_buf, pixelTripletsBuffers->eta_buf, nPixelTriplets);
alpaka::memcpy(queue, pixelTripletsInCPU->phi_buf, pixelTripletsBuffers->phi_buf, nPixelTriplets);
alpaka::memcpy(queue, pixelTripletsInCPU->score_buf, pixelTripletsBuffers->score_buf, nPixelTriplets);
if (sync)
alpaka::wait(queue); // host consumers expect filled data
}
return pixelTripletsInCPU;
}

lst::PixelQuintupletsBuffer<DevHost>* lst::Event<Acc3D>::getPixelQuintuplets() {
lst::PixelQuintupletsBuffer<DevHost>* lst::Event<Acc3D>::getPixelQuintuplets(bool sync) {
if (pixelQuintupletsInCPU == nullptr) {
// Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU
auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue, 1u);
Expand Down Expand Up @@ -1606,11 +1623,13 @@ lst::PixelQuintupletsBuffer<DevHost>* lst::Event<Acc3D>::getPixelQuintuplets() {
queue, pixelQuintupletsInCPU->T5Indices_buf, pixelQuintupletsBuffers->T5Indices_buf, nPixelQuintuplets);
alpaka::memcpy(queue, pixelQuintupletsInCPU->isDup_buf, pixelQuintupletsBuffers->isDup_buf, nPixelQuintuplets);
alpaka::memcpy(queue, pixelQuintupletsInCPU->score_buf, pixelQuintupletsBuffers->score_buf, nPixelQuintuplets);
if (sync)
alpaka::wait(queue); // host consumers expect filled data
}
return pixelQuintupletsInCPU;
}

lst::TrackCandidatesBuffer<DevHost>* lst::Event<Acc3D>::getTrackCandidates() {
lst::TrackCandidatesBuffer<DevHost>* lst::Event<Acc3D>::getTrackCandidates(bool sync) {
if (trackCandidatesInCPU == nullptr) {
// Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU
auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue, 1u);
Expand Down Expand Up @@ -1643,11 +1662,13 @@ lst::TrackCandidatesBuffer<DevHost>* lst::Event<Acc3D>::getTrackCandidates() {
trackCandidatesInCPU->trackCandidateType_buf,
trackCandidatesBuffers->trackCandidateType_buf,
nTrackCanHost);
if (sync)
alpaka::wait(queue); // host consumers expect filled data
}
return trackCandidatesInCPU;
}

lst::TrackCandidatesBuffer<DevHost>* lst::Event<Acc3D>::getTrackCandidatesInCMSSW() {
lst::TrackCandidatesBuffer<DevHost>* lst::Event<Acc3D>::getTrackCandidatesInCMSSW(bool sync) {
if (trackCandidatesInCPU == nullptr) {
// Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU
auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue, 1u);
Expand All @@ -1670,16 +1691,20 @@ lst::TrackCandidatesBuffer<DevHost>* lst::Event<Acc3D>::getTrackCandidatesInCMSS
trackCandidatesInCPU->trackCandidateType_buf,
trackCandidatesBuffers->trackCandidateType_buf,
nTrackCanHost);
if (sync)
alpaka::wait(queue); // host consumers expect filled data
}
return trackCandidatesInCPU;
}

lst::ModulesBuffer<DevHost>* lst::Event<Acc3D>::getModules(bool isFull) {
lst::ModulesBuffer<DevHost>* lst::Event<Acc3D>::getModules(bool isFull, bool sync) {
if (modulesInCPU == nullptr) {
// The last input here is just a small placeholder for the allocation.
modulesInCPU = new lst::ModulesBuffer<DevHost>(devHost, nModules_, nPixels_);

modulesInCPU->copyFromSrc(queue, modulesBuffers_, isFull);
if (sync)
alpaka::wait(queue); // host consumers expect filled data
}
return modulesInCPU;
}
72 changes: 37 additions & 35 deletions RecoTracker/LSTCore/src/alpaka/Event.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ namespace lst {
PixelTripletsBuffer<DevHost>* pixelTripletsInCPU;
PixelQuintupletsBuffer<DevHost>* pixelQuintupletsInCPU;

void init(bool verbose);
void initSync(bool verbose);

int* superbinCPU;
int8_t* pixelTypeCPU;
Expand All @@ -105,9 +105,10 @@ namespace lst {
modulesBuffers_(deviceESData->modulesBuffers),
pixelMapping_(*deviceESData->pixelMapping),
endcapGeometryBuffers_(deviceESData->endcapGeometryBuffers) {
init(verbose);
initSync(verbose);
}
void resetEvent();
void resetEventSync(); // synchronizes
void wait() const { alpaka::wait(queue); }

// Calls the appropriate hit function, then increments the counter
void addHitToEvent(std::vector<float> const& x,
Expand All @@ -134,24 +135,21 @@ namespace lst {
std::vector<int8_t> const& pixelType,
std::vector<char> const& isQuad);

// functions that map the objects to the appropriate modules
void addMiniDoubletsToEventExplicit();
void addSegmentsToEventExplicit();
void addTripletsToEventExplicit();
void addQuintupletsToEventExplicit();
void resetObjectsInModule();

void createMiniDoublets();
void createSegmentsWithModuleMap();
void createTriplets();
void createPixelTracklets();
void createPixelTrackletsWithMap();
void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets);
void createExtendedTracks();
void createQuintuplets();
void createPixelTriplets();
void createPixelQuintuplets();
void createQuintuplets();
void pixelLineSegmentCleaning(bool no_pls_dupclean);
void createPixelQuintuplets();

// functions that map the objects to the appropriate modules
void addMiniDoubletsToEventExplicit();
void addSegmentsToEventExplicit();
void addQuintupletsToEventExplicit();
void addTripletsToEventExplicit();
void resetObjectsInModule();

unsigned int getNumberOfHits();
unsigned int getNumberOfHitsByLayer(unsigned int layer);
Expand All @@ -173,33 +171,37 @@ namespace lst {
unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer);
unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer);

int getNumberOfTrackCandidates();
int getNumberOfPixelTrackCandidates();
int getNumberOfPT5TrackCandidates();
int getNumberOfPT3TrackCandidates();
int getNumberOfT5TrackCandidates();
int getNumberOfPLSTrackCandidates();
int getNumberOfPixelTriplets();
int getNumberOfPixelQuintuplets();

unsigned int getNumberOfQuintuplets();
unsigned int getNumberOfQuintupletsByLayer(unsigned int layer);
unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer);
unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer);

int getNumberOfPixelTriplets();
int getNumberOfPixelQuintuplets();
int getNumberOfTrackCandidates();
int getNumberOfPT5TrackCandidates();
int getNumberOfPT3TrackCandidates();
int getNumberOfPLSTrackCandidates();
int getNumberOfPixelTrackCandidates();
int getNumberOfT5TrackCandidates();

ObjectRangesBuffer<DevHost>* getRanges();
HitsBuffer<DevHost>* getHits();
HitsBuffer<DevHost>* getHitsInCMSSW();
MiniDoubletsBuffer<DevHost>* getMiniDoublets();
SegmentsBuffer<DevHost>* getSegments();
TripletsBuffer<DevHost>* getTriplets();
QuintupletsBuffer<DevHost>* getQuintuplets();
TrackCandidatesBuffer<DevHost>* getTrackCandidates();
TrackCandidatesBuffer<DevHost>* getTrackCandidatesInCMSSW();
PixelTripletsBuffer<DevHost>* getPixelTriplets();
PixelQuintupletsBuffer<DevHost>* getPixelQuintuplets();
ModulesBuffer<DevHost>* getModules(bool isFull = false);
// sync adds alpaka::wait at the end of filling a buffer during lazy fill
// (has no effect on repeated calls)
// set to false may allow faster operation with concurrent calls of get*
// HANDLE WITH CARE
HitsBuffer<DevHost>* getHits(bool sync = true);
HitsBuffer<DevHost>* getHitsInCMSSW(bool sync = true);
ObjectRangesBuffer<DevHost>* getRanges(bool sync = true);
MiniDoubletsBuffer<DevHost>* getMiniDoublets(bool sync = true);
SegmentsBuffer<DevHost>* getSegments(bool sync = true);
TripletsBuffer<DevHost>* getTriplets(bool sync = true);
QuintupletsBuffer<DevHost>* getQuintuplets(bool sync = true);
PixelTripletsBuffer<DevHost>* getPixelTriplets(bool sync = true);
PixelQuintupletsBuffer<DevHost>* getPixelQuintuplets(bool sync = true);
TrackCandidatesBuffer<DevHost>* getTrackCandidates(bool sync = true);
TrackCandidatesBuffer<DevHost>* getTrackCandidatesInCMSSW(bool sync = true);
ModulesBuffer<DevHost>* getModules(bool isFull = false, bool sync = true);
};

} // namespace lst
Expand Down
18 changes: 14 additions & 4 deletions RecoTracker/LSTCore/src/alpaka/LST.dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,11 @@ void lst::LST<Acc3D>::getOutput(lst::Event<Acc3D>& event) {
std::vector<int> tc_seedIdx;
std::vector<short> tc_trackCandidateType;

lst::HitsBuffer<alpaka::DevCpu>& hitsInGPU = (*event.getHitsInCMSSW());
lst::HitsBuffer<alpaka::DevCpu>& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line
lst::TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data();

unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates;

for (unsigned int idx = 0; idx < nTrackCandidates; idx++) {
short trackCandidateType = trackCandidates->trackCandidateType[idx];
std::vector<unsigned int> hit_idx =
Expand Down Expand Up @@ -344,6 +345,7 @@ void lst::LST<Acc3D>::run(Queue& queue,
in_isQuad_vec_);
event.createMiniDoublets();
if (verbose) {
alpaka::wait(queue); // event calls are asynchronous: wait before printing
printf("# of Mini-doublets produced: %d\n", event.getNumberOfMiniDoublets());
printf("# of Mini-doublets produced barrel layer 1: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(0));
printf("# of Mini-doublets produced barrel layer 2: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(1));
Expand All @@ -360,6 +362,7 @@ void lst::LST<Acc3D>::run(Queue& queue,

event.createSegmentsWithModuleMap();
if (verbose) {
alpaka::wait(queue); // event calls are asynchronous: wait before printing
printf("# of Segments produced: %d\n", event.getNumberOfSegments());
printf("# of Segments produced layer 1-2: %d\n", event.getNumberOfSegmentsByLayerBarrel(0));
printf("# of Segments produced layer 2-3: %d\n", event.getNumberOfSegmentsByLayerBarrel(1));
Expand All @@ -375,6 +378,7 @@ void lst::LST<Acc3D>::run(Queue& queue,

event.createTriplets();
if (verbose) {
alpaka::wait(queue); // event calls are asynchronous: wait before printing
printf("# of T3s produced: %d\n", event.getNumberOfTriplets());
printf("# of T3s produced layer 1-2-3: %d\n", event.getNumberOfTripletsByLayerBarrel(0));
printf("# of T3s produced layer 2-3-4: %d\n", event.getNumberOfTripletsByLayerBarrel(1));
Expand All @@ -392,6 +396,7 @@ void lst::LST<Acc3D>::run(Queue& queue,

event.createQuintuplets();
if (verbose) {
alpaka::wait(queue); // event calls are asynchronous: wait before printing
printf("# of Quintuplets produced: %d\n", event.getNumberOfQuintuplets());
printf("# of Quintuplets produced layer 1-2-3-4-5-6: %d\n", event.getNumberOfQuintupletsByLayerBarrel(0));
printf("# of Quintuplets produced layer 2: %d\n", event.getNumberOfQuintupletsByLayerBarrel(1));
Expand All @@ -409,15 +414,20 @@ void lst::LST<Acc3D>::run(Queue& queue,
event.pixelLineSegmentCleaning(no_pls_dupclean);

event.createPixelQuintuplets();
if (verbose)
if (verbose) {
alpaka::wait(queue); // event calls are asynchronous: wait before printing
printf("# of Pixel Quintuplets produced: %d\n", event.getNumberOfPixelQuintuplets());
}

event.createPixelTriplets();
if (verbose)
if (verbose) {
alpaka::wait(queue); // event calls are asynchronous: wait before printing
printf("# of Pixel T3s produced: %d\n", event.getNumberOfPixelTriplets());
}

event.createTrackCandidates(no_pls_dupclean, tc_pls_triplets);
if (verbose) {
alpaka::wait(queue); // event calls are asynchronous: wait before printing
printf("# of TrackCandidates produced: %d\n", event.getNumberOfTrackCandidates());
printf(" # of Pixel TrackCandidates produced: %d\n", event.getNumberOfPixelTrackCandidates());
printf(" # of pT5 TrackCandidates produced: %d\n", event.getNumberOfPT5TrackCandidates());
Expand All @@ -428,5 +438,5 @@ void lst::LST<Acc3D>::run(Queue& queue,

getOutput(event);

event.resetEvent();
event.resetEventSync();
}
2 changes: 1 addition & 1 deletion RecoTracker/LSTCore/standalone/bin/lst.cc
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ void run_lst() {
// Clear this event
TStopwatch my_timer;
my_timer.Start();
events.at(omp_get_thread_num())->resetEvent();
events.at(omp_get_thread_num())->resetEventSync();
float timing_resetEvent = my_timer.RealTime();

timing_information.push_back({timing_input_loading,
Expand Down
Loading

0 comments on commit 7889093

Please sign in to comment.