From 7889093ac6bd4e1f2e01ee5452fca01e776d6f0c Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 15 Aug 2024 16:43:14 -0700 Subject: [PATCH] add synchronizations in callers of the event methods where it matters; make synchronization more explicit/flexible in names or function arguments --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 55 ++++++++++---- RecoTracker/LSTCore/src/alpaka/Event.h | 72 ++++++++++--------- RecoTracker/LSTCore/src/alpaka/LST.dev.cc | 18 +++-- RecoTracker/LSTCore/standalone/bin/lst.cc | 2 +- .../LSTCore/standalone/code/core/trkCore.cc | 10 +++ 5 files changed, 102 insertions(+), 55 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index cc8872438dfe7..f9757b0659691 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -4,7 +4,8 @@ using namespace ALPAKA_ACCELERATOR_NAMESPACE; -void lst::Event::init(bool verbose) { +void lst::Event::initSync(bool verbose) { + alpaka::wait(queue); // other calls can be asynchronous addObjects = verbose; hitsInGPU = nullptr; mdsInGPU = nullptr; @@ -46,7 +47,8 @@ void lst::Event::init(bool verbose) { } } -void lst::Event::resetEvent() { +void lst::Event::resetEventSync() { + alpaka::wait(queue); // synchronize to reset consistently //reset the arrays for (int i = 0; i < 6; i++) { n_hits_by_layer_barrel_[i] = 0; @@ -1358,7 +1360,7 @@ int lst::Event::getNumberOfT5TrackCandidates() { return *nTrackCandidatesT5_buf_h.data(); } -lst::HitsBuffer* lst::Event::getHits() //std::shared_ptr should take care of garbage collection +lst::HitsBuffer* lst::Event::getHits(bool sync) //std::shared_ptr should take care of garbage collection { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1376,11 +1378,13 @@ lst::HitsBuffer* lst::Event::getHits() //std::shared_ptr should alpaka::memcpy(queue, hitsInCPU->ys_buf, hitsBuffers->ys_buf, nHits); alpaka::memcpy(queue, hitsInCPU->zs_buf, hitsBuffers->zs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->moduleIndices_buf, hitsBuffers->moduleIndices_buf, nHits); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return hitsInCPU; } -lst::HitsBuffer* lst::Event::getHitsInCMSSW() { +lst::HitsBuffer* lst::Event::getHitsInCMSSW(bool sync) { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); @@ -1392,11 +1396,13 @@ lst::HitsBuffer* lst::Event::getHitsInCMSSW() { *hitsInCPU->nHits_buf.data() = nHits; alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return hitsInCPU; } -lst::ObjectRangesBuffer* lst::Event::getRanges() { +lst::ObjectRangesBuffer* lst::Event::getRanges(bool sync) { if (rangesInCPU == nullptr) { rangesInCPU = new lst::ObjectRangesBuffer(nModules_, nLowerModules_, devHost, queue); rangesInCPU->setData(*rangesInCPU); @@ -1406,12 +1412,13 @@ lst::ObjectRangesBuffer* lst::Event::getRanges() { alpaka::memcpy(queue, rangesInCPU->miniDoubletModuleIndices_buf, rangesBuffers->miniDoubletModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->segmentModuleIndices_buf, rangesBuffers->segmentModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->tripletModuleIndices_buf, rangesBuffers->tripletModuleIndices_buf); - alpaka::wait(queue); // wait to get completed host data + if (sync) + alpaka::wait(queue); // wait to get completed host data } return rangesInCPU; } -lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { +lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets(bool sync) { if (mdsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based mdsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1428,11 +1435,13 @@ lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { alpaka::memcpy(queue, mdsInCPU->dphichanges_buf, miniDoubletsBuffers->dphichanges_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->nMDs_buf, miniDoubletsBuffers->nMDs_buf); alpaka::memcpy(queue, mdsInCPU->totOccupancyMDs_buf, miniDoubletsBuffers->totOccupancyMDs_buf); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return mdsInCPU; } -lst::SegmentsBuffer* lst::Event::getSegments() { +lst::SegmentsBuffer* lst::Event::getSegments(bool sync) { if (segmentsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based segmentsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1463,11 +1472,13 @@ lst::SegmentsBuffer* lst::Event::getSegments() { alpaka::memcpy(queue, segmentsInCPU->isDup_buf, segmentsBuffers->isDup_buf); alpaka::memcpy(queue, segmentsInCPU->isQuad_buf, segmentsBuffers->isQuad_buf); alpaka::memcpy(queue, segmentsInCPU->score_buf, segmentsBuffers->score_buf); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return segmentsInCPU; } -lst::TripletsBuffer* lst::Event::getTriplets() { +lst::TripletsBuffer* lst::Event::getTriplets(bool sync) { if (tripletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based tripletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1498,11 +1509,13 @@ lst::TripletsBuffer* lst::Event::getTriplets() { alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->nTriplets_buf, tripletsBuffers->nTriplets_buf); alpaka::memcpy(queue, tripletsInCPU->totOccupancyTriplets_buf, tripletsBuffers->totOccupancyTriplets_buf); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return tripletsInCPU; } -lst::QuintupletsBuffer* lst::Event::getQuintuplets() { +lst::QuintupletsBuffer* lst::Event::getQuintuplets(bool sync) { if (quintupletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based quintupletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1533,11 +1546,13 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets() { alpaka::memcpy(queue, quintupletsInCPU->rzChiSquared_buf, quintupletsBuffers->rzChiSquared_buf, nMemHost); alpaka::memcpy( queue, quintupletsInCPU->nonAnchorChiSquared_buf, quintupletsBuffers->nonAnchorChiSquared_buf, nMemHost); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return quintupletsInCPU; } -lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { +lst::PixelTripletsBuffer* lst::Event::getPixelTriplets(bool sync) { if (pixelTripletsInCPU == nullptr) { // Get nPixelTriplets parameter to initialize host based quintupletsInCPU auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1571,11 +1586,13 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { alpaka::memcpy(queue, pixelTripletsInCPU->eta_buf, pixelTripletsBuffers->eta_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->phi_buf, pixelTripletsBuffers->phi_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->score_buf, pixelTripletsBuffers->score_buf, nPixelTriplets); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return pixelTripletsInCPU; } -lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { +lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets(bool sync) { if (pixelQuintupletsInCPU == nullptr) { // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1606,11 +1623,13 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { queue, pixelQuintupletsInCPU->T5Indices_buf, pixelQuintupletsBuffers->T5Indices_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->isDup_buf, pixelQuintupletsBuffers->isDup_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->score_buf, pixelQuintupletsBuffers->score_buf, nPixelQuintuplets); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return pixelQuintupletsInCPU; } -lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { +lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates(bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1643,11 +1662,13 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return trackCandidatesInCPU; } -lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSSW() { +lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSSW(bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1670,16 +1691,20 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSS trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return trackCandidatesInCPU; } -lst::ModulesBuffer* lst::Event::getModules(bool isFull) { +lst::ModulesBuffer* lst::Event::getModules(bool isFull, bool sync) { if (modulesInCPU == nullptr) { // The last input here is just a small placeholder for the allocation. modulesInCPU = new lst::ModulesBuffer(devHost, nModules_, nPixels_); modulesInCPU->copyFromSrc(queue, modulesBuffers_, isFull); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return modulesInCPU; } diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 7e2a351a8b699..64365bb58bfa8 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -78,7 +78,7 @@ namespace lst { PixelTripletsBuffer* pixelTripletsInCPU; PixelQuintupletsBuffer* pixelQuintupletsInCPU; - void init(bool verbose); + void initSync(bool verbose); int* superbinCPU; int8_t* pixelTypeCPU; @@ -105,9 +105,10 @@ namespace lst { modulesBuffers_(deviceESData->modulesBuffers), pixelMapping_(*deviceESData->pixelMapping), endcapGeometryBuffers_(deviceESData->endcapGeometryBuffers) { - init(verbose); + initSync(verbose); } - void resetEvent(); + void resetEventSync(); // synchronizes + void wait() const { alpaka::wait(queue); } // Calls the appropriate hit function, then increments the counter void addHitToEvent(std::vector const& x, @@ -134,24 +135,21 @@ namespace lst { std::vector const& pixelType, std::vector const& isQuad); - // functions that map the objects to the appropriate modules - void addMiniDoubletsToEventExplicit(); - void addSegmentsToEventExplicit(); - void addTripletsToEventExplicit(); - void addQuintupletsToEventExplicit(); - void resetObjectsInModule(); - void createMiniDoublets(); void createSegmentsWithModuleMap(); void createTriplets(); - void createPixelTracklets(); - void createPixelTrackletsWithMap(); void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets); - void createExtendedTracks(); - void createQuintuplets(); void createPixelTriplets(); - void createPixelQuintuplets(); + void createQuintuplets(); void pixelLineSegmentCleaning(bool no_pls_dupclean); + void createPixelQuintuplets(); + + // functions that map the objects to the appropriate modules + void addMiniDoubletsToEventExplicit(); + void addSegmentsToEventExplicit(); + void addQuintupletsToEventExplicit(); + void addTripletsToEventExplicit(); + void resetObjectsInModule(); unsigned int getNumberOfHits(); unsigned int getNumberOfHitsByLayer(unsigned int layer); @@ -173,33 +171,37 @@ namespace lst { unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer); unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer); - int getNumberOfTrackCandidates(); - int getNumberOfPixelTrackCandidates(); - int getNumberOfPT5TrackCandidates(); - int getNumberOfPT3TrackCandidates(); - int getNumberOfT5TrackCandidates(); - int getNumberOfPLSTrackCandidates(); + int getNumberOfPixelTriplets(); + int getNumberOfPixelQuintuplets(); unsigned int getNumberOfQuintuplets(); unsigned int getNumberOfQuintupletsByLayer(unsigned int layer); unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer); unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer); - int getNumberOfPixelTriplets(); - int getNumberOfPixelQuintuplets(); + int getNumberOfTrackCandidates(); + int getNumberOfPT5TrackCandidates(); + int getNumberOfPT3TrackCandidates(); + int getNumberOfPLSTrackCandidates(); + int getNumberOfPixelTrackCandidates(); + int getNumberOfT5TrackCandidates(); - ObjectRangesBuffer* getRanges(); - HitsBuffer* getHits(); - HitsBuffer* getHitsInCMSSW(); - MiniDoubletsBuffer* getMiniDoublets(); - SegmentsBuffer* getSegments(); - TripletsBuffer* getTriplets(); - QuintupletsBuffer* getQuintuplets(); - TrackCandidatesBuffer* getTrackCandidates(); - TrackCandidatesBuffer* getTrackCandidatesInCMSSW(); - PixelTripletsBuffer* getPixelTriplets(); - PixelQuintupletsBuffer* getPixelQuintuplets(); - ModulesBuffer* getModules(bool isFull = false); + // sync adds alpaka::wait at the end of filling a buffer during lazy fill + // (has no effect on repeated calls) + // set to false may allow faster operation with concurrent calls of get* + // HANDLE WITH CARE + HitsBuffer* getHits(bool sync = true); + HitsBuffer* getHitsInCMSSW(bool sync = true); + ObjectRangesBuffer* getRanges(bool sync = true); + MiniDoubletsBuffer* getMiniDoublets(bool sync = true); + SegmentsBuffer* getSegments(bool sync = true); + TripletsBuffer* getTriplets(bool sync = true); + QuintupletsBuffer* getQuintuplets(bool sync = true); + PixelTripletsBuffer* getPixelTriplets(bool sync = true); + PixelQuintupletsBuffer* getPixelQuintuplets(bool sync = true); + TrackCandidatesBuffer* getTrackCandidates(bool sync = true); + TrackCandidatesBuffer* getTrackCandidatesInCMSSW(bool sync = true); + ModulesBuffer* getModules(bool isFull = false, bool sync = true); }; } // namespace lst diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index 940469e8682a2..f5ee7d7f52add 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -255,10 +255,11 @@ void lst::LST::getOutput(lst::Event& event) { std::vector tc_seedIdx; std::vector tc_trackCandidateType; - lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW()); + lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line lst::TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data(); unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { short trackCandidateType = trackCandidates->trackCandidateType[idx]; std::vector hit_idx = @@ -344,6 +345,7 @@ void lst::LST::run(Queue& queue, in_isQuad_vec_); event.createMiniDoublets(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Mini-doublets produced: %d\n", event.getNumberOfMiniDoublets()); printf("# of Mini-doublets produced barrel layer 1: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(0)); printf("# of Mini-doublets produced barrel layer 2: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(1)); @@ -360,6 +362,7 @@ void lst::LST::run(Queue& queue, event.createSegmentsWithModuleMap(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Segments produced: %d\n", event.getNumberOfSegments()); printf("# of Segments produced layer 1-2: %d\n", event.getNumberOfSegmentsByLayerBarrel(0)); printf("# of Segments produced layer 2-3: %d\n", event.getNumberOfSegmentsByLayerBarrel(1)); @@ -375,6 +378,7 @@ void lst::LST::run(Queue& queue, event.createTriplets(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of T3s produced: %d\n", event.getNumberOfTriplets()); printf("# of T3s produced layer 1-2-3: %d\n", event.getNumberOfTripletsByLayerBarrel(0)); printf("# of T3s produced layer 2-3-4: %d\n", event.getNumberOfTripletsByLayerBarrel(1)); @@ -392,6 +396,7 @@ void lst::LST::run(Queue& queue, event.createQuintuplets(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Quintuplets produced: %d\n", event.getNumberOfQuintuplets()); printf("# of Quintuplets produced layer 1-2-3-4-5-6: %d\n", event.getNumberOfQuintupletsByLayerBarrel(0)); printf("# of Quintuplets produced layer 2: %d\n", event.getNumberOfQuintupletsByLayerBarrel(1)); @@ -409,15 +414,20 @@ void lst::LST::run(Queue& queue, event.pixelLineSegmentCleaning(no_pls_dupclean); event.createPixelQuintuplets(); - if (verbose) + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Pixel Quintuplets produced: %d\n", event.getNumberOfPixelQuintuplets()); + } event.createPixelTriplets(); - if (verbose) + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Pixel T3s produced: %d\n", event.getNumberOfPixelTriplets()); + } event.createTrackCandidates(no_pls_dupclean, tc_pls_triplets); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of TrackCandidates produced: %d\n", event.getNumberOfTrackCandidates()); printf(" # of Pixel TrackCandidates produced: %d\n", event.getNumberOfPixelTrackCandidates()); printf(" # of pT5 TrackCandidates produced: %d\n", event.getNumberOfPT5TrackCandidates()); @@ -428,5 +438,5 @@ void lst::LST::run(Queue& queue, getOutput(event); - event.resetEvent(); + event.resetEventSync(); } diff --git a/RecoTracker/LSTCore/standalone/bin/lst.cc b/RecoTracker/LSTCore/standalone/bin/lst.cc index e67fe5b62d269..89bb43a3bcd4b 100644 --- a/RecoTracker/LSTCore/standalone/bin/lst.cc +++ b/RecoTracker/LSTCore/standalone/bin/lst.cc @@ -478,7 +478,7 @@ void run_lst() { // Clear this event TStopwatch my_timer; my_timer.Start(); - events.at(omp_get_thread_num())->resetEvent(); + events.at(omp_get_thread_num())->resetEventSync(); float timing_resetEvent = my_timer.RealTime(); timing_information.push_back({timing_input_loading, diff --git a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc index d6657c5e512f6..9277b60253a64 100644 --- a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc +++ b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc @@ -28,6 +28,7 @@ float runMiniDoublet(lst::Event *event, int evt) { std::cout << "Reco Mini-Doublet start " << evt << std::endl; my_timer.Start(); event->createMiniDoublets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float md_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) @@ -80,6 +81,7 @@ float runSegment(lst::Event *event) { std::cout << "Reco Segment start" << std::endl; my_timer.Start(); event->createSegmentsWithModuleMap(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float sg_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Segment processing time: " << sg_elapsed << " secs" << std::endl; @@ -117,6 +119,7 @@ float runT3(lst::Event *event) { std::cout << "Reco T3 start" << std::endl; my_timer.Start(); event->createTriplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float t3_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco T3 processing time: " << t3_elapsed << " secs" << std::endl; @@ -158,6 +161,7 @@ float runpT3(lst::Event *event) { std::cout << "Reco Pixel Triplet pT3 start" << std::endl; my_timer.Start(); event->createPixelTriplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float pt3_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco pT3 processing time: " << pt3_elapsed << " secs" << std::endl; @@ -174,6 +178,7 @@ float runQuintuplet(lst::Event *event) { std::cout << "Reco Quintuplet start" << std::endl; my_timer.Start(); event->createQuintuplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float t5_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Quintuplet processing time: " << t5_elapsed << " secs" << std::endl; @@ -219,6 +224,7 @@ float runPixelLineSegment(lst::Event *event, bool no_pls_dupclean) { std::cout << "Reco Pixel Line Segment start" << std::endl; my_timer.Start(); event->pixelLineSegmentCleaning(no_pls_dupclean); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float pls_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Pixel Line Segment processing time: " << pls_elapsed << " secs" << std::endl; @@ -233,6 +239,7 @@ float runPixelQuintuplet(lst::Event *event) { std::cout << "Reco Pixel Quintuplet start" << std::endl; my_timer.Start(); event->createPixelQuintuplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float pt5_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Pixel Quintuplet processing time: " << pt5_elapsed << " secs" << std::endl; @@ -249,6 +256,7 @@ float runTrackCandidate(lst::Event *event, bool no_pls_dupclean, bool tc_ std::cout << "Reco TrackCandidate start" << std::endl; my_timer.Start(); event->createTrackCandidates(no_pls_dupclean, tc_pls_triplets); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float tc_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco TrackCandidate processing time: " << tc_elapsed << " secs" << std::endl; @@ -892,6 +900,7 @@ float addInputsToEventPreLoad(lst::Event *event, superbin_vec, pixelType_vec, isQuad_vec); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float hit_loading_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) @@ -1331,6 +1340,7 @@ void writeMetaData() { pixelType_vec, isQuad_vec); + event.wait(); // device side event calls are asynchronous: wait to measure time or print float hit_loading_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Loading inputs processing time: " << hit_loading_elapsed << " secs" << std::endl;