From 85576445ee6b89c8675978d738d170895ce1c25a Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Fri, 2 Aug 2024 08:16:42 -0700 Subject: [PATCH 1/8] remove spurious semicolons at the end of method implementations --- RecoTracker/LSTCore/interface/Module.h | 6 +-- RecoTracker/LSTCore/src/ModuleMethods.h | 12 ++--- RecoTracker/LSTCore/src/alpaka/Hit.h | 14 +++--- RecoTracker/LSTCore/src/alpaka/Kernels.h | 14 +++--- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 16 +++---- .../LSTCore/src/alpaka/PixelQuintuplet.h | 20 ++++---- RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 6 +-- RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 46 +++++++++---------- RecoTracker/LSTCore/src/alpaka/Segment.h | 18 ++++---- .../LSTCore/src/alpaka/TrackCandidate.h | 6 +-- RecoTracker/LSTCore/src/alpaka/Triplet.h | 16 +++---- 11 files changed, 87 insertions(+), 87 deletions(-) diff --git a/RecoTracker/LSTCore/interface/Module.h b/RecoTracker/LSTCore/interface/Module.h index d45415f800a4f..78396c195cd8f 100644 --- a/RecoTracker/LSTCore/interface/Module.h +++ b/RecoTracker/LSTCore/interface/Module.h @@ -72,15 +72,15 @@ namespace lst { } else { return false; } - }; + } static bool parseIsLower(bool isInvertedx, unsigned int detId) { return (isInvertedx) ? !(detId & 1) : (detId & 1); - }; + } static unsigned int parsePartnerModuleId(unsigned int detId, bool isLowerx, bool isInvertedx) { return isLowerx ? (isInvertedx ? detId - 1 : detId + 1) : (isInvertedx ? detId + 1 : detId - 1); - }; + } template void setData(TBuff const& buf) { diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h index a5b16573db57b..54514cccf2b54 100644 --- a/RecoTracker/LSTCore/src/ModuleMethods.h +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -94,7 +94,7 @@ namespace lst { for (unsigned int icondet = 0; icondet < totalSizes_neg; icondet++) { connectedPixels[icondet + totalSizes + totalSizes_pos] = mmd.detIdToIndex.at(connectedModuleDetIds_neg[icondet]); } - }; + } inline void fillConnectedModuleArrayExplicit(ModulesBuffer& modulesBuf, ModuleMetaData const& mmd, @@ -111,7 +111,7 @@ namespace lst { moduleMap[index * max_connected_modules + i] = mmd.detIdToIndex.at(connectedModules[i]); } } - }; + } inline void fillMapArraysExplicit(ModulesBuffer& modulesBuf, ModuleMetaData const& mmd) { uint16_t* mapIdx = alpaka::getPtrNative(modulesBuf.mapIdx_buf); @@ -125,7 +125,7 @@ namespace lst { mapdetId[counter] = detId; counter++; } - }; + } inline void setDerivedQuantities(unsigned int detId, unsigned short& layer, @@ -148,7 +148,7 @@ namespace lst { r = std::sqrt(m_x * m_x + m_y * m_y + m_z * m_z); eta = ((m_z > 0) - (m_z < 0)) * std::acosh(r / std::sqrt(m_x * m_x + m_y * m_y)); - }; + } inline void loadCentroidsFromFile(const char* filePath, ModuleMetaData& mmd, uint16_t& nModules) { std::ifstream ifile(filePath, std::ios::binary); @@ -185,7 +185,7 @@ namespace lst { mmd.detIdToIndex[1] = counter; //pixel module is the last module in the module list counter++; nModules = counter; - }; + } inline ModulesBuffer loadModulesFromFile(MapPLStoLayer const& pLStoLayer, const char* moduleMetaDataFilePath, @@ -335,6 +335,6 @@ namespace lst { fillMapArraysExplicit(modulesBuf, mmd); return modulesBuf; - }; + } } // namespace lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h index c14ac26124e6d..c0eb481c73228 100644 --- a/RecoTracker/LSTCore/src/alpaka/Hit.h +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -113,7 +113,7 @@ namespace lst { float rt = alpaka::math::sqrt(acc, x * x + y * y); float eta = ((z > 0) - (z < 0)) * alpaka::math::acosh(acc, r3 / rt); return eta; - }; + } template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float phi_mpi_pi(TAcc const& acc, float x) { @@ -123,24 +123,24 @@ namespace lst { constexpr float o2pi = 1.f / (2.f * float(M_PI)); float n = alpaka::math::round(acc, x * o2pi); return x - n * float(2.f * float(M_PI)); - }; + } template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float phi(TAcc const& acc, float x, float y) { return phi_mpi_pi(acc, float(M_PI) + alpaka::math::atan2(acc, -y, -x)); - }; + } template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float deltaPhi(TAcc const& acc, float x1, float y1, float x2, float y2) { float phi1 = phi(acc, x1, y1); float phi2 = phi(acc, x2, y2); return phi_mpi_pi(acc, (phi2 - phi1)); - }; + } template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float deltaPhiChange(TAcc const& acc, float x1, float y1, float x2, float y2) { return deltaPhi(acc, x1, y1, x2 - x1, y2 - y1); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float calculate_dPhi(float phi1, float phi2) { // Calculate dPhi @@ -154,7 +154,7 @@ namespace lst { } return dPhi; - }; + } ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE int binary_search(const unsigned int* data, // Array that we are searching over unsigned int search_val, // Value we want to find in data array @@ -175,7 +175,7 @@ namespace lst { } // Couldn't find search value in array. return -1; - }; + } struct moduleRangesKernel { template diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h index 8e3fa46c3ab6f..496a3f2ce0fb2 100644 --- a/RecoTracker/LSTCore/src/alpaka/Kernels.h +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -18,23 +18,23 @@ namespace lst { unsigned int quintupletIndex, bool secondpass = false) { quintupletsInGPU.isDup[quintupletIndex] |= 1 + secondpass; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(lst::PixelTriplets& pixelTripletsInGPU, unsigned int pixelTripletIndex) { pixelTripletsInGPU.isDup[pixelTripletIndex] = true; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(lst::PixelQuintuplets& pixelQuintupletsInGPU, unsigned int pixelQuintupletIndex) { pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = true; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(lst::Segments& segmentsInGPU, unsigned int pixelSegmentArrayIndex, bool secondpass = false) { segmentsInGPU.isDup[pixelSegmentArrayIndex] |= 1 + secondpass; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, unsigned int jx, @@ -61,7 +61,7 @@ namespace lst { } } return nMatched; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitspT5(unsigned int ix, unsigned int jx, @@ -88,7 +88,7 @@ namespace lst { } } return nMatched; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void checkHitspT3(unsigned int ix, unsigned int jx, @@ -140,7 +140,7 @@ namespace lst { matched[0] = npMatched; matched[1] = nMatched; - }; + } struct removeDupQuintupletsInGPUAfterBuild { template diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index 86a22d943c33f..bdbd366bba338 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -260,7 +260,7 @@ namespace lst { mdsInGPU.outerHighEdgeY[idx] = hitsInGPU.highEdgeYs[outerHitIndex]; mdsInGPU.outerLowEdgeX[idx] = hitsInGPU.lowEdgeXs[outerHitIndex]; mdsInGPU.outerLowEdgeY[idx] = hitsInGPU.lowEdgeYs[outerHitIndex]; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules(lst::Modules const& modulesInGPU, uint16_t moduleIndex) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing @@ -280,7 +280,7 @@ namespace lst { return false; } else return false; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(struct lst::Modules const& modulesInGPU, uint16_t moduleIndex) { float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; @@ -331,7 +331,7 @@ namespace lst { } return moduleSeparation; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold( @@ -390,7 +390,7 @@ namespace lst { else { return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniLum * miniLum); } - }; + } template ALPAKA_FN_INLINE ALPAKA_FN_ACC void shiftStripHits(TAcc const& acc, @@ -556,7 +556,7 @@ namespace lst { shiftedCoords[0] = xn; shiftedCoords[1] = yn; shiftedCoords[2] = zn; - }; + } template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, @@ -628,7 +628,7 @@ namespace lst { zUpper, rtUpper); } - }; + } template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, @@ -755,7 +755,7 @@ namespace lst { } return alpaka::math::abs(acc, dPhiChange) < miniCut; - }; + } template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, @@ -866,7 +866,7 @@ namespace lst { noShiftedDphichange = noShiftedDphi / dzFrac * (1.f + dzFrac); return alpaka::math::abs(acc, dPhiChange) < miniCut; - }; + } struct createMiniDoubletsInGPUv2 { template diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index ee172f9e05f6e..fcdcd4d7c78bb 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -201,7 +201,7 @@ namespace lst { pixelQuintupletsInGPU.rzChiSquared[pixelQuintupletIndex] = rzChiSquared; pixelQuintupletsInGPU.rPhiChiSquared[pixelQuintupletIndex] = rPhiChiSquared; pixelQuintupletsInGPU.rPhiChiSquaredInwards[pixelQuintupletIndex] = rPhiChiSquaredInwards; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(lst::Modules const& modulesInGPU, uint16_t lowerModuleIndex1, @@ -291,7 +291,7 @@ namespace lst { } } return true; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredCuts(lst::Modules const& modulesInGPU, uint16_t lowerModuleIndex1, @@ -381,7 +381,7 @@ namespace lst { } } return true; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquaredpT5(TAcc const& acc, @@ -427,7 +427,7 @@ namespace lst { (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / (sigma2); } return chiSquared; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression_pT5(TAcc const& acc, @@ -513,7 +513,7 @@ namespace lst { } #endif } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquared(TAcc const& acc, @@ -536,7 +536,7 @@ namespace lst { chiSquared = computeChiSquaredpT5(acc, 5, xs, ys, delta1, delta2, slopes, isFlat, g, f, radius); return chiSquared; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquaredInwards( float g, float f, float r, float* xPix, float* yPix) { @@ -551,7 +551,7 @@ namespace lst { } chiSquared *= 0.5f; return chiSquared; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredInwardsCuts(lst::Modules const& modulesInGPU, uint16_t lowerModuleIndex1, @@ -641,7 +641,7 @@ namespace lst { } } return true; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelQuintupletDefaultAlgo(TAcc const& acc, @@ -787,7 +787,7 @@ namespace lst { centerY = (centerY + T5CenterY) / 2; return true; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RZChiSquared(TAcc const& acc, @@ -831,7 +831,7 @@ namespace lst { RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. return RMSE; - }; + } struct createPixelQuintupletsInGPUFromMapv2 { template diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 3b6faffbce426..4d6f88a8336be 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -1167,7 +1167,7 @@ namespace lst { //2nd update pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, @@ -1425,7 +1425,7 @@ namespace lst { (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); float dBeta = betaIn - betaOut; return dBeta * dBeta <= dBetaCut2; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, @@ -1689,7 +1689,7 @@ namespace lst { (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); float dBeta = betaIn - betaOut; return dBeta * dBeta <= dBetaCut2; - }; + } } // namespace lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 1165d33f6da5e..ff6e6ea8380b3 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -148,7 +148,7 @@ namespace lst { float secondMin, float secondMax) { return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(lst::Triplets const& tripletsInGPU, lst::Quintuplets& quintupletsInGPU, @@ -230,7 +230,7 @@ namespace lst { quintupletsInGPU.rzChiSquared[quintupletIndex] = rzChiSquared; quintupletsInGPU.chiSquared[quintupletIndex] = rPhiChiSquared; quintupletsInGPU.nonAnchorChiSquared[quintupletIndex] = nonAnchorChiSquared; - }; + } //90% constraint ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(lst::Modules const& modulesInGPU, @@ -313,7 +313,7 @@ namespace lst { } return true; - }; + } //bounds can be found at http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_RZFix/t5_rz_thresholds.txt template @@ -749,7 +749,7 @@ namespace lst { } } return true; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(lst::Triplets const& tripletsInGPU, @@ -764,7 +764,7 @@ namespace lst { segmentsInGPU.mdIndices[2 * outerInnerSegmentIndex]; //outer triplet inner segment inner MD index return (innerOuterOuterMiniDoubletIndex == outerInnerInnerMiniDoubletIndex); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeErrorInRadius(TAcc const& acc, @@ -796,7 +796,7 @@ namespace lst { } } } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE12378(TAcc const& acc, @@ -820,7 +820,7 @@ namespace lst { innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); - }; + } /*bounds for high Pt taken from : http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_efficiency/efficiencies/new_efficiencies/efficiencies_20210513_T5_recovering_high_Pt_efficiencies/highE_radius_matching/highE_bounds.txt */ template @@ -845,7 +845,7 @@ namespace lst { bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBBE(TAcc const& acc, @@ -869,7 +869,7 @@ namespace lst { bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE23478(TAcc const& acc, @@ -893,7 +893,7 @@ namespace lst { innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE34578(TAcc const& acc, @@ -917,7 +917,7 @@ namespace lst { innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBEEE(TAcc const& acc, @@ -947,7 +947,7 @@ namespace lst { innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBEEEE(TAcc const& acc, @@ -978,7 +978,7 @@ namespace lst { alpaka::math::max(acc, innerInvRadiusMax, 1.0 / innerRadiusMin2S), alpaka::math::min(acc, bridgeInvRadiusMin, 1.0 / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0 / bridgeRadiusMin2S)); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiEEEEE(TAcc const& acc, @@ -1009,7 +1009,7 @@ namespace lst { alpaka::math::max(acc, innerInvRadiusMax, 1.0 / innerRadiusMin2S), alpaka::math::min(acc, bridgeInvRadiusMin, 1.0 / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0 / bridgeRadiusMin2S)); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression(TAcc const& acc, @@ -1096,7 +1096,7 @@ namespace lst { #endif } } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusUsingRegression(TAcc const& acc, @@ -1194,7 +1194,7 @@ namespace lst { (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) / sigmas2[i]; } return radius; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquared(TAcc const& acc, @@ -1240,7 +1240,7 @@ namespace lst { (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / sigma2; } return chiSquared; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void runDeltaBetaIterationsT5(TAcc const& acc, @@ -1346,7 +1346,7 @@ namespace lst { //2nd update pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBBB(TAcc const& acc, @@ -1598,7 +1598,7 @@ namespace lst { float dBeta = betaIn - betaOut; return dBeta * dBeta <= dBetaCut2; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBEE(TAcc const& acc, @@ -1848,7 +1848,7 @@ namespace lst { float dBeta = betaIn - betaOut; //Cut #7: Cut on dBet return dBeta * dBeta <= dBetaCut2; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoEEEE(TAcc const& acc, @@ -2077,7 +2077,7 @@ namespace lst { float dBeta = betaIn - betaOut; //Cut #7: Cut on dBeta return dBeta * dBeta <= dBetaCut2; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletAlgoSelector(TAcc const& acc, @@ -2182,7 +2182,7 @@ namespace lst { } return false; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgo(TAcc const& acc, @@ -2533,7 +2533,7 @@ namespace lst { regressionF, regressionRadius); return true; - }; + } struct createQuintupletsInGPUv2 { template diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 6e79bacfa4902..3468a40acc3c0 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -190,7 +190,7 @@ namespace lst { return (subdet == Barrel) && (((side != Center) && (layer == 3)) || ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(short subdet, short layer, short side, short rod) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing @@ -199,7 +199,7 @@ namespace lst { return (subdet == Barrel) && (((side != Center) && (layer == 3)) || ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(short layer, short ring, short subdet, short side, short rod) { static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; @@ -229,7 +229,7 @@ namespace lst { } return moduleSeparation; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(lst::Modules const& modulesInGPU, unsigned int moduleIndex) { static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; @@ -261,7 +261,7 @@ namespace lst { } return moduleSeparation; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void dAlphaThreshold(TAcc const& acc, @@ -356,7 +356,7 @@ namespace lst { //Inner to outer dAlphaThresholdValues[2] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(lst::Segments& segmentsInGPU, unsigned int lowerMDIndex, @@ -448,7 +448,7 @@ namespace lst { segmentsInGPU.circleCenterX[pixelSegmentArrayIndex] = candidateCenterXs[bestIndex]; segmentsInGPU.circleCenterY[pixelSegmentArrayIndex] = candidateCenterYs[bestIndex]; segmentsInGPU.circleRadius[pixelSegmentArrayIndex] = circleRadius; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoBarrel(TAcc const& acc, @@ -538,7 +538,7 @@ namespace lst { if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) return false; return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoEndcap(TAcc const& acc, @@ -654,7 +654,7 @@ namespace lst { if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) return false; return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgo(TAcc const& acc, @@ -700,7 +700,7 @@ namespace lst { dPhiChangeMin, dPhiChangeMax); } - }; + } struct createSegmentsInGPUv2 { template diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index ede4dd9471e8e..dbf5cf24f6d55 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -126,7 +126,7 @@ namespace lst { trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 1] = hitIndices.z; trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 2] = hitIndices.y; trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 3] = hitIndices.w; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(lst::TrackCandidates& trackCandidatesInGPU, short trackCandidateType, @@ -163,7 +163,7 @@ namespace lst { trackCandidatesInGPU.centerX[trackCandidateIndex] = __F2H(centerX); trackCandidatesInGPU.centerY[trackCandidateIndex] = __F2H(centerY); trackCandidatesInGPU.radius[trackCandidateIndex] = __F2H(radius); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits(unsigned int ix, unsigned int jx, @@ -203,7 +203,7 @@ namespace lst { npMatched++; } return npMatched; - }; + } struct crossCleanpT3 { template diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index f5a216724c1da..9f3521e712ed6 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -202,7 +202,7 @@ namespace lst { tripletsInGPU.rtOut[tripletIndex] = rtOut; tripletsInGPU.betaInCut[tripletIndex] = betaInCut; #endif - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc, @@ -262,7 +262,7 @@ namespace lst { } else { return alpaka::math::abs(acc, residual) < 5; } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBB(TAcc const& acc, @@ -361,7 +361,7 @@ namespace lst { //Cut #3: first beta cut return alpaka::math::abs(acc, betaIn) < betaInCut; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBE(TAcc const& acc, @@ -482,7 +482,7 @@ namespace lst { //Cut #4: first beta cut return alpaka::math::abs(acc, betaInRHmin) < betaInCut; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintEEE(TAcc const& acc, @@ -605,7 +605,7 @@ namespace lst { //Cut #4: first beta cut return alpaka::math::abs(acc, betaInRHmin) < betaInCut; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc, @@ -707,7 +707,7 @@ namespace lst { betaInCut); } return false; // failsafe - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusFromThreeAnchorHits( @@ -740,7 +740,7 @@ namespace lst { radius = alpaka::math::sqrt(acc, g * g + f * f - c); return radius; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletConstraintsAndAlgo(TAcc const& acc, @@ -806,7 +806,7 @@ namespace lst { circleRadius = computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3, circleCenterX, circleCenterY); return true; - }; + } struct createTripletsInGPUv2 { template From e8cc7a8789a6345818a9a400bd56ee10a5d39817 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Fri, 2 Aug 2024 16:20:25 -0700 Subject: [PATCH 2/8] cleanup unnecessary alpaka::wait --- RecoTracker/LSTCore/interface/Module.h | 1 - RecoTracker/LSTCore/src/alpaka/Hit.h | 1 - RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 1 - RecoTracker/LSTCore/src/alpaka/ObjectRanges.h | 1 - RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h | 1 - RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 1 - RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 1 - RecoTracker/LSTCore/src/alpaka/Segment.h | 1 - RecoTracker/LSTCore/src/alpaka/TrackCandidate.h | 1 - 9 files changed, 9 deletions(-) diff --git a/RecoTracker/LSTCore/interface/Module.h b/RecoTracker/LSTCore/interface/Module.h index 78396c195cd8f..eca086b91850f 100644 --- a/RecoTracker/LSTCore/interface/Module.h +++ b/RecoTracker/LSTCore/interface/Module.h @@ -212,7 +212,6 @@ namespace lst { alpaka::memcpy(queue, lstLayers_buf, src.lstLayers_buf); alpaka::memcpy(queue, connectedPixels_buf, src.connectedPixels_buf); } - alpaka::wait(queue); } template diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h index c0eb481c73228..7f3412ce4694a 100644 --- a/RecoTracker/LSTCore/src/alpaka/Hit.h +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -100,7 +100,6 @@ namespace lst { alpaka::memset(queue, hitRangesUpper_buf, 0xff); alpaka::memset(queue, hitRangesnLower_buf, 0xff); alpaka::memset(queue, hitRangesnUpper_buf, 0xff); - alpaka::wait(queue); } inline Hits const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index bdbd366bba338..bda334b31afc1 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -181,7 +181,6 @@ namespace lst { outerLowEdgeY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)) { alpaka::memset(queue, nMDs_buf, 0u); alpaka::memset(queue, totOccupancyMDs_buf, 0u); - alpaka::wait(queue); } inline MiniDoublets const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h index 1e1ccf8df12bc..09aac58bc8eb4 100644 --- a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h +++ b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h @@ -143,7 +143,6 @@ namespace lst { alpaka::memset(queue, trackCandidateRanges_buf, 0xff); alpaka::memset(queue, quintupletRanges_buf, 0xff); alpaka::memset(queue, quintupletModuleIndices_buf, 0xff); - alpaka::wait(queue); data_.setData(*this); } diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index fcdcd4d7c78bb..2c0b143a6d913 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -100,7 +100,6 @@ namespace lst { rPhiChiSquaredInwards_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)) { alpaka::memset(queue, nPixelQuintuplets_buf, 0u); alpaka::memset(queue, totOccupancyPixelQuintuplets_buf, 0u); - alpaka::wait(queue); } inline PixelQuintuplets const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 4d6f88a8336be..15e4456c21fc6 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -123,7 +123,6 @@ namespace lst { alpaka::memset(queue, nPixelTriplets_buf, 0u); alpaka::memset(queue, totOccupancyPixelTriplets_buf, 0u); alpaka::memset(queue, partOfPT5_buf, false); - alpaka::wait(queue); } inline PixelTriplets const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index ff6e6ea8380b3..3b700dbb94793 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -136,7 +136,6 @@ namespace lst { alpaka::memset(queue, isDup_buf, 0u); alpaka::memset(queue, TightCutFlag_buf, false); alpaka::memset(queue, partOfPT5_buf, false); - alpaka::wait(queue); } inline Quintuplets const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 3468a40acc3c0..76436778802b1 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -170,7 +170,6 @@ namespace lst { alpaka::memset(queue, totOccupancySegments_buf, 0u); alpaka::memset(queue, partOfPT5_buf, false); alpaka::memset(queue, pLSHitsIdxs_buf, 0u); - alpaka::wait(queue); } inline Segments const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index dbf5cf24f6d55..835647c65e4bd 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -102,7 +102,6 @@ namespace lst { alpaka::memset(queue, lowerModuleIndices_buf, 0u); alpaka::memset(queue, hitIndices_buf, 0u); alpaka::memset(queue, pixelSeedIndex_buf, 0); - alpaka::wait(queue); } inline TrackCandidates const* data() const { return &data_; } From 73431560c541ac40e180f4b179182bb1d336860c Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 8 Aug 2024 05:31:06 -0700 Subject: [PATCH 3/8] cleanup unnecessary alpaka::wait; add comments justifying other alpaka::wait calls; switch to cms::alpakatools::make_host_buffer for a few local buffers --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 433 +++++++++----------- RecoTracker/LSTCore/src/alpaka/Event.h | 2 +- 2 files changed, 188 insertions(+), 247 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 318622da2ce42..05b9faac480e7 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -1,3 +1,5 @@ +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + #include "Event.h" using namespace ALPAKA_ACCELERATOR_NAMESPACE; @@ -181,7 +183,7 @@ void lst::Event::addHitToEvent(std::vector const& x, alpaka::memcpy(queue, hitsBuffers->detid_buf, detId, nHits); alpaka::memcpy(queue, hitsBuffers->idxs_buf, idxInNtuple, nHits); alpaka::memcpy(queue, hitsBuffers->nHits_buf, nHits_view); - alpaka::wait(queue); + alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock1{1, 1, 256}; Vec3D const blocksPerGrid1{1, 1, max_blocks}; @@ -253,12 +255,11 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& auto dst_view_miniDoubletModuleOccupancy = alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); - // Create a source view for the value to be set - int value = n_max_pixel_md_per_modules; - auto src_view_value = alpaka::createView(devHost, &value, (Idx)1u); + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; - alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, src_view_value); - alpaka::wait(queue); + alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); Vec3D const threadsPerBlockCreateMD{1, 1, 1024}; Vec3D const blocksPerGridCreateMD{1, 1, 1}; @@ -270,22 +271,19 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); alpaka::enqueue(queue, createMDArrayRangesGPUTask); - alpaka::wait(queue); - unsigned int nTotalMDs; - auto nTotalMDs_view = alpaka::createView(devHost, &nTotalMDs, (Idx)1u); + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); + alpaka::wait(queue); // wait to get the data before manipulation - alpaka::memcpy(queue, nTotalMDs_view, rangesBuffers->device_nTotalMDs_buf); - alpaka::wait(queue); - - nTotalMDs += n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); mdsInGPU = new lst::MiniDoublets(); miniDoubletsBuffers = new lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); - alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_view); - alpaka::wait(queue); + alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_buf_h); } if (segmentsInGPU == nullptr) { // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. @@ -304,22 +302,20 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& *mdsInGPU)); alpaka::enqueue(queue, createSegmentArrayRangesTask); - alpaka::wait(queue); - auto nTotalSegments_view = alpaka::createView(devHost, &nTotalSegments, (Idx)1u); + auto nTotalSegments_view = alpaka::createView(devHost, &nTotalSegments_, (Idx)1u); alpaka::memcpy(queue, nTotalSegments_view, rangesBuffers->device_nTotalSegs_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the value before manipulation - nTotalSegments += n_max_pixel_segments_per_module; + nTotalSegments_ += n_max_pixel_segments_per_module; segmentsInGPU = new lst::Segments(); - segmentsBuffers = - new lst::SegmentsBuffer(nTotalSegments, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); + segmentsBuffers = new lst::SegmentsBuffer( + nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); alpaka::memcpy(queue, segmentsBuffers->nMemoryLocations_buf, nTotalSegments_view); - alpaka::wait(queue); } auto hitIndices0_dev = allocBufWrapper(devAcc, size, queue); @@ -366,7 +362,7 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& alpaka::createSubView(miniDoubletsBuffers->totOccupancyMDs_buf, (Idx)1u, (Idx)pixelModuleIndex); alpaka::memcpy(queue, dst_view_totOccupancyMDs, src_view_mdSize); - alpaka::wait(queue); + alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock{1, 1, 256}; Vec3D const blocksPerGrid{1, 1, max_blocks}; @@ -389,7 +385,6 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& size)); alpaka::enqueue(queue, addPixelSegmentToEvent_task); - alpaka::wait(queue); } void lst::Event::createMiniDoublets() { @@ -397,12 +392,11 @@ void lst::Event::createMiniDoublets() { auto dst_view_miniDoubletModuleOccupancy = alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); - // Create a source view for the value to be set - int value = n_max_pixel_md_per_modules; - auto src_view_value = alpaka::createView(devHost, &value, (Idx)1u); + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; - alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, src_view_value); - alpaka::wait(queue); + alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); Vec3D const threadsPerBlockCreateMD{1, 1, 1024}; Vec3D const blocksPerGridCreateMD{1, 1, 1}; @@ -414,16 +408,13 @@ void lst::Event::createMiniDoublets() { createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); alpaka::enqueue(queue, createMDArrayRangesGPUTask); - alpaka::wait(queue); - auto nTotalMDs_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTotalMDs_buf, rangesBuffers->device_nTotalMDs_buf); - alpaka::wait(queue); + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); + alpaka::wait(queue); // wait to get the data before manipulation - unsigned int nTotalMDs = *alpaka::getPtrNative(nTotalMDs_buf); - - nTotalMDs += n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); if (mdsInGPU == nullptr) { mdsInGPU = new lst::MiniDoublets(); @@ -461,7 +452,6 @@ void lst::Event::createMiniDoublets() { *hitsInGPU)); alpaka::enqueue(queue, addMiniDoubletRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addMiniDoubletsToEventExplicit(); @@ -471,8 +461,8 @@ void lst::Event::createMiniDoublets() { void lst::Event::createSegmentsWithModuleMap() { if (segmentsInGPU == nullptr) { segmentsInGPU = new lst::Segments(); - segmentsBuffers = - new lst::SegmentsBuffer(nTotalSegments, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); + segmentsBuffers = new lst::SegmentsBuffer( + nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); } @@ -505,7 +495,6 @@ void lst::Event::createSegmentsWithModuleMap() { *rangesInGPU)); alpaka::enqueue(queue, addSegmentRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addSegmentsToEventExplicit(); @@ -527,46 +516,40 @@ void lst::Event::createTriplets() { *segmentsInGPU)); alpaka::enqueue(queue, createTripletArrayRangesTask); - alpaka::wait(queue); // TODO: Why are we pulling this back down only to put it back on the device in a new struct? - auto maxTriplets_buf = allocBufWrapper(devHost, 1, queue); + auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - alpaka::memcpy(queue, maxTriplets_buf, rangesBuffers->device_nTotalTrips_buf); - alpaka::wait(queue); + alpaka::memcpy(queue, maxTriplets_buf_h, rangesBuffers->device_nTotalTrips_buf); + alpaka::wait(queue); // wait to get the value before using it tripletsInGPU = new lst::Triplets(); - tripletsBuffers = - new lst::TripletsBuffer(*alpaka::getPtrNative(maxTriplets_buf), nLowerModules_, devAcc, queue); + tripletsBuffers = new lst::TripletsBuffer(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); tripletsInGPU->setData(*tripletsBuffers); - alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf); - alpaka::wait(queue); + alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf_h); } uint16_t nonZeroModules = 0; unsigned int max_InnerSeg = 0; - // Allocate host index - auto index_buf = allocBufWrapper(devHost, nLowerModules_, queue); - uint16_t* index = alpaka::getPtrNative(index_buf); + // Allocate and copy nSegments from device to host (only nLowerModules in OT, not the +1 with pLSs) + auto nSegments_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); + alpaka::memcpy(queue, nSegments_buf_h, segmentsBuffers->nSegments_buf, nLowerModules_); - // Allocate device index - auto index_gpu_buf = allocBufWrapper(devAcc, nLowerModules_, queue); + // ... same for module_nConnectedModules + // FIXME: replace by ES host data + auto module_nConnectedModules_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); + alpaka::memcpy(queue, module_nConnectedModules_buf_h, modulesBuffers_.nConnectedModules_buf, nLowerModules_); - // Allocate and copy nSegments from device to host - auto nSegments_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, nSegments_buf, segmentsBuffers->nSegments_buf, nLowerModules_); - alpaka::wait(queue); + alpaka::wait(queue); // wait for nSegments and module_nConnectedModules before using - unsigned int* nSegments = alpaka::getPtrNative(nSegments_buf); + auto const* nSegments = nSegments_buf_h.data(); + auto const* module_nConnectedModules = module_nConnectedModules_buf_h.data(); - // Allocate and copy module_nConnectedModules from device to host - auto module_nConnectedModules_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_nConnectedModules_buf, modulesBuffers_.nConnectedModules_buf, nLowerModules_); - alpaka::wait(queue); - - uint16_t* module_nConnectedModules = alpaka::getPtrNative(module_nConnectedModules_buf); + // Allocate host index and fill it directly + auto index_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); + auto* index = index_buf_h.data(); for (uint16_t innerLowerModuleIndex = 0; innerLowerModuleIndex < nLowerModules_; innerLowerModuleIndex++) { uint16_t nConnectedModules = module_nConnectedModules[innerLowerModuleIndex]; @@ -578,9 +561,9 @@ void lst::Event::createTriplets() { max_InnerSeg = std::max(max_InnerSeg, nInnerSegments); } - // Copy index from host to device - alpaka::memcpy(queue, index_gpu_buf, index_buf, nonZeroModules); - alpaka::wait(queue); + // Allocate and copy to device index + auto index_gpu_buf = allocBufWrapper(devAcc, nLowerModules_, queue); + alpaka::memcpy(queue, index_gpu_buf, index_buf_h, nonZeroModules); Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; @@ -614,7 +597,6 @@ void lst::Event::createTriplets() { *rangesInGPU)); alpaka::enqueue(queue, addTripletRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addTripletsToEventExplicit(); @@ -629,12 +611,6 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ trackCandidatesInGPU->setData(*trackCandidatesBuffers); } - // Pull nEligibleT5Modules from the device. - auto nEligibleModules_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nEligibleModules_buf, rangesBuffers->nEligibleT5Modules_buf); - alpaka::wait(queue); - uint16_t nEligibleModules = *alpaka::getPtrNative(nEligibleModules_buf); - Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; Vec3D const blocksPerGrid_crossCleanpT3{1, 4, 20}; WorkDiv3D const crossCleanpT3_workDiv = @@ -667,6 +643,12 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ alpaka::enqueue(queue, addpT3asTrackCandidatesInGPUTask); + // Pull nEligibleT5Modules from the device. + auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nEligibleModules_buf_h, rangesBuffers->nEligibleT5Modules_buf); + alpaka::wait(queue); // wait to get the value before using + auto const nEligibleModules = *nEligibleModules_buf_h.data(); + Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = @@ -768,12 +750,12 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ alpaka::memcpy(queue, nTrackCanpT3Host_buf, trackCandidatesBuffers->nTrackCandidatespT3_buf); alpaka::memcpy(queue, nTrackCanpLSHost_buf, trackCandidatesBuffers->nTrackCandidatespLS_buf); alpaka::memcpy(queue, nTrackCanT5Host_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the values before using them - int nTrackCandidatespT5 = *alpaka::getPtrNative(nTrackCanpT5Host_buf); - int nTrackCandidatespT3 = *alpaka::getPtrNative(nTrackCanpT3Host_buf); - int nTrackCandidatespLS = *alpaka::getPtrNative(nTrackCanpLSHost_buf); - int nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCanT5Host_buf); + auto nTrackCandidatespT5 = *alpaka::getPtrNative(nTrackCanpT5Host_buf); + auto nTrackCandidatespT3 = *alpaka::getPtrNative(nTrackCanpT3Host_buf); + auto nTrackCandidatespLS = *alpaka::getPtrNative(nTrackCanpLSHost_buf); + auto nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCanT5Host_buf); if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { printf( @@ -792,31 +774,30 @@ void lst::Event::createPixelTriplets() { pixelTripletsInGPU->setData(*pixelTripletsBuffers); } + auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + + alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); + alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + unsigned int nInnerSegments; auto nInnerSegments_src_view = alpaka::createView(devHost, &nInnerSegments, (size_t)1u); + // Create a sub-view for the device buffer auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); - alpaka::wait(queue); - - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - - alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); - alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - int* superbins = alpaka::getPtrNative(superbins_buf); - int8_t* pixelTypes = alpaka::getPtrNative(pixelTypes_buf); unsigned int* connectedPixelSize_host = alpaka::getPtrNative(connectedPixelSize_host_buf); unsigned int* connectedPixelIndex_host = alpaka::getPtrNative(connectedPixelIndex_host_buf); - alpaka::wait(queue); int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; @@ -856,7 +837,6 @@ void lst::Event::createPixelTriplets() { alpaka::memcpy(queue, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); - alpaka::wait(queue); Vec3D const threadsPerBlock{1, 4, 32}; Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; @@ -878,13 +858,12 @@ void lst::Event::createPixelTriplets() { nInnerSegments)); alpaka::enqueue(queue, createPixelTripletsInGPUFromMapv2Task); - alpaka::wait(queue); #ifdef WARNINGS auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the value before using it std::cout << "number of pixel triplets = " << *alpaka::getPtrNative(nPixelTriplets_buf) << std::endl; #endif @@ -901,7 +880,6 @@ void lst::Event::createPixelTriplets() { removeDupPixelTripletsInGPUFromMap_workDiv, removeDupPixelTripletsInGPUFromMap_kernel, *pixelTripletsInGPU)); alpaka::enqueue(queue, removeDupPixelTripletsInGPUFromMapTask); - alpaka::wait(queue); } void lst::Event::createQuintuplets() { @@ -919,17 +897,16 @@ void lst::Event::createQuintuplets() { *rangesInGPU)); alpaka::enqueue(queue, createEligibleModulesListForQuintupletsGPUTask); - alpaka::wait(queue); auto nEligibleT5Modules_buf = allocBufWrapper(devHost, 1, queue); auto nTotalQuintuplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nEligibleT5Modules_buf, rangesBuffers->nEligibleT5Modules_buf); alpaka::memcpy(queue, nTotalQuintuplets_buf, rangesBuffers->device_nTotalQuints_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait for the values before using them - uint16_t nEligibleT5Modules = *alpaka::getPtrNative(nEligibleT5Modules_buf); - unsigned int nTotalQuintuplets = *alpaka::getPtrNative(nTotalQuintuplets_buf); + auto nEligibleT5Modules = *nEligibleT5Modules_buf.data(); + auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); if (quintupletsInGPU == nullptr) { quintupletsInGPU = new lst::Quintuplets(); @@ -937,7 +914,6 @@ void lst::Event::createQuintuplets() { quintupletsInGPU->setData(*quintupletsBuffers); alpaka::memcpy(queue, quintupletsBuffers->nMemoryLocations_buf, nTotalQuintuplets_buf); - alpaka::wait(queue); } Vec3D const threadsPerBlockQuints{1, 8, 32}; @@ -987,7 +963,6 @@ void lst::Event::createQuintuplets() { *rangesInGPU)); alpaka::enqueue(queue, addQuintupletRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addQuintupletsToEventExplicit(); @@ -1006,7 +981,6 @@ void lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, false)); alpaka::enqueue(queue, checkHitspLSTask); - alpaka::wait(queue); } } @@ -1023,6 +997,14 @@ void lst::Event::createPixelQuintuplets() { trackCandidatesInGPU->setData(*trackCandidatesBuffers); } + auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + + alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); + alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + unsigned int nInnerSegments; auto nInnerSegments_src_view = alpaka::createView(devHost, &nInnerSegments, (size_t)1u); @@ -1030,25 +1012,15 @@ void lst::Event::createPixelQuintuplets() { auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); - alpaka::wait(queue); - - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - - alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); - alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - int* superbins = alpaka::getPtrNative(superbins_buf); - int8_t* pixelTypes = alpaka::getPtrNative(pixelTypes_buf); - unsigned int* connectedPixelSize_host = alpaka::getPtrNative(connectedPixelSize_host_buf); - unsigned int* connectedPixelIndex_host = alpaka::getPtrNative(connectedPixelIndex_host_buf); - alpaka::wait(queue); + auto* connectedPixelSize_host = connectedPixelSize_host_buf.data(); + auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; @@ -1083,7 +1055,6 @@ void lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); - alpaka::wait(queue); Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; @@ -1135,13 +1106,12 @@ void lst::Event::createPixelQuintuplets() { *rangesInGPU)); alpaka::enqueue(queue, addpT5asTrackCandidateInGPUTask); - alpaka::wait(queue); #ifdef WARNINGS auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the value before using it std::cout << "number of pixel quintuplets = " << *alpaka::getPtrNative(nPixelQuintuplets_buf) << std::endl; #endif @@ -1151,6 +1121,7 @@ void lst::Event::addMiniDoubletsToEventExplicit() { auto nMDsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nMDsCPU_buf, miniDoubletsBuffers->nMDs_buf, nLowerModules_); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); @@ -1160,12 +1131,12 @@ void lst::Event::addMiniDoubletsToEventExplicit() { auto module_hitRanges_buf = allocBufWrapper(devHost, nLowerModules_ * 2, queue); alpaka::memcpy(queue, module_hitRanges_buf, hitsBuffers->hitRanges_buf, nLowerModules_ * 2u); - alpaka::wait(queue); + alpaka::wait(queue); // wait for inputs before using them - unsigned int* nMDsCPU = alpaka::getPtrNative(nMDsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); - int* module_hitRanges = alpaka::getPtrNative(module_hitRanges_buf); + auto const* nMDsCPU = nMDsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_hitRanges = module_hitRanges_buf.data(); for (unsigned int i = 0; i < nLowerModules_; i++) { if (!(nMDsCPU[i] == 0 or module_hitRanges[i * 2] == -1)) { @@ -1182,17 +1153,18 @@ void lst::Event::addSegmentsToEventExplicit() { auto nSegmentsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nSegmentsCPU_buf, segmentsBuffers->nSegments_buf, nLowerModules_); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - alpaka::wait(queue); + alpaka::wait(queue); // wait for inputs before using them - unsigned int* nSegmentsCPU = alpaka::getPtrNative(nSegmentsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); + auto const* nSegmentsCPU = nSegmentsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); for (unsigned int i = 0; i < nLowerModules_; i++) { if (!(nSegmentsCPU[i] == 0)) { @@ -1209,6 +1181,7 @@ void lst::Event::addQuintupletsToEventExplicit() { auto nQuintupletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nQuintupletsCPU_buf, quintupletsBuffers->nQuintuplets_buf); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); @@ -1218,12 +1191,12 @@ void lst::Event::addQuintupletsToEventExplicit() { auto module_quintupletModuleIndices_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_quintupletModuleIndices_buf, rangesBuffers->quintupletModuleIndices_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait for inputs before using them - unsigned int* nQuintupletsCPU = alpaka::getPtrNative(nQuintupletsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); - int* module_quintupletModuleIndices = alpaka::getPtrNative(module_quintupletModuleIndices_buf); + auto const* nQuintupletsCPU = nQuintupletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_quintupletModuleIndices = module_quintupletModuleIndices_buf.data(); for (uint16_t i = 0; i < nLowerModules_; i++) { if (!(nQuintupletsCPU[i] == 0 or module_quintupletModuleIndices[i] == -1)) { @@ -1240,16 +1213,18 @@ void lst::Event::addTripletsToEventExplicit() { auto nTripletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nTripletsCPU_buf, tripletsBuffers->nTriplets_buf); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - alpaka::wait(queue); - unsigned int* nTripletsCPU = alpaka::getPtrNative(nTripletsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); + alpaka::wait(queue); // wait for inputs before using them + + auto const* nTripletsCPU = nTripletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); for (uint16_t i = 0; i < nLowerModules_; i++) { if (nTripletsCPU[i] != 0) { @@ -1371,25 +1346,19 @@ unsigned int lst::Event::getNumberOfTripletsByLayerEndcap(unsigned int la } int lst::Event::getNumberOfPixelTriplets() { - auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); + alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); - int nPixelTriplets = *alpaka::getPtrNative(nPixelTriplets_buf); - - return nPixelTriplets; + return *nPixelTriplets_buf_h.data(); } int lst::Event::getNumberOfPixelQuintuplets() { - auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nPixelQuintuplets = *alpaka::getPtrNative(nPixelQuintuplets_buf); + alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - return nPixelQuintuplets; + return *nPixelQuintuplets_buf_h.data(); } unsigned int lst::Event::getNumberOfQuintuplets() { @@ -1420,110 +1389,90 @@ unsigned int lst::Event::getNumberOfQuintupletsByLayerEndcap(unsigned int } int lst::Event::getNumberOfTrackCandidates() { - auto nTrackCandidates_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nTrackCandidates_buf, trackCandidatesBuffers->nTrackCandidates_buf); - alpaka::wait(queue); - - int nTrackCandidates = *alpaka::getPtrNative(nTrackCandidates_buf); + alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); - return nTrackCandidates; + return *nTrackCandidates_buf_h.data(); } int lst::Event::getNumberOfPT5TrackCandidates() { - auto nTrackCandidatesPT5_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nTrackCandidatesPT5_buf, trackCandidatesBuffers->nTrackCandidatespT5_buf); + alpaka::memcpy(queue, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers->nTrackCandidatespT5_buf); alpaka::wait(queue); - int nTrackCandidatesPT5 = *alpaka::getPtrNative(nTrackCandidatesPT5_buf); - - return nTrackCandidatesPT5; + return *nTrackCandidatesPT5_buf_h.data(); } int lst::Event::getNumberOfPT3TrackCandidates() { - auto nTrackCandidatesPT3_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidatesPT3_buf, trackCandidatesBuffers->nTrackCandidatespT3_buf); - alpaka::wait(queue); + auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nTrackCandidatesPT3 = *alpaka::getPtrNative(nTrackCandidatesPT3_buf); + alpaka::memcpy(queue, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers->nTrackCandidatespT3_buf); - return nTrackCandidatesPT3; + return *nTrackCandidatesPT3_buf_h.data(); } int lst::Event::getNumberOfPLSTrackCandidates() { - auto nTrackCandidatesPLS_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidatesPLS_buf, trackCandidatesBuffers->nTrackCandidatespLS_buf); - alpaka::wait(queue); + auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - unsigned int nTrackCandidatesPLS = *alpaka::getPtrNative(nTrackCandidatesPLS_buf); + alpaka::memcpy(queue, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers->nTrackCandidatespLS_buf); - return nTrackCandidatesPLS; + return *nTrackCandidatesPLS_buf_h.data(); } int lst::Event::getNumberOfPixelTrackCandidates() { - auto nTrackCandidates_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCandidatesT5_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidates_buf, trackCandidatesBuffers->nTrackCandidates_buf); - alpaka::memcpy(queue, nTrackCandidatesT5_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nTrackCandidates = *alpaka::getPtrNative(nTrackCandidates_buf); - int nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCandidatesT5_buf); + alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); + alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); - return nTrackCandidates - nTrackCandidatesT5; + return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); } int lst::Event::getNumberOfT5TrackCandidates() { - auto nTrackCandidatesT5_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidatesT5_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCandidatesT5_buf); + alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); - return nTrackCandidatesT5; + return *nTrackCandidatesT5_buf_h.data(); } lst::HitsBuffer* lst::Event::getHits() //std::shared_ptr should take care of garbage collection { if (hitsInCPU == nullptr) { - auto nHits_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nHits_buf, hitsBuffers->nHits_buf); - alpaka::wait(queue); + auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nHits = *alpaka::getPtrNative(nHits_buf); + auto const nHits = *nHits_buf_h.data(); hitsInCPU = new lst::HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); - *alpaka::getPtrNative(hitsInCPU->nHits_buf) = nHits; + *hitsInCPU->nHits_buf.data() = nHits; alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->detid_buf, hitsBuffers->detid_buf, nHits); alpaka::memcpy(queue, hitsInCPU->xs_buf, hitsBuffers->xs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->ys_buf, hitsBuffers->ys_buf, nHits); alpaka::memcpy(queue, hitsInCPU->zs_buf, hitsBuffers->zs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->moduleIndices_buf, hitsBuffers->moduleIndices_buf, nHits); - alpaka::wait(queue); } return hitsInCPU; } lst::HitsBuffer* lst::Event::getHitsInCMSSW() { if (hitsInCPU == nullptr) { - auto nHits_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nHits_buf, hitsBuffers->nHits_buf); - alpaka::wait(queue); + auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nHits = *alpaka::getPtrNative(nHits_buf); + auto const nHits = *nHits_buf_h.data(); hitsInCPU = new lst::HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); - *alpaka::getPtrNative(hitsInCPU->nHits_buf) = nHits; + *hitsInCPU->nHits_buf.data() = nHits; alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits); - alpaka::wait(queue); } return hitsInCPU; } @@ -1538,7 +1487,7 @@ lst::ObjectRangesBuffer* lst::Event::getRanges() { alpaka::memcpy(queue, rangesInCPU->miniDoubletModuleIndices_buf, rangesBuffers->miniDoubletModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->segmentModuleIndices_buf, rangesBuffers->segmentModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->tripletModuleIndices_buf, rangesBuffers->tripletModuleIndices_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get completed host data } return rangesInCPU; } @@ -1546,21 +1495,20 @@ lst::ObjectRangesBuffer* lst::Event::getRanges() { lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { if (mdsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based mdsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, miniDoubletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, miniDoubletsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); mdsInCPU = new lst::MiniDoubletsBuffer(nMemHost, nLowerModules_, devHost, queue); mdsInCPU->setData(*mdsInCPU); - *alpaka::getPtrNative(mdsInCPU->nMemoryLocations_buf) = nMemHost; + *mdsInCPU->nMemoryLocations_buf.data() = nMemHost; alpaka::memcpy(queue, mdsInCPU->anchorHitIndices_buf, miniDoubletsBuffers->anchorHitIndices_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->outerHitIndices_buf, miniDoubletsBuffers->outerHitIndices_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->dphichanges_buf, miniDoubletsBuffers->dphichanges_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->nMDs_buf, miniDoubletsBuffers->nMDs_buf); alpaka::memcpy(queue, mdsInCPU->totOccupancyMDs_buf, miniDoubletsBuffers->totOccupancyMDs_buf); - alpaka::wait(queue); } return mdsInCPU; } @@ -1568,16 +1516,16 @@ lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { lst::SegmentsBuffer* lst::Event::getSegments() { if (segmentsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based segmentsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, segmentsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, segmentsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); segmentsInCPU = new lst::SegmentsBuffer(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, devHost, queue); segmentsInCPU->setData(*segmentsInCPU); - *alpaka::getPtrNative(segmentsInCPU->nMemoryLocations_buf) = nMemHost; + *segmentsInCPU->nMemoryLocations_buf.data() = nMemHost; alpaka::memcpy(queue, segmentsInCPU->nSegments_buf, segmentsBuffers->nSegments_buf); alpaka::memcpy(queue, segmentsInCPU->mdIndices_buf, segmentsBuffers->mdIndices_buf, 2u * nMemHost); alpaka::memcpy(queue, @@ -1596,7 +1544,6 @@ lst::SegmentsBuffer* lst::Event::getSegments() { alpaka::memcpy(queue, segmentsInCPU->isDup_buf, segmentsBuffers->isDup_buf); alpaka::memcpy(queue, segmentsInCPU->isQuad_buf, segmentsBuffers->isQuad_buf); alpaka::memcpy(queue, segmentsInCPU->score_buf, segmentsBuffers->score_buf); - alpaka::wait(queue); } return segmentsInCPU; } @@ -1604,15 +1551,15 @@ lst::SegmentsBuffer* lst::Event::getSegments() { lst::TripletsBuffer* lst::Event::getTriplets() { if (tripletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based tripletsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, tripletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, tripletsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); tripletsInCPU = new lst::TripletsBuffer(nMemHost, nLowerModules_, devHost, queue); tripletsInCPU->setData(*tripletsInCPU); - *alpaka::getPtrNative(tripletsInCPU->nMemoryLocations_buf) = nMemHost; + *tripletsInCPU->nMemoryLocations_buf.data() = nMemHost; #ifdef CUT_VALUE_DEBUG alpaka::memcpy(queue, tripletsInCPU->zOut_buf, tripletsBuffers->zOut_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->zLo_buf, tripletsBuffers->zLo_buf, nMemHost); @@ -1632,7 +1579,6 @@ lst::TripletsBuffer* lst::Event::getTriplets() { alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->nTriplets_buf, tripletsBuffers->nTriplets_buf); alpaka::memcpy(queue, tripletsInCPU->totOccupancyTriplets_buf, tripletsBuffers->totOccupancyTriplets_buf); - alpaka::wait(queue); } return tripletsInCPU; } @@ -1640,15 +1586,15 @@ lst::TripletsBuffer* lst::Event::getTriplets() { lst::QuintupletsBuffer* lst::Event::getQuintuplets() { if (quintupletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based quintupletsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, quintupletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, quintupletsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); quintupletsInCPU = new lst::QuintupletsBuffer(nMemHost, nLowerModules_, devHost, queue); quintupletsInCPU->setData(*quintupletsInCPU); - *alpaka::getPtrNative(quintupletsInCPU->nMemoryLocations_buf) = nMemHost; + *quintupletsInCPU->nMemoryLocations_buf.data() = nMemHost; alpaka::memcpy(queue, quintupletsInCPU->nQuintuplets_buf, quintupletsBuffers->nQuintuplets_buf); alpaka::memcpy( queue, quintupletsInCPU->totOccupancyQuintuplets_buf, quintupletsBuffers->totOccupancyQuintuplets_buf); @@ -1668,7 +1614,6 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets() { alpaka::memcpy(queue, quintupletsInCPU->rzChiSquared_buf, quintupletsBuffers->rzChiSquared_buf, nMemHost); alpaka::memcpy( queue, quintupletsInCPU->nonAnchorChiSquared_buf, quintupletsBuffers->nonAnchorChiSquared_buf, nMemHost); - alpaka::wait(queue); } return quintupletsInCPU; } @@ -1676,15 +1621,15 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets() { lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { if (pixelTripletsInCPU == nullptr) { // Get nPixelTriplets parameter to initialize host based quintupletsInCPU - auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nPixelTriplets = *alpaka::getPtrNative(nPixelTriplets_buf); + auto const nPixelTriplets = *nPixelTriplets_buf_h.data(); pixelTripletsInCPU = new lst::PixelTripletsBuffer(nPixelTriplets, devHost, queue); pixelTripletsInCPU->setData(*pixelTripletsInCPU); - *alpaka::getPtrNative(pixelTripletsInCPU->nPixelTriplets_buf) = nPixelTriplets; + *pixelTripletsInCPU->nPixelTriplets_buf.data() = nPixelTriplets; alpaka::memcpy( queue, pixelTripletsInCPU->totOccupancyPixelTriplets_buf, pixelTripletsBuffers->totOccupancyPixelTriplets_buf); alpaka::memcpy(queue, pixelTripletsInCPU->rzChiSquared_buf, pixelTripletsBuffers->rzChiSquared_buf, nPixelTriplets); @@ -1707,7 +1652,6 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { alpaka::memcpy(queue, pixelTripletsInCPU->eta_buf, pixelTripletsBuffers->eta_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->phi_buf, pixelTripletsBuffers->phi_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->score_buf, pixelTripletsBuffers->score_buf, nPixelTriplets); - alpaka::wait(queue); } return pixelTripletsInCPU; } @@ -1715,15 +1659,15 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { if (pixelQuintupletsInCPU == nullptr) { // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU - auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nPixelQuintuplets = *alpaka::getPtrNative(nPixelQuintuplets_buf); + auto const nPixelQuintuplets = *nPixelQuintuplets_buf_h.data(); pixelQuintupletsInCPU = new lst::PixelQuintupletsBuffer(nPixelQuintuplets, devHost, queue); pixelQuintupletsInCPU->setData(*pixelQuintupletsInCPU); - *alpaka::getPtrNative(pixelQuintupletsInCPU->nPixelQuintuplets_buf) = nPixelQuintuplets; + *pixelQuintupletsInCPU->nPixelQuintuplets_buf.data() = nPixelQuintuplets; alpaka::memcpy(queue, pixelQuintupletsInCPU->totOccupancyPixelQuintuplets_buf, pixelQuintupletsBuffers->totOccupancyPixelQuintuplets_buf); @@ -1743,7 +1687,6 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { queue, pixelQuintupletsInCPU->T5Indices_buf, pixelQuintupletsBuffers->T5Indices_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->isDup_buf, pixelQuintupletsBuffers->isDup_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->score_buf, pixelQuintupletsBuffers->score_buf, nPixelQuintuplets); - alpaka::wait(queue); } return pixelQuintupletsInCPU; } @@ -1751,16 +1694,16 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU - auto nTrackCanHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nTrackCanHost_buf, trackCandidatesBuffers->nTrackCandidates_buf); + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nTrackCanHost_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); alpaka::wait(queue); - unsigned int nTrackCanHost = *alpaka::getPtrNative(nTrackCanHost_buf); + auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); trackCandidatesInCPU = new lst::TrackCandidatesBuffer( n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); - *alpaka::getPtrNative(trackCandidatesInCPU->nTrackCandidates_buf) = nTrackCanHost; + *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, @@ -1781,7 +1724,6 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); - alpaka::wait(queue); } return trackCandidatesInCPU; } @@ -1789,16 +1731,16 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSSW() { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU - auto nTrackCanHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nTrackCanHost_buf, trackCandidatesBuffers->nTrackCandidates_buf); - alpaka::wait(queue); + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nTrackCanHost_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nTrackCanHost = *alpaka::getPtrNative(nTrackCanHost_buf); + auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); trackCandidatesInCPU = new lst::TrackCandidatesBuffer( n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); - *alpaka::getPtrNative(trackCandidatesInCPU->nTrackCandidates_buf) = nTrackCanHost; + *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, @@ -1809,7 +1751,6 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSS trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); - alpaka::wait(queue); } return trackCandidatesInCPU; } diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index f1fa3a7d23347..7e2a351a8b699 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -44,9 +44,9 @@ namespace lst { std::array n_trackCandidates_by_layer_endcap_; std::array n_quintuplets_by_layer_barrel_; std::array n_quintuplets_by_layer_endcap_; + unsigned int nTotalSegments_; //Device stuff - unsigned int nTotalSegments; ObjectRanges* rangesInGPU; ObjectRangesBuffer* rangesBuffers; Hits* hitsInGPU; From 4ef678d1c691665c37506f39c2274afb0bfef00a Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 8 Aug 2024 06:21:53 -0700 Subject: [PATCH 4/8] switch to using alpaka::exec --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 478 +++++++++----------- 1 file changed, 215 insertions(+), 263 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 05b9faac480e7..82a7f44a268b9 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -190,32 +190,26 @@ void lst::Event::addHitToEvent(std::vector const& x, WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); hitLoopKernel hit_loop_kernel; - auto const hit_loop_task(alpaka::createTaskKernel(hit_loop_workdiv, - hit_loop_kernel, - Endcap, - TwoS, - nModules_, - nEndCapMap_, - alpaka::getPtrNative(endcapGeometryBuffers_.geoMapDetId_buf), - alpaka::getPtrNative(endcapGeometryBuffers_.geoMapPhi_buf), - *modulesBuffers_.data(), - *hitsInGPU, - nHits)); - - alpaka::enqueue(queue, hit_loop_task); + alpaka::exec(queue, + hit_loop_workdiv, + hit_loop_kernel, + Endcap, + TwoS, + nModules_, + nEndCapMap_, + alpaka::getPtrNative(endcapGeometryBuffers_.geoMapDetId_buf), + alpaka::getPtrNative(endcapGeometryBuffers_.geoMapPhi_buf), + *modulesBuffers_.data(), + *hitsInGPU, + nHits); Vec3D const threadsPerBlock2{1, 1, 256}; Vec3D const blocksPerGrid2{1, 1, max_blocks}; WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); moduleRangesKernel module_ranges_kernel; - auto const module_ranges_task(alpaka::createTaskKernel( - module_ranges_workdiv, module_ranges_kernel, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_)); - - // Waiting isn't needed after second kernel call. Saves ~100 us. - // This is because addPixelSegmentToEvent (which is run next) doesn't rely on hitsBuffers->hitrange variables. - // Also, modulesInGPU->partnerModuleIndices is not alterned in addPixelSegmentToEvent. - alpaka::enqueue(queue, module_ranges_task); + alpaka::exec( + queue, module_ranges_workdiv, module_ranges_kernel, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); } void lst::Event::addPixelSegmentToEvent(std::vector const& hitIndices0, @@ -267,10 +261,8 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread); lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; - auto const createMDArrayRangesGPUTask(alpaka::createTaskKernel( - createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); - - alpaka::enqueue(queue, createMDArrayRangesGPUTask); + alpaka::exec( + queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); @@ -295,13 +287,12 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); lst::createSegmentArrayRanges createSegmentArrayRanges_kernel; - auto const createSegmentArrayRangesTask(alpaka::createTaskKernel(createSegmentArrayRanges_workDiv, - createSegmentArrayRanges_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *mdsInGPU)); - - alpaka::enqueue(queue, createSegmentArrayRangesTask); + alpaka::exec(queue, + createSegmentArrayRanges_workDiv, + createSegmentArrayRanges_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *mdsInGPU); auto nTotalSegments_view = alpaka::createView(devHost, &nTotalSegments_, (Idx)1u); @@ -369,22 +360,21 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); addPixelSegmentToEventKernel addPixelSegmentToEvent_kernel; - auto const addPixelSegmentToEvent_task(alpaka::createTaskKernel(addPixelSegmentToEvent_workdiv, - addPixelSegmentToEvent_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *hitsInGPU, - *mdsInGPU, - *segmentsInGPU, - alpaka::getPtrNative(hitIndices0_dev), - alpaka::getPtrNative(hitIndices1_dev), - alpaka::getPtrNative(hitIndices2_dev), - alpaka::getPtrNative(hitIndices3_dev), - alpaka::getPtrNative(dPhiChange_dev), - pixelModuleIndex, - size)); - - alpaka::enqueue(queue, addPixelSegmentToEvent_task); + alpaka::exec(queue, + addPixelSegmentToEvent_workdiv, + addPixelSegmentToEvent_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *hitsInGPU, + *mdsInGPU, + *segmentsInGPU, + alpaka::getPtrNative(hitIndices0_dev), + alpaka::getPtrNative(hitIndices1_dev), + alpaka::getPtrNative(hitIndices2_dev), + alpaka::getPtrNative(hitIndices3_dev), + alpaka::getPtrNative(dPhiChange_dev), + pixelModuleIndex, + size); } void lst::Event::createMiniDoublets() { @@ -404,10 +394,8 @@ void lst::Event::createMiniDoublets() { createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread); lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; - auto const createMDArrayRangesGPUTask(alpaka::createTaskKernel( - createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); - - alpaka::enqueue(queue, createMDArrayRangesGPUTask); + alpaka::exec( + queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); @@ -428,14 +416,13 @@ void lst::Event::createMiniDoublets() { createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, elementsPerThread); lst::createMiniDoubletsInGPUv2 createMiniDoubletsInGPUv2_kernel; - auto const createMiniDoubletsInGPUv2Task(alpaka::createTaskKernel(createMiniDoubletsInGPUv2_workDiv, - createMiniDoubletsInGPUv2_kernel, - *modulesBuffers_.data(), - *hitsInGPU, - *mdsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, createMiniDoubletsInGPUv2Task); + alpaka::exec(queue, + createMiniDoubletsInGPUv2_workDiv, + createMiniDoubletsInGPUv2_kernel, + *modulesBuffers_.data(), + *hitsInGPU, + *mdsInGPU, + *rangesInGPU); Vec3D const threadsPerBlockAddMD{1, 1, 1024}; Vec3D const blocksPerGridAddMD{1, 1, 1}; @@ -443,15 +430,13 @@ void lst::Event::createMiniDoublets() { createWorkDiv(blocksPerGridAddMD, threadsPerBlockAddMD, elementsPerThread); lst::addMiniDoubletRangesToEventExplicit addMiniDoubletRangesToEventExplicit_kernel; - auto const addMiniDoubletRangesToEventExplicitTask( - alpaka::createTaskKernel(addMiniDoubletRangesToEventExplicit_workDiv, - addMiniDoubletRangesToEventExplicit_kernel, - *modulesBuffers_.data(), - *mdsInGPU, - *rangesInGPU, - *hitsInGPU)); - - alpaka::enqueue(queue, addMiniDoubletRangesToEventExplicitTask); + alpaka::exec(queue, + addMiniDoubletRangesToEventExplicit_workDiv, + addMiniDoubletRangesToEventExplicit_kernel, + *modulesBuffers_.data(), + *mdsInGPU, + *rangesInGPU, + *hitsInGPU); if (addObjects) { addMiniDoubletsToEventExplicit(); @@ -472,14 +457,13 @@ void lst::Event::createSegmentsWithModuleMap() { createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); lst::createSegmentsInGPUv2 createSegmentsInGPUv2_kernel; - auto const createSegmentsInGPUv2Task(alpaka::createTaskKernel(createSegmentsInGPUv2_workDiv, - createSegmentsInGPUv2_kernel, - *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, createSegmentsInGPUv2Task); + alpaka::exec(queue, + createSegmentsInGPUv2_workDiv, + createSegmentsInGPUv2_kernel, + *modulesBuffers_.data(), + *mdsInGPU, + *segmentsInGPU, + *rangesInGPU); Vec3D const threadsPerBlockAddSeg{1, 1, 1024}; Vec3D const blocksPerGridAddSeg{1, 1, 1}; @@ -487,14 +471,12 @@ void lst::Event::createSegmentsWithModuleMap() { createWorkDiv(blocksPerGridAddSeg, threadsPerBlockAddSeg, elementsPerThread); lst::addSegmentRangesToEventExplicit addSegmentRangesToEventExplicit_kernel; - auto const addSegmentRangesToEventExplicitTask( - alpaka::createTaskKernel(addSegmentRangesToEventExplicit_workDiv, - addSegmentRangesToEventExplicit_kernel, - *modulesBuffers_.data(), - *segmentsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addSegmentRangesToEventExplicitTask); + alpaka::exec(queue, + addSegmentRangesToEventExplicit_workDiv, + addSegmentRangesToEventExplicit_kernel, + *modulesBuffers_.data(), + *segmentsInGPU, + *rangesInGPU); if (addObjects) { addSegmentsToEventExplicit(); @@ -509,13 +491,12 @@ void lst::Event::createTriplets() { createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); lst::createTripletArrayRanges createTripletArrayRanges_kernel; - auto const createTripletArrayRangesTask(alpaka::createTaskKernel(createTripletArrayRanges_workDiv, - createTripletArrayRanges_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *segmentsInGPU)); - - alpaka::enqueue(queue, createTripletArrayRangesTask); + alpaka::exec(queue, + createTripletArrayRanges_workDiv, + createTripletArrayRanges_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *segmentsInGPU); // TODO: Why are we pulling this back down only to put it back on the device in a new struct? auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); @@ -571,17 +552,16 @@ void lst::Event::createTriplets() { createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); lst::createTripletsInGPUv2 createTripletsInGPUv2_kernel; - auto const createTripletsInGPUv2Task(alpaka::createTaskKernel(createTripletsInGPUv2_workDiv, - createTripletsInGPUv2_kernel, - *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *rangesInGPU, - alpaka::getPtrNative(index_gpu_buf), - nonZeroModules)); - - alpaka::enqueue(queue, createTripletsInGPUv2Task); + alpaka::exec(queue, + createTripletsInGPUv2_workDiv, + createTripletsInGPUv2_kernel, + *modulesBuffers_.data(), + *mdsInGPU, + *segmentsInGPU, + *tripletsInGPU, + *rangesInGPU, + alpaka::getPtrNative(index_gpu_buf), + nonZeroModules); Vec3D const threadsPerBlockAddTrip{1, 1, 1024}; Vec3D const blocksPerGridAddTrip{1, 1, 1}; @@ -589,14 +569,12 @@ void lst::Event::createTriplets() { createWorkDiv(blocksPerGridAddTrip, threadsPerBlockAddTrip, elementsPerThread); lst::addTripletRangesToEventExplicit addTripletRangesToEventExplicit_kernel; - auto const addTripletRangesToEventExplicitTask( - alpaka::createTaskKernel(addTripletRangesToEventExplicit_workDiv, - addTripletRangesToEventExplicit_kernel, - *modulesBuffers_.data(), - *tripletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addTripletRangesToEventExplicitTask); + alpaka::exec(queue, + addTripletRangesToEventExplicit_workDiv, + addTripletRangesToEventExplicit_kernel, + *modulesBuffers_.data(), + *tripletsInGPU, + *rangesInGPU); if (addObjects) { addTripletsToEventExplicit(); @@ -617,15 +595,14 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread); lst::crossCleanpT3 crossCleanpT3_kernel; - auto const crossCleanpT3Task(alpaka::createTaskKernel(crossCleanpT3_workDiv, - crossCleanpT3_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *pixelTripletsInGPU, - *segmentsInGPU, - *pixelQuintupletsInGPU)); - - alpaka::enqueue(queue, crossCleanpT3Task); + alpaka::exec(queue, + crossCleanpT3_workDiv, + crossCleanpT3_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *pixelTripletsInGPU, + *segmentsInGPU, + *pixelQuintupletsInGPU); Vec3D const threadsPerBlock_addpT3asTrackCandidatesInGPU{1, 1, 512}; Vec3D const blocksPerGrid_addpT3asTrackCandidatesInGPU{1, 1, 1}; @@ -633,15 +610,14 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ blocksPerGrid_addpT3asTrackCandidatesInGPU, threadsPerBlock_addpT3asTrackCandidatesInGPU, elementsPerThread); lst::addpT3asTrackCandidatesInGPU addpT3asTrackCandidatesInGPU_kernel; - auto const addpT3asTrackCandidatesInGPUTask(alpaka::createTaskKernel(addpT3asTrackCandidatesInGPU_workDiv, - addpT3asTrackCandidatesInGPU_kernel, - nLowerModules_, - *pixelTripletsInGPU, - *trackCandidatesInGPU, - *segmentsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addpT3asTrackCandidatesInGPUTask); + alpaka::exec(queue, + addpT3asTrackCandidatesInGPU_workDiv, + addpT3asTrackCandidatesInGPU_kernel, + nLowerModules_, + *pixelTripletsInGPU, + *trackCandidatesInGPU, + *segmentsInGPU, + *rangesInGPU); // Pull nEligibleT5Modules from the device. auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -655,13 +631,11 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); lst::removeDupQuintupletsInGPUBeforeTC removeDupQuintupletsInGPUBeforeTC_kernel; - auto const removeDupQuintupletsInGPUBeforeTCTask( - alpaka::createTaskKernel(removeDupQuintupletsInGPUBeforeTC_workDiv, - removeDupQuintupletsInGPUBeforeTC_kernel, - *quintupletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, removeDupQuintupletsInGPUBeforeTCTask); + alpaka::exec(queue, + removeDupQuintupletsInGPUBeforeTC_workDiv, + removeDupQuintupletsInGPUBeforeTC_kernel, + *quintupletsInGPU, + *rangesInGPU); Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks}; @@ -669,15 +643,14 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread); lst::crossCleanT5 crossCleanT5_kernel; - auto const crossCleanT5Task(alpaka::createTaskKernel(crossCleanT5_workDiv, - crossCleanT5_kernel, - *modulesBuffers_.data(), - *quintupletsInGPU, - *pixelQuintupletsInGPU, - *pixelTripletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, crossCleanT5Task); + alpaka::exec(queue, + crossCleanT5_workDiv, + crossCleanT5_kernel, + *modulesBuffers_.data(), + *quintupletsInGPU, + *pixelQuintupletsInGPU, + *pixelTripletsInGPU, + *rangesInGPU); Vec3D const threadsPerBlock_addT5asTrackCandidateInGPU{1, 8, 128}; Vec3D const blocksPerGrid_addT5asTrackCandidateInGPU{1, 8, 10}; @@ -685,14 +658,13 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, elementsPerThread); lst::addT5asTrackCandidateInGPU addT5asTrackCandidateInGPU_kernel; - auto const addT5asTrackCandidateInGPUTask(alpaka::createTaskKernel(addT5asTrackCandidateInGPU_workDiv, - addT5asTrackCandidateInGPU_kernel, - nLowerModules_, - *quintupletsInGPU, - *trackCandidatesInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addT5asTrackCandidateInGPUTask); + alpaka::exec(queue, + addT5asTrackCandidateInGPU_workDiv, + addT5asTrackCandidateInGPU_kernel, + nLowerModules_, + *quintupletsInGPU, + *trackCandidatesInGPU, + *rangesInGPU); if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; @@ -701,10 +673,8 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); lst::checkHitspLS checkHitspLS_kernel; - auto const checkHitspLSTask(alpaka::createTaskKernel( - checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, true)); - - alpaka::enqueue(queue, checkHitspLSTask); + alpaka::exec( + queue, checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, true); } Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; @@ -713,18 +683,17 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread); lst::crossCleanpLS crossCleanpLS_kernel; - auto const crossCleanpLSTask(alpaka::createTaskKernel(crossCleanpLS_workDiv, - crossCleanpLS_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *pixelTripletsInGPU, - *trackCandidatesInGPU, - *segmentsInGPU, - *mdsInGPU, - *hitsInGPU, - *quintupletsInGPU)); - - alpaka::enqueue(queue, crossCleanpLSTask); + alpaka::exec(queue, + crossCleanpLS_workDiv, + crossCleanpLS_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *pixelTripletsInGPU, + *trackCandidatesInGPU, + *segmentsInGPU, + *mdsInGPU, + *hitsInGPU, + *quintupletsInGPU); Vec3D const threadsPerBlock_addpLSasTrackCandidateInGPU{1, 1, 384}; Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, max_blocks}; @@ -732,14 +701,13 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, elementsPerThread); lst::addpLSasTrackCandidateInGPU addpLSasTrackCandidateInGPU_kernel; - auto const addpLSasTrackCandidateInGPUTask(alpaka::createTaskKernel(addpLSasTrackCandidateInGPU_workDiv, - addpLSasTrackCandidateInGPU_kernel, - nLowerModules_, - *trackCandidatesInGPU, - *segmentsInGPU, - tc_pls_triplets)); - - alpaka::enqueue(queue, addpLSasTrackCandidateInGPUTask); + alpaka::exec(queue, + addpLSasTrackCandidateInGPU_workDiv, + addpLSasTrackCandidateInGPU_kernel, + nLowerModules_, + *trackCandidatesInGPU, + *segmentsInGPU, + tc_pls_triplets); // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached auto nTrackCanpT5Host_buf = allocBufWrapper(devHost, 1, queue); @@ -844,20 +812,18 @@ void lst::Event::createPixelTriplets() { createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); lst::createPixelTripletsInGPUFromMapv2 createPixelTripletsInGPUFromMapv2_kernel; - auto const createPixelTripletsInGPUFromMapv2Task( - alpaka::createTaskKernel(createPixelTripletsInGPUFromMapv2_workDiv, - createPixelTripletsInGPUFromMapv2_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *pixelTripletsInGPU, - alpaka::getPtrNative(connectedPixelSize_dev_buf), - alpaka::getPtrNative(connectedPixelIndex_dev_buf), - nInnerSegments)); - - alpaka::enqueue(queue, createPixelTripletsInGPUFromMapv2Task); + alpaka::exec(queue, + createPixelTripletsInGPUFromMapv2_workDiv, + createPixelTripletsInGPUFromMapv2_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *mdsInGPU, + *segmentsInGPU, + *tripletsInGPU, + *pixelTripletsInGPU, + alpaka::getPtrNative(connectedPixelSize_dev_buf), + alpaka::getPtrNative(connectedPixelIndex_dev_buf), + nInnerSegments); #ifdef WARNINGS auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); @@ -876,10 +842,10 @@ void lst::Event::createPixelTriplets() { createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); lst::removeDupPixelTripletsInGPUFromMap removeDupPixelTripletsInGPUFromMap_kernel; - auto const removeDupPixelTripletsInGPUFromMapTask(alpaka::createTaskKernel( - removeDupPixelTripletsInGPUFromMap_workDiv, removeDupPixelTripletsInGPUFromMap_kernel, *pixelTripletsInGPU)); - - alpaka::enqueue(queue, removeDupPixelTripletsInGPUFromMapTask); + alpaka::exec(queue, + removeDupPixelTripletsInGPUFromMap_workDiv, + removeDupPixelTripletsInGPUFromMap_kernel, + *pixelTripletsInGPU); } void lst::Event::createQuintuplets() { @@ -889,14 +855,12 @@ void lst::Event::createQuintuplets() { createWorkDiv(blocksPerGridCreateQuints, threadsPerBlockCreateQuints, elementsPerThread); lst::createEligibleModulesListForQuintupletsGPU createEligibleModulesListForQuintupletsGPU_kernel; - auto const createEligibleModulesListForQuintupletsGPUTask( - alpaka::createTaskKernel(createEligibleModulesListForQuintupletsGPU_workDiv, - createEligibleModulesListForQuintupletsGPU_kernel, - *modulesBuffers_.data(), - *tripletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, createEligibleModulesListForQuintupletsGPUTask); + alpaka::exec(queue, + createEligibleModulesListForQuintupletsGPU_workDiv, + createEligibleModulesListForQuintupletsGPU_kernel, + *modulesBuffers_.data(), + *tripletsInGPU, + *rangesInGPU); auto nEligibleT5Modules_buf = allocBufWrapper(devHost, 1, queue); auto nTotalQuintuplets_buf = allocBufWrapper(devHost, 1, queue); @@ -922,17 +886,16 @@ void lst::Event::createQuintuplets() { createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); lst::createQuintupletsInGPUv2 createQuintupletsInGPUv2_kernel; - auto const createQuintupletsInGPUv2Task(alpaka::createTaskKernel(createQuintupletsInGPUv2_workDiv, - createQuintupletsInGPUv2_kernel, - *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *quintupletsInGPU, - *rangesInGPU, - nEligibleT5Modules)); - - alpaka::enqueue(queue, createQuintupletsInGPUv2Task); + alpaka::exec(queue, + createQuintupletsInGPUv2_workDiv, + createQuintupletsInGPUv2_kernel, + *modulesBuffers_.data(), + *mdsInGPU, + *segmentsInGPU, + *tripletsInGPU, + *quintupletsInGPU, + *rangesInGPU, + nEligibleT5Modules); Vec3D const threadsPerBlockDupQuint{1, 16, 16}; Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1}; @@ -940,14 +903,12 @@ void lst::Event::createQuintuplets() { createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); lst::removeDupQuintupletsInGPUAfterBuild removeDupQuintupletsInGPUAfterBuild_kernel; - auto const removeDupQuintupletsInGPUAfterBuildTask( - alpaka::createTaskKernel(removeDupQuintupletsInGPUAfterBuild_workDiv, - removeDupQuintupletsInGPUAfterBuild_kernel, - *modulesBuffers_.data(), - *quintupletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, removeDupQuintupletsInGPUAfterBuildTask); + alpaka::exec(queue, + removeDupQuintupletsInGPUAfterBuild_workDiv, + removeDupQuintupletsInGPUAfterBuild_kernel, + *modulesBuffers_.data(), + *quintupletsInGPU, + *rangesInGPU); Vec3D const threadsPerBlockAddQuint{1, 1, 1024}; Vec3D const blocksPerGridAddQuint{1, 1, 1}; @@ -955,14 +916,12 @@ void lst::Event::createQuintuplets() { createWorkDiv(blocksPerGridAddQuint, threadsPerBlockAddQuint, elementsPerThread); lst::addQuintupletRangesToEventExplicit addQuintupletRangesToEventExplicit_kernel; - auto const addQuintupletRangesToEventExplicitTask( - alpaka::createTaskKernel(addQuintupletRangesToEventExplicit_workDiv, - addQuintupletRangesToEventExplicit_kernel, - *modulesBuffers_.data(), - *quintupletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addQuintupletRangesToEventExplicitTask); + alpaka::exec(queue, + addQuintupletRangesToEventExplicit_workDiv, + addQuintupletRangesToEventExplicit_kernel, + *modulesBuffers_.data(), + *quintupletsInGPU, + *rangesInGPU); if (addObjects) { addQuintupletsToEventExplicit(); @@ -977,10 +936,8 @@ void lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); lst::checkHitspLS checkHitspLS_kernel; - auto const checkHitspLSTask(alpaka::createTaskKernel( - checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, false)); - - alpaka::enqueue(queue, checkHitspLSTask); + alpaka::exec( + queue, checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, false); } } @@ -1062,21 +1019,19 @@ void lst::Event::createPixelQuintuplets() { createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); lst::createPixelQuintupletsInGPUFromMapv2 createPixelQuintupletsInGPUFromMapv2_kernel; - auto const createPixelQuintupletsInGPUFromMapv2Task( - alpaka::createTaskKernel(createPixelQuintupletsInGPUFromMapv2_workDiv, - createPixelQuintupletsInGPUFromMapv2_kernel, - *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *quintupletsInGPU, - *pixelQuintupletsInGPU, - alpaka::getPtrNative(connectedPixelSize_dev_buf), - alpaka::getPtrNative(connectedPixelIndex_dev_buf), - nInnerSegments, - *rangesInGPU)); - - alpaka::enqueue(queue, createPixelQuintupletsInGPUFromMapv2Task); + alpaka::exec(queue, + createPixelQuintupletsInGPUFromMapv2_workDiv, + createPixelQuintupletsInGPUFromMapv2_kernel, + *modulesBuffers_.data(), + *mdsInGPU, + *segmentsInGPU, + *tripletsInGPU, + *quintupletsInGPU, + *pixelQuintupletsInGPU, + alpaka::getPtrNative(connectedPixelSize_dev_buf), + alpaka::getPtrNative(connectedPixelIndex_dev_buf), + nInnerSegments, + *rangesInGPU); Vec3D const threadsPerBlockDupPix{1, 16, 16}; Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; @@ -1084,12 +1039,10 @@ void lst::Event::createPixelQuintuplets() { createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); lst::removeDupPixelQuintupletsInGPUFromMap removeDupPixelQuintupletsInGPUFromMap_kernel; - auto const removeDupPixelQuintupletsInGPUFromMapTask( - alpaka::createTaskKernel(removeDupPixelQuintupletsInGPUFromMap_workDiv, - removeDupPixelQuintupletsInGPUFromMap_kernel, - *pixelQuintupletsInGPU)); - - alpaka::enqueue(queue, removeDupPixelQuintupletsInGPUFromMapTask); + alpaka::exec(queue, + removeDupPixelQuintupletsInGPUFromMap_workDiv, + removeDupPixelQuintupletsInGPUFromMap_kernel, + *pixelQuintupletsInGPU); Vec3D const threadsPerBlockAddpT5asTrackCan{1, 1, 256}; Vec3D const blocksPerGridAddpT5asTrackCan{1, 1, 1}; @@ -1097,15 +1050,14 @@ void lst::Event::createPixelQuintuplets() { createWorkDiv(blocksPerGridAddpT5asTrackCan, threadsPerBlockAddpT5asTrackCan, elementsPerThread); lst::addpT5asTrackCandidateInGPU addpT5asTrackCandidateInGPU_kernel; - auto const addpT5asTrackCandidateInGPUTask(alpaka::createTaskKernel(addpT5asTrackCandidateInGPU_workDiv, - addpT5asTrackCandidateInGPU_kernel, - nLowerModules_, - *pixelQuintupletsInGPU, - *trackCandidatesInGPU, - *segmentsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addpT5asTrackCandidateInGPUTask); + alpaka::exec(queue, + addpT5asTrackCandidateInGPU_workDiv, + addpT5asTrackCandidateInGPU_kernel, + nLowerModules_, + *pixelQuintupletsInGPU, + *trackCandidatesInGPU, + *segmentsInGPU, + *rangesInGPU); #ifdef WARNINGS auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); From a7d73c09a9c7a7ecbf25c45d56f70ebcadddb285 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 8 Aug 2024 06:37:00 -0700 Subject: [PATCH 5/8] replace alpaka::getPtrNative(buf) with buf.data() --- .../LSTCore/interface/EndcapGeometryBuffer.h | 4 +- RecoTracker/LSTCore/interface/Module.h | 52 +++++++------- RecoTracker/LSTCore/src/LSTESData.cc | 4 +- RecoTracker/LSTCore/src/ModuleMethods.h | 50 ++++++------- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 40 +++++------ RecoTracker/LSTCore/src/alpaka/Hit.h | 38 +++++----- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 72 +++++++++---------- RecoTracker/LSTCore/src/alpaka/ObjectRanges.h | 56 +++++++-------- .../LSTCore/src/alpaka/PixelQuintuplet.h | 36 +++++----- RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 44 ++++++------ RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 48 ++++++------- RecoTracker/LSTCore/src/alpaka/Segment.h | 68 +++++++++--------- .../LSTCore/src/alpaka/TrackCandidate.h | 34 ++++----- RecoTracker/LSTCore/src/alpaka/Triplet.h | 34 ++++----- 14 files changed, 290 insertions(+), 290 deletions(-) diff --git a/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h b/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h index 6a787a5ed95eb..2c6df9ab2773c 100644 --- a/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h +++ b/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h @@ -19,8 +19,8 @@ namespace lst { template void setData(TBuff const& buf) { - geoMapDetId = alpaka::getPtrNative(buf.geoMapDetId_buf); - geoMapPhi = alpaka::getPtrNative(buf.geoMapPhi_buf); + geoMapDetId = buf.geoMapDetId_buf.data(); + geoMapPhi = buf.geoMapPhi_buf.data(); } }; diff --git a/RecoTracker/LSTCore/interface/Module.h b/RecoTracker/LSTCore/interface/Module.h index eca086b91850f..7266ebd7bc49b 100644 --- a/RecoTracker/LSTCore/interface/Module.h +++ b/RecoTracker/LSTCore/interface/Module.h @@ -84,32 +84,32 @@ namespace lst { template void setData(TBuff const& buf) { - detIds = alpaka::getPtrNative(buf.detIds_buf); - moduleMap = alpaka::getPtrNative(buf.moduleMap_buf); - mapdetId = alpaka::getPtrNative(buf.mapdetId_buf); - mapIdx = alpaka::getPtrNative(buf.mapIdx_buf); - nConnectedModules = alpaka::getPtrNative(buf.nConnectedModules_buf); - drdzs = alpaka::getPtrNative(buf.drdzs_buf); - dxdys = alpaka::getPtrNative(buf.dxdys_buf); - nModules = alpaka::getPtrNative(buf.nModules_buf); - nLowerModules = alpaka::getPtrNative(buf.nLowerModules_buf); - partnerModuleIndices = alpaka::getPtrNative(buf.partnerModuleIndices_buf); - - layers = alpaka::getPtrNative(buf.layers_buf); - rings = alpaka::getPtrNative(buf.rings_buf); - modules = alpaka::getPtrNative(buf.modules_buf); - rods = alpaka::getPtrNative(buf.rods_buf); - subdets = alpaka::getPtrNative(buf.subdets_buf); - sides = alpaka::getPtrNative(buf.sides_buf); - eta = alpaka::getPtrNative(buf.eta_buf); - r = alpaka::getPtrNative(buf.r_buf); - isInverted = alpaka::getPtrNative(buf.isInverted_buf); - isLower = alpaka::getPtrNative(buf.isLower_buf); - isAnchor = alpaka::getPtrNative(buf.isAnchor_buf); - moduleType = alpaka::getPtrNative(buf.moduleType_buf); - moduleLayerType = alpaka::getPtrNative(buf.moduleLayerType_buf); - lstLayers = alpaka::getPtrNative(buf.lstLayers_buf); - connectedPixels = alpaka::getPtrNative(buf.connectedPixels_buf); + detIds = buf.detIds_buf.data(); + moduleMap = buf.moduleMap_buf.data(); + mapdetId = buf.mapdetId_buf.data(); + mapIdx = buf.mapIdx_buf.data(); + nConnectedModules = buf.nConnectedModules_buf.data(); + drdzs = buf.drdzs_buf.data(); + dxdys = buf.dxdys_buf.data(); + nModules = buf.nModules_buf.data(); + nLowerModules = buf.nLowerModules_buf.data(); + partnerModuleIndices = buf.partnerModuleIndices_buf.data(); + + layers = buf.layers_buf.data(); + rings = buf.rings_buf.data(); + modules = buf.modules_buf.data(); + rods = buf.rods_buf.data(); + subdets = buf.subdets_buf.data(); + sides = buf.sides_buf.data(); + eta = buf.eta_buf.data(); + r = buf.r_buf.data(); + isInverted = buf.isInverted_buf.data(); + isLower = buf.isLower_buf.data(); + isAnchor = buf.isAnchor_buf.data(); + moduleType = buf.moduleType_buf.data(); + moduleLayerType = buf.moduleLayerType_buf.data(); + lstLayers = buf.lstLayers_buf.data(); + connectedPixels = buf.connectedPixels_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/LSTESData.cc b/RecoTracker/LSTCore/src/LSTESData.cc index 9079d0d229216..1acf085a0f491 100644 --- a/RecoTracker/LSTCore/src/LSTESData.cc +++ b/RecoTracker/LSTCore/src/LSTESData.cc @@ -89,10 +89,10 @@ std::unique_ptr> lst::loadAndFillESHost() auto endcapGeometryBuffers = EndcapGeometryBuffer(cms::alpakatools::host(), endcapGeometry.nEndCapMap); - std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapDetId_buf), + std::memcpy(endcapGeometryBuffers.geoMapDetId_buf.data(), endcapGeometry.geoMapDetId_buf.data(), endcapGeometry.nEndCapMap * sizeof(unsigned int)); - std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapPhi_buf), + std::memcpy(endcapGeometryBuffers.geoMapPhi_buf.data(), endcapGeometry.geoMapPhi_buf.data(), endcapGeometry.nEndCapMap * sizeof(float)); diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h index 54514cccf2b54..196212defdfa6 100644 --- a/RecoTracker/LSTCore/src/ModuleMethods.h +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -83,7 +83,7 @@ namespace lst { modulesBuf.connectedPixels_buf = allocBufWrapper(cms::alpakatools::host(), nPixels); modulesBuf.data_.setData(modulesBuf); - unsigned int* connectedPixels = alpaka::getPtrNative(modulesBuf.connectedPixels_buf); + unsigned int* connectedPixels = modulesBuf.connectedPixels_buf.data(); for (unsigned int icondet = 0; icondet < totalSizes; icondet++) { connectedPixels[icondet] = mmd.detIdToIndex.at(connectedModuleDetIds[icondet]); @@ -99,8 +99,8 @@ namespace lst { inline void fillConnectedModuleArrayExplicit(ModulesBuffer& modulesBuf, ModuleMetaData const& mmd, ModuleConnectionMap const& moduleConnectionMap) { - uint16_t* moduleMap = alpaka::getPtrNative(modulesBuf.moduleMap_buf); - uint16_t* nConnectedModules = alpaka::getPtrNative(modulesBuf.nConnectedModules_buf); + uint16_t* moduleMap = modulesBuf.moduleMap_buf.data(); + uint16_t* nConnectedModules = modulesBuf.nConnectedModules_buf.data(); for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { unsigned int detId = it->first; @@ -114,8 +114,8 @@ namespace lst { } inline void fillMapArraysExplicit(ModulesBuffer& modulesBuf, ModuleMetaData const& mmd) { - uint16_t* mapIdx = alpaka::getPtrNative(modulesBuf.mapIdx_buf); - unsigned int* mapdetId = alpaka::getPtrNative(modulesBuf.mapdetId_buf); + uint16_t* mapIdx = modulesBuf.mapIdx_buf.data(); + unsigned int* mapdetId = modulesBuf.mapdetId_buf.data(); unsigned int counter = 0; for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { @@ -205,26 +205,26 @@ namespace lst { ModulesBuffer modulesBuf(cms::alpakatools::host(), nModules, 0); // Getting the underlying data pointers - unsigned int* host_detIds = alpaka::getPtrNative(modulesBuf.detIds_buf); - short* host_layers = alpaka::getPtrNative(modulesBuf.layers_buf); - short* host_rings = alpaka::getPtrNative(modulesBuf.rings_buf); - short* host_rods = alpaka::getPtrNative(modulesBuf.rods_buf); - short* host_modules = alpaka::getPtrNative(modulesBuf.modules_buf); - short* host_subdets = alpaka::getPtrNative(modulesBuf.subdets_buf); - short* host_sides = alpaka::getPtrNative(modulesBuf.sides_buf); - float* host_eta = alpaka::getPtrNative(modulesBuf.eta_buf); - float* host_r = alpaka::getPtrNative(modulesBuf.r_buf); - bool* host_isInverted = alpaka::getPtrNative(modulesBuf.isInverted_buf); - bool* host_isLower = alpaka::getPtrNative(modulesBuf.isLower_buf); - bool* host_isAnchor = alpaka::getPtrNative(modulesBuf.isAnchor_buf); - ModuleType* host_moduleType = alpaka::getPtrNative(modulesBuf.moduleType_buf); - ModuleLayerType* host_moduleLayerType = alpaka::getPtrNative(modulesBuf.moduleLayerType_buf); - float* host_dxdys = alpaka::getPtrNative(modulesBuf.dxdys_buf); - float* host_drdzs = alpaka::getPtrNative(modulesBuf.drdzs_buf); - uint16_t* host_nModules = alpaka::getPtrNative(modulesBuf.nModules_buf); - uint16_t* host_nLowerModules = alpaka::getPtrNative(modulesBuf.nLowerModules_buf); - uint16_t* host_partnerModuleIndices = alpaka::getPtrNative(modulesBuf.partnerModuleIndices_buf); - int* host_lstLayers = alpaka::getPtrNative(modulesBuf.lstLayers_buf); + unsigned int* host_detIds = modulesBuf.detIds_buf.data(); + short* host_layers = modulesBuf.layers_buf.data(); + short* host_rings = modulesBuf.rings_buf.data(); + short* host_rods = modulesBuf.rods_buf.data(); + short* host_modules = modulesBuf.modules_buf.data(); + short* host_subdets = modulesBuf.subdets_buf.data(); + short* host_sides = modulesBuf.sides_buf.data(); + float* host_eta = modulesBuf.eta_buf.data(); + float* host_r = modulesBuf.r_buf.data(); + bool* host_isInverted = modulesBuf.isInverted_buf.data(); + bool* host_isLower = modulesBuf.isLower_buf.data(); + bool* host_isAnchor = modulesBuf.isAnchor_buf.data(); + ModuleType* host_moduleType = modulesBuf.moduleType_buf.data(); + ModuleLayerType* host_moduleLayerType = modulesBuf.moduleLayerType_buf.data(); + float* host_dxdys = modulesBuf.dxdys_buf.data(); + float* host_drdzs = modulesBuf.drdzs_buf.data(); + uint16_t* host_nModules = modulesBuf.nModules_buf.data(); + uint16_t* host_nLowerModules = modulesBuf.nLowerModules_buf.data(); + uint16_t* host_partnerModuleIndices = modulesBuf.partnerModuleIndices_buf.data(); + int* host_lstLayers = modulesBuf.lstLayers_buf.data(); //reassign detIdToIndex indices here nLowerModules = (nModules - 1) / 2; diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 82a7f44a268b9..9e46c96a4488c 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -197,8 +197,8 @@ void lst::Event::addHitToEvent(std::vector const& x, TwoS, nModules_, nEndCapMap_, - alpaka::getPtrNative(endcapGeometryBuffers_.geoMapDetId_buf), - alpaka::getPtrNative(endcapGeometryBuffers_.geoMapPhi_buf), + endcapGeometryBuffers_.geoMapDetId_buf.data(), + endcapGeometryBuffers_.geoMapPhi_buf.data(), *modulesBuffers_.data(), *hitsInGPU, nHits); @@ -368,11 +368,11 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& *hitsInGPU, *mdsInGPU, *segmentsInGPU, - alpaka::getPtrNative(hitIndices0_dev), - alpaka::getPtrNative(hitIndices1_dev), - alpaka::getPtrNative(hitIndices2_dev), - alpaka::getPtrNative(hitIndices3_dev), - alpaka::getPtrNative(dPhiChange_dev), + hitIndices0_dev.data(), + hitIndices1_dev.data(), + hitIndices2_dev.data(), + hitIndices3_dev.data(), + dPhiChange_dev.data(), pixelModuleIndex, size); } @@ -560,7 +560,7 @@ void lst::Event::createTriplets() { *segmentsInGPU, *tripletsInGPU, *rangesInGPU, - alpaka::getPtrNative(index_gpu_buf), + index_gpu_buf.data(), nonZeroModules); Vec3D const threadsPerBlockAddTrip{1, 1, 1024}; @@ -720,10 +720,10 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ alpaka::memcpy(queue, nTrackCanT5Host_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); alpaka::wait(queue); // wait to get the values before using them - auto nTrackCandidatespT5 = *alpaka::getPtrNative(nTrackCanpT5Host_buf); - auto nTrackCandidatespT3 = *alpaka::getPtrNative(nTrackCanpT3Host_buf); - auto nTrackCandidatespLS = *alpaka::getPtrNative(nTrackCanpLSHost_buf); - auto nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCanT5Host_buf); + auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data(); + auto nTrackCandidatespT3 = *nTrackCanpT3Host_buf.data(); + auto nTrackCandidatespLS = *nTrackCanpLSHost_buf.data(); + auto nTrackCandidatesT5 = *nTrackCanT5Host_buf.data(); if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { printf( @@ -764,8 +764,8 @@ void lst::Event::createPixelTriplets() { auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - unsigned int* connectedPixelSize_host = alpaka::getPtrNative(connectedPixelSize_host_buf); - unsigned int* connectedPixelIndex_host = alpaka::getPtrNative(connectedPixelIndex_host_buf); + unsigned int* connectedPixelSize_host = connectedPixelSize_host_buf.data(); + unsigned int* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; @@ -821,8 +821,8 @@ void lst::Event::createPixelTriplets() { *segmentsInGPU, *tripletsInGPU, *pixelTripletsInGPU, - alpaka::getPtrNative(connectedPixelSize_dev_buf), - alpaka::getPtrNative(connectedPixelIndex_dev_buf), + connectedPixelSize_dev_buf.data(), + connectedPixelIndex_dev_buf.data(), nInnerSegments); #ifdef WARNINGS @@ -831,7 +831,7 @@ void lst::Event::createPixelTriplets() { alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); alpaka::wait(queue); // wait to get the value before using it - std::cout << "number of pixel triplets = " << *alpaka::getPtrNative(nPixelTriplets_buf) << std::endl; + std::cout << "number of pixel triplets = " << *nPixelTriplets_buf.data() << std::endl; #endif //pT3s can be cleaned here because they're not used in making pT5s! @@ -1028,8 +1028,8 @@ void lst::Event::createPixelQuintuplets() { *tripletsInGPU, *quintupletsInGPU, *pixelQuintupletsInGPU, - alpaka::getPtrNative(connectedPixelSize_dev_buf), - alpaka::getPtrNative(connectedPixelIndex_dev_buf), + connectedPixelSize_dev_buf.data(), + connectedPixelIndex_dev_buf.data(), nInnerSegments, *rangesInGPU); @@ -1065,7 +1065,7 @@ void lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); alpaka::wait(queue); // wait to get the value before using it - std::cout << "number of pixel quintuplets = " << *alpaka::getPtrNative(nPixelQuintuplets_buf) << std::endl; + std::cout << "number of pixel quintuplets = " << *nPixelQuintuplets_buf.data() << std::endl; #endif } diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h index 7f3412ce4694a..253b0860c7068 100644 --- a/RecoTracker/LSTCore/src/alpaka/Hit.h +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -28,25 +28,25 @@ namespace lst { template void setData(TBuff& buf) { - nHits = alpaka::getPtrNative(buf.nHits_buf); - xs = alpaka::getPtrNative(buf.xs_buf); - ys = alpaka::getPtrNative(buf.ys_buf); - zs = alpaka::getPtrNative(buf.zs_buf); - moduleIndices = alpaka::getPtrNative(buf.moduleIndices_buf); - idxs = alpaka::getPtrNative(buf.idxs_buf); - detid = alpaka::getPtrNative(buf.detid_buf); - rts = alpaka::getPtrNative(buf.rts_buf); - phis = alpaka::getPtrNative(buf.phis_buf); - etas = alpaka::getPtrNative(buf.etas_buf); - highEdgeXs = alpaka::getPtrNative(buf.highEdgeXs_buf); - highEdgeYs = alpaka::getPtrNative(buf.highEdgeYs_buf); - lowEdgeXs = alpaka::getPtrNative(buf.lowEdgeXs_buf); - lowEdgeYs = alpaka::getPtrNative(buf.lowEdgeYs_buf); - hitRanges = alpaka::getPtrNative(buf.hitRanges_buf); - hitRangesLower = alpaka::getPtrNative(buf.hitRangesLower_buf); - hitRangesUpper = alpaka::getPtrNative(buf.hitRangesUpper_buf); - hitRangesnLower = alpaka::getPtrNative(buf.hitRangesnLower_buf); - hitRangesnUpper = alpaka::getPtrNative(buf.hitRangesnUpper_buf); + nHits = buf.nHits_buf.data(); + xs = buf.xs_buf.data(); + ys = buf.ys_buf.data(); + zs = buf.zs_buf.data(); + moduleIndices = buf.moduleIndices_buf.data(); + idxs = buf.idxs_buf.data(); + detid = buf.detid_buf.data(); + rts = buf.rts_buf.data(); + phis = buf.phis_buf.data(); + etas = buf.etas_buf.data(); + highEdgeXs = buf.highEdgeXs_buf.data(); + highEdgeYs = buf.highEdgeYs_buf.data(); + lowEdgeXs = buf.lowEdgeXs_buf.data(); + lowEdgeYs = buf.lowEdgeYs_buf.data(); + hitRanges = buf.hitRanges_buf.data(); + hitRangesLower = buf.hitRangesLower_buf.data(); + hitRangesUpper = buf.hitRangesUpper_buf.data(); + hitRangesnLower = buf.hitRangesnLower_buf.data(); + hitRangesnUpper = buf.hitRangesnUpper_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index bda334b31afc1..b4cbd500c7bf8 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -56,42 +56,42 @@ namespace lst { template void setData(TBuf& buf) { - nMemoryLocations = alpaka::getPtrNative(buf.nMemoryLocations_buf); - anchorHitIndices = alpaka::getPtrNative(buf.anchorHitIndices_buf); - outerHitIndices = alpaka::getPtrNative(buf.outerHitIndices_buf); - moduleIndices = alpaka::getPtrNative(buf.moduleIndices_buf); - nMDs = alpaka::getPtrNative(buf.nMDs_buf); - totOccupancyMDs = alpaka::getPtrNative(buf.totOccupancyMDs_buf); - dphichanges = alpaka::getPtrNative(buf.dphichanges_buf); - dzs = alpaka::getPtrNative(buf.dzs_buf); - dphis = alpaka::getPtrNative(buf.dphis_buf); - shiftedXs = alpaka::getPtrNative(buf.shiftedXs_buf); - shiftedYs = alpaka::getPtrNative(buf.shiftedYs_buf); - shiftedZs = alpaka::getPtrNative(buf.shiftedZs_buf); - noShiftedDphis = alpaka::getPtrNative(buf.noShiftedDphis_buf); - noShiftedDphiChanges = alpaka::getPtrNative(buf.noShiftedDphiChanges_buf); - anchorX = alpaka::getPtrNative(buf.anchorX_buf); - anchorY = alpaka::getPtrNative(buf.anchorY_buf); - anchorZ = alpaka::getPtrNative(buf.anchorZ_buf); - anchorRt = alpaka::getPtrNative(buf.anchorRt_buf); - anchorPhi = alpaka::getPtrNative(buf.anchorPhi_buf); - anchorEta = alpaka::getPtrNative(buf.anchorEta_buf); - anchorHighEdgeX = alpaka::getPtrNative(buf.anchorHighEdgeX_buf); - anchorHighEdgeY = alpaka::getPtrNative(buf.anchorHighEdgeY_buf); - anchorLowEdgeX = alpaka::getPtrNative(buf.anchorLowEdgeX_buf); - anchorLowEdgeY = alpaka::getPtrNative(buf.anchorLowEdgeY_buf); - outerX = alpaka::getPtrNative(buf.outerX_buf); - outerY = alpaka::getPtrNative(buf.outerY_buf); - outerZ = alpaka::getPtrNative(buf.outerZ_buf); - outerRt = alpaka::getPtrNative(buf.outerRt_buf); - outerPhi = alpaka::getPtrNative(buf.outerPhi_buf); - outerEta = alpaka::getPtrNative(buf.outerEta_buf); - outerHighEdgeX = alpaka::getPtrNative(buf.outerHighEdgeX_buf); - outerHighEdgeY = alpaka::getPtrNative(buf.outerHighEdgeY_buf); - outerLowEdgeX = alpaka::getPtrNative(buf.outerLowEdgeX_buf); - outerLowEdgeY = alpaka::getPtrNative(buf.outerLowEdgeY_buf); - anchorLowEdgePhi = alpaka::getPtrNative(buf.anchorLowEdgePhi_buf); - anchorHighEdgePhi = alpaka::getPtrNative(buf.anchorHighEdgePhi_buf); + nMemoryLocations = buf.nMemoryLocations_buf.data(); + anchorHitIndices = buf.anchorHitIndices_buf.data(); + outerHitIndices = buf.outerHitIndices_buf.data(); + moduleIndices = buf.moduleIndices_buf.data(); + nMDs = buf.nMDs_buf.data(); + totOccupancyMDs = buf.totOccupancyMDs_buf.data(); + dphichanges = buf.dphichanges_buf.data(); + dzs = buf.dzs_buf.data(); + dphis = buf.dphis_buf.data(); + shiftedXs = buf.shiftedXs_buf.data(); + shiftedYs = buf.shiftedYs_buf.data(); + shiftedZs = buf.shiftedZs_buf.data(); + noShiftedDphis = buf.noShiftedDphis_buf.data(); + noShiftedDphiChanges = buf.noShiftedDphiChanges_buf.data(); + anchorX = buf.anchorX_buf.data(); + anchorY = buf.anchorY_buf.data(); + anchorZ = buf.anchorZ_buf.data(); + anchorRt = buf.anchorRt_buf.data(); + anchorPhi = buf.anchorPhi_buf.data(); + anchorEta = buf.anchorEta_buf.data(); + anchorHighEdgeX = buf.anchorHighEdgeX_buf.data(); + anchorHighEdgeY = buf.anchorHighEdgeY_buf.data(); + anchorLowEdgeX = buf.anchorLowEdgeX_buf.data(); + anchorLowEdgeY = buf.anchorLowEdgeY_buf.data(); + outerX = buf.outerX_buf.data(); + outerY = buf.outerY_buf.data(); + outerZ = buf.outerZ_buf.data(); + outerRt = buf.outerRt_buf.data(); + outerPhi = buf.outerPhi_buf.data(); + outerEta = buf.outerEta_buf.data(); + outerHighEdgeX = buf.outerHighEdgeX_buf.data(); + outerHighEdgeY = buf.outerHighEdgeY_buf.data(); + outerLowEdgeX = buf.outerLowEdgeX_buf.data(); + outerLowEdgeY = buf.outerLowEdgeY_buf.data(); + anchorLowEdgePhi = buf.anchorLowEdgePhi_buf.data(); + anchorHighEdgePhi = buf.anchorHighEdgePhi_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h index 09aac58bc8eb4..0e17185104c74 100644 --- a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h +++ b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h @@ -40,34 +40,34 @@ namespace lst { template void setData(TBuff& buf) { - hitRanges = alpaka::getPtrNative(buf.hitRanges_buf); - hitRangesLower = alpaka::getPtrNative(buf.hitRangesLower_buf); - hitRangesUpper = alpaka::getPtrNative(buf.hitRangesUpper_buf); - hitRangesnLower = alpaka::getPtrNative(buf.hitRangesnLower_buf); - hitRangesnUpper = alpaka::getPtrNative(buf.hitRangesnUpper_buf); - mdRanges = alpaka::getPtrNative(buf.mdRanges_buf); - segmentRanges = alpaka::getPtrNative(buf.segmentRanges_buf); - trackletRanges = alpaka::getPtrNative(buf.trackletRanges_buf); - tripletRanges = alpaka::getPtrNative(buf.tripletRanges_buf); - trackCandidateRanges = alpaka::getPtrNative(buf.trackCandidateRanges_buf); - quintupletRanges = alpaka::getPtrNative(buf.quintupletRanges_buf); - - nEligibleT5Modules = alpaka::getPtrNative(buf.nEligibleT5Modules_buf); - indicesOfEligibleT5Modules = alpaka::getPtrNative(buf.indicesOfEligibleT5Modules_buf); - - quintupletModuleIndices = alpaka::getPtrNative(buf.quintupletModuleIndices_buf); - quintupletModuleOccupancy = alpaka::getPtrNative(buf.quintupletModuleOccupancy_buf); - miniDoubletModuleIndices = alpaka::getPtrNative(buf.miniDoubletModuleIndices_buf); - miniDoubletModuleOccupancy = alpaka::getPtrNative(buf.miniDoubletModuleOccupancy_buf); - segmentModuleIndices = alpaka::getPtrNative(buf.segmentModuleIndices_buf); - segmentModuleOccupancy = alpaka::getPtrNative(buf.segmentModuleOccupancy_buf); - tripletModuleIndices = alpaka::getPtrNative(buf.tripletModuleIndices_buf); - tripletModuleOccupancy = alpaka::getPtrNative(buf.tripletModuleOccupancy_buf); - - device_nTotalMDs = alpaka::getPtrNative(buf.device_nTotalMDs_buf); - device_nTotalSegs = alpaka::getPtrNative(buf.device_nTotalSegs_buf); - device_nTotalTrips = alpaka::getPtrNative(buf.device_nTotalTrips_buf); - device_nTotalQuints = alpaka::getPtrNative(buf.device_nTotalQuints_buf); + hitRanges = buf.hitRanges_buf.data(); + hitRangesLower = buf.hitRangesLower_buf.data(); + hitRangesUpper = buf.hitRangesUpper_buf.data(); + hitRangesnLower = buf.hitRangesnLower_buf.data(); + hitRangesnUpper = buf.hitRangesnUpper_buf.data(); + mdRanges = buf.mdRanges_buf.data(); + segmentRanges = buf.segmentRanges_buf.data(); + trackletRanges = buf.trackletRanges_buf.data(); + tripletRanges = buf.tripletRanges_buf.data(); + trackCandidateRanges = buf.trackCandidateRanges_buf.data(); + quintupletRanges = buf.quintupletRanges_buf.data(); + + nEligibleT5Modules = buf.nEligibleT5Modules_buf.data(); + indicesOfEligibleT5Modules = buf.indicesOfEligibleT5Modules_buf.data(); + + quintupletModuleIndices = buf.quintupletModuleIndices_buf.data(); + quintupletModuleOccupancy = buf.quintupletModuleOccupancy_buf.data(); + miniDoubletModuleIndices = buf.miniDoubletModuleIndices_buf.data(); + miniDoubletModuleOccupancy = buf.miniDoubletModuleOccupancy_buf.data(); + segmentModuleIndices = buf.segmentModuleIndices_buf.data(); + segmentModuleOccupancy = buf.segmentModuleOccupancy_buf.data(); + tripletModuleIndices = buf.tripletModuleIndices_buf.data(); + tripletModuleOccupancy = buf.tripletModuleOccupancy_buf.data(); + + device_nTotalMDs = buf.device_nTotalMDs_buf.data(); + device_nTotalSegs = buf.device_nTotalSegs_buf.data(); + device_nTotalTrips = buf.device_nTotalTrips_buf.data(); + device_nTotalQuints = buf.device_nTotalQuints_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index 2c0b143a6d913..0a14f2cbbd112 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -34,24 +34,24 @@ namespace lst { template void setData(TBuff& buf) { - pixelIndices = alpaka::getPtrNative(buf.pixelIndices_buf); - T5Indices = alpaka::getPtrNative(buf.T5Indices_buf); - nPixelQuintuplets = alpaka::getPtrNative(buf.nPixelQuintuplets_buf); - totOccupancyPixelQuintuplets = alpaka::getPtrNative(buf.totOccupancyPixelQuintuplets_buf); - isDup = alpaka::getPtrNative(buf.isDup_buf); - score = alpaka::getPtrNative(buf.score_buf); - eta = alpaka::getPtrNative(buf.eta_buf); - phi = alpaka::getPtrNative(buf.phi_buf); - logicalLayers = alpaka::getPtrNative(buf.logicalLayers_buf); - hitIndices = alpaka::getPtrNative(buf.hitIndices_buf); - lowerModuleIndices = alpaka::getPtrNative(buf.lowerModuleIndices_buf); - pixelRadius = alpaka::getPtrNative(buf.pixelRadius_buf); - quintupletRadius = alpaka::getPtrNative(buf.quintupletRadius_buf); - centerX = alpaka::getPtrNative(buf.centerX_buf); - centerY = alpaka::getPtrNative(buf.centerY_buf); - rzChiSquared = alpaka::getPtrNative(buf.rzChiSquared_buf); - rPhiChiSquared = alpaka::getPtrNative(buf.rPhiChiSquared_buf); - rPhiChiSquaredInwards = alpaka::getPtrNative(buf.rPhiChiSquaredInwards_buf); + pixelIndices = buf.pixelIndices_buf.data(); + T5Indices = buf.T5Indices_buf.data(); + nPixelQuintuplets = buf.nPixelQuintuplets_buf.data(); + totOccupancyPixelQuintuplets = buf.totOccupancyPixelQuintuplets_buf.data(); + isDup = buf.isDup_buf.data(); + score = buf.score_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + pixelRadius = buf.pixelRadius_buf.data(); + quintupletRadius = buf.quintupletRadius_buf.data(); + centerX = buf.centerX_buf.data(); + centerY = buf.centerY_buf.data(); + rzChiSquared = buf.rzChiSquared_buf.data(); + rPhiChiSquared = buf.rPhiChiSquared_buf.data(); + rPhiChiSquaredInwards = buf.rPhiChiSquaredInwards_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 15e4456c21fc6..aa37b91ebb9da 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -42,28 +42,28 @@ namespace lst { template void setData(TBuff& buf) { - pixelSegmentIndices = alpaka::getPtrNative(buf.pixelSegmentIndices_buf); - tripletIndices = alpaka::getPtrNative(buf.tripletIndices_buf); - nPixelTriplets = alpaka::getPtrNative(buf.nPixelTriplets_buf); - totOccupancyPixelTriplets = alpaka::getPtrNative(buf.totOccupancyPixelTriplets_buf); - pixelRadius = alpaka::getPtrNative(buf.pixelRadius_buf); - tripletRadius = alpaka::getPtrNative(buf.tripletRadius_buf); - pt = alpaka::getPtrNative(buf.pt_buf); - eta = alpaka::getPtrNative(buf.eta_buf); - phi = alpaka::getPtrNative(buf.phi_buf); - eta_pix = alpaka::getPtrNative(buf.eta_pix_buf); - phi_pix = alpaka::getPtrNative(buf.phi_pix_buf); - score = alpaka::getPtrNative(buf.score_buf); - isDup = alpaka::getPtrNative(buf.isDup_buf); - partOfPT5 = alpaka::getPtrNative(buf.partOfPT5_buf); - logicalLayers = alpaka::getPtrNative(buf.logicalLayers_buf); - hitIndices = alpaka::getPtrNative(buf.hitIndices_buf); - lowerModuleIndices = alpaka::getPtrNative(buf.lowerModuleIndices_buf); - centerX = alpaka::getPtrNative(buf.centerX_buf); - centerY = alpaka::getPtrNative(buf.centerY_buf); - rPhiChiSquared = alpaka::getPtrNative(buf.rPhiChiSquared_buf); - rPhiChiSquaredInwards = alpaka::getPtrNative(buf.rPhiChiSquaredInwards_buf); - rzChiSquared = alpaka::getPtrNative(buf.rzChiSquared_buf); + pixelSegmentIndices = buf.pixelSegmentIndices_buf.data(); + tripletIndices = buf.tripletIndices_buf.data(); + nPixelTriplets = buf.nPixelTriplets_buf.data(); + totOccupancyPixelTriplets = buf.totOccupancyPixelTriplets_buf.data(); + pixelRadius = buf.pixelRadius_buf.data(); + tripletRadius = buf.tripletRadius_buf.data(); + pt = buf.pt_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + eta_pix = buf.eta_pix_buf.data(); + phi_pix = buf.phi_pix_buf.data(); + score = buf.score_buf.data(); + isDup = buf.isDup_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + centerX = buf.centerX_buf.data(); + centerY = buf.centerY_buf.data(); + rPhiChiSquared = buf.rPhiChiSquared_buf.data(); + rPhiChiSquaredInwards = buf.rPhiChiSquaredInwards_buf.data(); + rzChiSquared = buf.rzChiSquared_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 3b700dbb94793..49eb3b1902c9a 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -46,30 +46,30 @@ namespace lst { template void setData(TBuff& buf) { - tripletIndices = alpaka::getPtrNative(buf.tripletIndices_buf); - lowerModuleIndices = alpaka::getPtrNative(buf.lowerModuleIndices_buf); - nQuintuplets = alpaka::getPtrNative(buf.nQuintuplets_buf); - totOccupancyQuintuplets = alpaka::getPtrNative(buf.totOccupancyQuintuplets_buf); - nMemoryLocations = alpaka::getPtrNative(buf.nMemoryLocations_buf); - innerRadius = alpaka::getPtrNative(buf.innerRadius_buf); - bridgeRadius = alpaka::getPtrNative(buf.bridgeRadius_buf); - outerRadius = alpaka::getPtrNative(buf.outerRadius_buf); - pt = alpaka::getPtrNative(buf.pt_buf); - eta = alpaka::getPtrNative(buf.eta_buf); - phi = alpaka::getPtrNative(buf.phi_buf); - score_rphisum = alpaka::getPtrNative(buf.score_rphisum_buf); - layer = alpaka::getPtrNative(buf.layer_buf); - isDup = alpaka::getPtrNative(buf.isDup_buf); - TightCutFlag = alpaka::getPtrNative(buf.TightCutFlag_buf); - partOfPT5 = alpaka::getPtrNative(buf.partOfPT5_buf); - regressionRadius = alpaka::getPtrNative(buf.regressionRadius_buf); - regressionG = alpaka::getPtrNative(buf.regressionG_buf); - regressionF = alpaka::getPtrNative(buf.regressionF_buf); - logicalLayers = alpaka::getPtrNative(buf.logicalLayers_buf); - hitIndices = alpaka::getPtrNative(buf.hitIndices_buf); - rzChiSquared = alpaka::getPtrNative(buf.rzChiSquared_buf); - chiSquared = alpaka::getPtrNative(buf.chiSquared_buf); - nonAnchorChiSquared = alpaka::getPtrNative(buf.nonAnchorChiSquared_buf); + tripletIndices = buf.tripletIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + nQuintuplets = buf.nQuintuplets_buf.data(); + totOccupancyQuintuplets = buf.totOccupancyQuintuplets_buf.data(); + nMemoryLocations = buf.nMemoryLocations_buf.data(); + innerRadius = buf.innerRadius_buf.data(); + bridgeRadius = buf.bridgeRadius_buf.data(); + outerRadius = buf.outerRadius_buf.data(); + pt = buf.pt_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + score_rphisum = buf.score_rphisum_buf.data(); + layer = buf.layer_buf.data(); + isDup = buf.isDup_buf.data(); + TightCutFlag = buf.TightCutFlag_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + regressionRadius = buf.regressionRadius_buf.data(); + regressionG = buf.regressionG_buf.data(); + regressionF = buf.regressionF_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + rzChiSquared = buf.rzChiSquared_buf.data(); + chiSquared = buf.chiSquared_buf.data(); + nonAnchorChiSquared = buf.nonAnchorChiSquared_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 76436778802b1..cee59e316064a 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -50,40 +50,40 @@ namespace lst { template void setData(TBuff& buf) { - dPhis = alpaka::getPtrNative(buf.dPhis_buf); - dPhiMins = alpaka::getPtrNative(buf.dPhiMins_buf); - dPhiMaxs = alpaka::getPtrNative(buf.dPhiMaxs_buf); - dPhiChanges = alpaka::getPtrNative(buf.dPhiChanges_buf); - dPhiChangeMins = alpaka::getPtrNative(buf.dPhiChangeMins_buf); - dPhiChangeMaxs = alpaka::getPtrNative(buf.dPhiChangeMaxs_buf); - innerLowerModuleIndices = alpaka::getPtrNative(buf.innerLowerModuleIndices_buf); - outerLowerModuleIndices = alpaka::getPtrNative(buf.outerLowerModuleIndices_buf); - seedIdx = alpaka::getPtrNative(buf.seedIdx_buf); - mdIndices = alpaka::getPtrNative(buf.mdIndices_buf); - nMemoryLocations = alpaka::getPtrNative(buf.nMemoryLocations_buf); - innerMiniDoubletAnchorHitIndices = alpaka::getPtrNative(buf.innerMiniDoubletAnchorHitIndices_buf); - outerMiniDoubletAnchorHitIndices = alpaka::getPtrNative(buf.outerMiniDoubletAnchorHitIndices_buf); - charge = alpaka::getPtrNative(buf.charge_buf); - superbin = alpaka::getPtrNative(buf.superbin_buf); - nSegments = alpaka::getPtrNative(buf.nSegments_buf); - totOccupancySegments = alpaka::getPtrNative(buf.totOccupancySegments_buf); - pLSHitsIdxs = alpaka::getPtrNative(buf.pLSHitsIdxs_buf); - pixelType = alpaka::getPtrNative(buf.pixelType_buf); - isQuad = alpaka::getPtrNative(buf.isQuad_buf); - isDup = alpaka::getPtrNative(buf.isDup_buf); - partOfPT5 = alpaka::getPtrNative(buf.partOfPT5_buf); - ptIn = alpaka::getPtrNative(buf.ptIn_buf); - ptErr = alpaka::getPtrNative(buf.ptErr_buf); - px = alpaka::getPtrNative(buf.px_buf); - py = alpaka::getPtrNative(buf.py_buf); - pz = alpaka::getPtrNative(buf.pz_buf); - etaErr = alpaka::getPtrNative(buf.etaErr_buf); - eta = alpaka::getPtrNative(buf.eta_buf); - phi = alpaka::getPtrNative(buf.phi_buf); - score = alpaka::getPtrNative(buf.score_buf); - circleCenterX = alpaka::getPtrNative(buf.circleCenterX_buf); - circleCenterY = alpaka::getPtrNative(buf.circleCenterY_buf); - circleRadius = alpaka::getPtrNative(buf.circleRadius_buf); + dPhis = buf.dPhis_buf.data(); + dPhiMins = buf.dPhiMins_buf.data(); + dPhiMaxs = buf.dPhiMaxs_buf.data(); + dPhiChanges = buf.dPhiChanges_buf.data(); + dPhiChangeMins = buf.dPhiChangeMins_buf.data(); + dPhiChangeMaxs = buf.dPhiChangeMaxs_buf.data(); + innerLowerModuleIndices = buf.innerLowerModuleIndices_buf.data(); + outerLowerModuleIndices = buf.outerLowerModuleIndices_buf.data(); + seedIdx = buf.seedIdx_buf.data(); + mdIndices = buf.mdIndices_buf.data(); + nMemoryLocations = buf.nMemoryLocations_buf.data(); + innerMiniDoubletAnchorHitIndices = buf.innerMiniDoubletAnchorHitIndices_buf.data(); + outerMiniDoubletAnchorHitIndices = buf.outerMiniDoubletAnchorHitIndices_buf.data(); + charge = buf.charge_buf.data(); + superbin = buf.superbin_buf.data(); + nSegments = buf.nSegments_buf.data(); + totOccupancySegments = buf.totOccupancySegments_buf.data(); + pLSHitsIdxs = buf.pLSHitsIdxs_buf.data(); + pixelType = buf.pixelType_buf.data(); + isQuad = buf.isQuad_buf.data(); + isDup = buf.isDup_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + ptIn = buf.ptIn_buf.data(); + ptErr = buf.ptErr_buf.data(); + px = buf.px_buf.data(); + py = buf.py_buf.data(); + pz = buf.pz_buf.data(); + etaErr = buf.etaErr_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + score = buf.score_buf.data(); + circleCenterX = buf.circleCenterX_buf.data(); + circleCenterY = buf.circleCenterY_buf.data(); + circleRadius = buf.circleRadius_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 835647c65e4bd..03e853cea7d7b 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -34,23 +34,23 @@ namespace lst { template void setData(TBuff& buf) { - trackCandidateType = alpaka::getPtrNative(buf.trackCandidateType_buf); - directObjectIndices = alpaka::getPtrNative(buf.directObjectIndices_buf); - objectIndices = alpaka::getPtrNative(buf.objectIndices_buf); - nTrackCandidates = alpaka::getPtrNative(buf.nTrackCandidates_buf); - nTrackCandidatespT3 = alpaka::getPtrNative(buf.nTrackCandidatespT3_buf); - nTrackCandidatespT5 = alpaka::getPtrNative(buf.nTrackCandidatespT5_buf); - nTrackCandidatespLS = alpaka::getPtrNative(buf.nTrackCandidatespLS_buf); - nTrackCandidatesT5 = alpaka::getPtrNative(buf.nTrackCandidatesT5_buf); - - logicalLayers = alpaka::getPtrNative(buf.logicalLayers_buf); - hitIndices = alpaka::getPtrNative(buf.hitIndices_buf); - pixelSeedIndex = alpaka::getPtrNative(buf.pixelSeedIndex_buf); - lowerModuleIndices = alpaka::getPtrNative(buf.lowerModuleIndices_buf); - - centerX = alpaka::getPtrNative(buf.centerX_buf); - centerY = alpaka::getPtrNative(buf.centerY_buf); - radius = alpaka::getPtrNative(buf.radius_buf); + trackCandidateType = buf.trackCandidateType_buf.data(); + directObjectIndices = buf.directObjectIndices_buf.data(); + objectIndices = buf.objectIndices_buf.data(); + nTrackCandidates = buf.nTrackCandidates_buf.data(); + nTrackCandidatespT3 = buf.nTrackCandidatespT3_buf.data(); + nTrackCandidatespT5 = buf.nTrackCandidatespT5_buf.data(); + nTrackCandidatespLS = buf.nTrackCandidatespLS_buf.data(); + nTrackCandidatesT5 = buf.nTrackCandidatesT5_buf.data(); + + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + pixelSeedIndex = buf.pixelSeedIndex_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + + centerX = buf.centerX_buf.data(); + centerY = buf.centerY_buf.data(); + radius = buf.radius_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index 9f3521e712ed6..3744dfb69e262 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -36,24 +36,24 @@ namespace lst { #endif template void setData(TBuff& buf) { - segmentIndices = alpaka::getPtrNative(buf.segmentIndices_buf); - lowerModuleIndices = alpaka::getPtrNative(buf.lowerModuleIndices_buf); - nTriplets = alpaka::getPtrNative(buf.nTriplets_buf); - totOccupancyTriplets = alpaka::getPtrNative(buf.totOccupancyTriplets_buf); - nMemoryLocations = alpaka::getPtrNative(buf.nMemoryLocations_buf); - logicalLayers = alpaka::getPtrNative(buf.logicalLayers_buf); - hitIndices = alpaka::getPtrNative(buf.hitIndices_buf); - betaIn = alpaka::getPtrNative(buf.betaIn_buf); - circleRadius = alpaka::getPtrNative(buf.circleRadius_buf); - circleCenterX = alpaka::getPtrNative(buf.circleCenterX_buf); - circleCenterY = alpaka::getPtrNative(buf.circleCenterY_buf); - partOfPT5 = alpaka::getPtrNative(buf.partOfPT5_buf); - partOfT5 = alpaka::getPtrNative(buf.partOfT5_buf); - partOfPT3 = alpaka::getPtrNative(buf.partOfPT3_buf); + segmentIndices = buf.segmentIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + nTriplets = buf.nTriplets_buf.data(); + totOccupancyTriplets = buf.totOccupancyTriplets_buf.data(); + nMemoryLocations = buf.nMemoryLocations_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + betaIn = buf.betaIn_buf.data(); + circleRadius = buf.circleRadius_buf.data(); + circleCenterX = buf.circleCenterX_buf.data(); + circleCenterY = buf.circleCenterY_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + partOfT5 = buf.partOfT5_buf.data(); + partOfPT3 = buf.partOfPT3_buf.data(); #ifdef CUT_VALUE_DEBUG - zOut = alpaka::getPtrNative(buf.zOut_buf); - rtOut = alpaka::getPtrNative(buf.rtOut_buf); - betaInCut = alpaka::getPtrNative(buf.betaInCut_buf); + zOut = buf.zOut_buf.data(); + rtOut = buf.rtOut_buf.data(); + betaInCut = buf.betaInCut_buf.data(); #endif } }; From c6a246874122bc22055440db295f3bc370a5f053 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 8 Aug 2024 15:35:06 -0700 Subject: [PATCH 6/8] lst::createWorkDiv now depends on Acc to avoid ODR; allocBufWrapper correct parameter is TDev --- RecoTracker/LSTCore/interface/Constants.h | 16 +++++------ .../LSTCore/interface/alpaka/Constants.h | 27 +++++++++---------- RecoTracker/LSTCore/src/ModuleMethods.h | 3 ++- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/RecoTracker/LSTCore/interface/Constants.h b/RecoTracker/LSTCore/interface/Constants.h index 725cf5f46b224..c0c342b6ad8a0 100644 --- a/RecoTracker/LSTCore/interface/Constants.h +++ b/RecoTracker/LSTCore/interface/Constants.h @@ -14,23 +14,21 @@ namespace lst { using Buf = alpaka::Buf; // Allocation wrapper function to make integration of the caching allocator easier and reduce code boilerplate. - template - ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf, T> allocBufWrapper(TAcc const& devAccIn, - TSize nElements, - TQueue queue) { + template + ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf allocBufWrapper(TDev const& dev, TSize nElements, TQueue queue) { #ifdef CACHE_ALLOC return cms::alpakatools::allocCachedBuf( - devAccIn, queue, alpaka_common::Vec1D(static_cast(nElements))); + dev, queue, alpaka_common::Vec1D(static_cast(nElements))); #else - return alpaka::allocBuf(devAccIn, + return alpaka::allocBuf(dev, alpaka_common::Vec1D(static_cast(nElements))); #endif } // Second allocation wrapper function when queue is not given. Reduces code boilerplate. - template - ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf, T> allocBufWrapper(TAcc const& devAccIn, TSize nElements) { - return alpaka::allocBuf(devAccIn, + template + ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf allocBufWrapper(TDev const& dev, TSize nElements) { + return alpaka::allocBuf(dev, alpaka_common::Vec1D(static_cast(nElements))); } diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h index e2ebd979a59a3..029d5ebcb80b7 100644 --- a/RecoTracker/LSTCore/interface/alpaka/Constants.h +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -36,25 +36,24 @@ namespace lst { #endif // Adjust grid and block sizes based on backend configuration - template - ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv3D createWorkDiv(const Vec& blocksPerGrid, - const Vec& threadsPerBlock, - const Vec& elementsPerThreadArg) { + template > + ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, + const Vec& threadsPerBlock, + const Vec& elementsPerThreadArg) { Vec adjustedBlocks = blocksPerGrid; Vec adjustedThreads = threadsPerBlock; - // Serial execution, so all launch parameters set to 1. -#if defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) - adjustedBlocks = Vec::all(static_cast(1)); - adjustedThreads = Vec::all(static_cast(1)); -#endif + // special overrides for CPU/host cases + if constexpr (std::is_same_v) { + adjustedBlocks = Vec::all(static_cast(1)); - // Threads enabled, set number of blocks to 1. -#if defined(ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED) - adjustedBlocks = Vec::all(static_cast(1)); -#endif + if constexpr (alpaka::accMatchesTags) { + // Serial execution, set threads to 1 as well + adjustedThreads = Vec::all(static_cast(1)); // probably redundant + } + } - return WorkDiv3D(adjustedBlocks, adjustedThreads, elementsPerThreadArg); + return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThreadArg); } // The constants below are usually used in functions like alpaka::math::min(), diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h index 196212defdfa6..bf51e262f69e5 100644 --- a/RecoTracker/LSTCore/src/ModuleMethods.h +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -12,6 +12,7 @@ #include "RecoTracker/LSTCore/interface/PixelMap.h" #include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" namespace lst { struct ModuleMetaData { @@ -80,7 +81,7 @@ namespace lst { nPixels = connectedPix_size; // Now we re-initialize connectedPixels_buf since nPixels is now known - modulesBuf.connectedPixels_buf = allocBufWrapper(cms::alpakatools::host(), nPixels); + modulesBuf.connectedPixels_buf = cms::alpakatools::make_host_buffer(nPixels); modulesBuf.data_.setData(modulesBuf); unsigned int* connectedPixels = modulesBuf.connectedPixels_buf.data(); From 43ce20eee979ffc8b41d38629e91605d7cce3c54 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Mon, 12 Aug 2024 15:26:08 -0700 Subject: [PATCH 7/8] explicitly require 1D single block kernels to use Acc1D and have one block with asserts --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 77 ++++++------------- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 14 +++- RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 16 +++- RecoTracker/LSTCore/src/alpaka/Segment.h | 16 +++- .../LSTCore/src/alpaka/TrackCandidate.h | 16 +++- RecoTracker/LSTCore/src/alpaka/Triplet.h | 16 +++- 6 files changed, 81 insertions(+), 74 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 9e46c96a4488c..cc8872438dfe7 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -255,13 +255,10 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - Vec3D const threadsPerBlockCreateMD{1, 1, 1024}; - Vec3D const blocksPerGridCreateMD{1, 1, 1}; - WorkDiv3D const createMDArrayRangesGPU_workDiv = - createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread); + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; - alpaka::exec( + alpaka::exec( queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); @@ -281,13 +278,10 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them - Vec3D const threadsPerBlockCreateSeg{1, 1, 1024}; - Vec3D const blocksPerGridCreateSeg{1, 1, 1}; - WorkDiv3D const createSegmentArrayRanges_workDiv = - createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); + WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); lst::createSegmentArrayRanges createSegmentArrayRanges_kernel; - alpaka::exec(queue, + alpaka::exec(queue, createSegmentArrayRanges_workDiv, createSegmentArrayRanges_kernel, *modulesBuffers_.data(), @@ -388,13 +382,10 @@ void lst::Event::createMiniDoublets() { alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - Vec3D const threadsPerBlockCreateMD{1, 1, 1024}; - Vec3D const blocksPerGridCreateMD{1, 1, 1}; - WorkDiv3D const createMDArrayRangesGPU_workDiv = - createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread); + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; - alpaka::exec( + alpaka::exec( queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); @@ -424,13 +415,10 @@ void lst::Event::createMiniDoublets() { *mdsInGPU, *rangesInGPU); - Vec3D const threadsPerBlockAddMD{1, 1, 1024}; - Vec3D const blocksPerGridAddMD{1, 1, 1}; - WorkDiv3D const addMiniDoubletRangesToEventExplicit_workDiv = - createWorkDiv(blocksPerGridAddMD, threadsPerBlockAddMD, elementsPerThread); + WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); lst::addMiniDoubletRangesToEventExplicit addMiniDoubletRangesToEventExplicit_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addMiniDoubletRangesToEventExplicit_workDiv, addMiniDoubletRangesToEventExplicit_kernel, *modulesBuffers_.data(), @@ -465,13 +453,10 @@ void lst::Event::createSegmentsWithModuleMap() { *segmentsInGPU, *rangesInGPU); - Vec3D const threadsPerBlockAddSeg{1, 1, 1024}; - Vec3D const blocksPerGridAddSeg{1, 1, 1}; - WorkDiv3D const addSegmentRangesToEventExplicit_workDiv = - createWorkDiv(blocksPerGridAddSeg, threadsPerBlockAddSeg, elementsPerThread); + WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); lst::addSegmentRangesToEventExplicit addSegmentRangesToEventExplicit_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addSegmentRangesToEventExplicit_workDiv, addSegmentRangesToEventExplicit_kernel, *modulesBuffers_.data(), @@ -485,13 +470,10 @@ void lst::Event::createSegmentsWithModuleMap() { void lst::Event::createTriplets() { if (tripletsInGPU == nullptr) { - Vec3D const threadsPerBlockCreateTrip{1, 1, 1024}; - Vec3D const blocksPerGridCreateTrip{1, 1, 1}; - WorkDiv3D const createTripletArrayRanges_workDiv = - createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); + WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); lst::createTripletArrayRanges createTripletArrayRanges_kernel; - alpaka::exec(queue, + alpaka::exec(queue, createTripletArrayRanges_workDiv, createTripletArrayRanges_kernel, *modulesBuffers_.data(), @@ -563,13 +545,10 @@ void lst::Event::createTriplets() { index_gpu_buf.data(), nonZeroModules); - Vec3D const threadsPerBlockAddTrip{1, 1, 1024}; - Vec3D const blocksPerGridAddTrip{1, 1, 1}; - WorkDiv3D const addTripletRangesToEventExplicit_workDiv = - createWorkDiv(blocksPerGridAddTrip, threadsPerBlockAddTrip, elementsPerThread); + WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); lst::addTripletRangesToEventExplicit addTripletRangesToEventExplicit_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addTripletRangesToEventExplicit_workDiv, addTripletRangesToEventExplicit_kernel, *modulesBuffers_.data(), @@ -604,13 +583,10 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ *segmentsInGPU, *pixelQuintupletsInGPU); - Vec3D const threadsPerBlock_addpT3asTrackCandidatesInGPU{1, 1, 512}; - Vec3D const blocksPerGrid_addpT3asTrackCandidatesInGPU{1, 1, 1}; - WorkDiv3D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv( - blocksPerGrid_addpT3asTrackCandidatesInGPU, threadsPerBlock_addpT3asTrackCandidatesInGPU, elementsPerThread); + WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); lst::addpT3asTrackCandidatesInGPU addpT3asTrackCandidatesInGPU_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addpT3asTrackCandidatesInGPU_workDiv, addpT3asTrackCandidatesInGPU_kernel, nLowerModules_, @@ -849,13 +825,10 @@ void lst::Event::createPixelTriplets() { } void lst::Event::createQuintuplets() { - Vec3D const threadsPerBlockCreateQuints{1, 1, 1024}; - Vec3D const blocksPerGridCreateQuints{1, 1, 1}; - WorkDiv3D const createEligibleModulesListForQuintupletsGPU_workDiv = - createWorkDiv(blocksPerGridCreateQuints, threadsPerBlockCreateQuints, elementsPerThread); + WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = createWorkDiv({1}, {1024}, {1}); lst::createEligibleModulesListForQuintupletsGPU createEligibleModulesListForQuintupletsGPU_kernel; - alpaka::exec(queue, + alpaka::exec(queue, createEligibleModulesListForQuintupletsGPU_workDiv, createEligibleModulesListForQuintupletsGPU_kernel, *modulesBuffers_.data(), @@ -910,13 +883,10 @@ void lst::Event::createQuintuplets() { *quintupletsInGPU, *rangesInGPU); - Vec3D const threadsPerBlockAddQuint{1, 1, 1024}; - Vec3D const blocksPerGridAddQuint{1, 1, 1}; - WorkDiv3D const addQuintupletRangesToEventExplicit_workDiv = - createWorkDiv(blocksPerGridAddQuint, threadsPerBlockAddQuint, elementsPerThread); + WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); lst::addQuintupletRangesToEventExplicit addQuintupletRangesToEventExplicit_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addQuintupletRangesToEventExplicit_workDiv, addQuintupletRangesToEventExplicit_kernel, *modulesBuffers_.data(), @@ -1044,13 +1014,10 @@ void lst::Event::createPixelQuintuplets() { removeDupPixelQuintupletsInGPUFromMap_kernel, *pixelQuintupletsInGPU); - Vec3D const threadsPerBlockAddpT5asTrackCan{1, 1, 256}; - Vec3D const blocksPerGridAddpT5asTrackCan{1, 1, 1}; - WorkDiv3D const addpT5asTrackCandidateInGPU_workDiv = - createWorkDiv(blocksPerGridAddpT5asTrackCan, threadsPerBlockAddpT5asTrackCan, elementsPerThread); + WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = createWorkDiv({1}, {256}, {1}); lst::addpT5asTrackCandidateInGPU addpT5asTrackCandidateInGPU_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addpT5asTrackCandidateInGPU_workDiv, addpT5asTrackCandidateInGPU_kernel, nLowerModules_, diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index b4cbd500c7bf8..c00015384b77b 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -968,6 +968,10 @@ namespace lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, struct lst::Modules modulesInGPU, struct lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -978,10 +982,10 @@ namespace lst { } alpaka::syncBlockThreads(acc); - // Initialize variables outside of the for loop. + // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { short module_rings = modulesInGPU.rings[i]; short module_layers = modulesInGPU.layers[i]; short module_subdets = modulesInGPU.subdets[i]; @@ -1062,10 +1066,14 @@ namespace lst { struct lst::MiniDoublets mdsInGPU, struct lst::ObjectRanges rangesInGPU, struct lst::Hits hitsInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (mdsInGPU.nMDs[i] == 0 or hitsInGPU.hitRanges[i * 2] == -1) { rangesInGPU.mdRanges[i * 2] = -1; rangesInGPU.mdRanges[i * 2 + 1] = -1; diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 49eb3b1902c9a..07b5f50dd57de 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -2669,6 +2669,10 @@ namespace lst { lst::Modules modulesInGPU, lst::Triplets tripletsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -2681,10 +2685,10 @@ namespace lst { } alpaka::syncBlockThreads(acc); - // Initialize variables outside of the for loop. + // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (int i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (int i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { // Condition for a quintuple to exist for a module // TCs don't exist for layers 5 and 6 barrel, and layers 2,3,4,5 endcap short module_rings = modulesInGPU.rings[i]; @@ -2756,7 +2760,7 @@ namespace lst { // Wait for all threads to finish before reporting final values alpaka::syncBlockThreads(acc); - if (globalThreadIdx[2] == 0) { + if (cms::alpakatools::once_per_block(acc)) { *rangesInGPU.nEligibleT5Modules = static_cast(nEligibleT5Modulesx); *rangesInGPU.device_nTotalQuints = static_cast(nTotalQuintupletsx); } @@ -2769,10 +2773,14 @@ namespace lst { lst::Modules modulesInGPU, lst::Quintuplets quintupletsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (quintupletsInGPU.nQuintuplets[i] == 0 or rangesInGPU.quintupletModuleIndices[i] == -1) { rangesInGPU.quintupletRanges[i * 2] = -1; rangesInGPU.quintupletRanges[i * 2 + 1] = -1; diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index cee59e316064a..cc8470f911a8b 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -801,6 +801,10 @@ namespace lst { lst::Modules modulesInGPU, lst::ObjectRanges rangesInGPU, lst::MiniDoublets mdsInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -811,10 +815,10 @@ namespace lst { } alpaka::syncBlockThreads(acc); - // Initialize variables outside of the for loop. + // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (modulesInGPU.nConnectedModules[i] == 0) { rangesInGPU.segmentModuleIndices[i] = nTotalSegments; rangesInGPU.segmentModuleOccupancy[i] = 0; @@ -888,7 +892,7 @@ namespace lst { // Wait for all threads to finish before reporting final values alpaka::syncBlockThreads(acc); - if (globalThreadIdx[2] == 0) { + if (cms::alpakatools::once_per_block(acc)) { rangesInGPU.segmentModuleIndices[*modulesInGPU.nLowerModules] = nTotalSegments; *rangesInGPU.device_nTotalSegs = nTotalSegments; } @@ -901,10 +905,14 @@ namespace lst { lst::Modules modulesInGPU, lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (segmentsInGPU.nSegments[i] == 0) { rangesInGPU.segmentRanges[i * 2] = -1; rangesInGPU.segmentRanges[i * 2 + 1] = -1; diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 03e853cea7d7b..24ef4b94de0f2 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -389,13 +389,17 @@ namespace lst { lst::TrackCandidates trackCandidatesInGPU, lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); unsigned int nPixelTriplets = *pixelTripletsInGPU.nPixelTriplets; unsigned int pLS_offset = rangesInGPU.segmentModuleIndices[nLowerModules]; - for (unsigned int pixelTripletIndex = globalThreadIdx[2]; pixelTripletIndex < nPixelTriplets; - pixelTripletIndex += gridThreadExtent[2]) { + for (unsigned int pixelTripletIndex = globalThreadIdx[0]; pixelTripletIndex < nPixelTriplets; + pixelTripletIndex += gridThreadExtent[0]) { if ((pixelTripletsInGPU.isDup[pixelTripletIndex])) continue; @@ -534,13 +538,17 @@ namespace lst { lst::TrackCandidates trackCandidatesInGPU, lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets; unsigned int pLS_offset = rangesInGPU.segmentModuleIndices[nLowerModules]; - for (int pixelQuintupletIndex = globalThreadIdx[2]; pixelQuintupletIndex < nPixelQuintuplets; - pixelQuintupletIndex += gridThreadExtent[2]) { + for (int pixelQuintupletIndex = globalThreadIdx[0]; pixelQuintupletIndex < nPixelQuintuplets; + pixelQuintupletIndex += gridThreadExtent[0]) { if (pixelQuintupletsInGPU.isDup[pixelQuintupletIndex]) continue; diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index 3744dfb69e262..9fab052e6531f 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -928,6 +928,10 @@ namespace lst { lst::Modules modulesInGPU, lst::ObjectRanges rangesInGPU, lst::Segments segmentsInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -938,10 +942,10 @@ namespace lst { } alpaka::syncBlockThreads(acc); - // Initialize variables outside of the for loop. + // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (segmentsInGPU.nSegments[i] == 0) { rangesInGPU.tripletModuleIndices[i] = nTotalTriplets; rangesInGPU.tripletModuleOccupancy[i] = 0; @@ -1015,7 +1019,7 @@ namespace lst { // Wait for all threads to finish before reporting final values alpaka::syncBlockThreads(acc); - if (globalThreadIdx[2] == 0) { + if (cms::alpakatools::once_per_block(acc)) { *rangesInGPU.device_nTotalTrips = nTotalTriplets; } } @@ -1027,10 +1031,14 @@ namespace lst { lst::Modules modulesInGPU, lst::Triplets tripletsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (tripletsInGPU.nTriplets[i] == 0) { rangesInGPU.tripletRanges[i * 2] = -1; rangesInGPU.tripletRanges[i * 2 + 1] = -1; From 7889093ac6bd4e1f2e01ee5452fca01e776d6f0c Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 15 Aug 2024 16:43:14 -0700 Subject: [PATCH 8/8] add synchronizations in callers of the event methods where it matters; make synchronization more explicit/flexible in names or function arguments --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 55 ++++++++++---- RecoTracker/LSTCore/src/alpaka/Event.h | 72 ++++++++++--------- RecoTracker/LSTCore/src/alpaka/LST.dev.cc | 18 +++-- RecoTracker/LSTCore/standalone/bin/lst.cc | 2 +- .../LSTCore/standalone/code/core/trkCore.cc | 10 +++ 5 files changed, 102 insertions(+), 55 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index cc8872438dfe7..f9757b0659691 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -4,7 +4,8 @@ using namespace ALPAKA_ACCELERATOR_NAMESPACE; -void lst::Event::init(bool verbose) { +void lst::Event::initSync(bool verbose) { + alpaka::wait(queue); // other calls can be asynchronous addObjects = verbose; hitsInGPU = nullptr; mdsInGPU = nullptr; @@ -46,7 +47,8 @@ void lst::Event::init(bool verbose) { } } -void lst::Event::resetEvent() { +void lst::Event::resetEventSync() { + alpaka::wait(queue); // synchronize to reset consistently //reset the arrays for (int i = 0; i < 6; i++) { n_hits_by_layer_barrel_[i] = 0; @@ -1358,7 +1360,7 @@ int lst::Event::getNumberOfT5TrackCandidates() { return *nTrackCandidatesT5_buf_h.data(); } -lst::HitsBuffer* lst::Event::getHits() //std::shared_ptr should take care of garbage collection +lst::HitsBuffer* lst::Event::getHits(bool sync) //std::shared_ptr should take care of garbage collection { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1376,11 +1378,13 @@ lst::HitsBuffer* lst::Event::getHits() //std::shared_ptr should alpaka::memcpy(queue, hitsInCPU->ys_buf, hitsBuffers->ys_buf, nHits); alpaka::memcpy(queue, hitsInCPU->zs_buf, hitsBuffers->zs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->moduleIndices_buf, hitsBuffers->moduleIndices_buf, nHits); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return hitsInCPU; } -lst::HitsBuffer* lst::Event::getHitsInCMSSW() { +lst::HitsBuffer* lst::Event::getHitsInCMSSW(bool sync) { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); @@ -1392,11 +1396,13 @@ lst::HitsBuffer* lst::Event::getHitsInCMSSW() { *hitsInCPU->nHits_buf.data() = nHits; alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return hitsInCPU; } -lst::ObjectRangesBuffer* lst::Event::getRanges() { +lst::ObjectRangesBuffer* lst::Event::getRanges(bool sync) { if (rangesInCPU == nullptr) { rangesInCPU = new lst::ObjectRangesBuffer(nModules_, nLowerModules_, devHost, queue); rangesInCPU->setData(*rangesInCPU); @@ -1406,12 +1412,13 @@ lst::ObjectRangesBuffer* lst::Event::getRanges() { alpaka::memcpy(queue, rangesInCPU->miniDoubletModuleIndices_buf, rangesBuffers->miniDoubletModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->segmentModuleIndices_buf, rangesBuffers->segmentModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->tripletModuleIndices_buf, rangesBuffers->tripletModuleIndices_buf); - alpaka::wait(queue); // wait to get completed host data + if (sync) + alpaka::wait(queue); // wait to get completed host data } return rangesInCPU; } -lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { +lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets(bool sync) { if (mdsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based mdsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1428,11 +1435,13 @@ lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { alpaka::memcpy(queue, mdsInCPU->dphichanges_buf, miniDoubletsBuffers->dphichanges_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->nMDs_buf, miniDoubletsBuffers->nMDs_buf); alpaka::memcpy(queue, mdsInCPU->totOccupancyMDs_buf, miniDoubletsBuffers->totOccupancyMDs_buf); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return mdsInCPU; } -lst::SegmentsBuffer* lst::Event::getSegments() { +lst::SegmentsBuffer* lst::Event::getSegments(bool sync) { if (segmentsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based segmentsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1463,11 +1472,13 @@ lst::SegmentsBuffer* lst::Event::getSegments() { alpaka::memcpy(queue, segmentsInCPU->isDup_buf, segmentsBuffers->isDup_buf); alpaka::memcpy(queue, segmentsInCPU->isQuad_buf, segmentsBuffers->isQuad_buf); alpaka::memcpy(queue, segmentsInCPU->score_buf, segmentsBuffers->score_buf); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return segmentsInCPU; } -lst::TripletsBuffer* lst::Event::getTriplets() { +lst::TripletsBuffer* lst::Event::getTriplets(bool sync) { if (tripletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based tripletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1498,11 +1509,13 @@ lst::TripletsBuffer* lst::Event::getTriplets() { alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->nTriplets_buf, tripletsBuffers->nTriplets_buf); alpaka::memcpy(queue, tripletsInCPU->totOccupancyTriplets_buf, tripletsBuffers->totOccupancyTriplets_buf); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return tripletsInCPU; } -lst::QuintupletsBuffer* lst::Event::getQuintuplets() { +lst::QuintupletsBuffer* lst::Event::getQuintuplets(bool sync) { if (quintupletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based quintupletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1533,11 +1546,13 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets() { alpaka::memcpy(queue, quintupletsInCPU->rzChiSquared_buf, quintupletsBuffers->rzChiSquared_buf, nMemHost); alpaka::memcpy( queue, quintupletsInCPU->nonAnchorChiSquared_buf, quintupletsBuffers->nonAnchorChiSquared_buf, nMemHost); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return quintupletsInCPU; } -lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { +lst::PixelTripletsBuffer* lst::Event::getPixelTriplets(bool sync) { if (pixelTripletsInCPU == nullptr) { // Get nPixelTriplets parameter to initialize host based quintupletsInCPU auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1571,11 +1586,13 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { alpaka::memcpy(queue, pixelTripletsInCPU->eta_buf, pixelTripletsBuffers->eta_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->phi_buf, pixelTripletsBuffers->phi_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->score_buf, pixelTripletsBuffers->score_buf, nPixelTriplets); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return pixelTripletsInCPU; } -lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { +lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets(bool sync) { if (pixelQuintupletsInCPU == nullptr) { // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1606,11 +1623,13 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { queue, pixelQuintupletsInCPU->T5Indices_buf, pixelQuintupletsBuffers->T5Indices_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->isDup_buf, pixelQuintupletsBuffers->isDup_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->score_buf, pixelQuintupletsBuffers->score_buf, nPixelQuintuplets); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return pixelQuintupletsInCPU; } -lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { +lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates(bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1643,11 +1662,13 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return trackCandidatesInCPU; } -lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSSW() { +lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSSW(bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1670,16 +1691,20 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSS trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return trackCandidatesInCPU; } -lst::ModulesBuffer* lst::Event::getModules(bool isFull) { +lst::ModulesBuffer* lst::Event::getModules(bool isFull, bool sync) { if (modulesInCPU == nullptr) { // The last input here is just a small placeholder for the allocation. modulesInCPU = new lst::ModulesBuffer(devHost, nModules_, nPixels_); modulesInCPU->copyFromSrc(queue, modulesBuffers_, isFull); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return modulesInCPU; } diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 7e2a351a8b699..64365bb58bfa8 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -78,7 +78,7 @@ namespace lst { PixelTripletsBuffer* pixelTripletsInCPU; PixelQuintupletsBuffer* pixelQuintupletsInCPU; - void init(bool verbose); + void initSync(bool verbose); int* superbinCPU; int8_t* pixelTypeCPU; @@ -105,9 +105,10 @@ namespace lst { modulesBuffers_(deviceESData->modulesBuffers), pixelMapping_(*deviceESData->pixelMapping), endcapGeometryBuffers_(deviceESData->endcapGeometryBuffers) { - init(verbose); + initSync(verbose); } - void resetEvent(); + void resetEventSync(); // synchronizes + void wait() const { alpaka::wait(queue); } // Calls the appropriate hit function, then increments the counter void addHitToEvent(std::vector const& x, @@ -134,24 +135,21 @@ namespace lst { std::vector const& pixelType, std::vector const& isQuad); - // functions that map the objects to the appropriate modules - void addMiniDoubletsToEventExplicit(); - void addSegmentsToEventExplicit(); - void addTripletsToEventExplicit(); - void addQuintupletsToEventExplicit(); - void resetObjectsInModule(); - void createMiniDoublets(); void createSegmentsWithModuleMap(); void createTriplets(); - void createPixelTracklets(); - void createPixelTrackletsWithMap(); void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets); - void createExtendedTracks(); - void createQuintuplets(); void createPixelTriplets(); - void createPixelQuintuplets(); + void createQuintuplets(); void pixelLineSegmentCleaning(bool no_pls_dupclean); + void createPixelQuintuplets(); + + // functions that map the objects to the appropriate modules + void addMiniDoubletsToEventExplicit(); + void addSegmentsToEventExplicit(); + void addQuintupletsToEventExplicit(); + void addTripletsToEventExplicit(); + void resetObjectsInModule(); unsigned int getNumberOfHits(); unsigned int getNumberOfHitsByLayer(unsigned int layer); @@ -173,33 +171,37 @@ namespace lst { unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer); unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer); - int getNumberOfTrackCandidates(); - int getNumberOfPixelTrackCandidates(); - int getNumberOfPT5TrackCandidates(); - int getNumberOfPT3TrackCandidates(); - int getNumberOfT5TrackCandidates(); - int getNumberOfPLSTrackCandidates(); + int getNumberOfPixelTriplets(); + int getNumberOfPixelQuintuplets(); unsigned int getNumberOfQuintuplets(); unsigned int getNumberOfQuintupletsByLayer(unsigned int layer); unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer); unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer); - int getNumberOfPixelTriplets(); - int getNumberOfPixelQuintuplets(); + int getNumberOfTrackCandidates(); + int getNumberOfPT5TrackCandidates(); + int getNumberOfPT3TrackCandidates(); + int getNumberOfPLSTrackCandidates(); + int getNumberOfPixelTrackCandidates(); + int getNumberOfT5TrackCandidates(); - ObjectRangesBuffer* getRanges(); - HitsBuffer* getHits(); - HitsBuffer* getHitsInCMSSW(); - MiniDoubletsBuffer* getMiniDoublets(); - SegmentsBuffer* getSegments(); - TripletsBuffer* getTriplets(); - QuintupletsBuffer* getQuintuplets(); - TrackCandidatesBuffer* getTrackCandidates(); - TrackCandidatesBuffer* getTrackCandidatesInCMSSW(); - PixelTripletsBuffer* getPixelTriplets(); - PixelQuintupletsBuffer* getPixelQuintuplets(); - ModulesBuffer* getModules(bool isFull = false); + // sync adds alpaka::wait at the end of filling a buffer during lazy fill + // (has no effect on repeated calls) + // set to false may allow faster operation with concurrent calls of get* + // HANDLE WITH CARE + HitsBuffer* getHits(bool sync = true); + HitsBuffer* getHitsInCMSSW(bool sync = true); + ObjectRangesBuffer* getRanges(bool sync = true); + MiniDoubletsBuffer* getMiniDoublets(bool sync = true); + SegmentsBuffer* getSegments(bool sync = true); + TripletsBuffer* getTriplets(bool sync = true); + QuintupletsBuffer* getQuintuplets(bool sync = true); + PixelTripletsBuffer* getPixelTriplets(bool sync = true); + PixelQuintupletsBuffer* getPixelQuintuplets(bool sync = true); + TrackCandidatesBuffer* getTrackCandidates(bool sync = true); + TrackCandidatesBuffer* getTrackCandidatesInCMSSW(bool sync = true); + ModulesBuffer* getModules(bool isFull = false, bool sync = true); }; } // namespace lst diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index 940469e8682a2..f5ee7d7f52add 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -255,10 +255,11 @@ void lst::LST::getOutput(lst::Event& event) { std::vector tc_seedIdx; std::vector tc_trackCandidateType; - lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW()); + lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line lst::TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data(); unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { short trackCandidateType = trackCandidates->trackCandidateType[idx]; std::vector hit_idx = @@ -344,6 +345,7 @@ void lst::LST::run(Queue& queue, in_isQuad_vec_); event.createMiniDoublets(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Mini-doublets produced: %d\n", event.getNumberOfMiniDoublets()); printf("# of Mini-doublets produced barrel layer 1: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(0)); printf("# of Mini-doublets produced barrel layer 2: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(1)); @@ -360,6 +362,7 @@ void lst::LST::run(Queue& queue, event.createSegmentsWithModuleMap(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Segments produced: %d\n", event.getNumberOfSegments()); printf("# of Segments produced layer 1-2: %d\n", event.getNumberOfSegmentsByLayerBarrel(0)); printf("# of Segments produced layer 2-3: %d\n", event.getNumberOfSegmentsByLayerBarrel(1)); @@ -375,6 +378,7 @@ void lst::LST::run(Queue& queue, event.createTriplets(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of T3s produced: %d\n", event.getNumberOfTriplets()); printf("# of T3s produced layer 1-2-3: %d\n", event.getNumberOfTripletsByLayerBarrel(0)); printf("# of T3s produced layer 2-3-4: %d\n", event.getNumberOfTripletsByLayerBarrel(1)); @@ -392,6 +396,7 @@ void lst::LST::run(Queue& queue, event.createQuintuplets(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Quintuplets produced: %d\n", event.getNumberOfQuintuplets()); printf("# of Quintuplets produced layer 1-2-3-4-5-6: %d\n", event.getNumberOfQuintupletsByLayerBarrel(0)); printf("# of Quintuplets produced layer 2: %d\n", event.getNumberOfQuintupletsByLayerBarrel(1)); @@ -409,15 +414,20 @@ void lst::LST::run(Queue& queue, event.pixelLineSegmentCleaning(no_pls_dupclean); event.createPixelQuintuplets(); - if (verbose) + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Pixel Quintuplets produced: %d\n", event.getNumberOfPixelQuintuplets()); + } event.createPixelTriplets(); - if (verbose) + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Pixel T3s produced: %d\n", event.getNumberOfPixelTriplets()); + } event.createTrackCandidates(no_pls_dupclean, tc_pls_triplets); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of TrackCandidates produced: %d\n", event.getNumberOfTrackCandidates()); printf(" # of Pixel TrackCandidates produced: %d\n", event.getNumberOfPixelTrackCandidates()); printf(" # of pT5 TrackCandidates produced: %d\n", event.getNumberOfPT5TrackCandidates()); @@ -428,5 +438,5 @@ void lst::LST::run(Queue& queue, getOutput(event); - event.resetEvent(); + event.resetEventSync(); } diff --git a/RecoTracker/LSTCore/standalone/bin/lst.cc b/RecoTracker/LSTCore/standalone/bin/lst.cc index e67fe5b62d269..89bb43a3bcd4b 100644 --- a/RecoTracker/LSTCore/standalone/bin/lst.cc +++ b/RecoTracker/LSTCore/standalone/bin/lst.cc @@ -478,7 +478,7 @@ void run_lst() { // Clear this event TStopwatch my_timer; my_timer.Start(); - events.at(omp_get_thread_num())->resetEvent(); + events.at(omp_get_thread_num())->resetEventSync(); float timing_resetEvent = my_timer.RealTime(); timing_information.push_back({timing_input_loading, diff --git a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc index d6657c5e512f6..9277b60253a64 100644 --- a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc +++ b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc @@ -28,6 +28,7 @@ float runMiniDoublet(lst::Event *event, int evt) { std::cout << "Reco Mini-Doublet start " << evt << std::endl; my_timer.Start(); event->createMiniDoublets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float md_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) @@ -80,6 +81,7 @@ float runSegment(lst::Event *event) { std::cout << "Reco Segment start" << std::endl; my_timer.Start(); event->createSegmentsWithModuleMap(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float sg_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Segment processing time: " << sg_elapsed << " secs" << std::endl; @@ -117,6 +119,7 @@ float runT3(lst::Event *event) { std::cout << "Reco T3 start" << std::endl; my_timer.Start(); event->createTriplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float t3_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco T3 processing time: " << t3_elapsed << " secs" << std::endl; @@ -158,6 +161,7 @@ float runpT3(lst::Event *event) { std::cout << "Reco Pixel Triplet pT3 start" << std::endl; my_timer.Start(); event->createPixelTriplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float pt3_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco pT3 processing time: " << pt3_elapsed << " secs" << std::endl; @@ -174,6 +178,7 @@ float runQuintuplet(lst::Event *event) { std::cout << "Reco Quintuplet start" << std::endl; my_timer.Start(); event->createQuintuplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float t5_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Quintuplet processing time: " << t5_elapsed << " secs" << std::endl; @@ -219,6 +224,7 @@ float runPixelLineSegment(lst::Event *event, bool no_pls_dupclean) { std::cout << "Reco Pixel Line Segment start" << std::endl; my_timer.Start(); event->pixelLineSegmentCleaning(no_pls_dupclean); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float pls_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Pixel Line Segment processing time: " << pls_elapsed << " secs" << std::endl; @@ -233,6 +239,7 @@ float runPixelQuintuplet(lst::Event *event) { std::cout << "Reco Pixel Quintuplet start" << std::endl; my_timer.Start(); event->createPixelQuintuplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float pt5_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Pixel Quintuplet processing time: " << pt5_elapsed << " secs" << std::endl; @@ -249,6 +256,7 @@ float runTrackCandidate(lst::Event *event, bool no_pls_dupclean, bool tc_ std::cout << "Reco TrackCandidate start" << std::endl; my_timer.Start(); event->createTrackCandidates(no_pls_dupclean, tc_pls_triplets); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float tc_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco TrackCandidate processing time: " << tc_elapsed << " secs" << std::endl; @@ -892,6 +900,7 @@ float addInputsToEventPreLoad(lst::Event *event, superbin_vec, pixelType_vec, isQuad_vec); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float hit_loading_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) @@ -1331,6 +1340,7 @@ void writeMetaData() { pixelType_vec, isQuad_vec); + event.wait(); // device side event calls are asynchronous: wait to measure time or print float hit_loading_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Loading inputs processing time: " << hit_loading_elapsed << " secs" << std::endl;