From 02ea183c3e559ce1206da7ad36ef378606b3208a Mon Sep 17 00:00:00 2001 From: Dan Riley Date: Wed, 7 Feb 2024 10:36:46 -0500 Subject: [PATCH] move simd pragmas to inner loops, add constexpr to some config conditionals --- RecoTracker/MkFitCore/src/MkBuilder.cc | 10 +++++----- RecoTracker/MkFitCore/src/MkFinder.cc | 6 +++--- RecoTracker/MkFitCore/src/PropagationMPlex.cc | 10 +++++++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/RecoTracker/MkFitCore/src/MkBuilder.cc b/RecoTracker/MkFitCore/src/MkBuilder.cc index 0bedff5de99f1..19a1adcc9b9c6 100644 --- a/RecoTracker/MkFitCore/src/MkBuilder.cc +++ b/RecoTracker/MkFitCore/src/MkBuilder.cc @@ -412,7 +412,7 @@ namespace mkfit { //------------------------------------------------------------------------------ void MkBuilder::seed_post_cleaning(TrackVec &tv) { - if (Const::nan_n_silly_check_seeds) { + if constexpr (Const::nan_n_silly_check_seeds) { int count = 0; for (int i = 0; i < (int)tv.size(); ++i) { @@ -421,7 +421,7 @@ namespace mkfit { "Post-cleaning seed silly value check and fix"); if (silly) { ++count; - if (Const::nan_n_silly_remove_bad_seeds) { + if constexpr (Const::nan_n_silly_remove_bad_seeds) { // XXXX MT // Could do somethin smarter here: set as Stopped ? check in seed cleaning ? tv.erase(tv.begin() + i); @@ -669,7 +669,7 @@ namespace mkfit { seed_cand_vec.push_back(std::pair(iseed, ic)); ccand[ic].resetOverlaps(); - if (Const::nan_n_silly_check_cands_every_layer) { + if constexpr (Const::nan_n_silly_check_cands_every_layer) { if (ccand[ic].hasSillyValues(Const::nan_n_silly_print_bad_cands_every_layer, Const::nan_n_silly_fixup_bad_cands_every_layer, "Per layer silly check")) @@ -683,7 +683,7 @@ namespace mkfit { } } - if (Const::nan_n_silly_check_cands_every_layer && silly_count > 0) { + if constexpr (Const::nan_n_silly_check_cands_every_layer && silly_count > 0) { m_nan_n_silly_per_layer_count += silly_count; } @@ -1110,7 +1110,7 @@ namespace mkfit { // mkfndr->copyOutParErr(eoccs.refCandidates_nc(), end - itrack, true); // For prop-to-plane propagate from the last hit, not layer center. - if (Config::usePropToPlane) { + if constexpr (Config::usePropToPlane) { mkfndr->inputTracksAndHitIdx(eoccs.refCandidates(), seed_cand_idx, itrack, end, false); } diff --git a/RecoTracker/MkFitCore/src/MkFinder.cc b/RecoTracker/MkFitCore/src/MkFinder.cc index 725e5095e9545..dd9ba93bd9e35 100644 --- a/RecoTracker/MkFitCore/src/MkFinder.cc +++ b/RecoTracker/MkFitCore/src/MkFinder.cc @@ -1313,7 +1313,7 @@ namespace mkfit { MPlexLV propPar; clearFailFlag(); - if (Config::usePropToPlane) { + if constexpr (Config::usePropToPlane) { // Maybe could use 2 matriplex packers ... ModuleInfo has 3 * SVector3 and uint MPlexHV norm, dir; packModuleNormDir(layer_of_hits, hit_cnt, norm, dir, N_proc); @@ -1582,7 +1582,7 @@ namespace mkfit { MPlexLV propPar; clearFailFlag(); - if (Config::usePropToPlane) { + if constexpr (Config::usePropToPlane) { // Maybe could use 2 matriplex packers ... ModuleInfo has 3 * SVector3 and uint MPlexHV norm, dir; packModuleNormDir(layer_of_hits, hit_cnt, norm, dir, N_proc); @@ -1759,7 +1759,7 @@ namespace mkfit { // See comment in MkBuilder::find_tracks_in_layer() about intra / inter flags used here // for propagation to the hit. clearFailFlag(); - if (Config::usePropToPlane) { + if constexpr (Config::usePropToPlane) { MPlexHV norm, dir; packModuleNormDir(layer_of_hits, 0, norm, dir, N_proc); kalmanPropagateAndUpdatePlane(m_Err[iP], diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.cc b/RecoTracker/MkFitCore/src/PropagationMPlex.cc index 129e92f929235..00f0ae09c742c 100644 --- a/RecoTracker/MkFitCore/src/PropagationMPlex.cc +++ b/RecoTracker/MkFitCore/src/PropagationMPlex.cc @@ -228,9 +228,11 @@ namespace { // this version does not assume to know which elements are 0 or 1, so it does the full multiplication void MultHelixPropFull(const MPlexLL& A, const MPlexLS& B, MPlexLL& C) { -#pragma omp simd for (int n = 0; n < NN; ++n) { for (int i = 0; i < 6; ++i) { +// optimization reports indicate only the inner two loops are good +// candidates for vectorization +#pragma omp simd for (int j = 0; j < 6; ++j) { C(n, i, j) = 0.; for (int k = 0; k < 6; ++k) @@ -242,9 +244,11 @@ namespace { // this version does not assume to know which elements are 0 or 1, so it does the full mupltiplication void MultHelixPropTranspFull(const MPlexLL& A, const MPlexLL& B, MPlexLS& C) { -#pragma omp simd for (int n = 0; n < NN; ++n) { for (int i = 0; i < 6; ++i) { +// optimization reports indicate only the inner two loops are good +// candidates for vectorization +#pragma omp simd for (int j = 0; j < 6; ++j) { C(n, i, j) = 0.; for (int k = 0; k < 6; ++k) @@ -1280,7 +1284,7 @@ namespace mkfit { // const float thetaMSC2 = thetaMSC*thetaMSC; const float thetaMSC = 0.0136f * (1.f + 0.038f * std::log(radL)) / (beta * p); // eq 32.15 const float thetaMSC2 = thetaMSC * thetaMSC * radL; - if (Config::usePtMultScat) { + if constexpr (Config::usePtMultScat) { outErr.At(n, 3, 3) += thetaMSC2 * pz * pz * ipt2 * ipt2; outErr.At(n, 3, 5) -= thetaMSC2 * pz * ipt2; outErr.At(n, 4, 4) += thetaMSC2 * p2 * ipt2;