diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.cc b/RecoTracker/MkFitCore/src/PropagationMPlex.cc index 806c4767402b1..cfd9ca9c0fb90 100644 --- a/RecoTracker/MkFitCore/src/PropagationMPlex.cc +++ b/RecoTracker/MkFitCore/src/PropagationMPlex.cc @@ -588,8 +588,8 @@ namespace mkfit { } //============================================================================== - - void propagateHelixToZMPlex(const MPlexLS& inErr, + + void __attribute__ ((optimize("no-inline"))) propagateHelixToZMPlex(const MPlexLS& inErr, const MPlexLV& inPar, const MPlexQI& inChg, const MPlexQF& msZ, diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.icc b/RecoTracker/MkFitCore/src/PropagationMPlex.icc index a66083319f61c..be8438adb862d 100644 --- a/RecoTracker/MkFitCore/src/PropagationMPlex.icc +++ b/RecoTracker/MkFitCore/src/PropagationMPlex.icc @@ -15,6 +15,8 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, const PropagationFlags pf) { // bool debug = true; + const int asize = nmax-nmin; + #pragma omp simd for (int n = nmin; n < nmax; ++n) { //initialize erroProp to identity matrix @@ -24,24 +26,50 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, errorProp(n, 3, 3) = 1.f; errorProp(n, 4, 4) = 1.f; errorProp(n, 5, 5) = 1.f; - - float r0 = hipo(inPar(n, 0, 0), inPar(n, 1, 0)); - const float k = inChg(n, 0, 0) * 100.f / - (-Const::sol * (pf.use_param_b_field ? Config::bFieldFromZR(inPar(n, 2, 0), r0) : Config::Bfield)); - const float r = msRad(n, 0, 0); - + } + float r0[asize]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + //initialize erroProp to identity matrix + const int aindex = n-nmax; + r0[aindex] = hipo(inPar(n, 0, 0), inPar(n, 1, 0)); + } + float k[asize]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + const int aindex = n-nmax; + k[aindex] = inChg(n, 0, 0) * 100.f / + (-Const::sol * (pf.use_param_b_field ? Config::bFieldFromZR(inPar(n, 2, 0), r0[aindex]) : Config::Bfield)); + } + float r[nmax-nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + const int aindex = n-nmax; + r[aindex] = msRad(n, 0, 0); + } + float xin[asize]; + float yin[asize]; + float ipt[asize]; + float phiin[asize]; + float float theta[asize]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { // if (std::abs(r-r0)<0.0001f) { // dprint("distance less than 1mum, skip"); // continue; // } - const float xin = inPar(n, 0, 0); - const float yin = inPar(n, 1, 0); - const float ipt = inPar(n, 3, 0); - const float phiin = inPar(n, 4, 0); - const float theta = inPar(n, 5, 0); + const int aindex = n-nmax; + xin[aindex] = inPar(n, 0, 0); + yin[aindex] = inPar(n, 1, 0); + ipt[aindex] = inPar(n, 3, 0); + phiin[aindex] = inPar(n, 4, 0); + theta[aindex] = inPar(n, 5, 0); dprint(std::endl); + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { dprint_np(n, "input parameters" << " inPar(n, 0, 0)=" << std::setprecision(9) << inPar(n, 0, 0) << " inPar(n, 1, 0)=" @@ -49,30 +77,71 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, << inPar(n, 2, 0) << " inPar(n, 3, 0)=" << std::setprecision(9) << inPar(n, 3, 0) << " inPar(n, 4, 0)=" << std::setprecision(9) << inPar(n, 4, 0) << " inPar(n, 5, 0)=" << std::setprecision(9) << inPar(n, 5, 0)); - - const float kinv = 1.f / k; - const float pt = 1.f / ipt; - - float D = 0., cosa = 0., sina = 0., cosah = 0., sinah = 0., id = 0.; + } + float kinv[asize]; + float pt[asize]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + const int aindex = n-nmax; + const float kinv[aindex] = 1.f / k[aindex]; + const float pt[aindex] = 1.f / ipt[aindex]; + } + float D[asize]; + float cosa[asize]; + float sina[asize]; + float cosah[asize]; + float sinah[asize]; + float id[asize]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + const int aindex = n-nmax; + D[asize] = 0.; cosa[asize] = 0.; sina[asize] = 0.; cosah[asize] = 0.; sinah[asize] = 0.; id[asize] = 0.; +} //no trig approx here, phi can be large - float cosPorT = std::cos(phiin), sinPorT = std::sin(phiin); - float pxin = cosPorT * pt; - float pyin = sinPorT * pt; - + float cosPorT[asize]; + float sinPorT[asize]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + const int aindex = n-nmax; + cosPorT[aindex] = std::cos(phiin[aindex]); + sinPorT[aindex] = std::sin(phiin[aindex]); + } + float pxin[asize]; + float pyin[asize]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + const int aindex = n-nmax; + pxin[aindex] = cosPorT[aindex] * pt[aindex]; + pyin[aindex] = sinPorT[aindex] * pt[aindex]; + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + const int aindex = n-nmax; dprint_np(n, - "k=" << std::setprecision(9) << k << " pxin=" << std::setprecision(9) << pxin - << " pyin=" << std::setprecision(9) << pyin << " cosPorT=" << std::setprecision(9) << cosPorT - << " sinPorT=" << std::setprecision(9) << sinPorT << " pt=" << std::setprecision(9) << pt); - + "k=" << std::setprecision(9) << k[aindex] << " pxin=" << std::setprecision(9) << pxin[aindex] + << " pyin=" << std::setprecision(9) << pyin[aindex] << " cosPorT=" << std::setprecision(9) << cosPorT[aindex] + << " sinPorT=" << std::setprecision(9) << sinPorT[aindex] << " pt=" << std::setprecision(9) << pt[aindex]); + } + float dDdx[asize]; + float dDdy[asize]; + float dDdipt[asize]; + float dDdphi[asize]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + const int aindex = n-nmax; //derivatives initialized to value for first iteration, i.e. distance = r-r0in - float dDdx = r0 > 0.f ? -xin / r0 : 0.f; - float dDdy = r0 > 0.f ? -yin / r0 : 0.f; - float dDdipt = 0.; - float dDdphi = 0.; - + dDdx[aindex] = r0[aindex] > 0.f ? -xin[aindex] / r0[aindex] : 0.f; + dDdy[aindex] = r0[aindex] > 0.f ? -yin[aindex] / r0[aindex] : 0.f; + dDdipt[aindex] = 0.; + dDdphi[aindex] = 0.; + } + float oodotp[asize]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + const int aindex = n-nmax; for (int i = 0; i < Config::Niter; ++i) { //compute distance and path for the current iteration - r0 = hipo(outPar(n, 0, 0), outPar(n, 1, 0)); + r0[aindex] = hipo(outPar(n, 0, 0), outPar(n, 1, 0)); // Use one over dot produce of transverse momentum and radial // direction to scale the step. Propagation is prevented from reaching @@ -80,52 +149,52 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, // - Can / should we come up with a better approximation? // - Can / should take +/- curvature into account? - const float oodotp = r0 * pt / (pxin * outPar(n, 0, 0) + pyin * outPar(n, 1, 0)); + oodotp[aindex] = r0[aindex] * pt[aindex] / (pxin[aindex] * outPar(n, 0, 0) + pyin[aindex] * outPar(n, 1, 0)); - if (oodotp > 5.0f || oodotp < 0) // 0.2 is 78.5 deg + if (oodotp[aindex] > 5.0f || oodotp[aindex] < 0) // 0.2 is 78.5 deg { - id = 0.0f; + id[aindex] = 0.0f; outFailFlag(n, 0, 0) = 1; } else { // Can we come up with a better approximation? // Should take +/- curvature into account. - id = (r - r0) * oodotp; + id[aindex] = (r[aindex] - r0[aindex]) * oodotp[aindex]; } - D += id; + D[aindex] += id[aindex]; if (Config::useTrigApprox) { - sincos4(id * ipt * kinv * 0.5f, sinah, cosah); + sincos4(id * ipt[aindex] * kinv[aindex] * 0.5f, sinah[aindex], cosah[aindex]); } else { - cosah = std::cos(id * ipt * kinv * 0.5f); - sinah = std::sin(id * ipt * kinv * 0.5f); + cosah[aindex] = std::cos(id * ipt[aindex] * kinv[aindex] * 0.5f); + sinah[aindex] = std::sin(id * ipt[aindex] * kinv[aindex] * 0.5f); } - cosa = 1.f - 2.f * sinah * sinah; - sina = 2.f * sinah * cosah; + cosa[aindex] = 1.f - 2.f * sinah[aindex] * sinah[aindex]; + sina[aindex] = 2.f * sinah[aindex] * cosah[aindex]; dprint_np(n, "Attempt propagation from r=" - << r0 << " to r=" << r << std::endl - << " x=" << xin << " y=" << yin << " z=" << inPar(n, 2, 0) << " px=" << pxin << " py=" << pyin - << " pz=" << pt * std::tan(theta) << " q=" << inChg(n, 0, 0) << std::endl - << " r=" << std::setprecision(9) << r << " r0=" << std::setprecision(9) << r0 - << " id=" << std::setprecision(9) << id << " dr=" << std::setprecision(9) << r - r0 - << " cosa=" << cosa << " sina=" << sina); + << r0[aindex] << " to r=" << r[aindex] << std::endl + << " x=" << xin[aindex] << " y=" << yin[aindex] << " z=" << inPar(n, 2, 0) << " px=" << pxin[aindex] << " py=" << pyin[aindex] + << " pz=" << pt[aindex] * std::tan(theta[aindex]) << " q=" << inChg(n, 0, 0) << std::endl + << " r=" << std::setprecision(9) << r[aindex] << " r0=" << std::setprecision(9) << r0[aindex] + << " id=" << std::setprecision(9) << id[aindex] << " dr=" << std::setprecision(9) << r[aindex] - r0[aindex] + << " cosa=" << cosa[aindex] << " sina=" << sina[aindex]); //update derivatives on total distance if (i + 1 != Config::Niter) { const float x = outPar(n, 0, 0); const float y = outPar(n, 1, 0); - const float oor0 = (r0 > 0.f && std::abs(r - r0) < 0.0001f) ? 1.f / r0 : 0.f; + const float oor0 = (r0[aindex] > 0.f && std::abs(r[aindex] - r0[aindex]) < 0.0001f) ? 1.f / r0[aindex] : 0.f; - const float dadipt = id * kinv; + const float dadipt = id[aindex] * kinv[aindex]; - const float dadx = -x * ipt * kinv * oor0; + const float dadx = -x * ipt[aindex] * kinv[aindex] * oor0[aindex]; const float dady = -y * ipt * kinv * oor0; - const float pxca = pxin * cosa; - const float pxsa = pxin * sina; - const float pyca = pyin * cosa; - const float pysa = pyin * sina; + const float pxca = pxin[aindex] * cosa[aindex]; + const float pxsa = pxin[aindex] * sina[aindex]; + const float pyca = pyin[aindex] * cosa[aindex]; + const float pysa = pyin[aindex] * sina[aindex]; float tmp; @@ -136,73 +205,81 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, dDdy -= (x * tmp * (pxca - pysa) + y * (1.f + tmp * (pyca + pxsa))) * oor0; //now r0 depends on ipt and phi as well - tmp = dadipt * ipt; - dDdipt -= - k * + tmp = dadipt * ipt[aindex]; + dDdipt[aindex] -= + k[aindex] * (x * (pxca * tmp - pysa * tmp - pyca - pxsa + pyin) + y * (pyca * tmp + pxsa * tmp - pysa + pxca - pxin)) * - pt * oor0; - dDdphi += k * (x * (pysa - pxin + pxca) - y * (pxsa - pyin + pyca)) * oor0; + pt[aindex] * oor0; + dDdphi[aindex] += k[aindex] * (x * (pysa - pxin + pxca) - y * (pxsa - pyin + pyca)) * oor0; } //update parameters - outPar(n, 0, 0) = outPar(n, 0, 0) + 2.f * k * sinah * (pxin * cosah - pyin * sinah); - outPar(n, 1, 0) = outPar(n, 1, 0) + 2.f * k * sinah * (pyin * cosah + pxin * sinah); - const float pxinold = pxin; //copy before overwriting - pxin = pxin * cosa - pyin * sina; - pyin = pyin * cosa + pxinold * sina; + outPar(n, 0, 0) = outPar(n, 0, 0) + 2.f * k[aindex] * sinah[aindex] * (pxin[aindex] * cosah[aindex] - pyin[aindex] * sinah[aindex]); + outPar(n, 1, 0) = outPar(n, 1, 0) + 2.f * k[aindex] * sinah[aindex] * (pyin[aindex] * cosah[aindex] + pxin[aindex] * sinah[aindex]); + const float pxinold = pxin[aindex]; //copy before overwriting + pxin[aindex] = pxin[aindex] * cosa[aindex] - pyin[aindex] * sina[aindex]; + pyin[aindex] = pyin[aindex] * cosa[aindex] + pxinold * sina[aindex]; dprint_np(n, - "outPar(n, 0, 0)=" << outPar(n, 0, 0) << " outPar(n, 1, 0)=" << outPar(n, 1, 0) << " pxin=" << pxin - << " pyin=" << pyin); - } - - const float alpha = D * ipt * kinv; - const float dadx = dDdx * ipt * kinv; - const float dady = dDdy * ipt * kinv; - const float dadipt = (ipt * dDdipt + D) * kinv; - const float dadphi = dDdphi * ipt * kinv; + "outPar(n, 0, 0)=" << outPar(n, 0, 0) << " outPar(n, 1, 0)=" << outPar(n, 1, 0) << " pxin=" << pxin[aindex] + << " pyin=" << pyin[aindex]); + } // iteration loop + } + float alpha[asize]; + float dadx[asize]; + float dady[asize]; + float dadipt[asize]; + float dadphi[asize]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + const int aindex = n-nmax; + alpha = D[aindex] * ipt[aindex] * kinv[aindex]; + dadx = dDdx[aindex] * ipt[aindex] * kinv[aindex]; + dady = dDdy[aindex] * ipt[aindex] * kinv[aindex]; + dadipt = (ipt[aindex] * dDdipt[aindex] + D[aindex]) * kinv[aindex]; + dadphi = dDdphi[aindex] * ipt[aindex] * kinv[aindex]; if (Config::useTrigApprox) { - sincos4(alpha, sina, cosa); + sincos4(alpha[aindex], sina[aindex], cosa[aindex]); } else { - cosa = std::cos(alpha); - sina = std::sin(alpha); + cosa[aindex] = std::cos(alpha[aindex]); + sina[aindex] = std::sin(alpha[aindex]); } - errorProp(n, 0, 0) = 1.f + k * dadx * (cosPorT * cosa - sinPorT * sina) * pt; - errorProp(n, 0, 1) = k * dady * (cosPorT * cosa - sinPorT * sina) * pt; + errorProp(n, 0, 0) = 1.f + k[aindex] * dadx[aindex] * (cosPorT[aindex] * cosa[aindex] - sinPorT[aindex] * sina[aindex]) * pt[aindex]; + errorProp(n, 0, 1) = k[aindex] * dady[aindex] * (cosPorT[aindex] * cosa[aindex] - sinPorT[aindex] * sina[aindex]) * pt[aindex]; errorProp(n, 0, 2) = 0.f; errorProp(n, 0, 3) = - k * (cosPorT * (ipt * dadipt * cosa - sina) + sinPorT * ((1.f - cosa) - ipt * dadipt * sina)) * pt * pt; + k[aindex] * (cosPorT[aindex] * (ipt[aindex] * dadipt[aindex] * cosa[aindex] - sina[aindex]) + sinPorT[aindex] * ((1.f - cosa[aindex]) - ipt[aindex] * dadipt[aindex] * sina[aindex])) * pt[aindex] * pt[aindex]; errorProp(n, 0, 4) = - k * (cosPorT * dadphi * cosa - sinPorT * dadphi * sina - sinPorT * sina + cosPorT * cosa - cosPorT) * pt; + k[aindex] * (cosPorT[aindex] * dadphi[aindex] * cosa[aindex] - sinPorT[aindex] * dadphi[aindex] * sina[aindex] - sinPorT[aindex] * sina[aindex] + cosPorT[aindex] * cosa[aindex] - cosPorT[aindex]) * pt[aindex]; errorProp(n, 0, 5) = 0.f; - errorProp(n, 1, 0) = k * dadx * (sinPorT * cosa + cosPorT * sina) * pt; - errorProp(n, 1, 1) = 1.f + k * dady * (sinPorT * cosa + cosPorT * sina) * pt; + errorProp(n, 1, 0) = k[aindex] * dadx[aindex] * (sinPorT[aindex] * cosa[aindex] + cosPorT[aindex] * sina[aindex]) * pt[aindex]; + errorProp(n, 1, 1) = 1.f + k[aindex] * dady[aindex] * (sinPorT[aindex] * cosa[aindex] + cosPorT[aindex] * sina[aindex]) * pt[aindex]; errorProp(n, 1, 2) = 0.f; errorProp(n, 1, 3) = - k * (sinPorT * (ipt * dadipt * cosa - sina) + cosPorT * (ipt * dadipt * sina - (1.f - cosa))) * pt * pt; + k[aindex] * (sinPorT[aindex] * (ipt[aindex] * dadipt[aindex] * cosa[aindex] - sina[aindex]) + cosPorT[aindex] * (ipt[aindex] * dadipt[aindex] * sina[aindex] - (1.f - cosa[aindex]))) * pt[aindex] * pt[aindex]; errorProp(n, 1, 4) = - k * (sinPorT * dadphi * cosa + cosPorT * dadphi * sina + sinPorT * cosa + cosPorT * sina - sinPorT) * pt; + k[aindex] * (sinPorT[aindex] * dadphi[aindex] * cosa[aindex] + cosPorT[aindex] * dadphi[aindex] * sina[aindex] + sinPorT[aindex] * cosa[aindex] + cosPorT[aindex] * sina[aindex] - sinPorT[aindex]) * pt[aindex]; errorProp(n, 1, 5) = 0.f; //no trig approx here, theta can be large - cosPorT = std::cos(theta); - sinPorT = std::sin(theta); + cosPorT[aindex] = std::cos(theta[aindex]); + sinPorT[aindex] = std::sin(theta[aindex]); //redefine sinPorT as 1./sinPorT to reduce the number of temporaries - sinPorT = 1.f / sinPorT; + sinPorT[aindex] = 1.f / sinPorT[aindex]; - outPar(n, 2, 0) = inPar(n, 2, 0) + k * alpha * cosPorT * pt * sinPorT; + outPar(n, 2, 0) = inPar(n, 2, 0) + k[aindex] * alpha[aindex] * cosPorT[aindex] * pt[aindex] * sinPorT[aindex]; - errorProp(n, 2, 0) = k * cosPorT * dadx * pt * sinPorT; - errorProp(n, 2, 1) = k * cosPorT * dady * pt * sinPorT; + errorProp(n, 2, 0) = k[aindex] * cosPorT[aindex] * dadx[aindex] * pt[aindex] * sinPorT[aindex]; + errorProp(n, 2, 1) = k[aindex] * cosPorT[aindex] * dady[aindex] * pt[aindex] * sinPorT[aindex]; errorProp(n, 2, 2) = 1.f; - errorProp(n, 2, 3) = k * cosPorT * (ipt * dadipt - alpha) * pt * pt * sinPorT; - errorProp(n, 2, 4) = k * dadphi * cosPorT * pt * sinPorT; - errorProp(n, 2, 5) = -k * alpha * pt * sinPorT * sinPorT; + errorProp(n, 2, 3) = k[aindex] * cosPorT[aindex] * (ipt[aindex] * dadipt[aindex] - alpha[aindex]) * pt[aindex] * pt[aindex] * sinPorT[aindex]; + errorProp(n, 2, 4) = k[aindex] * dadphi[aindex] * cosPorT[aindex] * pt[aindex] * sinPorT[aindex]; + errorProp(n, 2, 5) = -k[aindex] * alpha[aindex] * pt[aindex] * sinPorT[aindex] * sinPorT[aindex]; - outPar(n, 3, 0) = ipt; + outPar(n, 3, 0) = ipt[aindex]; errorProp(n, 3, 0) = 0.f; errorProp(n, 3, 1) = 0.f; @@ -211,16 +288,16 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, errorProp(n, 3, 4) = 0.f; errorProp(n, 3, 5) = 0.f; - outPar(n, 4, 0) = inPar(n, 4, 0) + alpha; + outPar(n, 4, 0) = inPar(n, 4, 0) + alpha[aindex]; - errorProp(n, 4, 0) = dadx; - errorProp(n, 4, 1) = dady; + errorProp(n, 4, 0) = dadx[aindex]; + errorProp(n, 4, 1) = dady[aindex]; errorProp(n, 4, 2) = 0.f; - errorProp(n, 4, 3) = dadipt; - errorProp(n, 4, 4) = 1.f + dadphi; + errorProp(n, 4, 3) = dadipt[aindex]; + errorProp(n, 4, 4) = 1.f + dadphi[aindex]; errorProp(n, 4, 5) = 0.f; - outPar(n, 5, 0) = theta; + outPar(n, 5, 0) = theta[aindex]; errorProp(n, 5, 0) = 0.f; errorProp(n, 5, 1) = 0.f; @@ -237,8 +314,10 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, << " mom = " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0))) << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0)) << std::endl); - + } #ifdef DEBUG +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { if (n < N_proc) { dmutex_guard; std::cout << n << ": jacobian" << std::endl; diff --git a/RecoTracker/MkFitCore/standalone/Makefile.config b/RecoTracker/MkFitCore/standalone/Makefile.config index 5f964cb4fd6b6..0b828755f026d 100644 --- a/RecoTracker/MkFitCore/standalone/Makefile.config +++ b/RecoTracker/MkFitCore/standalone/Makefile.config @@ -51,13 +51,13 @@ OPT := -g -O3 # 4. Vectorization settings ifdef AVX_512 -VEC_GCC := -march=native # -fopt-info-vec -mavx512f -mavx512cd +VEC_GCC := -march=native # -fopt-info-vec -mavx512f -mavx512cd -fopt-info-vec-all VEC_ICC := -xHost -qopt-zmm-usage=high # -xcore-avx512 else ifdef AVX2 -VEC_GCC := -mavx2 -mfma +VEC_GCC := -mavx2 -mfma #-fopt-info-vec-all VEC_ICC := -mavx2 -mfma else ifdef SSE3 -VEC_GCC := -msse3 +VEC_GCC := -msse3 # -fopt-info-vec-all VEC_ICC := -msse3 else VEC_GCC := -mavx # -fopt-info-vec-all @@ -129,7 +129,8 @@ endif ifeq ($(CXX), g++) CXXFLAGS += -std=c++1z -ftree-vectorize -Werror=main -Werror=pointer-arith -Werror=overlength-strings -Wno-vla -Werror=overflow -Wstrict-overflow -Werror=array-bounds -Werror=format-contains-nul -Werror=type-limits -fvisibility-inlines-hidden -fno-math-errno --param vect-max-version-for-alias-checks=50 -Xassembler --compress-debug-sections -felide-constructors -fmessage-length=0 -Wall -Wno-non-template-friend -Wno-long-long -Wreturn-type -Wunused -Wparentheses -Wno-deprecated -Werror=return-type -Werror=missing-braces -Werror=unused-value -Werror=address -Werror=format -Werror=sign-compare -Werror=write-strings -Werror=delete-non-virtual-dtor -Wstrict-aliasing -Werror=narrowing -Werror=unused-but-set-variable -Werror=reorder -Werror=unused-variable -Werror=conversion-null -Werror=return-local-addr -Wnon-virtual-dtor -Werror=switch -fdiagnostics-show-option -Wno-unused-local-typedefs -Wno-attributes -Wno-psabi - CXXFLAGS += -fdiagnostics-color=auto -fdiagnostics-show-option -pthread -pipe -fopenmp-simd + CXXFLAGS += -fdiagnostics-color=auto -fdiagnostics-show-option -pthread -pipe -fopenmp-simd -ffast-math +#CXXFLAGS += -mveclibabi=svml -lsvml -L/cvmfs/projects.cern.ch/intelsw/oneAPI/linux/x86_64/2022/compiler/latest/linux/compiler/lib/intel64 -Wl,-rpath=/cvmfs/projects.cern.ch/intelsw/oneAPI/linux/x86_64/2022/compiler/latest/linux/compiler/lib/intel64 -funsafe-math-optimizations endif # Try to find a new enough TBB