diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.cc b/RecoTracker/MkFitCore/src/PropagationMPlex.cc index 806c4767402b1..13d57565072c7 100644 --- a/RecoTracker/MkFitCore/src/PropagationMPlex.cc +++ b/RecoTracker/MkFitCore/src/PropagationMPlex.cc @@ -1,3 +1,5 @@ +#include "FWCore/Utilities/interface/CMSUnrollLoop.h" + #include "MaterialEffects.h" #include "PropagationMPlex.h" @@ -331,6 +333,7 @@ namespace mkfit { float pxin = cosP / ipt; float pyin = sinP / ipt; + CMS_UNROLL_LOOP_COUNT(Config::Niter) for (int i = 0; i < Config::Niter; ++i) { dprint_np(n, std::endl @@ -343,7 +346,7 @@ namespace mkfit { const float ialpha = (r - r0) * ipt / k; //alpha+=ialpha; - if (Config::useTrigApprox) { + if constexpr (Config::useTrigApprox) { sincos4(ialpha * 0.5f, sinah, cosah); } else { cosah = std::cos(ialpha * 0.5f); @@ -708,21 +711,44 @@ namespace mkfit { errorProp(n, 3, 3) = 1.f; errorProp(n, 4, 4) = 1.f; errorProp(n, 5, 5) = 1.f; + } + float zout[NN]; + float zin[NN]; + float ipt[NN]; + float phiin[NN]; + float theta[NN]; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + //initialize erroProp to identity matrix, except element 2,2 which is zero + zout[n] = msZ.constAt(n, 0, 0); + zin[n] = inPar.constAt(n, 2, 0); + ipt[n] = inPar.constAt(n, 3, 0); + phiin[n] = inPar.constAt(n, 4, 0); + theta[n] = inPar.constAt(n, 5, 0); + } - const float zout = msZ.constAt(n, 0, 0); - - const float zin = inPar.constAt(n, 2, 0); - const float ipt = inPar.constAt(n, 3, 0); - const float phiin = inPar.constAt(n, 4, 0); - const float theta = inPar.constAt(n, 5, 0); + float k[NN]; + if (pflags.use_param_b_field) { +#pragma omp simd + for (int n = 0; n < NN; ++n) { + k[n] = inChg.constAt(n, 0, 0) * 100.f / + (-Const::sol * Config::bFieldFromZR(zin[n], hipo(inPar.constAt(n, 0, 0), inPar.constAt(n, 1, 0)))); + } + } else { +#pragma omp simd + for (int n = 0; n < NN; ++n) { + k[n] = inChg.constAt(n, 0, 0) * 100.f / (-Const::sol * Config::Bfield); + } + } - const float k = - inChg.constAt(n, 0, 0) * 100.f / - (-Const::sol * (pflags.use_param_b_field - ? Config::bFieldFromZR(zin, hipo(inPar.constAt(n, 0, 0), inPar.constAt(n, 1, 0))) - : Config::Bfield)); - const float kinv = 1.f / k; + float kinv[NN]; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + kinv[n] = 1.f / k[n]; + } +#pragma omp simd + for (int n = 0; n < NN; ++n) { dprint_np(n, std::endl << "input parameters" @@ -732,66 +758,160 @@ namespace mkfit { << " inPar.constAt(n, 3, 0)=" << std::setprecision(9) << inPar.constAt(n, 3, 0) << " inPar.constAt(n, 4, 0)=" << std::setprecision(9) << inPar.constAt(n, 4, 0) << " inPar.constAt(n, 5, 0)=" << std::setprecision(9) << inPar.constAt(n, 5, 0)); + } + + float pt[NN]; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + pt[n] = 1.f / ipt[n]; + } - const float pt = 1.f / ipt; + //no trig approx here, phi can be large + float cosP[NN]; + float sinP[NN]; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + cosP[n] = std::cos(phiin[n]); + } - float cosahTmp = 0., sinahTmp = 0.; - //no trig approx here, phi can be large - const float cosP = std::cos(phiin), sinP = std::sin(phiin); - const float cosT = std::cos(theta), sinT = std::sin(theta); - const float tanT = sinT / cosT; - const float icos2T = 1.f / (cosT * cosT); - const float pxin = cosP * pt; - const float pyin = sinP * pt; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + sinP[n] = std::sin(phiin[n]); + } + float cosT[NN]; + float sinT[NN]; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + cosT[n] = std::cos(theta[n]); + } + +#pragma omp simd + for (int n = 0; n < NN; ++n) { + sinT[n] = std::sin(theta[n]); + } + + float tanT[NN]; + float icos2T[NN]; + float pxin[NN]; + float pyin[NN]; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + tanT[n] = sinT[n] / cosT[n]; + icos2T[n] = 1.f / (cosT[n] * cosT[n]); + pxin[n] = cosP[n] * pt[n]; + pyin[n] = sinP[n] * pt[n]; + } +#pragma omp simd + for (int n = 0; n < NN; ++n) { //fixme, make this printout useful for propagation to z dprint_np(n, std::endl - << "k=" << std::setprecision(9) << k << " pxin=" << std::setprecision(9) << pxin - << " pyin=" << std::setprecision(9) << pyin << " cosP=" << std::setprecision(9) << cosP - << " sinP=" << std::setprecision(9) << sinP << " pt=" << std::setprecision(9) << pt); - - const float deltaZ = zout - zin; - const float alpha = deltaZ * tanT * ipt * kinv; - - if (Config::useTrigApprox) { - sincos4(alpha * 0.5f, sinahTmp, cosahTmp); - } else { - cosahTmp = std::cos(alpha * 0.5f); - sinahTmp = std::sin(alpha * 0.5f); + << "k=" << std::setprecision(9) << k[n] << " pxin=" << std::setprecision(9) << pxin[n] + << " pyin=" << std::setprecision(9) << pyin[n] << " cosP=" << std::setprecision(9) << cosP[n] + << " sinP=" << std::setprecision(9) << sinP[n] << " pt=" << std::setprecision(9) << pt[n]); + } + float deltaZ[NN]; + float alpha[NN]; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + deltaZ[n] = zout[n] - zin[n]; + alpha[n] = deltaZ[n] * tanT[n] * ipt[n] * kinv[n]; + } + + float cosahTmp[NN]; + float sinahTmp[NN]; + if constexpr (Config::useTrigApprox) { +#ifndef __INTEL_COMPILER +#pragma omp simd +#endif + for (int n = 0; n < NN; ++n) { + sincos4(alpha[n] * 0.5f, sinahTmp[n], cosahTmp[n]); + } + } else { +#ifndef __INTEL_COMPILER +#pragma omp simd +#endif + for (int n = 0; n < NN; ++n) { + cosahTmp[n] = std::cos(alpha[n] * 0.5f); + } +#ifndef __INTEL_COMPILER +#pragma omp simd +#endif + for (int n = 0; n < NN; ++n) { + sinahTmp[n] = std::sin(alpha[n] * 0.5f); } - const float cosah = cosahTmp; - const float sinah = sinahTmp; - const float cosa = 1.f - 2.f * sinah * sinah; - const float sina = 2.f * sinah * cosah; + } - //update parameters - outPar.At(n, 0, 0) = outPar.At(n, 0, 0) + 2.f * k * sinah * (pxin * cosah - pyin * sinah); - outPar.At(n, 1, 0) = outPar.At(n, 1, 0) + 2.f * k * sinah * (pyin * cosah + pxin * sinah); - outPar.At(n, 2, 0) = zout; - outPar.At(n, 4, 0) = phiin + alpha; + float cosah[NN]; + float sinah[NN]; + float cosa[NN]; + float sina[NN]; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + cosah[n] = cosahTmp[n]; + sinah[n] = sinahTmp[n]; + cosa[n] = 1.f - 2.f * sinah[n] * sinah[n]; + sina[n] = 2.f * sinah[n] * cosah[n]; + } + //update parameters +#pragma omp simd + for (int n = 0; n < NN; ++n) { + outPar.At(n, 0, 0) = outPar.At(n, 0, 0) + 2.f * k[n] * sinah[n] * (pxin[n] * cosah[n] - pyin[n] * sinah[n]); + outPar.At(n, 1, 0) = outPar.At(n, 1, 0) + 2.f * k[n] * sinah[n] * (pyin[n] * cosah[n] + pxin[n] * sinah[n]); + outPar.At(n, 2, 0) = zout[n]; + outPar.At(n, 4, 0) = phiin[n] + alpha[n]; + } +#pragma omp simd + for (int n = 0; n < NN; ++n) { dprint_np(n, std::endl << "outPar.At(n, 0, 0)=" << outPar.At(n, 0, 0) << " outPar.At(n, 1, 0)=" << outPar.At(n, 1, 0) - << " pxin=" << pxin << " pyin=" << pyin); + << " pxin=" << pxin[n] << " pyin=" << pyin[n]); + } + + float pxcaMpysa[NN]; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + pxcaMpysa[n] = pxin[n] * cosa[n] - pyin[n] * sina[n]; + } - const float pxcaMpysa = pxin * cosa - pyin * sina; - errorProp(n, 0, 2) = -tanT * ipt * pxcaMpysa; - errorProp(n, 0, 3) = k * pt * pt * (cosP * (alpha * cosa - sina) + sinP * 2.f * sinah * (sinah - alpha * cosah)); - errorProp(n, 0, 4) = -2 * k * pt * sinah * (sinP * cosah + cosP * sinah); - errorProp(n, 0, 5) = deltaZ * ipt * pxcaMpysa * icos2T; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + errorProp(n, 0, 2) = -tanT[n] * ipt[n] * pxcaMpysa[n]; + errorProp(n, 0, 3) = + k[n] * pt[n] * pt[n] * + (cosP[n] * (alpha[n] * cosa[n] - sina[n]) + sinP[n] * 2.f * sinah[n] * (sinah[n] - alpha[n] * cosah[n])); + errorProp(n, 0, 4) = -2.f * k[n] * pt[n] * sinah[n] * (sinP[n] * cosah[n] + cosP[n] * sinah[n]); + errorProp(n, 0, 5) = deltaZ[n] * ipt[n] * pxcaMpysa[n] * icos2T[n]; + } - const float pycaPpxsa = pyin * cosa + pxin * sina; - errorProp(n, 1, 2) = -tanT * ipt * pycaPpxsa; - errorProp(n, 1, 3) = k * pt * pt * (sinP * (alpha * cosa - sina) - cosP * 2.f * sinah * (sinah - alpha * cosah)); - errorProp(n, 1, 4) = 2 * k * pt * sinah * (cosP * cosah - sinP * sinah); - errorProp(n, 1, 5) = deltaZ * ipt * pycaPpxsa * icos2T; + float pycaPpxsa[NN]; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + pycaPpxsa[n] = pyin[n] * cosa[n] + pxin[n] * sina[n]; + } - errorProp(n, 4, 2) = -ipt * tanT * kinv; - errorProp(n, 4, 3) = tanT * deltaZ * kinv; - errorProp(n, 4, 5) = ipt * deltaZ * kinv * icos2T; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + errorProp(n, 1, 2) = -tanT[n] * ipt[n] * pycaPpxsa[n]; + errorProp(n, 1, 3) = + k[n] * pt[n] * pt[n] * + (sinP[n] * (alpha[n] * cosa[n] - sina[n]) - cosP[n] * 2.f * sinah[n] * (sinah[n] - alpha[n] * cosah[n])); + errorProp(n, 1, 4) = 2.f * k[n] * pt[n] * sinah[n] * (cosP[n] * cosah[n] - sinP[n] * sinah[n]); + errorProp(n, 1, 5) = deltaZ[n] * ipt[n] * pycaPpxsa[n] * icos2T[n]; + } +#pragma omp simd + for (int n = 0; n < NN; ++n) { + errorProp(n, 4, 2) = -ipt[n] * tanT[n] * kinv[n]; + errorProp(n, 4, 3) = tanT[n] * deltaZ[n] * kinv[n]; + errorProp(n, 4, 5) = ipt[n] * deltaZ[n] * kinv[n] * icos2T[n]; + } + +#pragma omp simd + for (int n = 0; n < NN; ++n) { dprint_np( n, "propagation end, dump parameters" @@ -802,8 +922,11 @@ namespace mkfit { << 1. / (outPar.At(n, 3, 0) * tan(outPar.At(n, 5, 0))) << " r=" << std::sqrt(outPar.At(n, 0, 0) * outPar.At(n, 0, 0) + outPar.At(n, 1, 0) * outPar.At(n, 1, 0)) << " pT=" << 1. / std::abs(outPar.At(n, 3, 0)) << std::endl); + } #ifdef DEBUG +#pragma omp simd + for (int n = 0; n < NN; ++n) { if (n < N_proc) { dmutex_guard; std::cout << n << ": jacobian" << std::endl; @@ -850,8 +973,8 @@ namespace mkfit { errorProp(n, 5, 4), errorProp(n, 5, 5)); } -#endif } +#endif } //============================================================================== diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.icc b/RecoTracker/MkFitCore/src/PropagationMPlex.icc index a66083319f61c..b8ae0fe482981 100644 --- a/RecoTracker/MkFitCore/src/PropagationMPlex.icc +++ b/RecoTracker/MkFitCore/src/PropagationMPlex.icc @@ -24,24 +24,52 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, errorProp(n, 3, 3) = 1.f; errorProp(n, 4, 4) = 1.f; errorProp(n, 5, 5) = 1.f; - - float r0 = hipo(inPar(n, 0, 0), inPar(n, 1, 0)); - const float k = inChg(n, 0, 0) * 100.f / - (-Const::sol * (pf.use_param_b_field ? Config::bFieldFromZR(inPar(n, 2, 0), r0) : Config::Bfield)); - const float r = msRad(n, 0, 0); - + } + float r0[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + //initialize erroProp to identity matrix + r0[n - nmin] = hipo(inPar(n, 0, 0), inPar(n, 1, 0)); + } + float k[nmax - nmin]; + if (pf.use_param_b_field) { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + k[n - nmin] = inChg(n, 0, 0) * 100.f / (-Const::sol * Config::bFieldFromZR(inPar(n, 2, 0), r0[n - nmin])); + } + } else { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + k[n - nmin] = inChg(n, 0, 0) * 100.f / (-Const::sol * Config::Bfield); + } + } + float r[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + r[n - nmin] = msRad(n, 0, 0); + } + float xin[nmax - nmin]; + float yin[nmax - nmin]; + float ipt[nmax - nmin]; + float phiin[nmax - nmin]; + float theta[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { // if (std::abs(r-r0)<0.0001f) { // dprint("distance less than 1mum, skip"); // continue; // } - const float xin = inPar(n, 0, 0); - const float yin = inPar(n, 1, 0); - const float ipt = inPar(n, 3, 0); - const float phiin = inPar(n, 4, 0); - const float theta = inPar(n, 5, 0); + xin[n - nmin] = inPar(n, 0, 0); + yin[n - nmin] = inPar(n, 1, 0); + ipt[n - nmin] = inPar(n, 3, 0); + phiin[n - nmin] = inPar(n, 4, 0); + theta[n - nmin] = inPar(n, 5, 0); + + //dprint(std::endl); + } - dprint(std::endl); + for (int n = nmin; n < nmax; ++n) { dprint_np(n, "input parameters" << " inPar(n, 0, 0)=" << std::setprecision(9) << inPar(n, 0, 0) << " inPar(n, 1, 0)=" @@ -49,186 +77,376 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, << inPar(n, 2, 0) << " inPar(n, 3, 0)=" << std::setprecision(9) << inPar(n, 3, 0) << " inPar(n, 4, 0)=" << std::setprecision(9) << inPar(n, 4, 0) << " inPar(n, 5, 0)=" << std::setprecision(9) << inPar(n, 5, 0)); + } - const float kinv = 1.f / k; - const float pt = 1.f / ipt; + float kinv[nmax - nmin]; + float pt[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + kinv[n - nmin] = 1.f / k[n - nmin]; + pt[n - nmin] = 1.f / ipt[n - nmin]; + } + float D[nmax - nmin]; + float cosa[nmax - nmin]; + float sina[nmax - nmin]; + float cosah[nmax - nmin]; + float sinah[nmax - nmin]; + float id[nmax - nmin]; + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + D[n - nmin] = 0.; + } - float D = 0., cosa = 0., sina = 0., cosah = 0., sinah = 0., id = 0.; - //no trig approx here, phi can be large - float cosPorT = std::cos(phiin), sinPorT = std::sin(phiin); - float pxin = cosPorT * pt; - float pyin = sinPorT * pt; + //no trig approx here, phi can be large + float cosPorT[nmax - nmin]; + float sinPorT[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + cosPorT[n - nmin] = std::cos(phiin[n - nmin]); + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + sinPorT[n - nmin] = std::sin(phiin[n - nmin]); + } + float pxin[nmax - nmin]; + float pyin[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + pxin[n - nmin] = cosPorT[n - nmin] * pt[n - nmin]; + pyin[n - nmin] = sinPorT[n - nmin] * pt[n - nmin]; + } + + for (int n = nmin; n < nmax; ++n) { dprint_np(n, - "k=" << std::setprecision(9) << k << " pxin=" << std::setprecision(9) << pxin - << " pyin=" << std::setprecision(9) << pyin << " cosPorT=" << std::setprecision(9) << cosPorT - << " sinPorT=" << std::setprecision(9) << sinPorT << " pt=" << std::setprecision(9) << pt); + "k=" << std::setprecision(9) << k[n - nmin] << " pxin=" << std::setprecision(9) << pxin[n - nmin] + << " pyin=" << std::setprecision(9) << pyin[n - nmin] << " cosPorT=" << std::setprecision(9) + << cosPorT[n - nmin] << " sinPorT=" << std::setprecision(9) << sinPorT[n - nmin] + << " pt=" << std::setprecision(9) << pt[n - nmin]); + } + float dDdx[nmax - nmin]; + float dDdy[nmax - nmin]; + float dDdipt[nmax - nmin]; + float dDdphi[nmax - nmin]; + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + dDdipt[n - nmin] = 0.; + dDdphi[n - nmin] = 0.; + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { //derivatives initialized to value for first iteration, i.e. distance = r-r0in - float dDdx = r0 > 0.f ? -xin / r0 : 0.f; - float dDdy = r0 > 0.f ? -yin / r0 : 0.f; - float dDdipt = 0.; - float dDdphi = 0.; + dDdx[n - nmin] = r0[n - nmin] > 0.f ? -xin[n - nmin] / r0[n - nmin] : 0.f; + } + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + dDdy[n - nmin] = r0[n - nmin] > 0.f ? -yin[n - nmin] / r0[n - nmin] : 0.f; + } - for (int i = 0; i < Config::Niter; ++i) { + float oodotp[nmax - nmin]; + float x[nmax - nmin]; + float y[nmax - nmin]; + float oor0[nmax - nmin]; + float dadipt[nmax - nmin]; + float dadx[nmax - nmin]; + float dady[nmax - nmin]; + float pxca[nmax - nmin]; + float pxsa[nmax - nmin]; + float pyca[nmax - nmin]; + float pysa[nmax - nmin]; + float tmp[nmax - nmin]; + float tmpx[nmax - nmin]; + float tmpy[nmax - nmin]; + float pxinold[nmax - nmin]; + + CMS_UNROLL_LOOP_COUNT(Config::Niter) + for (int i = 0; i < Config::Niter; ++i) { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { //compute distance and path for the current iteration - r0 = hipo(outPar(n, 0, 0), outPar(n, 1, 0)); + r0[n - nmin] = hipo(outPar(n, 0, 0), outPar(n, 1, 0)); + } - // Use one over dot produce of transverse momentum and radial - // direction to scale the step. Propagation is prevented from reaching - // too close to the apex (dotp > 0.2). - // - Can / should we come up with a better approximation? - // - Can / should take +/- curvature into account? + // Use one over dot produce of transverse momentum and radial + // direction to scale the step. Propagation is prevented from reaching + // too close to the apex (dotp > 0.2). + // - Can / should we come up with a better approximation? + // - Can / should take +/- curvature into account? - const float oodotp = r0 * pt / (pxin * outPar(n, 0, 0) + pyin * outPar(n, 1, 0)); +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + oodotp[n - nmin] = + r0[n - nmin] * pt[n - nmin] / (pxin[n - nmin] * outPar(n, 0, 0) + pyin[n - nmin] * outPar(n, 1, 0)); + } - if (oodotp > 5.0f || oodotp < 0) // 0.2 is 78.5 deg +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + if (oodotp[n - nmin] > 5.0f || oodotp[n - nmin] < 0) // 0.2 is 78.5 deg { - id = 0.0f; outFailFlag(n, 0, 0) = 1; - } else { - // Can we come up with a better approximation? - // Should take +/- curvature into account. - id = (r - r0) * oodotp; - } - D += id; - - if (Config::useTrigApprox) { - sincos4(id * ipt * kinv * 0.5f, sinah, cosah); - } else { - cosah = std::cos(id * ipt * kinv * 0.5f); - sinah = std::sin(id * ipt * kinv * 0.5f); } - cosa = 1.f - 2.f * sinah * sinah; - sina = 2.f * sinah * cosah; - - dprint_np(n, - "Attempt propagation from r=" - << r0 << " to r=" << r << std::endl - << " x=" << xin << " y=" << yin << " z=" << inPar(n, 2, 0) << " px=" << pxin << " py=" << pyin - << " pz=" << pt * std::tan(theta) << " q=" << inChg(n, 0, 0) << std::endl - << " r=" << std::setprecision(9) << r << " r0=" << std::setprecision(9) << r0 - << " id=" << std::setprecision(9) << id << " dr=" << std::setprecision(9) << r - r0 - << " cosa=" << cosa << " sina=" << sina); + } - //update derivatives on total distance - if (i + 1 != Config::Niter) { - const float x = outPar(n, 0, 0); - const float y = outPar(n, 1, 0); - const float oor0 = (r0 > 0.f && std::abs(r - r0) < 0.0001f) ? 1.f / r0 : 0.f; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + // Can we come up with a better approximation? + // Should take +/- curvature into account. + id[n - nmin] = + (oodotp[n - nmin] > 5.0f || oodotp[n - nmin] < 0) ? 0.0f : (r[n - nmin] - r0[n - nmin]) * oodotp[n - nmin]; + } - const float dadipt = id * kinv; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + D[n - nmin] += id[n - nmin]; + } - const float dadx = -x * ipt * kinv * oor0; - const float dady = -y * ipt * kinv * oor0; + if constexpr (Config::useTrigApprox) { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + sincos4(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f, sinah[n - nmin], cosah[n - nmin]); + } + } else { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + cosah[n - nmin] = std::cos(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f); + sinah[n - nmin] = std::sin(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f); + } + } - const float pxca = pxin * cosa; - const float pxsa = pxin * sina; - const float pyca = pyin * cosa; - const float pysa = pyin * sina; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + cosa[n - nmin] = 1.f - 2.f * sinah[n - nmin] * sinah[n - nmin]; + sina[n - nmin] = 2.f * sinah[n - nmin] * cosah[n - nmin]; + } - float tmp; + for (int n = nmin; n < nmax; ++n) { + dprint_np(n, + "Attempt propagation from r=" + << r0[n - nmin] << " to r=" << r[n - nmin] << std::endl + << " x=" << xin[n - nmin] << " y=" << yin[n - nmin] << " z=" << inPar(n, 2, 0) + << " px=" << pxin[n - nmin] << " py=" << pyin[n - nmin] + << " pz=" << pt[n - nmin] * std::tan(theta[n - nmin]) << " q=" << inChg(n, 0, 0) << std::endl + << " r=" << std::setprecision(9) << r[n - nmin] << " r0=" << std::setprecision(9) << r0[n - nmin] + << " id=" << std::setprecision(9) << id[n - nmin] << " dr=" << std::setprecision(9) + << r[n - nmin] - r0[n - nmin] << " cosa=" << cosa[n - nmin] << " sina=" << sina[n - nmin]); + } - tmp = k * dadx; - dDdx -= (x * (1.f + tmp * (pxca - pysa)) + y * tmp * (pyca + pxsa)) * oor0; + //update derivatives on total distance + if (i + 1 != Config::Niter) { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + x[n - nmin] = outPar(n, 0, 0); + y[n - nmin] = outPar(n, 1, 0); + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + oor0[n - nmin] = + (r0[n - nmin] > 0.f && std::abs(r[n - nmin] - r0[n - nmin]) < 0.0001f) ? 1.f / r0[n - nmin] : 0.f; + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + dadipt[n - nmin] = id[n - nmin] * kinv[n - nmin]; + dadx[n - nmin] = -x[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * oor0[n - nmin]; + dady[n - nmin] = -y[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * oor0[n - nmin]; + pxca[n - nmin] = pxin[n - nmin] * cosa[n - nmin]; + pxsa[n - nmin] = pxin[n - nmin] * sina[n - nmin]; + pyca[n - nmin] = pyin[n - nmin] * cosa[n - nmin]; + pysa[n - nmin] = pyin[n - nmin] * sina[n - nmin]; + tmpx[n - nmin] = k[n - nmin] * dadx[n - nmin]; + } - tmp = k * dady; - dDdy -= (x * tmp * (pxca - pysa) + y * (1.f + tmp * (pyca + pxsa))) * oor0; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + dDdx[n - nmin] -= (x[n - nmin] * (1.f + tmpx[n - nmin] * (pxca[n - nmin] - pysa[n - nmin])) + + y[n - nmin] * tmpx[n - nmin] * (pyca[n - nmin] + pxsa[n - nmin])) * + oor0[n - nmin]; + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + tmpy[n - nmin] = k[n - nmin] * dady[n - nmin]; + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + dDdy[n - nmin] -= (x[n - nmin] * tmpy[n - nmin] * (pxca[n - nmin] - pysa[n - nmin]) + + y[n - nmin] * (1.f + tmpy[n - nmin] * (pyca[n - nmin] + pxsa[n - nmin]))) * + oor0[n - nmin]; + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { //now r0 depends on ipt and phi as well - tmp = dadipt * ipt; - dDdipt -= - k * - (x * (pxca * tmp - pysa * tmp - pyca - pxsa + pyin) + y * (pyca * tmp + pxsa * tmp - pysa + pxca - pxin)) * - pt * oor0; - dDdphi += k * (x * (pysa - pxin + pxca) - y * (pxsa - pyin + pyca)) * oor0; + tmp[n - nmin] = dadipt[n - nmin] * ipt[n - nmin]; + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + dDdipt[n - nmin] -= k[n - nmin] * + (x[n - nmin] * (pxca[n - nmin] * tmp[n - nmin] - pysa[n - nmin] * tmp[n - nmin] - + pyca[n - nmin] - pxsa[n - nmin] + pyin[n - nmin]) + + y[n - nmin] * (pyca[n - nmin] * tmp[n - nmin] + pxsa[n - nmin] * tmp[n - nmin] - + pysa[n - nmin] + pxca[n - nmin] - pxin[n - nmin])) * + pt[n - nmin] * oor0[n - nmin]; } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + dDdphi[n - nmin] += k[n - nmin] * + (x[n - nmin] * (pysa[n - nmin] - pxin[n - nmin] + pxca[n - nmin]) - + y[n - nmin] * (pxsa[n - nmin] - pyin[n - nmin] + pyca[n - nmin])) * + oor0[n - nmin]; + } + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { //update parameters - outPar(n, 0, 0) = outPar(n, 0, 0) + 2.f * k * sinah * (pxin * cosah - pyin * sinah); - outPar(n, 1, 0) = outPar(n, 1, 0) + 2.f * k * sinah * (pyin * cosah + pxin * sinah); - const float pxinold = pxin; //copy before overwriting - pxin = pxin * cosa - pyin * sina; - pyin = pyin * cosa + pxinold * sina; - + outPar(n, 0, 0) = outPar(n, 0, 0) + 2.f * k[n - nmin] * sinah[n - nmin] * + (pxin[n - nmin] * cosah[n - nmin] - pyin[n - nmin] * sinah[n - nmin]); + outPar(n, 1, 0) = outPar(n, 1, 0) + 2.f * k[n - nmin] * sinah[n - nmin] * + (pyin[n - nmin] * cosah[n - nmin] + pxin[n - nmin] * sinah[n - nmin]); + pxinold[n - nmin] = pxin[n - nmin]; //copy before overwriting + pxin[n - nmin] = pxin[n - nmin] * cosa[n - nmin] - pyin[n - nmin] * sina[n - nmin]; + pyin[n - nmin] = pyin[n - nmin] * cosa[n - nmin] + pxinold[n - nmin] * sina[n - nmin]; + } + for (int n = nmin; n < nmax; ++n) { dprint_np(n, - "outPar(n, 0, 0)=" << outPar(n, 0, 0) << " outPar(n, 1, 0)=" << outPar(n, 1, 0) << " pxin=" << pxin - << " pyin=" << pyin); + "outPar(n, 0, 0)=" << outPar(n, 0, 0) << " outPar(n, 1, 0)=" << outPar(n, 1, 0) + << " pxin=" << pxin[n - nmin] << " pyin=" << pyin[n - nmin]); } + } // iteration loop - const float alpha = D * ipt * kinv; - const float dadx = dDdx * ipt * kinv; - const float dady = dDdy * ipt * kinv; - const float dadipt = (ipt * dDdipt + D) * kinv; - const float dadphi = dDdphi * ipt * kinv; + float alpha[nmax - nmin]; + float dadphi[nmax - nmin]; - if (Config::useTrigApprox) { - sincos4(alpha, sina, cosa); - } else { - cosa = std::cos(alpha); - sina = std::sin(alpha); - } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + alpha[n - nmin] = D[n - nmin] * ipt[n - nmin] * kinv[n - nmin]; + dadx[n - nmin] = dDdx[n - nmin] * ipt[n - nmin] * kinv[n - nmin]; + dady[n - nmin] = dDdy[n - nmin] * ipt[n - nmin] * kinv[n - nmin]; + dadipt[n - nmin] = (ipt[n - nmin] * dDdipt[n - nmin] + D[n - nmin]) * kinv[n - nmin]; + dadphi[n - nmin] = dDdphi[n - nmin] * ipt[n - nmin] * kinv[n - nmin]; + } - errorProp(n, 0, 0) = 1.f + k * dadx * (cosPorT * cosa - sinPorT * sina) * pt; - errorProp(n, 0, 1) = k * dady * (cosPorT * cosa - sinPorT * sina) * pt; + if constexpr (Config::useTrigApprox) { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + sincos4(alpha[n - nmin], sina[n - nmin], cosa[n - nmin]); + } + } else { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + cosa[n - nmin] = std::cos(alpha[n - nmin]); + sina[n - nmin] = std::sin(alpha[n - nmin]); + } + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + errorProp(n, 0, 0) = 1.f + k[n - nmin] * dadx[n - nmin] * + (cosPorT[n - nmin] * cosa[n - nmin] - sinPorT[n - nmin] * sina[n - nmin]) * + pt[n - nmin]; + errorProp(n, 0, 1) = k[n - nmin] * dady[n - nmin] * + (cosPorT[n - nmin] * cosa[n - nmin] - sinPorT[n - nmin] * sina[n - nmin]) * pt[n - nmin]; errorProp(n, 0, 2) = 0.f; errorProp(n, 0, 3) = - k * (cosPorT * (ipt * dadipt * cosa - sina) + sinPorT * ((1.f - cosa) - ipt * dadipt * sina)) * pt * pt; + k[n - nmin] * + (cosPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] * cosa[n - nmin] - sina[n - nmin]) + + sinPorT[n - nmin] * ((1.f - cosa[n - nmin]) - ipt[n - nmin] * dadipt[n - nmin] * sina[n - nmin])) * + pt[n - nmin] * pt[n - nmin]; errorProp(n, 0, 4) = - k * (cosPorT * dadphi * cosa - sinPorT * dadphi * sina - sinPorT * sina + cosPorT * cosa - cosPorT) * pt; + k[n - nmin] * + (cosPorT[n - nmin] * dadphi[n - nmin] * cosa[n - nmin] - sinPorT[n - nmin] * dadphi[n - nmin] * sina[n - nmin] - + sinPorT[n - nmin] * sina[n - nmin] + cosPorT[n - nmin] * cosa[n - nmin] - cosPorT[n - nmin]) * + pt[n - nmin]; errorProp(n, 0, 5) = 0.f; + } - errorProp(n, 1, 0) = k * dadx * (sinPorT * cosa + cosPorT * sina) * pt; - errorProp(n, 1, 1) = 1.f + k * dady * (sinPorT * cosa + cosPorT * sina) * pt; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + errorProp(n, 1, 0) = k[n - nmin] * dadx[n - nmin] * + (sinPorT[n - nmin] * cosa[n - nmin] + cosPorT[n - nmin] * sina[n - nmin]) * pt[n - nmin]; + errorProp(n, 1, 1) = 1.f + k[n - nmin] * dady[n - nmin] * + (sinPorT[n - nmin] * cosa[n - nmin] + cosPorT[n - nmin] * sina[n - nmin]) * + pt[n - nmin]; errorProp(n, 1, 2) = 0.f; errorProp(n, 1, 3) = - k * (sinPorT * (ipt * dadipt * cosa - sina) + cosPorT * (ipt * dadipt * sina - (1.f - cosa))) * pt * pt; + k[n - nmin] * + (sinPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] * cosa[n - nmin] - sina[n - nmin]) + + cosPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] * sina[n - nmin] - (1.f - cosa[n - nmin]))) * + pt[n - nmin] * pt[n - nmin]; errorProp(n, 1, 4) = - k * (sinPorT * dadphi * cosa + cosPorT * dadphi * sina + sinPorT * cosa + cosPorT * sina - sinPorT) * pt; + k[n - nmin] * + (sinPorT[n - nmin] * dadphi[n - nmin] * cosa[n - nmin] + cosPorT[n - nmin] * dadphi[n - nmin] * sina[n - nmin] + + sinPorT[n - nmin] * cosa[n - nmin] + cosPorT[n - nmin] * sina[n - nmin] - sinPorT[n - nmin]) * + pt[n - nmin]; errorProp(n, 1, 5) = 0.f; + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { //no trig approx here, theta can be large - cosPorT = std::cos(theta); - sinPorT = std::sin(theta); - //redefine sinPorT as 1./sinPorT to reduce the number of temporaries - sinPorT = 1.f / sinPorT; + cosPorT[n - nmin] = std::cos(theta[n - nmin]); + } - outPar(n, 2, 0) = inPar(n, 2, 0) + k * alpha * cosPorT * pt * sinPorT; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + sinPorT[n - nmin] = std::sin(theta[n - nmin]); + } - errorProp(n, 2, 0) = k * cosPorT * dadx * pt * sinPorT; - errorProp(n, 2, 1) = k * cosPorT * dady * pt * sinPorT; - errorProp(n, 2, 2) = 1.f; - errorProp(n, 2, 3) = k * cosPorT * (ipt * dadipt - alpha) * pt * pt * sinPorT; - errorProp(n, 2, 4) = k * dadphi * cosPorT * pt * sinPorT; - errorProp(n, 2, 5) = -k * alpha * pt * sinPorT * sinPorT; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + //redefine sinPorT as 1./sinPorT to reduce the number of temporaries + sinPorT[n - nmin] = 1.f / sinPorT[n - nmin]; + } - outPar(n, 3, 0) = ipt; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + outPar(n, 2, 0) = + inPar(n, 2, 0) + k[n - nmin] * alpha[n - nmin] * cosPorT[n - nmin] * pt[n - nmin] * sinPorT[n - nmin]; + errorProp(n, 2, 0) = k[n - nmin] * cosPorT[n - nmin] * dadx[n - nmin] * pt[n - nmin] * sinPorT[n - nmin]; + errorProp(n, 2, 1) = k[n - nmin] * cosPorT[n - nmin] * dady[n - nmin] * pt[n - nmin] * sinPorT[n - nmin]; + errorProp(n, 2, 2) = 1.f; + errorProp(n, 2, 3) = k[n - nmin] * cosPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] - alpha[n - nmin]) * + pt[n - nmin] * pt[n - nmin] * sinPorT[n - nmin]; + errorProp(n, 2, 4) = k[n - nmin] * dadphi[n - nmin] * cosPorT[n - nmin] * pt[n - nmin] * sinPorT[n - nmin]; + errorProp(n, 2, 5) = -k[n - nmin] * alpha[n - nmin] * pt[n - nmin] * sinPorT[n - nmin] * sinPorT[n - nmin]; + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + outPar(n, 3, 0) = ipt[n - nmin]; errorProp(n, 3, 0) = 0.f; errorProp(n, 3, 1) = 0.f; errorProp(n, 3, 2) = 0.f; errorProp(n, 3, 3) = 1.f; errorProp(n, 3, 4) = 0.f; errorProp(n, 3, 5) = 0.f; + } - outPar(n, 4, 0) = inPar(n, 4, 0) + alpha; - - errorProp(n, 4, 0) = dadx; - errorProp(n, 4, 1) = dady; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + outPar(n, 4, 0) = inPar(n, 4, 0) + alpha[n - nmin]; + errorProp(n, 4, 0) = dadx[n - nmin]; + errorProp(n, 4, 1) = dady[n - nmin]; errorProp(n, 4, 2) = 0.f; - errorProp(n, 4, 3) = dadipt; - errorProp(n, 4, 4) = 1.f + dadphi; + errorProp(n, 4, 3) = dadipt[n - nmin]; + errorProp(n, 4, 4) = 1.f + dadphi[n - nmin]; errorProp(n, 4, 5) = 0.f; + } - outPar(n, 5, 0) = theta; - +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + outPar(n, 5, 0) = theta[n - nmin]; errorProp(n, 5, 0) = 0.f; errorProp(n, 5, 1) = 0.f; errorProp(n, 5, 2) = 0.f; errorProp(n, 5, 3) = 0.f; errorProp(n, 5, 4) = 0.f; errorProp(n, 5, 5) = 1.f; + } + for (int n = nmin; n < nmax; ++n) { dprint_np(n, "propagation end, dump parameters" << std::endl @@ -237,8 +455,10 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, << " mom = " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0))) << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0)) << std::endl); + } #ifdef DEBUG + for (int n = nmin; n < nmax; ++n) { if (n < N_proc) { dmutex_guard; std::cout << n << ": jacobian" << std::endl; @@ -286,6 +506,6 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, errorProp(n, 5, 5)); printf("\n"); } -#endif } +#endif }