diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.icc b/RecoTracker/MkFitCore/src/PropagationMPlex.icc index be8438adb862d..ee5911d55080e 100644 --- a/RecoTracker/MkFitCore/src/PropagationMPlex.icc +++ b/RecoTracker/MkFitCore/src/PropagationMPlex.icc @@ -31,27 +31,27 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, #pragma omp simd for (int n = nmin; n < nmax; ++n) { //initialize erroProp to identity matrix - const int aindex = n-nmax; + const int aindex = n-nmin; r0[aindex] = hipo(inPar(n, 0, 0), inPar(n, 1, 0)); } float k[asize]; #pragma omp simd for (int n = nmin; n < nmax; ++n) { - const int aindex = n-nmax; + const int aindex = n-nmin; k[aindex] = inChg(n, 0, 0) * 100.f / (-Const::sol * (pf.use_param_b_field ? Config::bFieldFromZR(inPar(n, 2, 0), r0[aindex]) : Config::Bfield)); } float r[nmax-nmin]; #pragma omp simd for (int n = nmin; n < nmax; ++n) { - const int aindex = n-nmax; + const int aindex = n-nmin; r[aindex] = msRad(n, 0, 0); } float xin[asize]; float yin[asize]; float ipt[asize]; float phiin[asize]; - float float theta[asize]; + float theta[asize]; #pragma omp simd for (int n = nmin; n < nmax; ++n) { // if (std::abs(r-r0)<0.0001f) { @@ -59,7 +59,7 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, // continue; // } - const int aindex = n-nmax; + const int aindex = n-nmin; xin[aindex] = inPar(n, 0, 0); yin[aindex] = inPar(n, 1, 0); ipt[aindex] = inPar(n, 3, 0); @@ -82,9 +82,9 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, float pt[asize]; #pragma omp simd for (int n = nmin; n < nmax; ++n) { - const int aindex = n-nmax; - const float kinv[aindex] = 1.f / k[aindex]; - const float pt[aindex] = 1.f / ipt[aindex]; + const int aindex = n-nmin; + kinv[aindex] = 1.f / k[aindex]; + pt[aindex] = 1.f / ipt[aindex]; } float D[asize]; float cosa[asize]; @@ -94,15 +94,15 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, float id[asize]; #pragma omp simd for (int n = nmin; n < nmax; ++n) { - const int aindex = n-nmax; - D[asize] = 0.; cosa[asize] = 0.; sina[asize] = 0.; cosah[asize] = 0.; sinah[asize] = 0.; id[asize] = 0.; + const int aindex = n-nmin; + D[aindex] = 0.; cosa[aindex] = 0.; sina[aindex] = 0.; cosah[aindex] = 0.; sinah[aindex] = 0.; id[aindex] = 0.; } //no trig approx here, phi can be large float cosPorT[asize]; float sinPorT[asize]; #pragma omp simd for (int n = nmin; n < nmax; ++n) { - const int aindex = n-nmax; + const int aindex = n-nmin; cosPorT[aindex] = std::cos(phiin[aindex]); sinPorT[aindex] = std::sin(phiin[aindex]); } @@ -110,13 +110,13 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, float pyin[asize]; #pragma omp simd for (int n = nmin; n < nmax; ++n) { - const int aindex = n-nmax; + const int aindex = n-nmin; pxin[aindex] = cosPorT[aindex] * pt[aindex]; pyin[aindex] = sinPorT[aindex] * pt[aindex]; } #pragma omp simd for (int n = nmin; n < nmax; ++n) { - const int aindex = n-nmax; +// const int aindex = n-nmin; dprint_np(n, "k=" << std::setprecision(9) << k[aindex] << " pxin=" << std::setprecision(9) << pxin[aindex] << " pyin=" << std::setprecision(9) << pyin[aindex] << " cosPorT=" << std::setprecision(9) << cosPorT[aindex] @@ -128,7 +128,7 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, float dDdphi[asize]; #pragma omp simd for (int n = nmin; n < nmax; ++n) { - const int aindex = n-nmax; + const int aindex = n-nmin; //derivatives initialized to value for first iteration, i.e. distance = r-r0in dDdx[aindex] = r0[aindex] > 0.f ? -xin[aindex] / r0[aindex] : 0.f; dDdy[aindex] = r0[aindex] > 0.f ? -yin[aindex] / r0[aindex] : 0.f; @@ -136,9 +136,21 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, dDdphi[aindex] = 0.; } float oodotp[asize]; + float x[asize]; + float y[asize]; + float oor0[asize]; + float dadipt[asize]; + float dadx[asize]; + float dady[asize]; + float pxca[asize]; + float pxsa[asize]; + float pyca[asize]; + float pysa[asize]; + float tmp[asize]; + float pxinold[asize]; #pragma omp simd for (int n = nmin; n < nmax; ++n) { - const int aindex = n-nmax; + const int aindex = n-nmin; for (int i = 0; i < Config::Niter; ++i) { //compute distance and path for the current iteration r0[aindex] = hipo(outPar(n, 0, 0), outPar(n, 1, 0)); @@ -163,10 +175,10 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, D[aindex] += id[aindex]; if (Config::useTrigApprox) { - sincos4(id * ipt[aindex] * kinv[aindex] * 0.5f, sinah[aindex], cosah[aindex]); + sincos4(id[aindex] * ipt[aindex] * kinv[aindex] * 0.5f, sinah[aindex], cosah[aindex]); } else { - cosah[aindex] = std::cos(id * ipt[aindex] * kinv[aindex] * 0.5f); - sinah[aindex] = std::sin(id * ipt[aindex] * kinv[aindex] * 0.5f); + cosah[aindex] = std::cos(id[aindex] * ipt[aindex] * kinv[aindex] * 0.5f); + sinah[aindex] = std::sin(id[aindex] * ipt[aindex] * kinv[aindex] * 0.5f); } cosa[aindex] = 1.f - 2.f * sinah[aindex] * sinah[aindex]; sina[aindex] = 2.f * sinah[aindex] * cosah[aindex]; @@ -182,43 +194,42 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, //update derivatives on total distance if (i + 1 != Config::Niter) { - const float x = outPar(n, 0, 0); - const float y = outPar(n, 1, 0); - const float oor0 = (r0[aindex] > 0.f && std::abs(r[aindex] - r0[aindex]) < 0.0001f) ? 1.f / r0[aindex] : 0.f; + x[aindex] = outPar(n, 0, 0); + y[aindex] = outPar(n, 1, 0); + oor0[aindex] = (r0[aindex] > 0.f && std::abs(r[aindex] - r0[aindex]) < 0.0001f) ? 1.f / r0[aindex] : 0.f; - const float dadipt = id[aindex] * kinv[aindex]; + dadipt[aindex] = id[aindex] * kinv[aindex]; - const float dadx = -x * ipt[aindex] * kinv[aindex] * oor0[aindex]; - const float dady = -y * ipt * kinv * oor0; + dadx[aindex] = -x[aindex] * ipt[aindex] * kinv[aindex] * oor0[aindex]; + dady[aindex] = -y[aindex] * ipt[aindex] * kinv[aindex] * oor0[aindex]; - const float pxca = pxin[aindex] * cosa[aindex]; - const float pxsa = pxin[aindex] * sina[aindex]; - const float pyca = pyin[aindex] * cosa[aindex]; - const float pysa = pyin[aindex] * sina[aindex]; + pxca[aindex] = pxin[aindex] * cosa[aindex]; + pxsa[aindex] = pxin[aindex] * sina[aindex]; + pyca[aindex] = pyin[aindex] * cosa[aindex]; + pysa[aindex] = pyin[aindex] * sina[aindex]; - float tmp; - tmp = k * dadx; - dDdx -= (x * (1.f + tmp * (pxca - pysa)) + y * tmp * (pyca + pxsa)) * oor0; + tmp[aindex] = k[aindex] * dadx[aindex]; + dDdx[aindex] -= (x[aindex] * (1.f + tmp[aindex] * (pxca[aindex] - pysa[aindex])) + y[aindex] * tmp[aindex] * (pyca[aindex] + pxsa[aindex])) * oor0[aindex]; - tmp = k * dady; - dDdy -= (x * tmp * (pxca - pysa) + y * (1.f + tmp * (pyca + pxsa))) * oor0; + tmp[aindex] = k[aindex] * dady[aindex]; + dDdy[aindex] -= (x[aindex] * tmp[aindex] * (pxca[aindex] - pysa[aindex]) + y[aindex] * (1.f + tmp[aindex] * (pyca[aindex] + pxsa[aindex]))) * oor0[aindex]; //now r0 depends on ipt and phi as well - tmp = dadipt * ipt[aindex]; + tmp[aindex] = dadipt[aindex] * ipt[aindex]; dDdipt[aindex] -= k[aindex] * - (x * (pxca * tmp - pysa * tmp - pyca - pxsa + pyin) + y * (pyca * tmp + pxsa * tmp - pysa + pxca - pxin)) * - pt[aindex] * oor0; - dDdphi[aindex] += k[aindex] * (x * (pysa - pxin + pxca) - y * (pxsa - pyin + pyca)) * oor0; + (x[aindex] * (pxca[aindex] * tmp[aindex] - pysa[aindex] * tmp[aindex] - pyca[aindex] - pxsa[aindex] + pyin[aindex]) + y[aindex] * (pyca[aindex] * tmp[aindex] + pxsa[aindex] * tmp[aindex] - pysa[aindex] + pxca[aindex] - pxin[aindex])) * + pt[aindex] * oor0[aindex]; + dDdphi[aindex] += k[aindex] * (x[aindex] * (pysa[aindex] - pxin[aindex] + pxca[aindex]) - y[aindex] * (pxsa[aindex] - pyin[aindex] + pyca[aindex])) * oor0[aindex]; } //update parameters outPar(n, 0, 0) = outPar(n, 0, 0) + 2.f * k[aindex] * sinah[aindex] * (pxin[aindex] * cosah[aindex] - pyin[aindex] * sinah[aindex]); outPar(n, 1, 0) = outPar(n, 1, 0) + 2.f * k[aindex] * sinah[aindex] * (pyin[aindex] * cosah[aindex] + pxin[aindex] * sinah[aindex]); - const float pxinold = pxin[aindex]; //copy before overwriting + pxinold[aindex] = pxin[aindex]; //copy before overwriting pxin[aindex] = pxin[aindex] * cosa[aindex] - pyin[aindex] * sina[aindex]; - pyin[aindex] = pyin[aindex] * cosa[aindex] + pxinold * sina[aindex]; + pyin[aindex] = pyin[aindex] * cosa[aindex] + pxinold[aindex] * sina[aindex]; dprint_np(n, "outPar(n, 0, 0)=" << outPar(n, 0, 0) << " outPar(n, 1, 0)=" << outPar(n, 1, 0) << " pxin=" << pxin[aindex] @@ -226,18 +237,15 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, } // iteration loop } float alpha[asize]; - float dadx[asize]; - float dady[asize]; - float dadipt[asize]; float dadphi[asize]; #pragma omp simd for (int n = nmin; n < nmax; ++n) { - const int aindex = n-nmax; - alpha = D[aindex] * ipt[aindex] * kinv[aindex]; - dadx = dDdx[aindex] * ipt[aindex] * kinv[aindex]; - dady = dDdy[aindex] * ipt[aindex] * kinv[aindex]; - dadipt = (ipt[aindex] * dDdipt[aindex] + D[aindex]) * kinv[aindex]; - dadphi = dDdphi[aindex] * ipt[aindex] * kinv[aindex]; + const int aindex = n-nmin; + alpha[aindex] = D[aindex] * ipt[aindex] * kinv[aindex]; + dadx[aindex] = dDdx[aindex] * ipt[aindex] * kinv[aindex]; + dady[aindex] = dDdy[aindex] * ipt[aindex] * kinv[aindex]; + dadipt[aindex] = (ipt[aindex] * dDdipt[aindex] + D[aindex]) * kinv[aindex]; + dadphi[aindex] = dDdphi[aindex] * ipt[aindex] * kinv[aindex]; if (Config::useTrigApprox) { sincos4(alpha[aindex], sina[aindex], cosa[aindex]); @@ -367,4 +375,4 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, } #endif } -} +