diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.cc b/RecoTracker/MkFitCore/src/PropagationMPlex.cc
index 806c4767402b1..cfd9ca9c0fb90 100644
--- a/RecoTracker/MkFitCore/src/PropagationMPlex.cc
+++ b/RecoTracker/MkFitCore/src/PropagationMPlex.cc
@@ -588,8 +588,8 @@ namespace mkfit {
   }
 
   //==============================================================================
-
-  void propagateHelixToZMPlex(const MPlexLS& inErr,
+  
+  void __attribute__ ((optimize("no-inline"))) propagateHelixToZMPlex(const MPlexLS& inErr,
                               const MPlexLV& inPar,
                               const MPlexQI& inChg,
                               const MPlexQF& msZ,
diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.icc b/RecoTracker/MkFitCore/src/PropagationMPlex.icc
index a66083319f61c..be8438adb862d 100644
--- a/RecoTracker/MkFitCore/src/PropagationMPlex.icc
+++ b/RecoTracker/MkFitCore/src/PropagationMPlex.icc
@@ -15,6 +15,8 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar,
                                                  const PropagationFlags pf) {
   // bool debug = true;
 
+  const int asize = nmax-nmin;
+
 #pragma omp simd
   for (int n = nmin; n < nmax; ++n) {
     //initialize erroProp to identity matrix
@@ -24,24 +26,50 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar,
     errorProp(n, 3, 3) = 1.f;
     errorProp(n, 4, 4) = 1.f;
     errorProp(n, 5, 5) = 1.f;
-
-    float r0 = hipo(inPar(n, 0, 0), inPar(n, 1, 0));
-    const float k = inChg(n, 0, 0) * 100.f /
-                    (-Const::sol * (pf.use_param_b_field ? Config::bFieldFromZR(inPar(n, 2, 0), r0) : Config::Bfield));
-    const float r = msRad(n, 0, 0);
-
+  }
+  float r0[asize];
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    //initialize erroProp to identity matrix
+    const int aindex = n-nmax;
+    r0[aindex] = hipo(inPar(n, 0, 0), inPar(n, 1, 0));
+  }
+  float k[asize];
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const int aindex = n-nmax;
+    k[aindex] = inChg(n, 0, 0) * 100.f /
+                    (-Const::sol * (pf.use_param_b_field ? Config::bFieldFromZR(inPar(n, 2, 0), r0[aindex]) : Config::Bfield));
+  }
+  float r[nmax-nmin];
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const int aindex = n-nmax;
+    r[aindex] = msRad(n, 0, 0);
+  }
+  float xin[asize];
+  float yin[asize];
+  float ipt[asize];
+  float phiin[asize];
+  float float theta[asize];
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
     // if (std::abs(r-r0)<0.0001f) {
     // 	dprint("distance less than 1mum, skip");
     // 	continue;
     // }
 
-    const float xin = inPar(n, 0, 0);
-    const float yin = inPar(n, 1, 0);
-    const float ipt = inPar(n, 3, 0);
-    const float phiin = inPar(n, 4, 0);
-    const float theta = inPar(n, 5, 0);
+    const int aindex = n-nmax;
+    xin[aindex] = inPar(n, 0, 0);
+    yin[aindex] = inPar(n, 1, 0);
+    ipt[aindex] = inPar(n, 3, 0);
+    phiin[aindex] = inPar(n, 4, 0);
+    theta[aindex] = inPar(n, 5, 0);
 
     dprint(std::endl);
+  }
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
     dprint_np(n,
               "input parameters"
                   << " inPar(n, 0, 0)=" << std::setprecision(9) << inPar(n, 0, 0) << " inPar(n, 1, 0)="
@@ -49,30 +77,71 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar,
                   << inPar(n, 2, 0) << " inPar(n, 3, 0)=" << std::setprecision(9) << inPar(n, 3, 0)
                   << " inPar(n, 4, 0)=" << std::setprecision(9) << inPar(n, 4, 0)
                   << " inPar(n, 5, 0)=" << std::setprecision(9) << inPar(n, 5, 0));
-
-    const float kinv = 1.f / k;
-    const float pt = 1.f / ipt;
-
-    float D = 0., cosa = 0., sina = 0., cosah = 0., sinah = 0., id = 0.;
+  }
+  float kinv[asize];
+  float pt[asize];
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const int aindex = n-nmax;
+    const float kinv[aindex] = 1.f / k[aindex];
+    const float pt[aindex] = 1.f / ipt[aindex];
+  }
+  float D[asize];
+  float cosa[asize];
+  float sina[asize];
+  float cosah[asize];
+  float sinah[asize];
+  float id[asize];
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const int aindex = n-nmax;
+    D[asize] = 0.; cosa[asize] = 0.; sina[asize] = 0.; cosah[asize] = 0.; sinah[asize] = 0.; id[asize] = 0.;
+}
     //no trig approx here, phi can be large
-    float cosPorT = std::cos(phiin), sinPorT = std::sin(phiin);
-    float pxin = cosPorT * pt;
-    float pyin = sinPorT * pt;
-
+  float cosPorT[asize];
+  float sinPorT[asize];
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const int aindex = n-nmax;
+    cosPorT[aindex] = std::cos(phiin[aindex]);
+    sinPorT[aindex] = std::sin(phiin[aindex]);
+  }
+  float pxin[asize];
+  float pyin[asize];
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const int aindex = n-nmax;
+    pxin[aindex] = cosPorT[aindex] * pt[aindex];
+    pyin[aindex] = sinPorT[aindex] * pt[aindex];
+  }
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const int aindex = n-nmax;
     dprint_np(n,
-              "k=" << std::setprecision(9) << k << " pxin=" << std::setprecision(9) << pxin
-                   << " pyin=" << std::setprecision(9) << pyin << " cosPorT=" << std::setprecision(9) << cosPorT
-                   << " sinPorT=" << std::setprecision(9) << sinPorT << " pt=" << std::setprecision(9) << pt);
-
+              "k=" << std::setprecision(9) << k[aindex] << " pxin=" << std::setprecision(9) << pxin[aindex]
+                   << " pyin=" << std::setprecision(9) << pyin[aindex] << " cosPorT=" << std::setprecision(9) << cosPorT[aindex]
+                   << " sinPorT=" << std::setprecision(9) << sinPorT[aindex] << " pt=" << std::setprecision(9) << pt[aindex]);
+  }
+  float dDdx[asize];
+  float dDdy[asize];
+  float dDdipt[asize];
+  float dDdphi[asize];
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const int aindex = n-nmax;
     //derivatives initialized to value for first iteration, i.e. distance = r-r0in
-    float dDdx = r0 > 0.f ? -xin / r0 : 0.f;
-    float dDdy = r0 > 0.f ? -yin / r0 : 0.f;
-    float dDdipt = 0.;
-    float dDdphi = 0.;
-
+    dDdx[aindex] = r0[aindex] > 0.f ? -xin[aindex] / r0[aindex] : 0.f;
+    dDdy[aindex] = r0[aindex] > 0.f ? -yin[aindex] / r0[aindex] : 0.f;
+    dDdipt[aindex] = 0.;
+    dDdphi[aindex] = 0.;
+  }
+  float oodotp[asize];
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const int aindex = n-nmax;
     for (int i = 0; i < Config::Niter; ++i) {
       //compute distance and path for the current iteration
-      r0 = hipo(outPar(n, 0, 0), outPar(n, 1, 0));
+      r0[aindex] = hipo(outPar(n, 0, 0), outPar(n, 1, 0));
 
       // Use one over dot produce of transverse momentum and radial
       // direction to scale the step. Propagation is prevented from reaching
@@ -80,52 +149,52 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar,
       // - Can / should we come up with a better approximation?
       // - Can / should take +/- curvature into account?
 
-      const float oodotp = r0 * pt / (pxin * outPar(n, 0, 0) + pyin * outPar(n, 1, 0));
+      oodotp[aindex] = r0[aindex] * pt[aindex] / (pxin[aindex] * outPar(n, 0, 0) + pyin[aindex] * outPar(n, 1, 0));
 
-      if (oodotp > 5.0f || oodotp < 0)  // 0.2 is 78.5 deg
+      if (oodotp[aindex] > 5.0f || oodotp[aindex] < 0)  // 0.2 is 78.5 deg
       {
-        id = 0.0f;
+        id[aindex] = 0.0f;
         outFailFlag(n, 0, 0) = 1;
       } else {
         // Can we come up with a better approximation?
         // Should take +/- curvature into account.
-        id = (r - r0) * oodotp;
+        id[aindex] = (r[aindex] - r0[aindex]) * oodotp[aindex];
       }
-      D += id;
+      D[aindex] += id[aindex];
 
       if (Config::useTrigApprox) {
-        sincos4(id * ipt * kinv * 0.5f, sinah, cosah);
+        sincos4(id * ipt[aindex] * kinv[aindex] * 0.5f, sinah[aindex], cosah[aindex]);
       } else {
-        cosah = std::cos(id * ipt * kinv * 0.5f);
-        sinah = std::sin(id * ipt * kinv * 0.5f);
+        cosah[aindex] = std::cos(id * ipt[aindex] * kinv[aindex] * 0.5f);
+        sinah[aindex] = std::sin(id * ipt[aindex] * kinv[aindex] * 0.5f);
       }
-      cosa = 1.f - 2.f * sinah * sinah;
-      sina = 2.f * sinah * cosah;
+      cosa[aindex] = 1.f - 2.f * sinah[aindex] * sinah[aindex];
+      sina[aindex] = 2.f * sinah[aindex] * cosah[aindex];
 
       dprint_np(n,
                 "Attempt propagation from r="
-                    << r0 << " to r=" << r << std::endl
-                    << "   x=" << xin << " y=" << yin << " z=" << inPar(n, 2, 0) << " px=" << pxin << " py=" << pyin
-                    << " pz=" << pt * std::tan(theta) << " q=" << inChg(n, 0, 0) << std::endl
-                    << "   r=" << std::setprecision(9) << r << " r0=" << std::setprecision(9) << r0
-                    << " id=" << std::setprecision(9) << id << " dr=" << std::setprecision(9) << r - r0
-                    << " cosa=" << cosa << " sina=" << sina);
+                    << r0[aindex] << " to r=" << r[aindex] << std::endl
+                    << "   x=" << xin[aindex] << " y=" << yin[aindex] << " z=" << inPar(n, 2, 0) << " px=" << pxin[aindex] << " py=" << pyin[aindex]
+                    << " pz=" << pt[aindex] * std::tan(theta[aindex]) << " q=" << inChg(n, 0, 0) << std::endl
+                    << "   r=" << std::setprecision(9) << r[aindex] << " r0=" << std::setprecision(9) << r0[aindex]
+                    << " id=" << std::setprecision(9) << id[aindex] << " dr=" << std::setprecision(9) << r[aindex] - r0[aindex]
+                    << " cosa=" << cosa[aindex] << " sina=" << sina[aindex]);
 
       //update derivatives on total distance
       if (i + 1 != Config::Niter) {
         const float x = outPar(n, 0, 0);
         const float y = outPar(n, 1, 0);
-        const float oor0 = (r0 > 0.f && std::abs(r - r0) < 0.0001f) ? 1.f / r0 : 0.f;
+        const float oor0 = (r0[aindex] > 0.f && std::abs(r[aindex] - r0[aindex]) < 0.0001f) ? 1.f / r0[aindex] : 0.f;
 
-        const float dadipt = id * kinv;
+        const float dadipt = id[aindex] * kinv[aindex];
 
-        const float dadx = -x * ipt * kinv * oor0;
+        const float dadx = -x * ipt[aindex] * kinv[aindex] * oor0[aindex];
         const float dady = -y * ipt * kinv * oor0;
 
-        const float pxca = pxin * cosa;
-        const float pxsa = pxin * sina;
-        const float pyca = pyin * cosa;
-        const float pysa = pyin * sina;
+        const float pxca = pxin[aindex] * cosa[aindex];
+        const float pxsa = pxin[aindex] * sina[aindex];
+        const float pyca = pyin[aindex] * cosa[aindex];
+        const float pysa = pyin[aindex] * sina[aindex];
 
         float tmp;
 
@@ -136,73 +205,81 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar,
         dDdy -= (x * tmp * (pxca - pysa) + y * (1.f + tmp * (pyca + pxsa))) * oor0;
 
         //now r0 depends on ipt and phi as well
-        tmp = dadipt * ipt;
-        dDdipt -=
-            k *
+        tmp = dadipt * ipt[aindex];
+        dDdipt[aindex] -=
+            k[aindex] *
             (x * (pxca * tmp - pysa * tmp - pyca - pxsa + pyin) + y * (pyca * tmp + pxsa * tmp - pysa + pxca - pxin)) *
-            pt * oor0;
-        dDdphi += k * (x * (pysa - pxin + pxca) - y * (pxsa - pyin + pyca)) * oor0;
+            pt[aindex] * oor0;
+        dDdphi[aindex] += k[aindex] * (x * (pysa - pxin + pxca) - y * (pxsa - pyin + pyca)) * oor0;
       }
 
       //update parameters
-      outPar(n, 0, 0) = outPar(n, 0, 0) + 2.f * k * sinah * (pxin * cosah - pyin * sinah);
-      outPar(n, 1, 0) = outPar(n, 1, 0) + 2.f * k * sinah * (pyin * cosah + pxin * sinah);
-      const float pxinold = pxin;  //copy before overwriting
-      pxin = pxin * cosa - pyin * sina;
-      pyin = pyin * cosa + pxinold * sina;
+      outPar(n, 0, 0) = outPar(n, 0, 0) + 2.f * k[aindex] * sinah[aindex] * (pxin[aindex] * cosah[aindex] - pyin[aindex] * sinah[aindex]);
+      outPar(n, 1, 0) = outPar(n, 1, 0) + 2.f * k[aindex] * sinah[aindex] * (pyin[aindex] * cosah[aindex] + pxin[aindex] * sinah[aindex]);
+      const float pxinold = pxin[aindex];  //copy before overwriting
+      pxin[aindex] = pxin[aindex] * cosa[aindex] - pyin[aindex] * sina[aindex];
+      pyin[aindex] = pyin[aindex] * cosa[aindex] + pxinold * sina[aindex];
 
       dprint_np(n,
-                "outPar(n, 0, 0)=" << outPar(n, 0, 0) << " outPar(n, 1, 0)=" << outPar(n, 1, 0) << " pxin=" << pxin
-                                   << " pyin=" << pyin);
-    }
-
-    const float alpha = D * ipt * kinv;
-    const float dadx = dDdx * ipt * kinv;
-    const float dady = dDdy * ipt * kinv;
-    const float dadipt = (ipt * dDdipt + D) * kinv;
-    const float dadphi = dDdphi * ipt * kinv;
+                "outPar(n, 0, 0)=" << outPar(n, 0, 0) << " outPar(n, 1, 0)=" << outPar(n, 1, 0) << " pxin=" << pxin[aindex]
+                                   << " pyin=" << pyin[aindex]);
+    } // iteration loop
+  } 
+  float alpha[asize];
+  float dadx[asize];
+  float dady[asize];
+  float dadipt[asize];
+  float dadphi[asize];  
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const int aindex = n-nmax;
+    alpha = D[aindex] * ipt[aindex] * kinv[aindex];
+    dadx = dDdx[aindex] * ipt[aindex] * kinv[aindex];
+    dady = dDdy[aindex] * ipt[aindex] * kinv[aindex];
+    dadipt = (ipt[aindex] * dDdipt[aindex] + D[aindex]) * kinv[aindex];
+    dadphi = dDdphi[aindex] * ipt[aindex] * kinv[aindex];
 
     if (Config::useTrigApprox) {
-      sincos4(alpha, sina, cosa);
+      sincos4(alpha[aindex], sina[aindex], cosa[aindex]);
     } else {
-      cosa = std::cos(alpha);
-      sina = std::sin(alpha);
+      cosa[aindex] = std::cos(alpha[aindex]);
+      sina[aindex] = std::sin(alpha[aindex]);
     }
 
-    errorProp(n, 0, 0) = 1.f + k * dadx * (cosPorT * cosa - sinPorT * sina) * pt;
-    errorProp(n, 0, 1) = k * dady * (cosPorT * cosa - sinPorT * sina) * pt;
+    errorProp(n, 0, 0) = 1.f + k[aindex] * dadx[aindex] * (cosPorT[aindex] * cosa[aindex] - sinPorT[aindex] * sina[aindex]) * pt[aindex];
+    errorProp(n, 0, 1) = k[aindex] * dady[aindex] * (cosPorT[aindex] * cosa[aindex] - sinPorT[aindex] * sina[aindex]) * pt[aindex];
     errorProp(n, 0, 2) = 0.f;
     errorProp(n, 0, 3) =
-        k * (cosPorT * (ipt * dadipt * cosa - sina) + sinPorT * ((1.f - cosa) - ipt * dadipt * sina)) * pt * pt;
+        k[aindex] * (cosPorT[aindex] * (ipt[aindex] * dadipt[aindex] * cosa[aindex] - sina[aindex]) + sinPorT[aindex] * ((1.f - cosa[aindex]) - ipt[aindex] * dadipt[aindex] * sina[aindex])) * pt[aindex] * pt[aindex];
     errorProp(n, 0, 4) =
-        k * (cosPorT * dadphi * cosa - sinPorT * dadphi * sina - sinPorT * sina + cosPorT * cosa - cosPorT) * pt;
+        k[aindex] * (cosPorT[aindex] * dadphi[aindex] * cosa[aindex] - sinPorT[aindex] * dadphi[aindex] * sina[aindex] - sinPorT[aindex] * sina[aindex] + cosPorT[aindex] * cosa[aindex] - cosPorT[aindex]) * pt[aindex];
     errorProp(n, 0, 5) = 0.f;
 
-    errorProp(n, 1, 0) = k * dadx * (sinPorT * cosa + cosPorT * sina) * pt;
-    errorProp(n, 1, 1) = 1.f + k * dady * (sinPorT * cosa + cosPorT * sina) * pt;
+    errorProp(n, 1, 0) = k[aindex] * dadx[aindex] * (sinPorT[aindex] * cosa[aindex] + cosPorT[aindex] * sina[aindex]) * pt[aindex];
+    errorProp(n, 1, 1) = 1.f + k[aindex] * dady[aindex] * (sinPorT[aindex] * cosa[aindex] + cosPorT[aindex] * sina[aindex]) * pt[aindex];
     errorProp(n, 1, 2) = 0.f;
     errorProp(n, 1, 3) =
-        k * (sinPorT * (ipt * dadipt * cosa - sina) + cosPorT * (ipt * dadipt * sina - (1.f - cosa))) * pt * pt;
+        k[aindex] * (sinPorT[aindex] * (ipt[aindex] * dadipt[aindex] * cosa[aindex] - sina[aindex]) + cosPorT[aindex] * (ipt[aindex] * dadipt[aindex] * sina[aindex] - (1.f - cosa[aindex]))) * pt[aindex] * pt[aindex];
     errorProp(n, 1, 4) =
-        k * (sinPorT * dadphi * cosa + cosPorT * dadphi * sina + sinPorT * cosa + cosPorT * sina - sinPorT) * pt;
+        k[aindex] * (sinPorT[aindex] * dadphi[aindex] * cosa[aindex] + cosPorT[aindex] * dadphi[aindex] * sina[aindex] + sinPorT[aindex] * cosa[aindex] + cosPorT[aindex] * sina[aindex] - sinPorT[aindex]) * pt[aindex];
     errorProp(n, 1, 5) = 0.f;
 
     //no trig approx here, theta can be large
-    cosPorT = std::cos(theta);
-    sinPorT = std::sin(theta);
+    cosPorT[aindex] = std::cos(theta[aindex]);
+    sinPorT[aindex] = std::sin(theta[aindex]);
     //redefine sinPorT as 1./sinPorT to reduce the number of temporaries
-    sinPorT = 1.f / sinPorT;
+    sinPorT[aindex] = 1.f / sinPorT[aindex];
 
-    outPar(n, 2, 0) = inPar(n, 2, 0) + k * alpha * cosPorT * pt * sinPorT;
+    outPar(n, 2, 0) = inPar(n, 2, 0) + k[aindex] * alpha[aindex] * cosPorT[aindex] * pt[aindex] * sinPorT[aindex];
 
-    errorProp(n, 2, 0) = k * cosPorT * dadx * pt * sinPorT;
-    errorProp(n, 2, 1) = k * cosPorT * dady * pt * sinPorT;
+    errorProp(n, 2, 0) = k[aindex] * cosPorT[aindex] * dadx[aindex] * pt[aindex] * sinPorT[aindex];
+    errorProp(n, 2, 1) = k[aindex] * cosPorT[aindex] * dady[aindex] * pt[aindex] * sinPorT[aindex];
     errorProp(n, 2, 2) = 1.f;
-    errorProp(n, 2, 3) = k * cosPorT * (ipt * dadipt - alpha) * pt * pt * sinPorT;
-    errorProp(n, 2, 4) = k * dadphi * cosPorT * pt * sinPorT;
-    errorProp(n, 2, 5) = -k * alpha * pt * sinPorT * sinPorT;
+    errorProp(n, 2, 3) = k[aindex] * cosPorT[aindex] * (ipt[aindex] * dadipt[aindex] - alpha[aindex]) * pt[aindex] * pt[aindex] * sinPorT[aindex];
+    errorProp(n, 2, 4) = k[aindex] * dadphi[aindex] * cosPorT[aindex] * pt[aindex] * sinPorT[aindex];
+    errorProp(n, 2, 5) = -k[aindex] * alpha[aindex] * pt[aindex] * sinPorT[aindex] * sinPorT[aindex];
 
-    outPar(n, 3, 0) = ipt;
+    outPar(n, 3, 0) = ipt[aindex];
 
     errorProp(n, 3, 0) = 0.f;
     errorProp(n, 3, 1) = 0.f;
@@ -211,16 +288,16 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar,
     errorProp(n, 3, 4) = 0.f;
     errorProp(n, 3, 5) = 0.f;
 
-    outPar(n, 4, 0) = inPar(n, 4, 0) + alpha;
+    outPar(n, 4, 0) = inPar(n, 4, 0) + alpha[aindex];
 
-    errorProp(n, 4, 0) = dadx;
-    errorProp(n, 4, 1) = dady;
+    errorProp(n, 4, 0) = dadx[aindex];
+    errorProp(n, 4, 1) = dady[aindex];
     errorProp(n, 4, 2) = 0.f;
-    errorProp(n, 4, 3) = dadipt;
-    errorProp(n, 4, 4) = 1.f + dadphi;
+    errorProp(n, 4, 3) = dadipt[aindex];
+    errorProp(n, 4, 4) = 1.f + dadphi[aindex];
     errorProp(n, 4, 5) = 0.f;
 
-    outPar(n, 5, 0) = theta;
+    outPar(n, 5, 0) = theta[aindex];
 
     errorProp(n, 5, 0) = 0.f;
     errorProp(n, 5, 1) = 0.f;
@@ -237,8 +314,10 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar,
                   << "   mom = " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " "
                   << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0)))
                   << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0)) << std::endl);
-
+ }
 #ifdef DEBUG
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
     if (n < N_proc) {
       dmutex_guard;
       std::cout << n << ": jacobian" << std::endl;
diff --git a/RecoTracker/MkFitCore/standalone/Makefile.config b/RecoTracker/MkFitCore/standalone/Makefile.config
index 5f964cb4fd6b6..0b828755f026d 100644
--- a/RecoTracker/MkFitCore/standalone/Makefile.config
+++ b/RecoTracker/MkFitCore/standalone/Makefile.config
@@ -51,13 +51,13 @@ OPT := -g -O3
 
 # 4. Vectorization settings
 ifdef AVX_512
-VEC_GCC  := -march=native # -fopt-info-vec -mavx512f -mavx512cd
+VEC_GCC  := -march=native # -fopt-info-vec -mavx512f -mavx512cd -fopt-info-vec-all
 VEC_ICC  := -xHost -qopt-zmm-usage=high # -xcore-avx512
 else ifdef AVX2
-VEC_GCC  := -mavx2 -mfma
+VEC_GCC  := -mavx2 -mfma #-fopt-info-vec-all
 VEC_ICC  := -mavx2 -mfma
 else ifdef SSE3
-VEC_GCC  := -msse3
+VEC_GCC  := -msse3 # -fopt-info-vec-all
 VEC_ICC  := -msse3
 else
 VEC_GCC  := -mavx # -fopt-info-vec-all
@@ -129,7 +129,8 @@ endif
 
 ifeq ($(CXX), g++)
   CXXFLAGS += -std=c++1z -ftree-vectorize -Werror=main -Werror=pointer-arith -Werror=overlength-strings -Wno-vla -Werror=overflow -Wstrict-overflow -Werror=array-bounds -Werror=format-contains-nul -Werror=type-limits -fvisibility-inlines-hidden -fno-math-errno --param vect-max-version-for-alias-checks=50 -Xassembler --compress-debug-sections -felide-constructors -fmessage-length=0 -Wall -Wno-non-template-friend -Wno-long-long -Wreturn-type -Wunused -Wparentheses -Wno-deprecated -Werror=return-type -Werror=missing-braces -Werror=unused-value -Werror=address -Werror=format -Werror=sign-compare -Werror=write-strings -Werror=delete-non-virtual-dtor -Wstrict-aliasing -Werror=narrowing -Werror=unused-but-set-variable -Werror=reorder -Werror=unused-variable -Werror=conversion-null -Werror=return-local-addr -Wnon-virtual-dtor -Werror=switch -fdiagnostics-show-option -Wno-unused-local-typedefs -Wno-attributes -Wno-psabi
-  CXXFLAGS += -fdiagnostics-color=auto -fdiagnostics-show-option -pthread -pipe -fopenmp-simd
+  CXXFLAGS += -fdiagnostics-color=auto -fdiagnostics-show-option -pthread -pipe -fopenmp-simd -ffast-math
+#CXXFLAGS += -mveclibabi=svml  -lsvml -L/cvmfs/projects.cern.ch/intelsw/oneAPI/linux/x86_64/2022/compiler/latest/linux/compiler/lib/intel64 -Wl,-rpath=/cvmfs/projects.cern.ch/intelsw/oneAPI/linux/x86_64/2022/compiler/latest/linux/compiler/lib/intel64 -funsafe-math-optimizations
 endif
 
 # Try to find a new enough TBB