diff --git a/Tracking/TrkExtrapolation/TrkExUtils/src/RungeKuttaUtils.cxx b/Tracking/TrkExtrapolation/TrkExUtils/src/RungeKuttaUtils.cxx
index 9d99cae54bef9af90d10c9b3d8cfadef7ce139da..10be54b5acbd4b00e307ec0d51bc4a6ac4a0df8e 100755
--- a/Tracking/TrkExtrapolation/TrkExUtils/src/RungeKuttaUtils.cxx
+++ b/Tracking/TrkExtrapolation/TrkExUtils/src/RungeKuttaUtils.cxx
@@ -49,9 +49,7 @@ globalToLocalVecHelper(double* ATH_RESTRICT P,
 {
   using namespace CxxUtils;
   using vec2 = CxxUtils::vec<double, 2>;
-  using vec4 = CxxUtils::vec<double, 4>;
-
-  /* Calculation
+  /* The calculation (original form)
       P[ 7]-=(s0*P[ 3]); P[ 8]-=(s0*P[ 4]); P[ 9]-=(s0*P[ 5]);
       P[10]-=(s0*P[42]); P[11]-=(s0*P[43]); P[12]-=(s0*P[44]);
       P[14]-=(s1*P[ 3]); P[15]-=(s1*P[ 4]); P[16]-=(s1*P[ 5]);
@@ -73,51 +71,74 @@ globalToLocalVecHelper(double* ATH_RESTRICT P,
    * --> {P[30],P[31]}
    */
   vec2 Pmult1 = { P[3], P[4] };
-  vec4 Pmult2 = { P[5], P[42], P[43], P[44] };
+  vec2 Pmult2 = { P[5], P[42] };
+  vec2 Pmult3 = { P[43], P[44] };
+
   vec2 dXdL0_dYdL0;
   vload(dXdL0_dYdL0, &P[7]);
-  vec4 dZdL0_dAxdL0_dAydL0_dAzdL0;
-  vload(dZdL0_dAxdL0_dAydL0_dAzdL0, &P[9]);
+  vec2 dZdL0_dAxdL0;
+  vload(dZdL0_dAxdL0, &P[9]);
+  vec2 dAydL0_dAzdL0;
+  vload(dAydL0_dAzdL0, &P[11]);
   dXdL0_dYdL0 -= s0 * Pmult1;
-  dZdL0_dAxdL0_dAydL0_dAzdL0 -= s0 * Pmult2;
+  dZdL0_dAxdL0 -= s0 * Pmult2;
+  dAydL0_dAzdL0 -= s0 * Pmult3;
   vstore(&P[7], dXdL0_dYdL0);
-  vstore(&P[9], dZdL0_dAxdL0_dAydL0_dAzdL0);
+  vstore(&P[9], dZdL0_dAxdL0);
+  vstore(&P[11], dAydL0_dAzdL0);
 
   vec2 dXdL1_dYdL1;
   vload(dXdL1_dYdL1, &P[14]);
-  vec4 dZdL1_dAxdL1_dAydL1_dAzdL1;
-  vload(dZdL1_dAxdL1_dAydL1_dAzdL1, &P[16]);
+  vec2 dZdL1_dAxdL1;
+  vload(dZdL1_dAxdL1, &P[16]);
+  vec2 dAydL1_dAzdL1;
+  vload(dAydL1_dAzdL1, &P[18]);
   dXdL1_dYdL1 -= s1 * Pmult1;
-  dZdL1_dAxdL1_dAydL1_dAzdL1 -= s1 * Pmult2;
+  dZdL1_dAxdL1 -= s1 * Pmult2;
+  dAydL1_dAzdL1 -= s1 * Pmult3;
   vstore(&P[14], dXdL1_dYdL1);
-  vstore(&P[16], dZdL1_dAxdL1_dAydL1_dAzdL1);
+  vstore(&P[16], dZdL1_dAxdL1);
+  vstore(&P[18], dAydL1_dAzdL1);
 
   vec2 dXdPhi_dYdPhi;
   vload(dXdPhi_dYdPhi, &P[21]);
-  vec4 dZdPhi_dAxdPhi_dAydPhi_dAzdPhi;
-  vload(dZdPhi_dAxdPhi_dAydPhi_dAzdPhi, &P[23]);
+  vec2 dZdPhi_dAxdPhi;
+  vload(dZdPhi_dAxdPhi, &P[23]);
+  vec2 dAydPhi_dAzdPhi;
+  vload(dAydPhi_dAzdPhi, &P[25]);
   dXdPhi_dYdPhi -= s2 * Pmult1;
-  dZdPhi_dAxdPhi_dAydPhi_dAzdPhi -= s2 * Pmult2;
+  dZdPhi_dAxdPhi -= s2 * Pmult2;
+  dAydPhi_dAzdPhi -= s2 * Pmult3;
   vstore(&P[21], dXdPhi_dYdPhi);
-  vstore(&P[23], dZdPhi_dAxdPhi_dAydPhi_dAzdPhi);
+  vstore(&P[23], dZdPhi_dAxdPhi);
+  vstore(&P[25], dAydPhi_dAzdPhi);
 
   vec2 dXdTheta_dYdTheta;
   vload(dXdTheta_dYdTheta, &P[28]);
-  vec4 dZdTheta_dAxdTheta_dAydTheta_dAzdTheta;
-  vload(dZdTheta_dAxdTheta_dAydTheta_dAzdTheta, &P[30]);
+  vec2 dZdTheta_dAxdTheta;
+  vload(dZdTheta_dAxdTheta, &P[30]);
+  vec2 dAydTheta_dAzdTheta;
+  vload(dAydTheta_dAzdTheta, &P[32]);
   dXdTheta_dYdTheta -= s3 * Pmult1;
-  dZdTheta_dAxdTheta_dAydTheta_dAzdTheta -= s3 * Pmult2;
+  dZdTheta_dAxdTheta -= s3 * Pmult2;
+  dAydTheta_dAzdTheta -= s3 * Pmult3;
   vstore(&P[28], dXdTheta_dYdTheta);
-  vstore(&P[30], dZdTheta_dAxdTheta_dAydTheta_dAzdTheta);
+  vstore(&P[30], dZdTheta_dAxdTheta);
+  vstore(&P[32], dAydTheta_dAzdTheta);
 
   vec2 dXdCM_dYdCM;
   vload(dXdCM_dYdCM, &P[35]);
-  vec4 dZdCM_dAxdCM_AydCM_dAzdCM;
-  vload(dZdCM_dAxdCM_AydCM_dAzdCM, &P[37]);
+  vec2 dZdCM_dAxdCM;
+  vload(dZdCM_dAxdCM, &P[37]);
+  vec2 AydCM_dAzdCM;
+  vload(AydCM_dAzdCM, &P[39]);
   dXdCM_dYdCM -= s4 * Pmult1;
-  dZdCM_dAxdCM_AydCM_dAzdCM -= s4 * Pmult2;
+  dZdCM_dAxdCM -= s4 * Pmult2;
+  AydCM_dAzdCM -= s4 * Pmult3;
   vstore(&P[35], dXdCM_dYdCM);
-  vstore(&P[37], dZdCM_dAxdCM_AydCM_dAzdCM);
+  vstore(&P[37], dZdCM_dAxdCM);
+  vstore(&P[39], AydCM_dAzdCM);
+
 }
 
 inline void