diff --git a/MagneticField/MagFieldElements/MagFieldElements/BFieldCache.h b/MagneticField/MagFieldElements/MagFieldElements/BFieldCache.h
index 7ba7c5ffed866561d8220381989979750de07088..f18209c09a259b22ff5419509bc3f93a5bd2ce36 100644
--- a/MagneticField/MagFieldElements/MagFieldElements/BFieldCache.h
+++ b/MagneticField/MagFieldElements/MagFieldElements/BFieldCache.h
@@ -11,7 +11,7 @@
  *
  * Masahiro Morii, Harvard University
  *
- * RD Schaffer , Christos Anastopoulos
+ * AthenaMT : RD Schaffer , Christos Anastopoulos
  */
 
 #ifndef BFIELDCACHE_H
@@ -54,7 +54,7 @@ public:
             double* ATH_RESTRICT deriv = nullptr) const;
 
 private:
-  // bin range in z
+ // bin range in z
   double m_zmin = 0.0;
   double m_zmax = 0.0;
   // bin range in r
@@ -62,10 +62,13 @@ private:
   double m_rmax = 0.0;
   // bin range in phi
   double m_phimin = 0.0;
-  double m_phimax = -1.0;          // bin range in phi
-  double m_invz, m_invr, m_invphi; // 1/(bin size) in z, r, phi
-  double m_field[3][8];            // (Bz,Br,Bphi) at 8 corners of the bin
+  double m_phimax = -1.0;         
+  // 1/(bin size) in z, r, phi
+  double m_invz;
+  double m_invr;
+  double m_invphi; 
   double m_scale;                  // unit of m_field in kT
+  alignas(16) double m_field[3][8]; // (Bz,Br,Bphi) at 8 corners of the bin
 };
 
 #include "MagFieldElements/BFieldCache.icc"
diff --git a/MagneticField/MagFieldElements/MagFieldElements/BFieldMesh.h b/MagneticField/MagFieldElements/MagFieldElements/BFieldMesh.h
index 7424d87ab3de46217aa4186211d851693c20b545..b6dc1f34ffce2e01150893a173971af2c1929dbc 100644
--- a/MagneticField/MagFieldElements/MagFieldElements/BFieldMesh.h
+++ b/MagneticField/MagFieldElements/MagFieldElements/BFieldMesh.h
@@ -2,14 +2,16 @@
   Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
 */
 
-//
-// BFieldMesh.h
-//
-// Generic 3-d mesh representing a simple field map.
-// The field type is templated - it may be short (for the toroid) or double (for
-// the solenoid)
-//
-// Masahiro Morii, Harvard University
+/*
+ * BFieldMesh.h
+ * Generic 3-d mesh representing a simple field map.
+ * The field type is templated - it may be short (for the toroid) or double (for
+ * the solenoid)
+ *
+ * Masahiro Morii, Harvard University
+ *
+ * AthenaMT : RD Schaffer , Christos Anastopoulos
+ */
 //
 #ifndef BFIELDMESH_H
 #define BFIELDMESH_H
diff --git a/MagneticField/MagFieldElements/MagFieldElements/BFieldMesh.icc b/MagneticField/MagFieldElements/MagFieldElements/BFieldMesh.icc
index 114bffa4fdcebe9052d81b1141c4203aea1d765b..1abe3474d8d6176af368125686d235dd71f3c59f 100644
--- a/MagneticField/MagFieldElements/MagFieldElements/BFieldMesh.icc
+++ b/MagneticField/MagFieldElements/MagFieldElements/BFieldMesh.icc
@@ -72,58 +72,35 @@ BFieldMesh<T>::getCache(double z,
   // store the bin edges
   cache.setRange(mz[iz], mz[iz + 1], mr[ir], mr[ir + 1], mphi[iphi], mphi[iphi + 1]);
 
-  
-  // store the B field at the 8 corners
-  const int im0 = iz * m_zoff + ir * m_roff + iphi; // index of the first corner
-
-  
-  // ** The following field initialization is commented out, and replaced by subsequent matrix
-  // ** initialization as optimization test
-  
-
-  // std::array<BFieldVector<T>, 8> field = {
-  //   m_field[im0],
-  //   m_field[im0 + 1],
-  //   m_field[im0 + m_roff],
-  //   m_field[im0 + m_roff + 1],
-  //   m_field[im0 + m_zoff],
-  //   m_field[im0 + m_zoff + 1],
-  //   m_field[im0 + m_zoff + m_roff],
-  //   m_field[im0 + m_zoff + m_roff + 1]
-  // };
-
-  // cache.setField(field, scaleFactor);
 
+  // store the B field at the 8 corners in cache
+  const int im0 = iz * m_zoff + ir * m_roff + iphi; // index of the first corner
   const double sf = scaleFactor;
-
-  double field1[3][8] = {
-      { sf * m_field[im0][0],                        
-        sf * m_field[im0 + 1][0],                    
-        sf * m_field[im0 + m_roff][0],               
-        sf * m_field[im0 + m_roff + 1][0],           
-        sf * m_field[im0 + m_zoff][0],               
-        sf * m_field[im0 + m_zoff + 1][0],           
-        sf * m_field[im0 + m_zoff + m_roff][0],      
-        sf * m_field[im0 + m_zoff + m_roff + 1][0] },
-      { sf * m_field[im0][1],                      
-        sf * m_field[im0 + 1][1],                  
-        sf * m_field[im0 + m_roff][1],             
-        sf * m_field[im0 + m_roff + 1][1],         
-        sf * m_field[im0 + m_zoff][1],             
-        sf * m_field[im0 + m_zoff + 1][1],         
-        sf * m_field[im0 + m_zoff + m_roff][1],    
-        sf * m_field[im0 + m_zoff + m_roff + 1][1] },
-      { sf * m_field[im0][2],                     
-        sf * m_field[im0 + 1][2],                 
-        sf * m_field[im0 + m_roff][2],            
-        sf * m_field[im0 + m_roff + 1][2],        
-        sf * m_field[im0 + m_zoff][2],            
-        sf * m_field[im0 + m_zoff + 1][2],        
-        sf * m_field[im0 + m_zoff + m_roff][2],   
-        sf * m_field[im0 + m_zoff + m_roff + 1][2] }
-  };
+  double field1[3][8] = { { sf * m_field[im0][0],
+                            sf * m_field[im0 + m_roff][0],
+                            sf * m_field[im0 + m_zoff][0],
+                            sf * m_field[im0 + m_zoff + m_roff][0],
+                            sf * m_field[im0 + 1][0],
+                            sf * m_field[im0 + m_roff + 1][0],
+                            sf * m_field[im0 + m_zoff + 1][0],
+                            sf * m_field[im0 + m_zoff + m_roff + 1][0] },
+                          { sf * m_field[im0][1],
+                            sf * m_field[im0 + m_roff][1],
+                            sf * m_field[im0 + m_zoff][1],
+                            sf * m_field[im0 + m_zoff + m_roff][1],
+                            sf * m_field[im0 + 1][1],
+                            sf * m_field[im0 + m_roff + 1][1],
+                            sf * m_field[im0 + m_zoff + 1][1],
+                            sf * m_field[im0 + m_zoff + m_roff + 1][1] },
+                          { sf * m_field[im0][2],
+                            sf * m_field[im0 + m_roff][2],
+                            sf * m_field[im0 + m_zoff][2],
+                            sf * m_field[im0 + m_zoff + m_roff][2],
+                            sf * m_field[im0 + 1][2],
+                            sf * m_field[im0 + m_roff + 1][2],
+                            sf * m_field[im0 + m_zoff + 1][2],
+                            sf * m_field[im0 + m_zoff + m_roff + 1][2] } };
   cache.setField(field1);
-  
   // store the B scale
   cache.setBscale(m_scale);
 }
diff --git a/MagneticField/MagFieldElements/src/BFieldCache.cxx b/MagneticField/MagFieldElements/src/BFieldCache.cxx
index 4a24558c24e7e807f24d469a899a3e1ee3bc664c..332783e8c45d39c8a6f33c2295e5fedc4ab7a29c 100644
--- a/MagneticField/MagFieldElements/src/BFieldCache.cxx
+++ b/MagneticField/MagFieldElements/src/BFieldCache.cxx
@@ -3,6 +3,7 @@
 */
 
 #include "MagFieldElements/BFieldCache.h"
+#include "CxxUtils/vec.h"
 #include <cmath>
 
 void
@@ -29,15 +30,50 @@ BFieldCache::getB(const double* ATH_RESTRICT xyz,
   const double fphi = (phi - m_phimin) * m_invphi;
   const double gphi = 1.0 - fphi;
   const double scale = m_scale;
+
+  /*
+   The following implement this  calculation
+   const double* field = m_field[i];
+   Bzrphi[i] = scale * (gz * ( gr * (gphi * field[0] + fphi * field[4]) +
+                               fr * (gphi * field[1] + fphi * field[5])) +
+                         fz * (gr * (gphi * field[2] + fphi * field[6]) +
+                               fr * (gphi * field[3] + fphi * field[7])));
+  in SIMD fashion.
+  The "lanes"  are
+  ( field[0], field[1], field[2], field[3])
+  ( field[4], field[5], field[6], field[7])
+  aka the "vertical" part of the inermost parenthesis.
+
+  Then we work our way out. Following the formula in a "vertical"
+  manner.
+  */
+
   // interpolate field values in z, r, phi
-  double Bzrphi[3];
+  std::array<double, 3> Bzrphi{};
+  CxxUtils::vec<double, 4> rInterCoeff = { gr, fr, gr, fr };
+
   for (int i = 0; i < 3; ++i) { // z, r, phi components
-    const double* field = m_field[i];
-    Bzrphi[i] = scale * (gz * (gr * (gphi * field[0] + fphi * field[1]) +
-                               fr * (gphi * field[2] + fphi * field[3])) +
-                         fz * (gr * (gphi * field[4] + fphi * field[5]) +
-                               fr * (gphi * field[6] + fphi * field[7])));
+
+    CxxUtils::vec<double, 4> field1 = {
+      m_field[i][0], m_field[i][1], m_field[i][2], m_field[i][3]
+    };
+
+    CxxUtils::vec<double, 4> field2 = {
+      m_field[i][4], m_field[i][5], m_field[i][6], m_field[i][7]
+    };
+
+    CxxUtils::vec<double, 4> gPhiM = field1 * gphi;
+    CxxUtils::vec<double, 4> fPhiM = field2 * fphi;
+
+    CxxUtils::vec<double, 4> interp = (gPhiM + fPhiM) * rInterCoeff;
+
+    // Since a * (b+c) != (a*b + a*c)
+    // we try for now to keep binary compatibility,
+    // retain order of operations .
+    Bzrphi[i] =
+      scale * ((interp[0] + interp[1]) * gz + (interp[2] + interp[3]) * fz);
   }
+
   // convert (Bz,Br,Bphi) to (Bx,By,Bz)
   double invr;
   double c;
@@ -61,24 +97,25 @@ BFieldCache::getB(const double* ATH_RESTRICT xyz,
     const double sr = m_scale * m_invr;
     const double sphi = m_scale * m_invphi;
 
-    std::array<double, 3> dBdz;
-    std::array<double, 3> dBdr;
-    std::array<double, 3> dBdphi;
+    std::array<double, 3> dBdz{};
+    std::array<double, 3> dBdr{};
+    std::array<double, 3> dBdphi{};
 
     for (int j = 0; j < 3; ++j) { // Bz, Br, Bphi components
       const double* field = m_field[j];
       dBdz[j] =
         sz *
-        (gr * (gphi * (field[4] - field[0]) + fphi * (field[5] - field[1])) +
-         fr * (gphi * (field[6] - field[2]) + fphi * (field[7] - field[3])));
+        (gr * (gphi * (field[2] - field[0]) + fphi * (field[6] - field[4])) +
+         fr * (gphi * (field[3] - field[1]) + fphi * (field[7] - field[5])));
       dBdr[j] =
         sr *
-        (gz * (gphi * (field[2] - field[0]) + fphi * (field[3] - field[1])) +
-         fz * (gphi * (field[6] - field[4]) + fphi * (field[7] - field[5])));
+        (gz * (gphi * (field[1] - field[0]) + fphi * (field[5] - field[4])) +
+         fz * (gphi * (field[3] - field[2]) + fphi * (field[7] - field[6])));
       dBdphi[j] =
-        sphi * (gz * (gr * (field[1] - field[0]) + fr * (field[3] - field[2])) +
-                fz * (gr * (field[5] - field[4]) + fr * (field[7] - field[6])));
+        sphi * (gz * (gr * (field[4] - field[0]) + fr * (field[5] - field[1])) +
+                fz * (gr * (field[6] - field[2]) + fr * (field[7] - field[3])));
     }
+
     // convert to cartesian coordinates
     const double cc = c * c;
     const double cs = c * s;
@@ -104,4 +141,3 @@ BFieldCache::getB(const double* ATH_RESTRICT xyz,
   }
 }
 
-
diff --git a/MagneticField/MagFieldElements/test/BFieldExample_test.cxx b/MagneticField/MagFieldElements/test/BFieldExample_test.cxx
index 7eec7f7db067dca7266bbee14585ff2ea545ad6a..484d5747e4900693c465d5c2a4b5a4a4ebed364b 100644
--- a/MagneticField/MagFieldElements/test/BFieldExample_test.cxx
+++ b/MagneticField/MagFieldElements/test/BFieldExample_test.cxx
@@ -191,7 +191,7 @@ public:
 
       // do interpolation (cache3d has correct scale factor)
       zone.getCache(z, r, phi, cache3d, 1);
-      cache3d.getB(xyz, r1, phi, bxyz, 0);
+      cache3d.getB(xyz, r1, phi, bxyz, nullptr);
 
       std::cout << "get field std: i, bxyz " << i << " " << bxyz[0] << ", "
                 << bxyz[1] << ", " << bxyz[2] << " fractional diff gt 10^-5: "