From 927e1785b77a51d418474499573a60aa68bb3e7f Mon Sep 17 00:00:00 2001 From: christos <christos@cern.ch> Date: Wed, 2 Sep 2020 16:26:55 +0100 Subject: [PATCH 1/2] BFieldCache::getB , continue on vectorize the Brzphi calculation --- .../MagFieldElements/src/BFieldCache.cxx | 88 ++++++++++++++----- 1 file changed, 64 insertions(+), 24 deletions(-) diff --git a/MagneticField/MagFieldElements/src/BFieldCache.cxx b/MagneticField/MagFieldElements/src/BFieldCache.cxx index 332783e8c45d..b689852cf8fd 100644 --- a/MagneticField/MagFieldElements/src/BFieldCache.cxx +++ b/MagneticField/MagFieldElements/src/BFieldCache.cxx @@ -29,7 +29,6 @@ BFieldCache::getB(const double* ATH_RESTRICT xyz, const double gr = 1.0 - fr; const double fphi = (phi - m_phimin) * m_invphi; const double gphi = 1.0 - fphi; - const double scale = m_scale; /* The following implement this calculation @@ -46,33 +45,74 @@ BFieldCache::getB(const double* ATH_RESTRICT xyz, Then we work our way out. Following the formula in a "vertical" manner. + + The following code keeps the same order as operation as the + above formula. As we want identical results. */ - // interpolate field values in z, r, phi - std::array<double, 3> Bzrphi{}; CxxUtils::vec<double, 4> rInterCoeff = { gr, fr, gr, fr }; - for (int i = 0; i < 3; ++i) { // z, r, phi components - - CxxUtils::vec<double, 4> field1 = { - m_field[i][0], m_field[i][1], m_field[i][2], m_field[i][3] - }; - - CxxUtils::vec<double, 4> field2 = { - m_field[i][4], m_field[i][5], m_field[i][6], m_field[i][7] - }; - - CxxUtils::vec<double, 4> gPhiM = field1 * gphi; - CxxUtils::vec<double, 4> fPhiM = field2 * fphi; - - CxxUtils::vec<double, 4> interp = (gPhiM + fPhiM) * rInterCoeff; - - // Since a * (b+c) != (a*b + a*c) - // we try for now to keep binary compatibility, - // retain order of operations . - Bzrphi[i] = - scale * ((interp[0] + interp[1]) * gz + (interp[2] + interp[3]) * fz); - } + // Load Bz at 8 corners of the bin + CxxUtils::vec<double, 4> field1_z = { + m_field[0][0], m_field[0][1], m_field[0][2], m_field[0][3] + }; + CxxUtils::vec<double, 4> field2_z = { + m_field[0][4], m_field[0][5], m_field[0][6], m_field[0][7] + }; + // Load Br at 8 corners of the bin + CxxUtils::vec<double, 4> field1_r = { + m_field[1][0], m_field[1][1], m_field[1][2], m_field[1][3] + }; + CxxUtils::vec<double, 4> field2_r = { + m_field[1][4], m_field[1][5], m_field[1][6], m_field[1][7] + }; + // Load Bphi at 8 corners of the bin + CxxUtils::vec<double, 4> field1_phi = { + m_field[2][0], m_field[2][1], m_field[2][2], m_field[2][3] + }; + CxxUtils::vec<double, 4> field2_phi = { + m_field[2][4], m_field[2][5], m_field[2][6], m_field[2][7] + }; + + CxxUtils::vec<double, 4> gPhiM_z = field1_z * gphi; + CxxUtils::vec<double, 4> fPhiM_z = field2_z * fphi; + CxxUtils::vec<double, 4> interp_z = (gPhiM_z + fPhiM_z) * rInterCoeff; + + CxxUtils::vec<double, 4> gPhiM_r = field1_r * gphi; + CxxUtils::vec<double, 4> fPhiM_r = field2_r * fphi; + CxxUtils::vec<double, 4> interp_r = (gPhiM_r + fPhiM_r) * rInterCoeff; + + CxxUtils::vec<double, 4> gPhiM_phi = field1_phi * gphi; + CxxUtils::vec<double, 4> fPhiM_phi = field2_phi * fphi; + CxxUtils::vec<double, 4> interp_phi = (gPhiM_phi + fPhiM_phi) * rInterCoeff; + + // We end up with + // 3 (z,r,phi) size 4 SIMD vectors : + // The entries of each of the 3 SIMD vectors are : + // 0 : gr * (gphi * field[0] + fphi * field[4]) , + // 1 : fr * (gphi * field[1] + fphi * field[5]) , + // 2 : gr * (gphi * field[2] + fphi * field[6]) , + // 3 : fr * (gphi * field[3] + fphi * field[7]) , + + // Switch to 4 vector of 3 entries (z,r,phi) + CxxUtils::vec<double, 4> Bzrphivec0 = { + interp_z[0], interp_r[0], interp_phi[0], 0 + }; + CxxUtils::vec<double, 4> Bzrphivec1 = { + interp_z[1], interp_r[1], interp_phi[1], 0 + }; + CxxUtils::vec<double, 4> Bzrphivec2 = { + interp_z[2], interp_r[2], interp_phi[2], 0 + }; + CxxUtils::vec<double, 4> Bzrphivec3 = { + interp_z[3], interp_r[3], interp_phi[3], 0 + }; + + // Do the final interpolation ster for all 3 Bz,Br,Bphi + CxxUtils::vec<double, 4> Bzrphi1 = (Bzrphivec0 + Bzrphivec1) * gz; + CxxUtils::vec<double, 4> Bzrphi2 = (Bzrphivec2 + Bzrphivec3) * fz; + // now create the final (r,z,phi) values + CxxUtils::vec<double, 4> Bzrphi = (Bzrphi1 + Bzrphi2) * m_scale; // convert (Bz,Br,Bphi) to (Bx,By,Bz) double invr; -- GitLab From 4bc91cbbb35ea097e4f7bd7c94ae56766f731c73 Mon Sep 17 00:00:00 2001 From: christos <christos@cern.ch> Date: Wed, 2 Sep 2020 16:30:09 +0100 Subject: [PATCH 2/2] BFieldCache::getB , continue on vectorize the Brzphi calculation --- MagneticField/MagFieldElements/src/BFieldCache.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MagneticField/MagFieldElements/src/BFieldCache.cxx b/MagneticField/MagFieldElements/src/BFieldCache.cxx index b689852cf8fd..337335f251f8 100644 --- a/MagneticField/MagFieldElements/src/BFieldCache.cxx +++ b/MagneticField/MagFieldElements/src/BFieldCache.cxx @@ -108,10 +108,10 @@ BFieldCache::getB(const double* ATH_RESTRICT xyz, interp_z[3], interp_r[3], interp_phi[3], 0 }; - // Do the final interpolation ster for all 3 Bz,Br,Bphi + // Do the final step for all 3 Bz,Br,Bphi at once CxxUtils::vec<double, 4> Bzrphi1 = (Bzrphivec0 + Bzrphivec1) * gz; CxxUtils::vec<double, 4> Bzrphi2 = (Bzrphivec2 + Bzrphivec3) * fz; - // now create the final (r,z,phi) values + // now create the final (r,z,phi) values in one pass CxxUtils::vec<double, 4> Bzrphi = (Bzrphi1 + Bzrphi2) * m_scale; // convert (Bz,Br,Bphi) to (Bx,By,Bz) -- GitLab