diff --git a/Tr/TrackFitEvent/Event/FitNode.h b/Tr/TrackFitEvent/Event/FitNode.h
index b5ba89f04cda6e04e103f4eada1f67f7abbd4971..9d3d95511976140cf8988b61a1fad32739a1625f 100755
--- a/Tr/TrackFitEvent/Event/FitNode.h
+++ b/Tr/TrackFitEvent/Event/FitNode.h
@@ -340,9 +340,13 @@ namespace LHCb
         s << " measurement of type " << node.measurement().type() 
           << " with LHCbID " << node.measurement().lhcbID().channelID();
       }
-      s << " at z " << node.z()
-        << " with chi2 " << node.chi2() << "\n"
-        << " transport matrix ";
+      s << " at z " << node.z();
+      if (node.hasMeasurement()) {
+        s << " with chi2 " << node.chi2() << "\n";
+      } else {
+        s << " with chi2 -\n";
+      }
+      s << " transport matrix ";
       node.print(s, node.transportMatrix());
       s << "\n inverse transport matrix ";
       node.print(s, node.invertTransportMatrix());
@@ -353,7 +357,11 @@ namespace LHCb
       s << " refVector ";
       node.print(s, node.refVector().parameters());
       s << " refResidual " << node.refResidual()
-        << " errMeasure " << node.errMeasure();
+        << " errMeasure " << node.errMeasure() << "\n"
+        << " noiseMatrix ";
+      node.print(s, node.noiseMatrix());
+      s << " transportVector ";
+      node.print(s, node.transportVector());
 
       return s;
     }
diff --git a/Tr/TrackFitter/src/TrackMasterFitter.cpp b/Tr/TrackFitter/src/TrackMasterFitter.cpp
index 14c9c87e99679ea1cafe3e1557d6f2b317f0a044..772a853233f3656051e421f2df159be9da7d21cc 100644
--- a/Tr/TrackFitter/src/TrackMasterFitter.cpp
+++ b/Tr/TrackFitter/src/TrackMasterFitter.cpp
@@ -523,6 +523,11 @@ LHCb::Node* TrackMasterFitter::outlierRemoved(Track& track) const
   // one.
   const double totalchi2  = std::max(nodes.front()->totalChi2(LHCb::FitNode::Backward).chi2(),
                                       nodes.back()->totalChi2(LHCb::FitNode::Forward).chi2());
+
+  if (m_debugLevel){
+    debug() << "removeWorstOutlier: total chi2 " << totalchi2 << endmsg;
+  }
+
   using NodeWithChi2 = std::pair<const FitNode*, double>;
   std::vector< NodeWithChi2 > nodesWithChi2UL;
   nodesWithChi2UL.reserve(nodes.size());
@@ -538,6 +543,17 @@ LHCb::Node* TrackMasterFitter::outlierRemoved(Track& track) const
           const auto* tmpnode =  node->prevNode(dir);
           if (tmpnode) chi2MatchAndHit -= tmpnode->totalChi2(dir).chi2();
         }
+
+        if (m_debugLevel){
+          const auto* prevnode = node->prevNode(0);
+          const auto* nextnode = node->prevNode(1);
+
+          debug() << "node LHCbID " << node->measurement().lhcbID().channelID()
+            << " chi2Contribution " << chi2MatchAndHit << " (" << totalchi2 << " - "
+            << (prevnode ? prevnode->totalChi2(0).chi2() : 0) << " - "
+            << (nextnode ? nextnode->totalChi2(1).chi2() : 0) << ")" << endmsg;
+        }
+
         if (chi2MatchAndHit > m_chi2Outliers) {
           nodesWithChi2UL.emplace_back( node, chi2MatchAndHit);
         }
@@ -549,9 +565,23 @@ LHCb::Node* TrackMasterFitter::outlierRemoved(Track& track) const
   auto worstChi2 = m_chi2Outliers;
   std::sort(nodesWithChi2UL.begin(), nodesWithChi2UL.end(), DecreasingChi2);
 
+  if (m_debugLevel) {
+    debug() << "All bad measurements, ordered: " << endmsg;
+    for (const auto& nodeUL : nodesWithChi2UL ) {
+      auto& node = nodeUL.first;
+      debug() << "Measurement of type " << node->measurement().type()
+              << " LHCbID " << node->measurement().lhcbID().channelID()
+              << " at z " << node->z()
+              << " with chi2Contribution " << nodeUL.second
+              << " and chi2 " << node->chi2()
+              << endmsg;
+    }
+  }
+
   // -- if the first is smaller than worstChi2, and it is sorted in decreasing order
   // -- the 'if' will never be true and we can return here (M. De Cian)
   if (!nodesWithChi2UL.empty() && nodesWithChi2UL.front().second < worstChi2) return outlier;
+  
   for (const auto& node : nodesWithChi2UL) {
     if (node.second > worstChi2) {
       const auto chi2 = node.first->chi2();
@@ -562,14 +592,18 @@ LHCb::Node* TrackMasterFitter::outlierRemoved(Track& track) const
       }
     }
   }
-  if (outlier && m_debugLevel) {
-    debug() << "Measurement of type="
-            << outlier -> measurement().type()
-            << " LHCbID="
-            << outlier -> measurement().lhcbID().channelID()
-            << " at z=" << outlier->z()
-            << " with chi2=" << outlier->chi2()
-            << " removed." << endmsg;
+  if (m_debugLevel) {
+    if (outlier) {
+      debug() << "Measurement of type "
+              << outlier -> measurement().type()
+              << " LHCbID "
+              << outlier -> measurement().lhcbID().channelID()
+              << " at z " << outlier->z()
+              << " with chi2 " << outlier->chi2()
+              << " removed." << endmsg;
+    } else {
+      debug() << "Outlier not found" << endmsg;
+    }
   }
 
   return outlier;
@@ -849,6 +883,10 @@ StatusCode TrackMasterFitter::updateTransport(LHCb::Track& track) const
     auto inode = nodes.begin();
     const LHCb::StateVector* refvector = &((*inode)->refVector());
     TrackMatrix F =  ROOT::Math::SMatrixIdentity();
+
+    // Bug: We still need to reset the first node
+    (*inode)->resetFilterStatus();
+
     for (++inode; inode!=nodes.end() && sc.isSuccess(); ++inode) {
 
       FitNode* node = *inode;
diff --git a/Tr/TrackFitter/src/TrackVectorFitter.cpp b/Tr/TrackFitter/src/TrackVectorFitter.cpp
index 5fbc381ed8fdc78ef23d698115574537752d10f2..e9684648c3409bac4f868f2d36fe6696fdda40f7 100644
--- a/Tr/TrackFitter/src/TrackVectorFitter.cpp
+++ b/Tr/TrackFitter/src/TrackVectorFitter.cpp
@@ -7,7 +7,7 @@ DECLARE_TOOL_FACTORY(TrackVectorFitter)
 namespace Tr {
 namespace TrackVectorFitter {
 
-thread_local Tr::TrackVectorFit::TrackVectorFit fit;
+thread_local Tr::TrackVectorFit::TrackVectorFit fitter;
 
 }
 }
@@ -41,7 +41,7 @@ TrackVectorFitter::TrackVectorFitter (
 /**
  * @brief Initializes all tools and debug level.
  */
-StatusCode TrackVectorFitter::initialize () {
+StatusCode TrackVectorFitter::initialize () {  
   // Note: Do we really need to specify the pid, or is it always 211 for the fit?
 
   // Count the number of hits of each type
@@ -138,7 +138,7 @@ StatusCode TrackVectorFitter::operator() (
   // __itt_resume();
   // }
   
-  Tr::TrackVectorFitter::fit.reset(updatedStatesStore);
+  Tr::TrackVectorFitter::fitter.reset(updatedStatesStore, msgLevel(MSG::DEBUG));
 
   // Generate the Tr::TrackVectorFit::Tracks
   std::list<Tr::TrackVectorFit::Track> tList;
@@ -177,7 +177,7 @@ StatusCode TrackVectorFitter::operator() (
   unsigned tSize = tConvergence.size();
 
   // Run the vector scheduler
-  Tr::TrackVectorFitter::fit.initializeBasePointers(tConvergence);
+  Tr::TrackVectorFitter::fitter.initializeBasePointers<false>(tConvergence);
 
   std::for_each(tConvergence.begin(), tConvergence.end(), [&] (Tr::TrackVectorFit::Track& t) {
     // Populate the reference vectors from the defined states
@@ -186,12 +186,9 @@ StatusCode TrackVectorFitter::operator() (
     // Project measurements, update residuals and materials
     // for the first time
     projectAndUpdate(t, pid);
-
-    // Populate with the seed covariance
-    generateAndSetSeedCovariance(t);
   });
 
-  Tr::TrackVectorFitter::fit(tConvergence);
+  Tr::TrackVectorFitter::fitter.smoothFit<false>(tConvergence);
   eraseFailed(tConvergence);
 
   if (msgLevel(MSG::DEBUG)) {
@@ -209,16 +206,22 @@ StatusCode TrackVectorFitter::operator() (
     const bool sameSize = (tSize == tConvergence.size());
     tSize = tConvergence.size();
 
-    if ( !sameSize ) {
+    if (!sameSize) {
       // Run the vector scheduler
-      Tr::TrackVectorFitter::fit.initializeBasePointers(tConvergence);
+      Tr::TrackVectorFitter::fitter.initializeBasePointers<true>(
+        tConvergence,
+        !(m_updateMaterial && m_applyMaterialCorrections)
+      );
+    } else {
+      // Even though we don't initialize the scheduler and pointers,
+      // we still need to copy the smoothed state into the ref vectors
+      std::for_each(tConvergence.begin(), tConvergence.end(), [&] (Tr::TrackVectorFit::Track& t) {
+        t.updateRefVectors();
+      });
     }
 
     std::for_each(tConvergence.begin(), tConvergence.end(), [&] (Tr::TrackVectorFit::Track& t) {
-      // Update reference trajectories with smoothed states
-      t.updateRefVectors();
-
-      projectAndUpdate(t, pid, m_updateMaterial, m_updateTransport && i <= m_maxUpdateTransports+1);
+      projectAndUpdate(t, pid, m_updateMaterial);
       // Note: projectAndUpdate may flaw the track (set its fit status to FitFailed).
       //       We proceed with those nevertheless, otherwise we screw up the scheduler.
       //       We will check them after the fit anyway.
@@ -227,7 +230,7 @@ StatusCode TrackVectorFitter::operator() (
       t.saveChi2();
     });
 
-    Tr::TrackVectorFitter::fit(tConvergence);
+    Tr::TrackVectorFitter::fitter.smoothFit<false>(tConvergence);
 
     if (msgLevel(MSG::DEBUG)) {
       debug() << "Convergence iteration #" << i << endmsg;
@@ -310,15 +313,16 @@ StatusCode TrackVectorFitter::operator() (
       // In the outlier removal iterations,
       // we always need to initialize the base pointers,
       // as the track size varies
-      Tr::TrackVectorFitter::fit.initializeBasePointers(tOutliers);
+      Tr::TrackVectorFitter::fitter.initializeBasePointers<true>(
+        tOutliers,
+        !(m_updateMaterial && m_applyMaterialCorrections)
+      );
 
       std::for_each(tOutliers.begin(), tOutliers.end(), [&] (Tr::TrackVectorFit::Track& t) {
-        t.updateRefVectors();
-        projectAndUpdate(t, pid, m_updateMaterial, m_updateTransport);
+        projectAndUpdate(t, pid, m_updateMaterial);
       });
 
-      Tr::TrackVectorFitter::fit(tOutliers);
-      eraseFailed(tOutliers);
+      Tr::TrackVectorFitter::fitter.smoothFit<true>(tOutliers);
 
       if (msgLevel(MSG::DEBUG)) {
         debug() << "Outlier removal iteration #" << i << endmsg;
@@ -326,6 +330,8 @@ StatusCode TrackVectorFitter::operator() (
           printNodes(t);
         });
       }
+
+      eraseFailed(tOutliers);
     }
 
     // Add leftovers
@@ -356,7 +362,7 @@ void TrackVectorFitter::populateTracks (
 ) const {
   std::for_each(tracks.begin(), tracks.end(), [&] (Tr::TrackVectorFit::Track& t) {
     // Populate nodes with states, covariances, etc.
-    Tr::TrackVectorFitter::fit.populateNodes(t);
+    Tr::TrackVectorFitter::fitter.populateNodes(t);
 
     // Determine states
     determineStates(t);
@@ -382,43 +388,6 @@ void TrackVectorFitter::generateFitResult (LHCb::Track& track) const {
   }
 }
 
-/**
- * @brief Generates the seed covariance, and populates it.
- */
-void TrackVectorFitter::generateAndSetSeedCovariance (Tr::TrackVectorFit::Track& t) const {
-  const auto& track = t.track();
-  const auto& nodes = t.nodes();
-  Gaudi::TrackSymMatrix seedCovariance;
-
-  // Set off-diagonal elements to zero
-  if (m_useSeedStateErrors) {
-    LHCb::State state0 = track.firstState();
-    auto z1 = nodes.front().node().z();
-    extrapolator(track.type())->propagate(state0, z1);
-    seedCovariance = state0.covariance();
-    if (msgLevel(MSG::DEBUG)) {
-      debug() << " state0 at z " << z1
-              << " vector " << state0.stateVector() << "\n"
-              << " covariance " << state0.covariance() << endmsg;
-    }
-  } else {
-    seedCovariance(0,0) = m_errorX * m_errorX;
-    seedCovariance(1,1) = m_errorY * m_errorY;
-    seedCovariance(2,2) = m_errorTx * m_errorTx;
-    seedCovariance(3,3) = m_errorTy * m_errorTy;
-
-    const auto temp = m_errorQoP[0] * track.firstState().qOverP();
-    seedCovariance(4,4) = temp * temp + m_errorQoP[1] * m_errorQoP[1];
-  }
-
-  if (msgLevel(MSG::DEBUG)) {
-    debug() << "SeedState: z = " << nodes.front().node().z()
-            << " covariance  = " << seedCovariance << endmsg;
-  }
-
-  t.setSeedCovariance(std::move(seedCovariance));
-}
-
 /**
  * @brief Determines the z position of the closest approach
  *        to the beam line by linear extrapolation.
@@ -488,28 +457,6 @@ StatusCode TrackVectorFitter::makeNodes (LHCb::Track& track, const LHCb::Tr::PID
     }
   );
 
-  // Create reference nodes depending on track type
-  if (m_addDefaultRefNodes) {
-    if (track.hasVelo() && !track.checkFlag(LHCb::Track::Flags::Backward)) {
-      nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZEndVelo, LHCb::State::EndVelo));
-    }
-    if (track.hasTT()) {
-      nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZBegRich1, LHCb::State::BegRich1));
-      nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZEndRich1, LHCb::State::EndRich1));
-    }
-    if (track.hasT()) {
-      nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZBegT, LHCb::State::AtT));
-      nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZBegRich2, LHCb::State::BegRich2));
-      nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZEndRich2, LHCb::State::EndRich2));
-    }
-  }
-
-  // At a node for the position at the beamline
-  if (m_stateAtBeamLine && (track.hasTT() || track.hasVelo())) {
-    const LHCb::State& refstate = *(track.checkFlag(LHCb::Track::Flags::Backward) ? track.states().back() : track.states().front());
-    nodes.push_back(new Tr::TrackVectorFit::FitNode(closestToBeamLine(refstate), LHCb::State::ClosestToBeam));
-  }
-
   // Sort the nodes in z
   const bool backward = track.checkFlag(LHCb::Track::Flags::Backward);
   const bool upstream = (m_upstream && !backward) || (!m_upstream && backward);
@@ -543,8 +490,7 @@ void TrackVectorFitter::populateRefVectors (Tr::TrackVectorFit::Track& t) const
 void TrackVectorFitter::projectAndUpdate (
   Tr::TrackVectorFit::Track& t,
   const LHCb::Tr::PID& pid,
-  const bool& doUpdateMaterialCorrections,
-  const bool& doUpdateTransport
+  const bool& doUpdateMaterialCorrections
 ) const {
   // update the projections. need to be done every time ref is updated
   projectReference(t);
@@ -556,10 +502,7 @@ void TrackVectorFitter::projectAndUpdate (
   }
 
   // create the transport of the reference (after the noise, because it uses energy loss)
-  if (doUpdateTransport) {
-    updateTransport(t);
-    // return failureInfo("Problem updating transport");
-  }
+  updateTransport(t);
 }
 
 /**
@@ -689,15 +632,6 @@ void TrackVectorFitter::projectReference (Tr::TrackVectorFit::Track& t) const
 {
   for (auto& n : t.nodes()) {
     auto& node = n.node();
-
-    // node must have a measurement associated,
-    // otherwise one doesn't project
-    if (!node.hasMeasurement()) {
-      // Reset projection matrix, residual and errMeasure to 0
-      n.resetProjection();
-      continue;
-    }
-
     // get the projector object, for the kind of measurement we have
     // ITrackProjectorSelector m_projectorSelector
     ITrackProjector* projector = m_projectorSelector->projector(node.measurement());
@@ -882,16 +816,8 @@ void TrackVectorFitter::updateTransport (Tr::TrackVectorFit::Track& t) const
       // calculate the 'transport vector' (need to replace that)
       const Gaudi::TrackVector tv = stateVector.parameters() - tm * refVector;
       n.setTransportVector(tv);
-
-      if (i > t.m_forwardUpstream) {
-        n.setTransportMatrix(tm);
-      }
-
-      if (i < (nodes.size() - t.m_backwardUpstream)) {
-        // Note: Store the inverse transport matrix of this node
-        //       on the datatype of the node that will require it
-        nodes[i-1].calculateAndSetInverseTransportMatrix(tm);
-      }
+      n.setTransportMatrix(tm);
+      nodes[i-1].calculateAndSetInverseTransportMatrix(tm);
 
       // test dtx/dqop to see if the momentum affects this track.
       if (std::abs(tm(2,4)) != 0) {
@@ -917,7 +843,7 @@ void TrackVectorFitter::updateTransport (Tr::TrackVectorFit::Track& t) const
  */
 bool TrackVectorFitter::removeWorstOutlier (Tr::TrackVectorFit::Track& t) const {
   // return false if outlier chi2 cut < 0
-  if (m_chi2Outliers < 0.0) {
+  if (m_chi2Outliers.value() < 0.0) {
     return false;
   }
 
@@ -929,7 +855,9 @@ bool TrackVectorFitter::removeWorstOutlier (Tr::TrackVectorFit::Track& t) const
     }
   }
 
-  std::vector<std::reference_wrapper<Tr::TrackVectorFit::Node>> badNodes;
+  using NodeWithChi2 = std::pair<std::reference_wrapper<Tr::TrackVectorFit::Node>, double>;
+  std::vector<NodeWithChi2> nodesWithChi2UL;
+  nodesWithChi2UL.reserve(t.nodes().size());
 
   const TRACKVECTORFIT_PRECISION trackChi2 = t.chi2();
 
@@ -949,59 +877,74 @@ bool TrackVectorFitter::removeWorstOutlier (Tr::TrackVectorFit::Track& t) const
         if ((it+1) != t.nodes().end()) { chi2Contribution -= (it+1)->get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::Chi2>(); }
 
         if (msgLevel(MSG::DEBUG)){
-          debug() << chi2Contribution << " (" << trackChi2 << " - "
+          debug() << "node LHCbID " << n.node().measurement().lhcbID().channelID()
+            << " chi2Contribution " << chi2Contribution << " (" << trackChi2 << " - "
             << ((it != t.nodes().begin()) ? (it-1)->get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::Chi2>() : 0) << " - "
             << (((it+1) != t.nodes().end()) ? (it+1)->get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::Chi2>() : 0) << ")" << endmsg;
         }
 
-        if (chi2Contribution > m_chi2Outliers) {
-          badNodes.push_back(n);
+        if (chi2Contribution > m_chi2Outliers.value()) {
+          nodesWithChi2UL.emplace_back(n, chi2Contribution);
         }
       }
     }
   }
 
-  // If we didn't find any bad nodes, return false
-  if (badNodes.empty()) {
+  // Sort by decreasing chi2
+  std::sort(nodesWithChi2UL.begin(), nodesWithChi2UL.end(),
+    [] (const NodeWithChi2& n0, const NodeWithChi2& n1) {
+    return n0.second > n1.second;
+  });
+
+  if (msgLevel(MSG::DEBUG)) {
+    debug() << "All bad measurements, ordered: " << endmsg;
+    for (const NodeWithChi2& node : nodesWithChi2UL) {
+      Tr::TrackVectorFit::Node& n = node.first;
+      debug() << "Measurement of type " << n.node().measurement().type()
+              << " LHCbID " << n.node().measurement().lhcbID().channelID()
+              << " at z " << n.node().z()
+              << " with chi2Contribution " << node.second
+              << " and chi2 " << n.smoothChi2()
+              << endmsg;
+    }
+  }
+
+  if (!nodesWithChi2UL.empty() && nodesWithChi2UL.front().second < m_chi2Outliers.value()) {
     return false;
   }
 
-  // Sort by decreasing chi2
-  std::sort(badNodes.begin(), badNodes.end(), [] (const Tr::TrackVectorFit::Node& n0, const Tr::TrackVectorFit::Node& n1) {
-    return n0.smoothChi2() > n1.smoothChi2();
-  });
+  auto worstChi2 = m_chi2Outliers.value();
+  auto worstNodeIt = nodesWithChi2UL.end();
+  for (auto it=nodesWithChi2UL.begin(); it!=nodesWithChi2UL.end(); ++it) {
+    if (it->second > worstChi2) {
+      Tr::TrackVectorFit::Node& n = it->first;
+      const auto chi2 = n.smoothChi2();
+      if (chi2 > worstChi2) {
+        worstChi2 = chi2;
+        worstNodeIt = it;
+      }
+    }
+  }
 
-  Tr::TrackVectorFit::Node& badNode = badNodes.front();
+  if (worstNodeIt == nodesWithChi2UL.end()) {
+    if (msgLevel(MSG::DEBUG)) {
+      debug() << "Outlier not found" << endmsg;
+    }
 
-  if (badNode.smoothChi2() <= m_chi2Outliers) {
     return false;
   }
 
   if (msgLevel(MSG::DEBUG)) {
-    debug() << "Measurement of type " << badNode.node().measurement().type()
-      << " LHCbID " << badNode.node().measurement().lhcbID().channelID()
-      << " at z " << badNode.node().z()
-      << " node chi2 " << badNode.smoothChi2()
-      << " residual " << badNode.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::Residual>()
-      << " errResidual " << badNode.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::ErrResidual>()
+    Tr::TrackVectorFit::Node& n = worstNodeIt->first;
+    debug() << "Measurement of type " << n.node().measurement().type()
+      << " LHCbID " << n.node().measurement().lhcbID().channelID()
+      << " at z " << n.node().z()
+      << " with chi2 " << worstChi2
       << " removed." << endmsg;
-
-    debug() << "All bad measurements, ordered: " << endmsg;
-    for (const Tr::TrackVectorFit::Node& node : badNodes ) {
-      debug() << "Measurement of type " << node.node().measurement().type()
-              << " LHCbID " << node.node().measurement().lhcbID().channelID()
-              << " at z " << node.node().z()
-              << " with chi2 " << node.smoothChi2()
-              << " residual " << node.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::Residual>()
-              << " errResidual " << node.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::ErrResidual>() << endmsg;
-    }
   }
 
   // Mark the node as an outlier
-  badNode.node().setType(LHCb::Node::Outlier);
-
-  // Update the track's upstreams, if need be
-  t.updateUpstream();
+  worstNodeIt->first.get().setOutlier();
 
   return true;
 }
@@ -1044,7 +987,7 @@ bool TrackVectorFitter::removeWorstOutlierSimplified (Tr::TrackVectorFit::Track&
     return n0.smoothChi2() > n1.smoothChi2();
   });
 
-  ((Tr::TrackVectorFit::Node) badNodes[0]).node().setType(LHCb::Node::Outlier);
+  ((Tr::TrackVectorFit::Node) badNodes[0]).setOutlier();
 
   return true;
 }
diff --git a/Tr/TrackFitter/src/TrackVectorFitter.h b/Tr/TrackFitter/src/TrackVectorFitter.h
index acd828f305b12d8d9fad71996f9c03150d6c1a25..43500da36f2930f1362a59cc85c01f13a695aaeb 100644
--- a/Tr/TrackFitter/src/TrackVectorFitter.h
+++ b/Tr/TrackFitter/src/TrackVectorFitter.h
@@ -51,17 +51,11 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> {
   Gaudi::Property<bool> m_applyEnergyLossCorrections {this, "ApplyEnergyLossCorr", true};
   Gaudi::Property<bool> m_applyMaterialCorrections {this, "ApplyMaterialCorrections", true};
   Gaudi::Property<double> m_chi2Outliers {this, "Chi2Outliers", 9.0};
-  Gaudi::Property<double> m_errorX {this, "ErrorX", 20.0 * Gaudi::Units::mm};
-  Gaudi::Property<double> m_errorY {this, "ErrorY", 20.0 * Gaudi::Units::mm};
-  Gaudi::Property<double> m_errorTx {this, "ErrorTx", 0.1};
-  Gaudi::Property<double> m_errorTy {this, "ErrorTy", 0.1};
-  Gaudi::Property<std::vector<double>> m_errorQoP {this, "ErrorQoP", {0.0, 0.01}};
   Gaudi::Property<bool> m_fillExtraInfo {this, "FillExtraInfo", true};
   Gaudi::Property<bool> m_makeMeasurements {this, "MakeMeasurements", false};
   Gaudi::Property<bool> m_makeNodes {this, "MakeNodes", false};
   Gaudi::Property<double> m_maxDeltaChi2Converged {this, "MaxDeltaChiSqConverged", 0.01};
   Gaudi::Property<double> m_maxMomentumForScattering {this, "MaxMomentumForScattering", 500. * Gaudi::Units::GeV};
-  Gaudi::Property<unsigned> m_maxUpdateTransports {this, "MaxUpdateTransports", 10};
   Gaudi::Property<unsigned> m_memManagerStorageIncrements {this, "MemManagerStorageIncrements", 4096};
   Gaudi::Property<double> m_minMomentumForELossCorr {this, "MinMomentumELossCorr", 10. * Gaudi::Units::MeV};
   Gaudi::Property<double> m_minMomentumForScattering {this, "MinMomentumForScattering", 100. * Gaudi::Units::MeV};
@@ -76,7 +70,6 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> {
   Gaudi::Property<double> m_scatteringPt {this, "TransverseMomentumForScattering", 400. * Gaudi::Units::MeV};
   Gaudi::Property<bool> m_stateAtBeamLine {this, "StateAtBeamLine", true};
   Gaudi::Property<bool> m_updateMaterial {this, "UpdateMaterial", false};
-  Gaudi::Property<bool> m_updateTransport {this, "UpdateTransport", true};
   Gaudi::Property<bool> m_upstream {this, "FitUpstream", true};
   Gaudi::Property<bool> m_useSeedStateErrors {this, "UseSeedStateErrors", false};
   // Tool Handles
@@ -93,16 +86,11 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> {
     debug() << "m_applyEnergyLossCorrections " << m_applyEnergyLossCorrections << endmsg;
     debug() << "m_applyMaterialCorrections " << m_applyMaterialCorrections << endmsg;
     debug() << "m_chi2Outliers " << m_chi2Outliers << endmsg;
-    debug() << "m_errorTx " << m_errorTx << endmsg;
-    debug() << "m_errorTy " << m_errorTy << endmsg;
-    debug() << "m_errorX " << m_errorX << endmsg;
-    debug() << "m_errorY " << m_errorY << endmsg;
     debug() << "m_fillExtraInfo " << m_fillExtraInfo << endmsg;
     debug() << "m_makeMeasurements " << m_makeMeasurements << endmsg;
     debug() << "m_makeNodes " << m_makeNodes << endmsg;
     debug() << "m_maxDeltaChi2Converged " << m_maxDeltaChi2Converged << endmsg;
     debug() << "m_maxMomentumForScattering " << m_maxMomentumForScattering << endmsg;
-    debug() << "m_maxUpdateTransports " << m_maxUpdateTransports << endmsg;
     debug() << "m_minMomentumForELossCorr " << m_minMomentumForELossCorr << endmsg;
     debug() << "m_minMomentumForScattering " << m_minMomentumForScattering << endmsg;
     debug() << "m_minNumMuonHits " << m_minNumMuonHits << endmsg;
@@ -116,7 +104,6 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> {
     debug() << "m_scatteringPt " << m_scatteringPt << endmsg;
     debug() << "m_stateAtBeamLine " << m_stateAtBeamLine << endmsg;
     debug() << "m_updateMaterial " << m_updateMaterial << endmsg;
-    debug() << "m_updateTransport " << m_updateTransport << endmsg;
     debug() << "m_upstream " << m_upstream << endmsg;
     debug() << "m_useSeedStateErrors " << m_useSeedStateErrors << endmsg;
   }
@@ -171,8 +158,6 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> {
 
   void generateFitResult (LHCb::Track& track) const;
 
-  void generateAndSetSeedCovariance (Tr::TrackVectorFit::Track& t) const;
-
   double closestToBeamLine (const LHCb::State& state) const;
 
   StatusCode makeNodes (LHCb::Track& track, const LHCb::Tr::PID& pid) const;
@@ -182,8 +167,7 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> {
   void projectAndUpdate (
     Tr::TrackVectorFit::Track& t,
     const LHCb::Tr::PID& pid,
-    const bool& doUpdateMaterialCorrections = true,
-    const bool& doUpdateTransport = true
+    const bool& doUpdateMaterialCorrections = true
   ) const;
 
   StatusCode initializeRefStates (LHCb::Track& track, const LHCb::Tr::PID& pid) const;
diff --git a/Tr/TrackVectorFit/TrackVectorFit/ArrayGen.h b/Tr/TrackVectorFit/TrackVectorFit/ArrayGen.h
index 0f2e621d533f9dcb706cdce2805f04b416426b12..f8fcc6631fde497cc48e40a49e20e9b7f7673e88 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/ArrayGen.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/ArrayGen.h
@@ -4,7 +4,6 @@
 #include "Types.h"
 #include "Scheduler.h"
 #include "VectorConfiguration.h"
-#include "SimdTransposeHelper.h"
 
 // ------------------
 // Array constructors
@@ -19,33 +18,70 @@ struct ArrayGen {
   static inline constexpr uint16_t mask () {
     return (W==16 ? 0xFFFF : (W==8 ? 0xFF : (W==4 ? 0x0F : 0x03)));
   }
-  
-  template<size_t W>
-  static inline constexpr std::array<TRACKVECTORFIT_PRECISION, 5*W> getPreviousTransportVector (
-    const std::array<Sch::Item, W>& nodes
+
+  template<size_t W, std::size_t... Is>
+  static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 5*W> InitialState_helper (
+    const std::array<Sch::Item, W>& nodes,
+    std::index_sequence<Is...>
   ) {
-    return transpose_helper<5>::get<W>([&nodes](int j){ return (nodes[j].node+1)->transportVector().Array(); });
+    return {
+      nodes[Is].node->m_nodeParameters.m_referenceVector[0]...,
+      nodes[Is].node->m_nodeParameters.m_referenceVector[1]...,
+      nodes[Is].node->m_nodeParameters.m_referenceVector[2]...,
+      nodes[Is].node->m_nodeParameters.m_referenceVector[3]...,
+      nodes[Is].node->m_nodeParameters.m_referenceVector[4]...
+    };
   }
 
   template<size_t W>
-  static inline constexpr std::array<TRACKVECTORFIT_PRECISION, 5*W> getCurrentTransportVector (
+  static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 5*W> InitialState (
     const std::array<Sch::Item, W>& nodes
   ) {
-    return transpose_helper<5>::get<W>([&nodes](int j){ return nodes[j].node->transportVector().Array(); });
+    return InitialState_helper<W>(nodes, std::make_index_sequence<W>{});
+  }
+  
+  static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 15> InitialCovariance_values () {
+    return {
+      400,
+      0, 400,
+      0, 0, 0.01,
+      0, 0, 0, 0.01,
+      0, 0, 0, 0, 0.0001
+    };
   }
 
-  template<size_t W>
-  static inline constexpr std::array<TRACKVECTORFIT_PRECISION, 15*W> getPreviousNoiseMatrix (
-    const std::array<Sch::Item, W>& nodes
+  static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 15> InitialCovariance_values (
+    std::size_t
   ) {
-    return transpose_helper<15>::get<W>([&nodes](int j){ return (nodes[j].node+1)->noiseMatrix().Array(); });
+    return InitialCovariance_values();
   }
 
-  template<size_t W>
-  static inline constexpr std::array<TRACKVECTORFIT_PRECISION, 15*W> getCurrentNoiseMatrix (
-    const std::array<Sch::Item, W>& nodes
+  template<size_t W, std::size_t... Is>
+  static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 15*W> InitialCovariance_helper (
+    std::index_sequence<Is...>
   ) {
-    return transpose_helper<15>::get<W>([&nodes](int j){ return nodes[j].node->noiseMatrix().Array(); });
+    return {
+      InitialCovariance_values(Is)[0]...,
+      InitialCovariance_values(Is)[1]...,
+      InitialCovariance_values(Is)[2]...,
+      InitialCovariance_values(Is)[3]...,
+      InitialCovariance_values(Is)[4]...,
+      InitialCovariance_values(Is)[5]...,
+      InitialCovariance_values(Is)[6]...,
+      InitialCovariance_values(Is)[7]...,
+      InitialCovariance_values(Is)[8]...,
+      InitialCovariance_values(Is)[9]...,
+      InitialCovariance_values(Is)[10]...,
+      InitialCovariance_values(Is)[11]...,
+      InitialCovariance_values(Is)[12]...,
+      InitialCovariance_values(Is)[13]...,
+      InitialCovariance_values(Is)[14]...
+    };
+  }
+
+  template<size_t W>
+  static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 15*W> InitialCovariance () {
+    return InitialCovariance_helper<W>(std::make_index_sequence<W>{});
   }
 };
 
diff --git a/Tr/TrackVectorFit/TrackVectorFit/MemView.h b/Tr/TrackVectorFit/TrackVectorFit/MemView.h
index 57973e3120eb205c62f398c55dceff6c23502dab..f2261074f69e83acc0047ea02d4e41bad8c8919a 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/MemView.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/MemView.h
@@ -22,16 +22,18 @@ namespace Op {
   class ProjectionMatrix;
   class ReferenceResidual;
   class ErrMeasure;
+  class TransportVector;
+  class NoiseMatrix;
 
   // Forward, Backward, Smooth
   class StateVector;
   class Covariance;
 
   // Forward, Backward
+  class TransportMatrix;
   class Chi2;
 
   // Smooth
-  class TransportMatrix;
   class Residual;
   class ErrResidual;
 }
@@ -50,10 +52,12 @@ using TrackVector = LHCb::Vector::Mem::View::TrackVector<TRACKVECTORFIT_PRECISIO
 using TrackSymMatrix = LHCb::Vector::Mem::View::TrackSymMatrix<TRACKVECTORFIT_PRECISION>;
 
 struct NodeParameters {
-  constexpr static size_t size () { return 12; }
+  constexpr static size_t size () { return 32; }
   TRACKVECTORFIT_PRECISION* m_basePointer = nullptr;
   TrackVector m_referenceVector;
   TrackVector m_projectionMatrix;
+  TrackVector m_transportVector;
+  TrackSymMatrix m_noiseMatrix;
   TRACKVECTORFIT_PRECISION* m_referenceResidual;
   TRACKVECTORFIT_PRECISION* m_errorMeasure;
 
@@ -65,9 +69,11 @@ struct NodeParameters {
 
   void setBasePointer (TRACKVECTORFIT_PRECISION* p) {
     m_basePointer = p;
-    m_referenceVector = TrackVector(p); p += 5 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>();
-    m_projectionMatrix = TrackVector(p); p += 5 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>();
-    m_referenceResidual = p; p += VectorConfiguration::width<TRACKVECTORFIT_PRECISION>();
+    m_referenceVector = TrackVector(p); p += 5 * vector_width();
+    m_projectionMatrix = TrackVector(p); p += 5 * vector_width();
+    m_transportVector = TrackVector(p); p += 5 * vector_width();
+    m_noiseMatrix = TrackSymMatrix(p); p += 15 * vector_width();
+    m_referenceResidual = p; p += vector_width();
     m_errorMeasure = p;
   }
 
@@ -78,6 +84,8 @@ struct NodeParameters {
   void copy (const NodeParameters& s) {
     m_referenceVector.copy(s.m_referenceVector);
     m_projectionMatrix.copy(s.m_projectionMatrix);
+    m_transportVector.copy(s.m_transportVector);
+    m_noiseMatrix.copy(s.m_noiseMatrix);
     *m_referenceResidual = *s.m_referenceResidual;
     *m_errorMeasure = *s.m_errorMeasure;
   }
@@ -99,9 +107,9 @@ struct SmoothState {
 
   void setBasePointer (TRACKVECTORFIT_PRECISION* p) {
     m_basePointer = p;
-    m_state = TrackVector(p); p += 5 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>();
-    m_covariance = TrackSymMatrix(p); p += 15 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>();
-    m_residual = p; p += VectorConfiguration::width<TRACKVECTORFIT_PRECISION>();
+    m_state = TrackVector(p); p += 5 * vector_width();
+    m_covariance = TrackSymMatrix(p); p += 15 * vector_width();
+    m_residual = p; p += vector_width();
     m_errResidual = p;
   }
 
@@ -132,8 +140,8 @@ struct State {
 
   void setBasePointer (TRACKVECTORFIT_PRECISION* p) {
     m_basePointer = p;
-    m_updatedState = TrackVector(p); p += 5 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>();
-    m_updatedCovariance = TrackSymMatrix(p); p += 15 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>();
+    m_updatedState = TrackVector(p); p += 5 * vector_width();
+    m_updatedCovariance = TrackSymMatrix(p); p += 15 * vector_width();
     m_chi2 = p;
   }
 
diff --git a/Tr/TrackVectorFit/TrackVectorFit/NodeGetters.h b/Tr/TrackVectorFit/TrackVectorFit/NodeGetters.h
index b0a026129ef56c8183a9f18c382461e69103d4e9..d90d3ac6dfcdc31ac98c331e95ab9ed10ea1944b 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/NodeGetters.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/NodeGetters.h
@@ -41,6 +41,16 @@ inline const TrackSymMatrix& Node::get<Op::Forward, Op::Covariance> () const {
   return m_node->forwardCovariance();
 }
 
+template<>
+inline const TrackSymMatrix& Node::get<Op::NodeParameters, Op::NoiseMatrix> () const {
+  return m_nodeParameters.m_noiseMatrix;
+}
+
+template<>
+inline const TrackVector& Node::get<Op::NodeParameters, Op::TransportVector> () const {
+  return m_nodeParameters.m_transportVector;
+}
+
 template<>
 inline const TrackMatrix<25>& Node::get<Op::Forward, Op::TransportMatrix> () const {
   return m_forwardTransportMatrix;
@@ -125,6 +135,16 @@ inline TrackSymMatrix& Node::get<Op::Forward, Op::Covariance> () {
   return m_node->forwardCovariance();
 }
 
+template<>
+inline TrackSymMatrix& Node::get<Op::NodeParameters, Op::NoiseMatrix> () {
+  return m_nodeParameters.m_noiseMatrix;
+}
+
+template<>
+inline TrackVector& Node::get<Op::NodeParameters, Op::TransportVector> () {
+  return m_nodeParameters.m_transportVector;
+}
+
 template<>
 inline TrackMatrix<25>& Node::get<Op::Forward, Op::TransportMatrix> () {
   return m_forwardTransportMatrix;
diff --git a/Tr/TrackVectorFit/TrackVectorFit/Scheduler.h b/Tr/TrackVectorFit/TrackVectorFit/Scheduler.h
index 4c5375d1460923679358f1f5e5d66c4c7e0ad25b..0542c63b6d0c492efe788df7b631b523a785f986 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/Scheduler.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/Scheduler.h
@@ -12,20 +12,34 @@ namespace Sch {
 
 struct Item {
   Track* track;
-  unsigned trackIndex;
-  std::vector<Node, aligned_allocator<TRACKVECTORFIT_PRECISION, ALIGNMENT>>::iterator node;
-  std::vector<Node, aligned_allocator<TRACKVECTORFIT_PRECISION, ALIGNMENT>>::iterator lastnode;
+  std::vector<Node>::iterator node;
+  unsigned trackIndex = 0;
+  unsigned nodeIndex = 0;
 
   Item () = default;
   Item (const Item& copy) = default;
-  Item (
-    Track* track,
-    const unsigned& nodeIndex,
-    const unsigned& lastnodeIndex
-  ) : track(track) {
+  Item (Track* track)
+    : track(track) {
     trackIndex = track->m_index;
-    node     = track->m_nodes.begin() + nodeIndex;
-    lastnode = track->m_nodes.begin() + lastnodeIndex;
+    node       = track->m_nodes.begin();
+  }
+
+  void incrementNodeIndex () {
+    ++node;
+    ++nodeIndex;
+  }
+
+  template<size_t N>
+  friend inline std::ostream& operator<< (
+    std::ostream& cout,
+    const std::array<Item, N>& pool
+  ) {
+    cout << "{ ";
+    for (unsigned j=0; j<pool.size(); ++j) {
+      cout << pool[j].trackIndex << "-" << pool[j].nodeIndex << " ";
+    }
+    cout << "}";
+    return cout;
   }
 };
 
@@ -60,14 +74,8 @@ struct Blueprint {
   ) {
     cout << reverse<N>(blueprint.in) << " "
       << reverse<N>(blueprint.out) << " "
-      << reverse<N>(blueprint.action) << " ";
-
-    auto& pool = blueprint.pool;
-    cout << "{ ";
-    for (unsigned j=0; j<pool.size(); ++j) {
-      cout << pool[j].trackIndex << "-" << pool[j].node->m_index << " ";
-    }
-    cout << "}";
+      << reverse<N>(blueprint.action) << " "
+      << blueprint.pool;
 
     return cout;
   }
@@ -84,27 +92,40 @@ struct Blueprint {
   }
 };
 
+
 template<size_t N>
-struct StaticSchedulerCommon {
-  // Generic generate method
-  template<class F0, class F1, class F2>
+struct StaticScheduler {
+  StaticScheduler () = default;
+  StaticScheduler (const StaticScheduler& copy) = default;
+
   static std::list<Blueprint<N>>
   generate (
-    std::list<std::reference_wrapper<Track>>& tracks,
-    const F0& getSize,
-    const F1& getCurrentElement,
-    const F2& getLastElement
+    std::list<std::reference_wrapper<Track>>& tracks
   ) {
-    if (N > tracks.size()) {
-      std::cout << "Scheduler: N <= tracks.size(), things probably won't work" << std::endl;
-    }
+    auto getSize = [] (const Track& t) { return t.m_nodes.size(); };
 
     // Order tracks
     std::vector<std::reference_wrapper<Track>> ordered_tracks (tracks.begin(), tracks.end());
     std::sort(ordered_tracks.begin(), ordered_tracks.end(), [&getSize] (const Track& t0, const Track& t1) {
-      const size_t size0 = getSize(t0);
-      const size_t size1 = getSize(t1);
-      return size0 > size1;
+      const auto& type0 = t0.track().type();
+      const auto& type1 = t1.track().type();
+
+      // Always prefer the non velo or velor tracks
+      if (!(type0 == LHCb::Track::Velo || type0 == LHCb::Track::VeloR)
+        && (type1 == LHCb::Track::Velo || type1 == LHCb::Track::VeloR)) {
+        return true;
+      }
+      else if ((type0 == LHCb::Track::Velo || type0 == LHCb::Track::VeloR)
+        && !(type1 == LHCb::Track::Velo || type1 == LHCb::Track::VeloR)) {
+        return false;
+      }
+      else {
+        // Otherwise, compare sizes
+        const size_t size0 = getSize(t0);
+        const size_t size1 = getSize(t1);
+
+        return size0 > size1;
+      }
     });
 
     // Initialise
@@ -126,11 +147,7 @@ struct StaticSchedulerCommon {
         const unsigned slot = slots.back();
         slots.pop_back();
         in[slot] = true;
-        pool[slot] = Item (
-          &track,
-          getCurrentElement(track),
-          getLastElement(track)
-        );
+        pool[slot] = Item(&track);
       }
 
       while (slots.empty()) {
@@ -139,7 +156,7 @@ struct StaticSchedulerCommon {
         out = false;
         for (size_t i=0; i<N; ++i) {
           Item& s = pool[i];
-          if (s.node + 1 == s.lastnode) {
+          if (s.node + 1 == s.track->m_nodes.end()) {
             out[i] = 1;
             slots.push_back(i);
           }
@@ -151,8 +168,8 @@ struct StaticSchedulerCommon {
         // Initialise in, and prepare nodes for next iteration
         in = false;
         std::for_each (pool.begin(), pool.end(), [] (Item& s) {
-          if (s.node + 1 != s.lastnode) {
-            ++s.node;
+          if (s.node + 1 != s.track->m_nodes.end()) {
+            s.incrementNodeIndex();
           }
         });
       }
@@ -177,7 +194,7 @@ struct StaticSchedulerCommon {
         std::for_each (processingIndices.begin(), processingIndicesEnd, [&] (const unsigned& i) {
           Item& s = pool[i];
           action[i] = 1;
-          if (s.node + 1 == s.lastnode) {
+          if (s.node + 1 == s.track->m_nodes.end()) {
             out[i] = true;
           }
         });
@@ -189,8 +206,8 @@ struct StaticSchedulerCommon {
         in = false;
         processingIndicesEnd = std::remove_if(processingIndices.begin(), processingIndicesEnd, [&] (const unsigned& i) {
           Item& s = pool[i];
-          if (s.node + 1 == s.lastnode) return true;
-          ++s.node;
+          if (s.node + 1 == s.track->m_nodes.end()) return true;
+          s.incrementNodeIndex();
           return false;
         });
       }
@@ -200,78 +217,8 @@ struct StaticSchedulerCommon {
   }
 };
 
-// Specializable generate method
-class Pre;
-class Main;
-class Post;
-
-template<class T, size_t N>
-struct StaticScheduler {
-  StaticScheduler () = default;
-  StaticScheduler (const StaticScheduler& copy) = default;
-
-  static std::list<Blueprint<N>>
-  generate (
-    std::list<std::reference_wrapper<Track>>& tracks
-  );
-};
-
-template<size_t N>
-struct StaticScheduler<Pre, N> {
-  static std::list<Blueprint<N>>
-  generate (
-    std::list<std::reference_wrapper<Track>>& tracks
-  ) {
-    return StaticSchedulerCommon<N>::generate (
-      tracks,
-      [] (const Track& t) { return (t.m_forwardUpstream + 1); },
-      [] (const Track&) { return 0; },
-      [] (const Track& t) { return t.m_forwardUpstream + 1; }
-    );
-  }
-};
-
-template<size_t N>
-struct StaticScheduler<Main, N> {
-  static std::list<Blueprint<N>>
-  generate (
-    std::list<std::reference_wrapper<Track>>& tracks
-  ) {
-    return StaticSchedulerCommon<N>::generate (
-      tracks,
-      [] (const Track& t) { return t.m_nodes.size() - t.m_forwardUpstream - t.m_backwardUpstream; },
-      [] (const Track& t) { return t.m_forwardUpstream + 1; },
-      [] (const Track& t) { return t.m_nodes.size() - t.m_backwardUpstream - 1; }
-    );
-  }
-};
-
-template<size_t N>
-struct StaticScheduler<Post, N> {
-  static std::list<Blueprint<N>>
-  generate (
-    std::list<std::reference_wrapper<Track>>& tracks
-  ) {
-    return StaticSchedulerCommon<N>::generate (
-      tracks,
-      [] (const Track& t) { return (t.m_backwardUpstream + 1); },
-      [] (const Track& t) { return t.m_nodes.size() - t.m_backwardUpstream - 1; },
-      [] (const Track& t) { return t.m_nodes.size(); }
-    );
-  }
-};
-
 }
 
-struct SwapStore {
-  Mem::View::State store;
-  TrackVector& state;
-  TrackSymMatrix& covariance;
-
-  SwapStore (const Mem::View::State& store, TrackVector& state, TrackSymMatrix& covariance) :
-    store(store), state(state), covariance(covariance) {}
-};
-
 }
 
 }
diff --git a/Tr/TrackVectorFit/TrackVectorFit/SimdTranspose.h b/Tr/TrackVectorFit/TrackVectorFit/SimdTranspose.h
deleted file mode 100644
index 1bb91518af33c67d7fee99ca527954ed312663d2..0000000000000000000000000000000000000000
--- a/Tr/TrackVectorFit/TrackVectorFit/SimdTranspose.h
+++ /dev/null
@@ -1,239 +0,0 @@
-#pragma once
-
-#include "VectorConfiguration.h"
-
-#ifdef __SSE__
-/**
- * Transposition in SSE of 4 PRECISION vectors
- */
-//#define _MM_TRANSPOSE4_PS(r0, r1, r2, r3) ...
-
-/**
- * Transposition in SSE of 2 PRECISION vectors
- */
-#define ___MM_TRANSPOSE2_PD(in0, in1, out0, out1, __in0, __in1, __out0, __out1) \
-  do { \
-    __m128d __in0 = (in0), __in1 = (in1); \
-    __m128d __out0, __out1; \
-    __out0 = _mm_shuffle_pd(__in0, __in1, 0x0); \
-    __out1 = _mm_shuffle_pd(__in0, __in1, 0x3); \
-    (out0) = __out0, (out1) = __out1; \
-  } while (0)
-
-#define _MM_TRANSPOSE2_PD(in0, in1) \
-      ___MM_TRANSPOSE2_PD(in0, in1, in0, in1, __in0##__LINE__, __in1##__LINE__, __out0##__LINE__, __out1##__LINE__)
-
-#endif //__SSE__
-
-
-#ifdef __AVX__
-/**
- * Transposition in AVX of 8 PRECISION vectors
- */
-#define ___MM256_TRANSPOSE8_PS(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, out4, out5, out6, out7, __in0, __in1, __in2, __in3, __in4, __in5, __in6, __in7, __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7, __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7, __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7) \
-  do { \
-    __m256 __in0 = (in0), __in1 = (in1), __in2 = (in2), __in3 = (in3), __in4 = (in4), __in5 = (in5), __in6 = (in6), __in7 = (in7); \
-    __m256 __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7; \
-    __m256 __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7; \
-    __m256 __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7; \
-    __tmp0  = _mm256_unpacklo_ps(__in0, __in1); \
-    __tmp1  = _mm256_unpackhi_ps(__in0, __in1); \
-    __tmp2  = _mm256_unpacklo_ps(__in2, __in3); \
-    __tmp3  = _mm256_unpackhi_ps(__in2, __in3); \
-    __tmp4  = _mm256_unpacklo_ps(__in4, __in5); \
-    __tmp5  = _mm256_unpackhi_ps(__in4, __in5); \
-    __tmp6  = _mm256_unpacklo_ps(__in6, __in7); \
-    __tmp7  = _mm256_unpackhi_ps(__in6, __in7); \
-    __tmpp0 = _mm256_shuffle_ps(__tmp0, __tmp2, 0x44); \
-    __tmpp1 = _mm256_shuffle_ps(__tmp0, __tmp2, 0xEE); \
-    __tmpp2 = _mm256_shuffle_ps(__tmp1, __tmp3, 0x44); \
-    __tmpp3 = _mm256_shuffle_ps(__tmp1, __tmp3, 0xEE); \
-    __tmpp4 = _mm256_shuffle_ps(__tmp4, __tmp6, 0x44); \
-    __tmpp5 = _mm256_shuffle_ps(__tmp4, __tmp6, 0xEE); \
-    __tmpp6 = _mm256_shuffle_ps(__tmp5, __tmp7, 0x44); \
-    __tmpp7 = _mm256_shuffle_ps(__tmp5, __tmp7, 0xEE); \
-    __out0  = _mm256_permute2f128_ps(__tmpp0, __tmpp4, 0x20); \
-    __out1  = _mm256_permute2f128_ps(__tmpp1, __tmpp5, 0x20); \
-    __out2  = _mm256_permute2f128_ps(__tmpp2, __tmpp6, 0x20); \
-    __out3  = _mm256_permute2f128_ps(__tmpp3, __tmpp7, 0x20); \
-    __out4  = _mm256_permute2f128_ps(__tmpp0, __tmpp4, 0x31); \
-    __out5  = _mm256_permute2f128_ps(__tmpp1, __tmpp5, 0x31); \
-    __out6  = _mm256_permute2f128_ps(__tmpp2, __tmpp6, 0x31); \
-    __out7  = _mm256_permute2f128_ps(__tmpp3, __tmpp7, 0x31); \
-    (out0)  = __out0, (out1) = __out1, (out2) = __out2, (out3) = __out3, (out4) = __out4, (out5) = __out5, (out6) = __out6, (out7) = __out7; \
-  } while (0)
-
-#define _MM256_TRANSPOSE8_PS(in0, in1, in2, in3, in4, in5, in6, in7) \
-      ___MM256_TRANSPOSE8_PS(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7, \
-          __in0##__LINE__, __in1##__LINE__, __in2##__LINE__, __in3##__LINE__, __in4##__LINE__, __in5##__LINE__, __in6##__LINE__, __in7##__LINE__, \
-          __out0##__LINE__, __out1##__LINE__, __out2##__LINE__, __out3##__LINE__, __out4##__LINE__, __out5##__LINE__, __out6##__LINE__, __out7##__LINE__, \
-          __tmp0##__LINE__, __tmp1##__LINE__, __tmp2##__LINE__, __tmp3##__LINE__, __tmp4##__LINE__, __tmp5##__LINE__, __tmp6##__LINE__, __tmp7##__LINE__, \
-          __tmpp0##__LINE__, __tmpp1##__LINE__, __tmpp2##__LINE__, __tmpp3##__LINE__, __tmpp4##__LINE__, __tmpp5##__LINE__, __tmpp6##__LINE__, __tmpp7##__LINE__)
-
-
-/**
- * Transposition in AVX of 4 PRECISION vectors
- */
-#define ___MM256_TRANSPOSE4_PD(in0, in1, in2, in3, out0, out1, out2, out3, __in0, __in1, __in2, __in3, __out0, __out1, __out2, __out3, __tmp0, __tmp1, __tmp2, __tmp3) \
-  do { \
-    __m256d __in0 = (in0), __in1 = (in1), __in2 = (in2), __in3 = (in3); \
-    __m256d __tmp0, __tmp1, __tmp2, __tmp3; \
-    __m256d __out0, __out1, __out2, __out3; \
-    __tmp0 = _mm256_shuffle_pd(__in0, __in1, 0x0); \
-    __tmp1 = _mm256_shuffle_pd(__in0, __in1, 0xf); \
-    __tmp2 = _mm256_shuffle_pd(__in2, __in3, 0x0); \
-    __tmp3 = _mm256_shuffle_pd(__in2, __in3, 0xf); \
-    __out0 = _mm256_permute2f128_pd(__tmp0, __tmp2, 0x20); \
-    __out1 = _mm256_permute2f128_pd(__tmp1, __tmp3, 0x20); \
-    __out2 = _mm256_permute2f128_pd(__tmp0, __tmp2, 0x31); \
-    __out3 = _mm256_permute2f128_pd(__tmp1, __tmp3, 0x31); \
-    (out0) = __out0, (out1) = __out1, (out2) = __out2, (out3) = __out3; \
-  } while (0)
-
-#define _MM256_TRANSPOSE4_PD(in0, in1, in2, in3) \
-      ___MM256_TRANSPOSE4_PD(in0, in1, in2, in3, in0, in1, in2, in3, \
-          __in0##__LINE__, __in1##__LINE__, __in2##__LINE__, __in3##__LINE__, \
-          __out0##__LINE__, __out1##__LINE__, __out2##__LINE__, __out3##__LINE__, \
-          __tmp0##__LINE__, __tmp1##__LINE__, __tmp2##__LINE__, __tmp3##__LINE__)
-
-#endif //__AVX__
-
-#ifdef __AVX512F__
-/**
- * Transposition in AVX512 of 16 PRECISION vectors
- */
-#define ___MM512_TRANSPOSE16_PS(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, inA, inB, inC, inD, inE, inF, out0, out1, out2, out3, out4, out5, out6, out7, out8, out9, outA, outB, outC, outD, outE, outF, __in0, __in1, __in2, __in3, __in4, __in5, __in6, __in7, __in8, __in9, __inA, __inB, __inC, __inD, __inE, __inF, __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7, __out8, __out9, __outA, __outB, __outC, __outD, __outE, __outF, __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7, __tmp8, __tmp9, __tmpA, __tmpB, __tmpC, __tmpD, __tmpE, __tmpF, __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7, __tmpp8, __tmpp9, __tmppA, __tmppB, __tmppC, __tmppD, __tmpppE, __tmppF) \
-  do { \
-    __m512i __in0 = _mm512_castps_si512(in0), __in1 = _mm512_castps_si512(in1), __in2 = _mm512_castps_si512(in2), __in3 = _mm512_castps_si512(in3), __in4 = _mm512_castps_si512(in4), __in5 = _mm512_castps_si512(in5), __in6 = _mm512_castps_si512(in6), __in7 = _mm512_castps_si512(in7), __in8 = _mm512_castps_si512(in8), __in9 = _mm512_castps_si512(in9), __inA = _mm512_castps_si512(inA), __inB = _mm512_castps_si512(inB), __inC = _mm512_castps_si512(inC), __inD = _mm512_castps_si512(inD), __inE = _mm512_castps_si512(inE), __inF = _mm512_castps_si512(inF); \
-    __m512i __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7, __tmp8, __tmp9, __tmpA, __tmpB, __tmpC, __tmpD, __tmpE, __tmpF; \
-    __m512i __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7, __tmpp8, __tmpp9, __tmppA, __tmppB, __tmppC, __tmppD, __tmppE, __tmppF; \
-    __m512i __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7, __out8, __out9, __outA, __outB, __outC, __outD, __outE, __outF; \
-     \
-    __tmp0 = _mm512_unpacklo_epi32(__in0,__in1); \
-    __tmp1 = _mm512_unpackhi_epi32(__in0,__in1); \
-    __tmp2 = _mm512_unpacklo_epi32(__in2,__in3); \
-    __tmp3 = _mm512_unpackhi_epi32(__in2,__in3); \
-    __tmp4 = _mm512_unpacklo_epi32(__in4,__in5); \
-    __tmp5 = _mm512_unpackhi_epi32(__in4,__in5); \
-    __tmp6 = _mm512_unpacklo_epi32(__in6,__in7); \
-    __tmp7 = _mm512_unpackhi_epi32(__in6,__in7); \
-    __tmp8 = _mm512_unpacklo_epi32(__in8,__in9); \
-    __tmp9 = _mm512_unpackhi_epi32(__in8,__in9); \
-    __tmpA = _mm512_unpacklo_epi32(__inA,__inB); \
-    __tmpB = _mm512_unpackhi_epi32(__inA,__inB); \
-    __tmpC = _mm512_unpacklo_epi32(__inC,__inD); \
-    __tmpD = _mm512_unpackhi_epi32(__inC,__inD); \
-    __tmpE = _mm512_unpacklo_epi32(__inE,__inF); \
-    __tmpF = _mm512_unpackhi_epi32(__inE,__inF); \
-     \
-    __tmpp0 = _mm512_unpacklo_epi64(__tmp0,__tmp2); \
-    __tmpp1 = _mm512_unpackhi_epi64(__tmp0,__tmp2); \
-    __tmpp2 = _mm512_unpacklo_epi64(__tmp1,__tmp3); \
-    __tmpp3 = _mm512_unpackhi_epi64(__tmp1,__tmp3); \
-    __tmpp4 = _mm512_unpacklo_epi64(__tmp4,__tmp6); \
-    __tmpp5 = _mm512_unpackhi_epi64(__tmp4,__tmp6); \
-    __tmpp6 = _mm512_unpacklo_epi64(__tmp5,__tmp7); \
-    __tmpp7 = _mm512_unpackhi_epi64(__tmp5,__tmp7); \
-    __tmpp8 = _mm512_unpacklo_epi64(__tmp8,__tmpA); \
-    __tmpp9 = _mm512_unpackhi_epi64(__tmp8,__tmpA); \
-    __tmppA = _mm512_unpacklo_epi64(__tmp9,__tmpB); \
-    __tmppB = _mm512_unpackhi_epi64(__tmp9,__tmpB); \
-    __tmppC = _mm512_unpacklo_epi64(__tmpC,__tmpE); \
-    __tmppD = _mm512_unpackhi_epi64(__tmpC,__tmpE); \
-    __tmppE = _mm512_unpacklo_epi64(__tmpD,__tmpF); \
-    __tmppF = _mm512_unpackhi_epi64(__tmpD,__tmpF); \
-     \
-    __tmp0 = _mm512_shuffle_i32x4(__tmpp0, __tmpp4, 0x88); \
-    __tmp1 = _mm512_shuffle_i32x4(__tmpp1, __tmpp5, 0x88); \
-    __tmp2 = _mm512_shuffle_i32x4(__tmpp2, __tmpp6, 0x88); \
-    __tmp3 = _mm512_shuffle_i32x4(__tmpp3, __tmpp7, 0x88); \
-    __tmp4 = _mm512_shuffle_i32x4(__tmpp0, __tmpp4, 0xdd); \
-    __tmp5 = _mm512_shuffle_i32x4(__tmpp1, __tmpp5, 0xdd); \
-    __tmp6 = _mm512_shuffle_i32x4(__tmpp2, __tmpp6, 0xdd); \
-    __tmp7 = _mm512_shuffle_i32x4(__tmpp3, __tmpp7, 0xdd); \
-    __tmp8 = _mm512_shuffle_i32x4(__tmpp8, __tmppC, 0x88); \
-    __tmp9 = _mm512_shuffle_i32x4(__tmpp9, __tmppD, 0x88); \
-    __tmpA = _mm512_shuffle_i32x4(__tmppA, __tmppE, 0x88); \
-    __tmpB = _mm512_shuffle_i32x4(__tmppB, __tmppF, 0x88); \
-    __tmpC = _mm512_shuffle_i32x4(__tmpp8, __tmppC, 0xdd); \
-    __tmpD = _mm512_shuffle_i32x4(__tmpp9, __tmppD, 0xdd); \
-    __tmpE = _mm512_shuffle_i32x4(__tmppA, __tmppE, 0xdd); \
-    __tmpF = _mm512_shuffle_i32x4(__tmppB, __tmppF, 0xdd); \
-     \
-    __out0 = _mm512_shuffle_i32x4(__tmp0, __tmp8, 0x88); \
-    __out1 = _mm512_shuffle_i32x4(__tmp1, __tmp9, 0x88); \
-    __out2 = _mm512_shuffle_i32x4(__tmp2, __tmpA, 0x88); \
-    __out3 = _mm512_shuffle_i32x4(__tmp3, __tmpB, 0x88); \
-    __out4 = _mm512_shuffle_i32x4(__tmp4, __tmpC, 0x88); \
-    __out5 = _mm512_shuffle_i32x4(__tmp5, __tmpD, 0x88); \
-    __out6 = _mm512_shuffle_i32x4(__tmp6, __tmpE, 0x88); \
-    __out7 = _mm512_shuffle_i32x4(__tmp7, __tmpF, 0x88); \
-    __out8 = _mm512_shuffle_i32x4(__tmp0, __tmp8, 0xdd); \
-    __out9 = _mm512_shuffle_i32x4(__tmp1, __tmp9, 0xdd); \
-    __outA = _mm512_shuffle_i32x4(__tmp2, __tmpA, 0xdd); \
-    __outB = _mm512_shuffle_i32x4(__tmp3, __tmpB, 0xdd); \
-    __outC = _mm512_shuffle_i32x4(__tmp4, __tmpC, 0xdd); \
-    __outD = _mm512_shuffle_i32x4(__tmp5, __tmpD, 0xdd); \
-    __outE = _mm512_shuffle_i32x4(__tmp6, __tmpE, 0xdd); \
-    __outF = _mm512_shuffle_i32x4(__tmp7, __tmpF, 0xdd); \
-     \
-    (out0)  = _mm512_castsi512_ps(__out0), (out1) = _mm512_castsi512_ps(__out1), (out2) = _mm512_castsi512_ps(__out2), (out3) = _mm512_castsi512_ps(__out3), (out4) = _mm512_castsi512_ps(__out4), (out5) = _mm512_castsi512_ps(__out5), (out6) = _mm512_castsi512_ps(__out6), (out7) = _mm512_castsi512_ps(__out7), (out8) = _mm512_castsi512_ps(__out8), (out9) = _mm512_castsi512_ps(__out9), (outA) = _mm512_castsi512_ps(__outA), (outB) = _mm512_castsi512_ps(__outB), (outC) = _mm512_castsi512_ps(__outC), (outD) = _mm512_castsi512_ps(__outD), (outE) = _mm512_castsi512_ps(__outE), (outF) = _mm512_castsi512_ps(__outF); \
-  } while (0)
-
-#define _MM512_TRANSPOSE16_PS(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, inA, inB, inC, inD, inE, inF) \
-      ___MM512_TRANSPOSE16_PS(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, inA, inB, inC, inD, inE, inF, in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, inA, inB, inC, inD, inE, inF, \
-          __in0##__LINE__, __in1##__LINE__, __in2##__LINE__, __in3##__LINE__, __in4##__LINE__, __in5##__LINE__, __in6##__LINE__, __in7##__LINE__, __in8##__LINE__, __in9##__LINE__, __inA##__LINE__, __inB##__LINE__, __inC##__LINE__, __inD##__LINE__, __inE##__LINE__, __inF##__LINE__, \
-          __out0##__LINE__, __out1##__LINE__, __out2##__LINE__, __out3##__LINE__, __out4##__LINE__, __out5##__LINE__, __out6##__LINE__, __out7##__LINE__, __out8##__LINE__, __out9##__LINE__, __outA##__LINE__, __outB##__LINE__, __outC##__LINE__, __outD##__LINE__, __outE##__LINE__, __outF##__LINE__, \
-          __tmp0##__LINE__, __tmp1##__LINE__, __tmp2##__LINE__, __tmp3##__LINE__, __tmp4##__LINE__, __tmp5##__LINE__, __tmp6##__LINE__, __tmp7##__LINE__, __tmp8##__LINE__, __tmp9##__LINE__, __tmpA##__LINE__, __tmpB##__LINE__, __tmpC##__LINE__, __tmpD##__LINE__, __tmpE##__LINE__, __tmpF##__LINE__, \
-          __tmpp0##__LINE__, __tmpp1##__LINE__, __tmpp2##__LINE__, __tmpp3##__LINE__, __tmpp4##__LINE__, __tmpp5##__LINE__, __tmpp6##__LINE__, __tmpp7##__LINE__, __tmpp8##__LINE__, __tmpp9##__LINE__, __tmppA##__LINE__, __tmppB##__LINE__, __tmppC##__LINE__, __tmppD##__LINE__, __tmppE##__LINE__, __tmppF##__LINE__)
-
-
-
-/**
- * Transposition in AVX512 of 8 PRECISION vectors
- */
-#define ___MM512_TRANSPOSE8_PD(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, out4, out5, out6, out7, __in0, __in1, __in2, __in3, __in4, __in5, __in6, __in7, __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7, __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7, __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7) \
-  do { \
-    __m512i __in0 = _mm512_castpd_si512(in0), __in1 = _mm512_castpd_si512(in1), __in2 = _mm512_castpd_si512(in2), __in3 = _mm512_castpd_si512(in3), __in4 = _mm512_castpd_si512(in4), __in5 = _mm512_castpd_si512(in5), __in6 = _mm512_castpd_si512(in6), __in7 = _mm512_castpd_si512(in7); \
-    __m512i __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7; \
-    __m512i __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7; \
-    __m512i __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7; \
-     \
-    __tmp0  = _mm512_unpacklo_epi64(__in0,__in1); \
-    __tmp1  = _mm512_unpackhi_epi64(__in0,__in1); \
-    __tmp2  = _mm512_unpacklo_epi64(__in2,__in3); \
-    __tmp3  = _mm512_unpackhi_epi64(__in2,__in3); \
-    __tmp4  = _mm512_unpacklo_epi64(__in4,__in5); \
-    __tmp5  = _mm512_unpackhi_epi64(__in4,__in5); \
-    __tmp6  = _mm512_unpacklo_epi64(__in6,__in7); \
-    __tmp7  = _mm512_unpackhi_epi64(__in6,__in7); \
-     \
-    __tmpp0 = _mm512_shuffle_i64x2(__tmp0, __tmp2, 0x88); \
-    __tmpp1 = _mm512_shuffle_i64x2(__tmp1, __tmp3, 0x88); \
-    __tmpp2 = _mm512_shuffle_i64x2(__tmp0, __tmp2, 0xdd); \
-    __tmpp3 = _mm512_shuffle_i64x2(__tmp1, __tmp3, 0xdd); \
-    __tmpp4 = _mm512_shuffle_i64x2(__tmp4, __tmp6, 0x88); \
-    __tmpp5 = _mm512_shuffle_i64x2(__tmp5, __tmp7, 0x88); \
-    __tmpp6 = _mm512_shuffle_i64x2(__tmp4, __tmp6, 0xdd); \
-    __tmpp7 = _mm512_shuffle_i64x2(__tmp5, __tmp7, 0xdd); \
-     \
-    __out0  = _mm512_shuffle_i64x2(__tmpp0, __tmpp4, 0x88); \
-    __out1  = _mm512_shuffle_i64x2(__tmpp1, __tmpp5, 0x88); \
-    __out2  = _mm512_shuffle_i64x2(__tmpp2, __tmpp6, 0x88); \
-    __out3  = _mm512_shuffle_i64x2(__tmpp3, __tmpp7, 0x88); \
-    __out4  = _mm512_shuffle_i64x2(__tmpp0, __tmpp4, 0xdd); \
-    __out5  = _mm512_shuffle_i64x2(__tmpp1, __tmpp5, 0xdd); \
-    __out6  = _mm512_shuffle_i64x2(__tmpp2, __tmpp6, 0xdd); \
-    __out7  = _mm512_shuffle_i64x2(__tmpp3, __tmpp7, 0xdd); \
-     \
-    (out0)  = _mm512_castsi512_pd(__out0), (out1) = _mm512_castsi512_pd(__out1), (out2) = _mm512_castsi512_pd(__out2), (out3) = _mm512_castsi512_pd(__out3), (out4) = _mm512_castsi512_pd(__out4), (out5) = _mm512_castsi512_pd(__out5), (out6) = _mm512_castsi512_pd(__out6), (out7) = _mm512_castsi512_pd(__out7); \
-  } while (0)
-
-#define _MM512_TRANSPOSE8_PD(in0, in1, in2, in3, in4, in5, in6, in7) \
-      ___MM512_TRANSPOSE8_PD(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7, \
-          __in0##__LINE__, __in1##__LINE__, __in2##__LINE__, __in3##__LINE__, __in4##__LINE__, __in5##__LINE__, __in6##__LINE__, __in7##__LINE__, \
-          __out0##__LINE__, __out1##__LINE__, __out2##__LINE__, __out3##__LINE__, __out4##__LINE__, __out5##__LINE__, __out6##__LINE__, __out7##__LINE__, \
-          __tmp0##__LINE__, __tmp1##__LINE__, __tmp2##__LINE__, __tmp3##__LINE__, __tmp4##__LINE__, __tmp5##__LINE__, __tmp6##__LINE__, __tmp7##__LINE__, \
-          __tmpp0##__LINE__, __tmpp1##__LINE__, __tmpp2##__LINE__, __tmpp3##__LINE__, __tmpp4##__LINE__, __tmpp5##__LINE__, __tmpp6##__LINE__, __tmpp7##__LINE__)
-
-#endif //__AVX512F__
diff --git a/Tr/TrackVectorFit/TrackVectorFit/SimdTransposeHelper.h b/Tr/TrackVectorFit/TrackVectorFit/SimdTransposeHelper.h
deleted file mode 100644
index 6758aafe4df06a17e8c78a54d4821c22b1070f86..0000000000000000000000000000000000000000
--- a/Tr/TrackVectorFit/TrackVectorFit/SimdTransposeHelper.h
+++ /dev/null
@@ -1,665 +0,0 @@
-#pragma once
-
-#ifdef __SSE__
-#include "immintrin.h"
-#endif
-#include "VectorConfiguration.h"
-#include "SimdTranspose.h"
-
-namespace Tr {
-
-namespace TrackVectorFit {
-
-namespace {
-  template <class T>
-  static constexpr T round_down(T n, T align) {
-    return n & -align;
-  }
-  template <class T>
-  static constexpr T round_up(T n, T align) {
-    return (n + align-1) & -align;
-  }
-  template <std::size_t N, class ISA, class T>
-  struct transpose_helper_soaize {
-    template <std::size_t W, class F, class G>
-    static void soaize(F in_rows, G out_rows) {
-      for (int i = 0; i < N; i++) {
-        for (int j = 0; j < W; j++) {
-          out_rows(i)[j] = in_rows(j)[i];
-        }
-      }
-    }
-  };
-  template <std::size_t N, class ISA, class T>
-  struct transpose_helper_get {
-    template <class F, std::size_t... Is>
-    static std::array<T, N*sizeof...(Is)> get(
-      F getter,
-      std::index_sequence<Is...>
-    ) {
-      constexpr std::size_t W = sizeof...(Is);
-      std::array<T, N*W> soa;
-      transpose_helper_soaize<N, ISA, T>::template soaize<W>(
-              getter,
-              [&soa](int i){ return &soa[i*W]; }
-      );
-      return soa;
-    }
-  };
-
-  template <std::size_t N, class ISA = ISAs::CURRENT, class T = TRACKVECTORFIT_PRECISION>
-  struct transpose_helper {
-    template <std::size_t W, class F>
-    static std::array<T, N*W> get(
-      F getter
-    ) {
-      return transpose_helper_get<N, ISA, T>::get(getter, std::make_index_sequence<W>{});
-    }
-  };
-
-  template <class T>
-  struct transpose_helper_get<5, ISAs::SCALAR, T> {
-    template <class F, std::size_t... Is>
-    static std::array<T, 5*sizeof...(Is)> get(
-      F getter,
-      std::index_sequence<Is...>
-    ) {
-      return std::array<TRACKVECTORFIT_PRECISION, 5*sizeof...(Is)> {
-        getter(Is)[0]... ,
-        getter(Is)[1]... ,
-        getter(Is)[2]... ,
-        getter(Is)[3]... ,
-        getter(Is)[4]...
-      };
-    }
-  };
-
-  template <class T>
-  struct transpose_helper_get<15, ISAs::SCALAR, T> {
-    template <class F, std::size_t... Is>
-    static std::array<T, 15*sizeof...(Is)> get(
-      F getter,
-      std::index_sequence<Is...>
-    ) {
-      return std::array<TRACKVECTORFIT_PRECISION, 15*sizeof...(Is)> {
-        getter(Is)[0]... ,
-        getter(Is)[1]... ,
-        getter(Is)[2]... ,
-        getter(Is)[3]... ,
-        getter(Is)[4]... ,
-        getter(Is)[5]... ,
-        getter(Is)[6]... ,
-        getter(Is)[7]... ,
-        getter(Is)[8]... ,
-        getter(Is)[9]... ,
-        getter(Is)[10]... ,
-        getter(Is)[11]... ,
-        getter(Is)[12]... ,
-        getter(Is)[13]... ,
-        getter(Is)[14]...
-      };
-    }
-  };
-
-  template <class T>
-  struct transpose_helper_get<25, ISAs::SCALAR, T> {
-    template <class F, std::size_t... Is>
-    static std::array<T, 25*sizeof...(Is)> get(
-      F getter,
-      std::index_sequence<Is...>
-    ) {
-      return std::array<TRACKVECTORFIT_PRECISION, 25*sizeof...(Is)> {
-        getter(Is)[0]... ,
-        getter(Is)[1]... ,
-        getter(Is)[2]... ,
-        getter(Is)[3]... ,
-        getter(Is)[4]... ,
-        getter(Is)[5]... ,
-        getter(Is)[6]... ,
-        getter(Is)[7]... ,
-        getter(Is)[8]... ,
-        getter(Is)[9]... ,
-        getter(Is)[10]... ,
-        getter(Is)[11]... ,
-        getter(Is)[12]... ,
-        getter(Is)[13]... ,
-        getter(Is)[14]... ,
-        getter(Is)[15]... ,
-        getter(Is)[16]... ,
-        getter(Is)[17]... ,
-        getter(Is)[18]... ,
-        getter(Is)[19]... ,
-        getter(Is)[20]... ,
-        getter(Is)[21]... ,
-        getter(Is)[22]... ,
-        getter(Is)[23]... ,
-        getter(Is)[24]...
-      };
-    }
-  };
-
-#ifdef TRACKVECTORFIT_SINGLE_PRECISION
-
-#ifdef __SSE__
-  template <std::size_t N>
-  struct transpose_helper_soaize<N, ISAs::SSE, TRACKVECTORFIT_PRECISION> {
-    template <std::size_t W, class F, class G>
-    static void soaize(F in_rows, G out_rows) {
-      constexpr std::size_t sse_card = ISAs::SSE::card<TRACKVECTORFIT_PRECISION>();
-      unsigned i, j;
-      static_assert(W % sse_card == 0, "Width is not a multiple of SIMD card (4)");
-      for (i = 0; i < round_down(N, sse_card); i += sse_card) {
-        for (j = 0; j < W; j += sse_card) {
-          __m128 r0, r1, r2, r3;
-          r0 = _mm_loadu_ps(&in_rows(j+0)[i]);
-          r1 = _mm_loadu_ps(&in_rows(j+1)[i]);
-          r2 = _mm_loadu_ps(&in_rows(j+1)[i]);
-          r3 = _mm_loadu_ps(&in_rows(j+1)[i]);
-
-          _MM_TRANSPOSE4_PS(r0, r1, r2, r3);
-
-          _mm_store_ps(&out_rows(i+0)[j], r0);
-          _mm_store_ps(&out_rows(i+1)[j], r1);
-          _mm_store_ps(&out_rows(i+1)[j], r2);
-          _mm_store_ps(&out_rows(i+1)[j], r3);
-        }
-      } /* remainder */ {
-        i = round_down(N, sse_card);
-        for (j = 0; j < W; j += sse_card) {
-          __m128 r0, r1, r2, r3;
-          if (N - i <= 4) {
-            r0 = _mm_loadu_ps(&in_rows(j+0)[i]);
-            r1 = _mm_loadu_ps(&in_rows(j+1)[i]);
-            r2 = _mm_loadu_ps(&in_rows(j+2)[i]);
-            r3 = _mm_loadu_ps(&in_rows(j+3)[i]);
-          } else {
-            r0 = _mm_loadu_ps(&in_rows(j+0)[i]);
-            r1 = _mm_loadu_ps(&in_rows(j+1)[i]);
-            r2 = _mm_loadu_ps(&in_rows(j+2)[i]);
-            r3 = _mm_loadu_ps(&in_rows(j+3)[i]);
-          }
-
-          _MM_TRANSPOSE4_PS(r0, r1, r2, r3);
-
-          switch (N - i) {
-          case 4:
-            _mm_store_ps(&out_rows(i+3)[j], r3);
-          case 3:
-            _mm_store_ps(&out_rows(i+2)[j], r2);
-          case 2:
-            _mm_store_ps(&out_rows(i+1)[j], r1);
-          case 1:
-            _mm_store_ps(&out_rows(i+0)[j], r0);
-          }
-        }
-      }
-    }
-  };
-#endif //__SSE__
-
-#ifdef __AVX__
-  template <std::size_t N>
-  struct transpose_helper_soaize<N, ISAs::AVX, TRACKVECTORFIT_PRECISION> {
-    template <std::size_t W, class F, class G>
-    static void soaize(F in_rows, G out_rows) {
-      constexpr std::size_t avx_card = ISAs::AVX::card<TRACKVECTORFIT_PRECISION>();
-      unsigned i, j;
-      static_assert(W % avx_card == 0, "Width is not a multiple of SIMD card (8)");
-      for (i = 0; i < round_down(N, avx_card); i += avx_card) {
-        for (j = 0; j < W; j += avx_card) {
-          __m256 r0, r1, r2, r3, r4, r5, r6, r7;
-          r0 = _mm256_loadu_ps(&in_rows(j+0)[i]);
-          r1 = _mm256_loadu_ps(&in_rows(j+1)[i]);
-          r2 = _mm256_loadu_ps(&in_rows(j+2)[i]);
-          r3 = _mm256_loadu_ps(&in_rows(j+3)[i]);
-          r4 = _mm256_loadu_ps(&in_rows(j+4)[i]);
-          r5 = _mm256_loadu_ps(&in_rows(j+5)[i]);
-          r6 = _mm256_loadu_ps(&in_rows(j+6)[i]);
-          r7 = _mm256_loadu_ps(&in_rows(j+7)[i]);
-
-          _MM256_TRANSPOSE8_PS(r0, r1, r2, r3, r4, r5, r6, r7);
-
-          _mm256_store_ps(&out_rows(i+0)[j], r0);
-          _mm256_store_ps(&out_rows(i+1)[j], r1);
-          _mm256_store_ps(&out_rows(i+2)[j], r2);
-          _mm256_store_ps(&out_rows(i+3)[j], r3);
-          _mm256_store_ps(&out_rows(i+4)[j], r4);
-          _mm256_store_ps(&out_rows(i+5)[j], r5);
-          _mm256_store_ps(&out_rows(i+6)[j], r6);
-          _mm256_store_ps(&out_rows(i+7)[j], r7);
-        }
-      } /* remainder */ {
-        i = round_down(N, avx_card);
-        for (j = 0; j < W; j += avx_card) {
-          __m256 r0, r1, r2, r3, r4, r5, r6, r7;
-          if (N - i == 1) {
-            r0 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+0)[i]));
-            r1 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+1)[i]));
-            r2 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+2)[i]));
-            r3 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+3)[i]));
-            r4 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+4)[i]));
-            r5 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+5)[i]));
-            r6 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+6)[i]));
-            r7 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+7)[i]));
-          } else if (N - i <= 4) {
-            r0 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+0)[i]));
-            r1 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+1)[i]));
-            r2 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+2)[i]));
-            r3 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+3)[i]));
-            r4 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+4)[i]));
-            r5 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+5)[i]));
-            r6 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+6)[i]));
-            r7 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+7)[i]));
-          } else {
-            r0 = _mm256_loadu_ps(&in_rows(j+0)[i]);
-            r1 = _mm256_loadu_ps(&in_rows(j+1)[i]);
-            r2 = _mm256_loadu_ps(&in_rows(j+2)[i]);
-            r3 = _mm256_loadu_ps(&in_rows(j+3)[i]);
-            r4 = _mm256_loadu_ps(&in_rows(j+4)[i]);
-            r5 = _mm256_loadu_ps(&in_rows(j+5)[i]);
-            r6 = _mm256_loadu_ps(&in_rows(j+6)[i]);
-            r7 = _mm256_loadu_ps(&in_rows(j+7)[i]);
-          }
-
-          _MM256_TRANSPOSE8_PS(r0, r1, r2, r3, r4, r5, r6, r7);
-
-          switch (N - i) {
-          case 8:
-            _mm256_store_ps(&out_rows(i+7)[j], r7);
-          case 7:
-            _mm256_store_ps(&out_rows(i+6)[j], r6);
-          case 6:
-            _mm256_store_ps(&out_rows(i+5)[j], r5);
-          case 5:
-            _mm256_store_ps(&out_rows(i+4)[j], r4);
-          case 4:
-            _mm256_store_ps(&out_rows(i+3)[j], r3);
-          case 3:
-            _mm256_store_ps(&out_rows(i+2)[j], r2);
-          case 2:
-            _mm256_store_ps(&out_rows(i+1)[j], r1);
-          case 1:
-            _mm256_store_ps(&out_rows(i+0)[j], r0);
-          }
-        }
-      }
-    }
-  };
-#endif //__AVX__
-
-#ifdef __AVX512F__
-  template <std::size_t N>
-  struct transpose_helper_soaize<N, ISAs::AVX512, TRACKVECTORFIT_PRECISION> {
-    template <std::size_t W, class F, class G>
-    static void soaize(F in_rows, G out_rows) {
-      constexpr std::size_t avx512_card = ISAs::AVX512::card<TRACKVECTORFIT_PRECISION>();
-      unsigned i, j;
-      static_assert(W % avx512_card == 0, "Width is not a multiple of SIMD card (16)");
-      for (i = 0; i < round_down(N, avx512_card); i += avx512_card) {
-        for (j = 0; j < W; j += avx512_card) {
-          __m512 r0, r1, r2, r3, r4, r5, r6, r7,
-            r8, r9, r10, r11, r12, r13, r14, r15;
-          r0 = _mm512_loadu_ps(&in_rows(j+0)[i]);
-          r1 = _mm512_loadu_ps(&in_rows(j+1)[i]);
-          r2 = _mm512_loadu_ps(&in_rows(j+2)[i]);
-          r3 = _mm512_loadu_ps(&in_rows(j+3)[i]);
-          r4 = _mm512_loadu_ps(&in_rows(j+4)[i]);
-          r5 = _mm512_loadu_ps(&in_rows(j+5)[i]);
-          r6 = _mm512_loadu_ps(&in_rows(j+6)[i]);
-          r7 = _mm512_loadu_ps(&in_rows(j+7)[i]);
-          r8 = _mm512_loadu_ps(&in_rows(j+8)[i]);
-          r9 = _mm512_loadu_ps(&in_rows(j+9)[i]);
-          r10 = _mm512_loadu_ps(&in_rows(j+10)[i]);
-          r11 = _mm512_loadu_ps(&in_rows(j+11)[i]);
-          r12 = _mm512_loadu_ps(&in_rows(j+12)[i]);
-          r13 = _mm512_loadu_ps(&in_rows(j+13)[i]);
-          r14 = _mm512_loadu_ps(&in_rows(j+14)[i]);
-          r15 = _mm512_loadu_ps(&in_rows(j+15)[i]);
-
-          _MM512_TRANSPOSE16_PS(r0, r1, r2, r3, r4, r5, r6, r7,
-            r8, r9, r10, r11, r12, r13, r14, r15);
-
-          _mm512_store_ps(&out_rows(i+0)[j], r0);
-          _mm512_store_ps(&out_rows(i+1)[j], r1);
-          _mm512_store_ps(&out_rows(i+2)[j], r2);
-          _mm512_store_ps(&out_rows(i+3)[j], r3);
-          _mm512_store_ps(&out_rows(i+4)[j], r4);
-          _mm512_store_ps(&out_rows(i+5)[j], r5);
-          _mm512_store_ps(&out_rows(i+6)[j], r6);
-          _mm512_store_ps(&out_rows(i+7)[j], r7);
-          _mm512_store_ps(&out_rows(i+8)[j], r8);
-          _mm512_store_ps(&out_rows(i+9)[j], r9);
-          _mm512_store_ps(&out_rows(i+10)[j], r10);
-          _mm512_store_ps(&out_rows(i+11)[j], r11);
-          _mm512_store_ps(&out_rows(i+12)[j], r12);
-          _mm512_store_ps(&out_rows(i+13)[j], r13);
-          _mm512_store_ps(&out_rows(i+14)[j], r14);
-          _mm512_store_ps(&out_rows(i+15)[j], r15);
-        }
-      } /* remainder */ {
-        i = round_down(N, avx512_card);
-
-        for (j = 0; j < W; j += avx512_card) {
-          __m512 r0, r1, r2, r3, r4, r5, r6, r7,
-            r8, r9, r10, r11, r12, r13, r14, r15;
-          if (N - i == 1) {
-            r0 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+0)[i]));
-            r1 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+1)[i]));
-            r2 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+2)[i]));
-            r3 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+3)[i]));
-            r4 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+4)[i]));
-            r5 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+5)[i]));
-            r6 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+6)[i]));
-            r7 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+7)[i]));
-            r8 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+8)[i]));
-            r9 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+9)[i]));
-            r10 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+10)[i]));
-            r11 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+11)[i]));
-            r12 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+12)[i]));
-            r13 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+13)[i]));
-            r14 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+14)[i]));
-            r15 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+15)[i]));
-          } else if (N - i <= 4) {
-            r0 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+0)[i]));
-            r1 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+1)[i]));
-            r2 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+2)[i]));
-            r3 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+3)[i]));
-            r4 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+4)[i]));
-            r5 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+5)[i]));
-            r6 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+6)[i]));
-            r7 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+7)[i]));
-            r8 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+8)[i]));
-            r9 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+9)[i]));
-            r10 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+10)[i]));
-            r11 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+11)[i]));
-            r12 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+12)[i]));
-            r13 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+13)[i]));
-            r14 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+14)[i]));
-            r15 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+15)[i]));
-          } else if (N - i <= 8) {
-            r0 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+0)[i]));
-            r1 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+1)[i]));
-            r2 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+2)[i]));
-            r3 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+3)[i]));
-            r4 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+4)[i]));
-            r5 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+5)[i]));
-            r6 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+6)[i]));
-            r7 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+7)[i]));
-            r8 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+8)[i]));
-            r9 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+9)[i]));
-            r10 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+10)[i]));
-            r11 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+11)[i]));
-            r12 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+12)[i]));
-            r13 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+13)[i]));
-            r14 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+14)[i]));
-            r15 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+15)[i]));
-          } else {
-            r0 = _mm512_loadu_ps(&in_rows(j+0)[i]);
-            r1 = _mm512_loadu_ps(&in_rows(j+1)[i]);
-            r2 = _mm512_loadu_ps(&in_rows(j+2)[i]);
-            r3 = _mm512_loadu_ps(&in_rows(j+3)[i]);
-            r4 = _mm512_loadu_ps(&in_rows(j+4)[i]);
-            r5 = _mm512_loadu_ps(&in_rows(j+5)[i]);
-            r6 = _mm512_loadu_ps(&in_rows(j+6)[i]);
-            r7 = _mm512_loadu_ps(&in_rows(j+7)[i]);
-            r8 = _mm512_loadu_ps(&in_rows(j+8)[i]);
-            r9 = _mm512_loadu_ps(&in_rows(j+9)[i]);
-            r10 = _mm512_loadu_ps(&in_rows(j+10)[i]);
-            r11 = _mm512_loadu_ps(&in_rows(j+11)[i]);
-            r12 = _mm512_loadu_ps(&in_rows(j+12)[i]);
-            r13 = _mm512_loadu_ps(&in_rows(j+13)[i]);
-            r14 = _mm512_loadu_ps(&in_rows(j+14)[i]);
-            r15 = _mm512_loadu_ps(&in_rows(j+15)[i]);
-          }
-
-          _MM512_TRANSPOSE16_PS(r0, r1, r2, r3, r4, r5, r6, r7,
-            r8, r9, r10, r11, r12, r13, r14, r15);
-
-          switch (N - i) {
-          case 16:
-            _mm512_store_ps(&out_rows(i+15)[j], r15);
-          case 15:
-            _mm512_store_ps(&out_rows(i+14)[j], r14);
-          case 14:
-            _mm512_store_ps(&out_rows(i+13)[j], r13);
-          case 13:
-            _mm512_store_ps(&out_rows(i+12)[j], r12);
-          case 12:
-            _mm512_store_ps(&out_rows(i+11)[j], r11);
-          case 11:
-            _mm512_store_ps(&out_rows(i+10)[j], r10);
-          case 10:
-            _mm512_store_ps(&out_rows(i+9)[j], r9);
-          case 9:
-            _mm512_store_ps(&out_rows(i+8)[j], r8);
-          case 8:
-            _mm512_store_ps(&out_rows(i+7)[j], r7);
-          case 7:
-            _mm512_store_ps(&out_rows(i+6)[j], r6);
-          case 6:
-            _mm512_store_ps(&out_rows(i+5)[j], r5);
-          case 5:
-            _mm512_store_ps(&out_rows(i+4)[j], r4);
-          case 4:
-            _mm512_store_ps(&out_rows(i+3)[j], r3);
-          case 3:
-            _mm512_store_ps(&out_rows(i+2)[j], r2);
-          case 2:
-            _mm512_store_ps(&out_rows(i+1)[j], r1);
-          case 1:
-            _mm512_store_ps(&out_rows(i+0)[j], r0);
-          }
-        }
-      }
-    }
-  };
-#endif //__AVX512F__
-
-#else
-
-#ifdef __SSE__
-  template <std::size_t N>
-  struct transpose_helper_soaize<N, ISAs::SSE, double> {
-    template <std::size_t W, class F, class G>
-    static void soaize(F in_rows, G out_rows) {
-      constexpr std::size_t sse_card = ISAs::SSE::card<double>();
-      unsigned i, j;
-      static_assert(W % sse_card == 0, "Width is not a multiple of SIMD card (2)");
-      for (i = 0; i < round_down(N, sse_card); i += sse_card) {
-        for (j = 0; j < W; j += sse_card) {
-          __m128d r0, r1;
-          r0 = _mm_loadu_pd(&in_rows(j+0)[i]);
-          r1 = _mm_loadu_pd(&in_rows(j+1)[i]);
-
-          _MM_TRANSPOSE2_PD(r0, r1);
-
-          _mm_store_pd(&out_rows(i+0)[j], r0);
-          _mm_store_pd(&out_rows(i+1)[j], r1);
-        }
-      } /* remainder */ {
-        i = round_down(N, sse_card);
-        if (i == N-1) {
-          for (j = 0; j < W; j += sse_card) {
-            __m128d r0, r1;
-            r0 = _mm_loadu_pd(&in_rows(j+0)[i]);
-            r1 = _mm_loadu_pd(&in_rows(j+1)[i]);
-
-            _MM_TRANSPOSE2_PD(r0, r1);
-
-            _mm_store_pd(&out_rows(i+0)[j], r0);
-          }
-        }
-      }
-    }
-  };
-#endif //__SSE__
-
-#ifdef __AVX__
-  template <std::size_t N>
-  struct transpose_helper_soaize<N, ISAs::AVX, double> {
-    template <std::size_t W, class F, class G>
-    static void soaize(F in_rows, G out_rows) {
-      constexpr std::size_t avx_card = ISAs::AVX::card<double>();
-      unsigned i, j;
-      static_assert(W % avx_card == 0, "Width is not a multiple of SIMD card (4)");
-      for (i = 0; i < round_down(N, avx_card); i += avx_card) {
-        for (j = 0; j < W; j += avx_card) {
-          __m256d r0, r1, r2, r3;
-          r0 = _mm256_loadu_pd(&in_rows(j+0)[i]);
-          r1 = _mm256_loadu_pd(&in_rows(j+1)[i]);
-          r2 = _mm256_loadu_pd(&in_rows(j+2)[i]);
-          r3 = _mm256_loadu_pd(&in_rows(j+3)[i]);
-
-          _MM256_TRANSPOSE4_PD(r0, r1, r2, r3);
-
-          _mm256_store_pd(&out_rows(i+0)[j], r0);
-          _mm256_store_pd(&out_rows(i+1)[j], r1);
-          _mm256_store_pd(&out_rows(i+2)[j], r2);
-          _mm256_store_pd(&out_rows(i+3)[j], r3);
-        }
-      } /* remainder */ {
-        i = round_down(N, avx_card);
-        for (j = 0; j < W; j += avx_card) {
-          __m256d r0, r1, r2, r3;
-          /*if (N - i == 1) {
-            r0 = _mm256_castpd128_pd256(_mm_load_sd(&in_rows(j+0)[i]));
-            r1 = _mm256_castpd128_pd256(_mm_load_sd(&in_rows(j+1)[i]));
-            r2 = _mm256_castpd128_pd256(_mm_load_sd(&in_rows(j+2)[i]));
-            r3 = _mm256_castpd128_pd256(_mm_load_sd(&in_rows(j+3)[i]));
-          } else*/ if (N - i <= 2) {
-            r0 = _mm256_castpd128_pd256(_mm_loadu_pd(&in_rows(j+0)[i]));
-            r1 = _mm256_castpd128_pd256(_mm_loadu_pd(&in_rows(j+1)[i]));
-            r2 = _mm256_castpd128_pd256(_mm_loadu_pd(&in_rows(j+2)[i]));
-            r3 = _mm256_castpd128_pd256(_mm_loadu_pd(&in_rows(j+3)[i]));
-          } else {
-            r0 = _mm256_loadu_pd(&in_rows(j+0)[i]);
-            r1 = _mm256_loadu_pd(&in_rows(j+1)[i]);
-            r2 = _mm256_loadu_pd(&in_rows(j+2)[i]);
-            r3 = _mm256_loadu_pd(&in_rows(j+3)[i]);
-          }
-
-          _MM256_TRANSPOSE4_PD(r0, r1, r2, r3);
-
-          switch (N - i) {
-          case 4:
-            _mm256_store_pd(&out_rows(i+3)[j], r3);
-          case 3:
-            _mm256_store_pd(&out_rows(i+2)[j], r2);
-          case 2:
-            _mm256_store_pd(&out_rows(i+1)[j], r1);
-          case 1:
-            _mm256_store_pd(&out_rows(i+0)[j], r0);
-          }
-        }
-      }
-    }
-  };
-#endif //__AVX__
-
-#ifdef __AVX512F__
-  template <std::size_t N>
-  struct transpose_helper_soaize<N, ISAs::AVX512, double> {
-    template <std::size_t W, class F, class G>
-    static void soaize(F in_rows, G out_rows) {
-      //constexpr std::size_t N = 15;
-      constexpr std::size_t avx512_card = ISAs::AVX512::card<double>();
-      unsigned i, j;
-      static_assert(W % avx512_card == 0, "Width is not a multiple of SIMD card (8)");
-      for (i = 0; i < round_down(N, avx512_card); i += avx512_card) {
-        for (j = 0; j < W; j += avx512_card) {
-          __m512d r0, r1, r2, r3, r4, r5, r6, r7;
-          r0 = _mm512_loadu_pd(&in_rows(j+0)[i]);
-          r1 = _mm512_loadu_pd(&in_rows(j+1)[i]);
-          r2 = _mm512_loadu_pd(&in_rows(j+2)[i]);
-          r3 = _mm512_loadu_pd(&in_rows(j+3)[i]);
-          r4 = _mm512_loadu_pd(&in_rows(j+4)[i]);
-          r5 = _mm512_loadu_pd(&in_rows(j+5)[i]);
-          r6 = _mm512_loadu_pd(&in_rows(j+6)[i]);
-          r7 = _mm512_loadu_pd(&in_rows(j+7)[i]);
-
-          _MM512_TRANSPOSE8_PD(r0, r1, r2, r3, r4, r5, r6, r7);
-
-          _mm512_store_pd(&out_rows(i+0)[j], r0);
-          _mm512_store_pd(&out_rows(i+1)[j], r1);
-          _mm512_store_pd(&out_rows(i+2)[j], r2);
-          _mm512_store_pd(&out_rows(i+3)[j], r3);
-          _mm512_store_pd(&out_rows(i+4)[j], r4);
-          _mm512_store_pd(&out_rows(i+5)[j], r5);
-          _mm512_store_pd(&out_rows(i+6)[j], r6);
-          _mm512_store_pd(&out_rows(i+7)[j], r7);
-        }
-      } /* remainder */ {
-        i = round_down(N, avx512_card);
-
-        for (j = 0; j < W; j += avx512_card) {
-          __m512d r0, r1, r2, r3, r4, r5, r6, r7;
-          if (N - i == 1) {
-            r0 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+0)[i]));
-            r1 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+1)[i]));
-            r2 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+2)[i]));
-            r3 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+3)[i]));
-            r4 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+4)[i]));
-            r5 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+5)[i]));
-            r6 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+6)[i]));
-            r7 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+7)[i]));
-          } else if (N - i <= 2) {
-            r0 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+0)[i]));
-            r1 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+1)[i]));
-            r2 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+2)[i]));
-            r3 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+3)[i]));
-            r4 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+4)[i]));
-            r5 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+5)[i]));
-            r6 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+6)[i]));
-            r7 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+7)[i]));
-          } else if (N - i <= 4) {
-            r0 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+0)[i]));
-            r1 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+1)[i]));
-            r2 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+2)[i]));
-            r3 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+3)[i]));
-            r4 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+4)[i]));
-            r5 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+5)[i]));
-            r6 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+6)[i]));
-            r7 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+7)[i]));
-          } else {
-            r0 = _mm512_loadu_pd(&in_rows(j+0)[i]);
-            r1 = _mm512_loadu_pd(&in_rows(j+1)[i]);
-            r2 = _mm512_loadu_pd(&in_rows(j+2)[i]);
-            r3 = _mm512_loadu_pd(&in_rows(j+3)[i]);
-            r4 = _mm512_loadu_pd(&in_rows(j+4)[i]);
-            r5 = _mm512_loadu_pd(&in_rows(j+5)[i]);
-            r6 = _mm512_loadu_pd(&in_rows(j+6)[i]);
-            r7 = _mm512_loadu_pd(&in_rows(j+7)[i]);
-          }
-
-          _MM512_TRANSPOSE8_PD(r0, r1, r2, r3, r4, r5, r6, r7);
-
-          switch (N - i) {
-          case 8:
-            _mm512_store_pd(&out_rows(i+7)[j], r7);
-          case 7:
-            _mm512_store_pd(&out_rows(i+6)[j], r6);
-          case 6:
-            _mm512_store_pd(&out_rows(i+5)[j], r5);
-          case 5:
-            _mm512_store_pd(&out_rows(i+4)[j], r4);
-          case 4:
-            _mm512_store_pd(&out_rows(i+3)[j], r3);
-          case 3:
-            _mm512_store_pd(&out_rows(i+2)[j], r2);
-          case 2:
-            _mm512_store_pd(&out_rows(i+1)[j], r1);
-          case 1:
-            _mm512_store_pd(&out_rows(i+0)[j], r0);
-          }
-        }
-      }
-    }
-  };
-#endif //__AVX512F__
-
-#endif
-}
-
-}
-
-}
diff --git a/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h b/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h
index ae55e65a6583db2e5b3903bea9a55a6960ad8f7c..546b66c268ce1d5eed6df01701333cb3ef9ed2d8 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h
@@ -26,30 +26,67 @@ namespace TrackVectorFit {
 struct TrackVectorFit {
   unsigned m_memManagerStorageIncrements;
   bool m_debug;
-  // The three schedulers
-  std::list<Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>> m_scheduler_pre;
-  std::list<Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>> m_scheduler_main;
-  std::list<Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>> m_scheduler_post;
+  bool m_verbose;
+  // The scheduler
+  std::list<Sch::Blueprint<vector_width()>> m_scheduler;
+
   // VectorFit MemManagers local to this TrackVectorFit
   Mem::Store m_smoothStore;
   Mem::Store m_nodeParametersStore;
   Mem::Store m_transportForwardStore;
   Mem::Store m_transportBackwardStore;
-  Mem::Store m_auxStore;
   Mem::Iterator m_forwardNodeIterator;
   Mem::Iterator m_forwardStoreIterator;
-  Mem::Iterator m_smoothStoreIterator;
+  Mem::ReverseIterator m_smoothStoreIterator;
   Mem::ReverseIterator m_forwardMainStoreIterator;
+  
   // Stores with TES ownership
   std::vector<Mem::Store>::iterator m_forwardStore;
   std::vector<Mem::Store>::iterator m_backwardStore;
 
+  /**
+   * @brief Constructor
+   * 
+   * @param memManagerStorageIncrements Storage increment for each new chunk of data
+   * @param debug Debug flag
+   */
   TrackVectorFit (
     const unsigned& memManagerStorageIncrements = 4096,
-    const bool& debug = false
-  );
+    const bool& debug = false,
+    const bool& verbose = false
+  ) : m_memManagerStorageIncrements(memManagerStorageIncrements),
+    m_debug(debug), m_verbose(verbose) {
+    m_smoothStore = Mem::Store(Mem::View::SmoothState::size(), memManagerStorageIncrements);
+    m_nodeParametersStore = Mem::Store(Mem::View::NodeParameters::size(), memManagerStorageIncrements);
+    m_transportForwardStore = Mem::Store(Mem::View::TrackMatrix<25>::size(), memManagerStorageIncrements);
+    m_transportBackwardStore = Mem::Store(Mem::View::TrackMatrix<25>::size(), memManagerStorageIncrements);
+  }
+
+  /**
+   * @brief      Reference getter for m_scheduler
+   *
+   * @return     The scheduler
+   */
+  std::list<Sch::Blueprint<vector_width()>>& scheduler () {
+    return m_scheduler;
+  }
+
+  /**
+   * @brief Resets the stores, add new stores to updatedStatesStore
+   *        that will contain the forward and backward stores.
+   * 
+   * @param updatedStatesStore Stores for keeping the updated states,
+   *                           both forward and backward.
+   * @param debug Sets the debug flag across the fitter
+   */
+  void reset (
+    std::vector<Mem::Store>& updatedStatesStore,
+    const bool& debug=false,
+    const bool& verbose=false
+  ) {
+    m_debug = debug;
+    m_verbose = verbose;
 
-  void reset (std::vector<Mem::Store>& updatedStatesStore) {
     // Reserve updated states stores
     updatedStatesStore.emplace_back(Mem::View::State::size(), m_memManagerStorageIncrements);
     updatedStatesStore.emplace_back(Mem::View::State::size(), m_memManagerStorageIncrements);
@@ -59,117 +96,136 @@ struct TrackVectorFit {
     // Only reset stores not resetted in initializeBasePointers
     m_smoothStore.reset();
     m_nodeParametersStore.reset();
-    m_auxStore.reset();
   }
 
-  void operator() (
-    Track& track
-  );
-
-  void operator() (
-    std::list<std::reference_wrapper<Track>>& tracks
-  );
-
+  /**
+   * @brief Initializes the scheduler and base pointers
+   *        for all tracks
+   *        
+   * @details A static scheduler schedules the execution of all nodes in the tracks
+   *          for a machine configured with vector_width() SIMD width. The scheduler
+   *          follows a DTA (Decreasing Time Algorithm) in order to minimize the
+   *          number of iterations.
+   *          
+   *          Additionally, all base pointers are initialized for all nodes.
+   *          Consequent accesses to any data member in the Nodes is possible.
+   *          Iterators are also initialized in preparation of the upcoming fit.
+   * 
+   * @param tracks Tracks to be scheduled and initialized.
+   * @tparam updateRefVectors Boolean to indicate whether to update reference vectors
+   *                          prior to populating the smooth pointer.
+   */
+  template<bool updateRefVectors>
   void initializeBasePointers (
-    std::list<std::reference_wrapper<Track>>& tracks
+    std::list<std::reference_wrapper<Track>>& tracks,
+    bool copyNoiseMatrix=false
   ) {
     m_transportForwardStore.reset();
     m_transportBackwardStore.reset();
 
-    if (tracks.size() >= VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()) {
-      // Schedule what we are going to do
-      m_scheduler_pre  = Sch::StaticScheduler<Sch::Pre, VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>::generate(tracks);
-      m_scheduler_main = Sch::StaticScheduler<Sch::Main, VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>::generate(tracks);
-      m_scheduler_post = Sch::StaticScheduler<Sch::Post, VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>::generate(tracks);
-
-      // Populates the base pointers with the scheduler provided
-      auto populateBasePointers =
-      [&] (
-        decltype(m_scheduler_pre)& sch,
-        const bool& populateForward,
-        const bool& populateBackward
-      ) {
-        std::for_each(sch.begin(), sch.end(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) {
-          auto& pool = s.pool;
-          const auto& action = s.action;
-
-          m_nodeParametersStore.getNewVector();
-          m_forwardStore->getNewVector();
-          m_backwardStore->getNewVector();
-          m_smoothStore.getNewVector();
-          if (populateForward)  { m_transportForwardStore.getNewVector(); }
-          if (populateBackward) { m_transportBackwardStore.getNewVector(); }
-
-          for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-            if (action[i]) {
-              pool[i].node->m_nodeParameters.setBasePointer(m_nodeParametersStore.getLastVector() + i);
-              pool[i].node->node().m_forwardState.setBasePointer(m_forwardStore->getLastVector() + i);
-              pool[i].node->node().m_backwardState.setBasePointer(m_backwardStore->getLastVector() + i);
-              // Note: Do NOT update smooth pointer here (we still need that)
-
-              if (populateForward) {
-                pool[i].node->get<Op::Forward, Op::TransportMatrix>().setBasePointer(m_transportForwardStore.getLastVector() + i);
-              }
-
-              if (populateBackward) {
-                pool[i].node->get<Op::Backward, Op::TransportMatrix>().setBasePointer(m_transportBackwardStore.getLastVector() + i);
-              }
+    if (m_debug) {
+      std::cout << "Initializing base pointers of " << tracks.size() << " tracks" << std::endl;
+    }
+
+    // Schedule what we are going to do
+    m_scheduler = Sch::StaticScheduler<vector_width()>::generate(tracks);
+
+    // Populates the base pointers with the scheduler provided
+    auto populateBasePointers =
+    [&] (
+      std::list<Sch::Blueprint<vector_width()>>& sch
+    ) {
+      std::for_each(sch.begin(), sch.end(), [&] (Sch::Blueprint<vector_width()>& s) {
+        auto& pool = s.pool;
+        const auto& action = s.action;
+
+        m_nodeParametersStore.getNewVector();
+        m_forwardStore->getNewVector();
+        m_backwardStore->getNewVector();
+        m_smoothStore.getNewVector();
+        m_transportForwardStore.getNewVector();
+        m_transportBackwardStore.getNewVector();
+
+        for (unsigned i=0; i<vector_width(); ++i) {
+          if (action[i]) {
+            Node& n = *(pool[i].node);
+
+            // Note: We should evaluate the impact of this
+            // It is probably worth copying, as now we iterate forward and
+            // backward through the set
+            if (copyNoiseMatrix) {
+              auto noiseMatrix = n.get<Op::NodeParameters, Op::NoiseMatrix>();
+              n.m_nodeParameters.setBasePointer(m_nodeParametersStore.getLastVector() + i);
+              n.get<Op::NodeParameters, Op::NoiseMatrix>().copy(noiseMatrix);
+            } else {
+              n.m_nodeParameters.setBasePointer(m_nodeParametersStore.getLastVector() + i);
             }
+            
+            // Note: In c++17 we can even do
+            //       if constexpr (updateRefVectors) { ... }
+            if (updateRefVectors) {
+              n.get<Op::NodeParameters, Op::ReferenceVector>().copy(n.get<Op::Smooth, Op::StateVector>());
+            }
+
+            n.node().m_forwardState.setBasePointer(m_forwardStore->getLastVector() + i);
+            n.node().m_backwardState.setBasePointer(m_backwardStore->getLastVector() + i);
+            n.m_smoothState.setBasePointer(m_smoothStore.getLastVector() + i);
+            n.get<Op::Forward, Op::TransportMatrix>().setBasePointer(m_transportForwardStore.getLastVector() + i);
+            n.get<Op::Backward, Op::TransportMatrix>().setBasePointer(m_transportBackwardStore.getLastVector() + i);
           }
-        });
-      };
-
-      // Align the stores to point to new vectors, and initialize iterator attributes
-      m_forwardStore->align();
-      m_backwardStore->align();
-      m_smoothStore.align();
-      m_nodeParametersStore.align();
-      m_forwardNodeIterator = Mem::Iterator(m_nodeParametersStore, true);
-      m_forwardStoreIterator = Mem::Iterator(*m_forwardStore, true);
-      m_smoothStoreIterator = Mem::Iterator(m_smoothStore, true);
-
-      // Populate all pointers
-      populateBasePointers(m_scheduler_pre, false, true);
-      // Add one extra vector to avoid swapping out elements
-      m_forwardStore->getNewVector();
-      m_backwardStore->getNewVector();
-      populateBasePointers(m_scheduler_main, true, true);
-      // Initialize the reverse mainstore iterators here
-      m_forwardMainStoreIterator = Mem::ReverseIterator(*m_forwardStore);
-      // Add one extra vector to avoid swapping out elements
-      m_forwardStore->getNewVector();
-      m_backwardStore->getNewVector();
-      populateBasePointers(m_scheduler_post, true, false);
-    } else {
-      // Align the stores to point to new vectors, and initialize iterator attributes
-      m_forwardStore->align();
-      m_backwardStore->align();
-      m_smoothStore.align();
-      m_nodeParametersStore.align();
-      m_forwardNodeIterator = Mem::Iterator(m_nodeParametersStore, true);
-      m_forwardStoreIterator = Mem::Iterator(*m_forwardStore, true);
-      m_smoothStoreIterator = Mem::Iterator(m_smoothStore, true);
-
-      // Just populate the base pointers of all nodes
-      std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) {
-        std::for_each(track.m_nodes.begin(), track.m_nodes.end(), [&] (Node& node) {
-          node.m_nodeParameters.setBasePointer(m_nodeParametersStore.getNextElement());
-          node.get<Op::Forward, Op::TransportMatrix>().setBasePointer(m_transportForwardStore.getNextElement());
-          node.get<Op::Backward, Op::TransportMatrix>().setBasePointer(m_transportBackwardStore.getNextElement());
-          node.node().m_forwardState.setBasePointer(m_forwardStore->getNextElement());
-          node.node().m_backwardState.setBasePointer(m_backwardStore->getNextElement());
-          m_smoothStore.getNextElement();
-          // Note: Do NOT update the smooth pointer here (we still need that)
-        });
+        }
       });
-    }
+    };
+
+    // Align the stores to point to new vectors, and initialize iterator attributes
+    m_forwardStore->align();
+    m_backwardStore->align();
+    m_smoothStore.align();
+    m_nodeParametersStore.align();
+    m_forwardNodeIterator = Mem::Iterator(m_nodeParametersStore, true);
+    m_forwardStoreIterator = Mem::Iterator(*m_forwardStore, true);
+
+    populateBasePointers(m_scheduler);
+    
+    // Initialize the reverse mainstore iterators here
+    m_forwardMainStoreIterator = Mem::ReverseIterator(*m_forwardStore);
+    m_smoothStoreIterator = Mem::ReverseIterator(m_smoothStore);
   }
+
+  template<bool checkNodeType>
+  void smoothFit (
+    std::list<std::reference_wrapper<Track>>& tracks
+  );
   
+  /**
+   * @brief Populate the nodes with the calculated fit information.
+   *        This performs the AOSOA -> AOS conversion.
+   */
   void populateNodes (
     Track& track
-  );
+  ) {
+    std::for_each(track.nodes().begin(), track.nodes().end(), [] (Node& n) {
+      auto& node = n.node();
+
+      if (node.type() == LHCb::Node::HitOnTrack) {
+        node.setErrMeasure(n.get<Op::NodeParameters, Op::ErrMeasure>());
+        node.setProjectionMatrix((Gaudi::TrackProjectionMatrix) n.get<Op::NodeParameters, Op::ProjectionMatrix>());
+        node.setResidual(n.get<Op::Smooth, Op::Residual>());
+        node.setErrResidual(n.get<Op::Smooth, Op::ErrResidual>());
+        
+        const auto smoothStateVector = (Gaudi::TrackVector) n.get<Op::Smooth, Op::StateVector>();
+        node.setRefVector(smoothStateVector);
+        node.setState(
+          smoothStateVector,
+          (Gaudi::TrackSymMatrix) n.get<Op::Smooth, Op::Covariance>(),
+          node.z());
+      }
+    });
+  }
 };
 
 }
 
 }
+
+#include "TrackVectorFit.h_impl"
diff --git a/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h_impl b/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h_impl
new file mode 100644
index 0000000000000000000000000000000000000000..fb6dbe423fcab9de25f41cb0cadd3e2ad8dc5265
--- /dev/null
+++ b/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h_impl
@@ -0,0 +1,403 @@
+namespace Tr {
+
+namespace TrackVectorFit {
+
+/**
+ * @brief Performs the forward fit, backward fit and smoother
+ *        over all designated tracks.
+ *        
+ * @details The vectorized fit is activated if invoked with at least
+ *          vector_width() tracks. A fallback sequential fit is invoked
+ *          otherwise. In either case, the method relies on the structures generated by
+ *          initializedBasePointers to perform the fit.
+ *          
+ *          The vectorized fit strictly follows the scheduler to
+ *          determine what is computed at each iteration. In particular,
+ *          tracks are processed in the following order:
+ *          
+ *          * Forward fit: All tracks are fitted in the forward direction.
+ *            Only updated states are kept.
+ * 
+ *          * Backward fit and smoother: All tracks are fitted in the backward
+ *            direction. The smoothed state is a result of averaging the
+ *            forward updated state and the backward predicted state.
+ *            After its execution, the backward updated state and the smoothed
+ *            state are kept.
+ *           
+ *          No conversion from / to AOSOA is performed in the fit. For that,
+ *          check populateNodes.
+ * 
+ * @param tracks Tracks to be fitted
+ */
+template<bool checkNodeType>
+void TrackVectorFit::smoothFit (
+  std::list<std::reference_wrapper<Track>>& tracks
+) {
+  if (m_debug) {
+    std::cout << "Fitting " << tracks.size() << " tracks" << std::endl;
+  }
+
+  // Iterators for all stores
+  Mem::Iterator forwardTransportIterator (m_transportForwardStore);
+  Mem::ReverseIterator backwardTransportIterator (m_transportBackwardStore);
+  Mem::ReverseIterator backwardNodeIterator (m_nodeParametersStore);
+  Mem::ReverseIterator backwardStoreIterator (*m_backwardStore);
+
+  // Iterators initialized from the object iterators
+  Mem::Iterator forwardNodeIterator (m_forwardNodeIterator);
+  Mem::Iterator forwardStoreIterator (m_forwardStoreIterator);
+  Mem::ReverseIterator forwardMainStoreIterator (m_forwardMainStoreIterator);
+  Mem::ReverseIterator smoothMainStoreIterator (m_smoothStoreIterator);
+
+  if (m_debug) {
+    std::cout << "Scheduler iterations:" << std::endl
+      << m_scheduler << std::endl << std::endl
+      << "Iterators:" << std::endl
+      << " forwardTransportIterator: " << forwardTransportIterator
+      << " backwardTransportIterator: " << backwardTransportIterator
+      << " backwardNodeIterator: " << backwardNodeIterator
+      << " backwardStoreIterator: " << backwardStoreIterator
+      << " forwardNodeIterator: " << forwardNodeIterator
+      << " forwardStoreIterator: " << forwardStoreIterator
+      << " forwardMainStoreIterator: " << forwardMainStoreIterator
+      << " smoothMainStoreIterator: " << smoothMainStoreIterator
+      << std::endl
+      << "Calculating forward fit iterations" << std::endl;
+  }
+  
+  // Pointers to keep track of the lastState state and node parameters pointers
+  TRACKVECTORFIT_PRECISION* lastStatePointer = nullptr;
+  TRACKVECTORFIT_PRECISION* lastNodeParametersPointer = nullptr;
+
+  std::for_each (m_scheduler.begin(), m_scheduler.end(), [&] (Sch::Blueprint<vector_width()>& s) {
+    const auto& in = s.in;
+    const auto& action = s.action;
+    auto& pool = s.pool;
+
+    // Increment iterators, prepare views
+    const auto& currentStatePointer = forwardStoreIterator.nextVector();
+    const auto& transportMatrixPointer = forwardTransportIterator.nextVector();
+    const auto& currentParametersPointer = forwardNodeIterator.nextVector();
+    Mem::View::NodeParameters currentParameters (currentParametersPointer);
+    Mem::View::State lastState (lastStatePointer);
+    Mem::View::State currentState (currentStatePointer);
+
+    if (m_verbose) {
+      std::cout << s << std::endl
+        << " pool[0] np " << (*(pool[0].node)).m_nodeParameters.m_basePointer
+        << " currentNP " << currentParametersPointer << std::endl
+        << " pool[0] fwd state " << (*(pool[0].node)).node().m_forwardState.m_basePointer
+        << " currentFwdState " << currentStatePointer << std::endl
+        << " pool[0] tm " << (*(pool[0].node)).m_forwardTransportMatrix.m_basePointer
+        << " current tm " << transportMatrixPointer << std::endl;
+    }
+    
+    if (action.all()) {
+      if (in.all()) {
+        // All of the nodes are starting now
+        // Just prepare data and do "update"
+
+        // Fetch the reference vectors for the state
+        std::copy_n(ArrayGen::InitialState<vector_width()>(pool).begin(),
+          5*vector_width(),
+          currentState.m_updatedState.m_basePointer);
+
+        // Fetch the initial covariance matrix for the cov matrix
+        std::copy_n(ArrayGen::InitialCovariance<vector_width()>().begin(),
+          15*vector_width(),
+          currentState.m_updatedCovariance.m_basePointer);
+
+        // Update
+        Vector::update<vector_width(), checkNodeType> (
+          currentParameters.m_referenceVector.m_basePointer,
+          currentParameters.m_projectionMatrix.m_basePointer,
+          currentParameters.m_referenceResidual,
+          currentParameters.m_errorMeasure,
+          currentState.m_updatedState.m_basePointer,
+          currentState.m_updatedCovariance.m_basePointer,
+          currentState.m_chi2,
+          pool
+        );
+      } else if (in.any()) {
+        // Some states are starting now, but others continue their execution
+        // Do a vectorized fit, and follow up with
+        // an initial sequential fit for the starting nodes
+
+        // Fit
+        Vector::fit<Op::Forward>::op<vector_width(), checkNodeType> (
+          currentParameters.m_referenceVector.m_basePointer,
+          currentParameters.m_projectionMatrix.m_basePointer,
+          currentParameters.m_referenceResidual,
+          currentParameters.m_errorMeasure,
+          currentParameters.m_transportVector.m_basePointer,
+          currentParameters.m_noiseMatrix.m_basePointer,
+          transportMatrixPointer,
+          lastState.m_updatedState.m_basePointer,
+          lastState.m_updatedCovariance.m_basePointer,
+          currentState.m_updatedState.m_basePointer,
+          currentState.m_updatedCovariance.m_basePointer,
+          currentState.m_chi2,
+          pool
+        );
+
+        // Replace data with those from starting nodes
+        for (unsigned i=0; i<vector_width(); ++i) {
+          if (in[i]) {
+            Scalar::fit<Op::Forward>(*(pool[i].node), ArrayGen::InitialCovariance_values());
+          }
+        }
+      }
+      else {
+        // Fit
+        Vector::fit<Op::Forward>::op<vector_width(), checkNodeType> (
+          currentParameters.m_referenceVector.m_basePointer,
+          currentParameters.m_projectionMatrix.m_basePointer,
+          currentParameters.m_referenceResidual,
+          currentParameters.m_errorMeasure,
+          currentParameters.m_transportVector.m_basePointer,
+          currentParameters.m_noiseMatrix.m_basePointer,
+          transportMatrixPointer,
+          lastState.m_updatedState.m_basePointer,
+          lastState.m_updatedCovariance.m_basePointer,
+          currentState.m_updatedState.m_basePointer,
+          currentState.m_updatedCovariance.m_basePointer,
+          currentState.m_chi2,
+          pool
+        );
+      }
+    } else {
+      // Since !action.all(), we couldn't allocate enough
+      // nodes to process in parallel - so let's do them
+      // individually
+      for (unsigned i=0; i<vector_width(); ++i) {
+        if (action[i]) {
+          Node& node = *(pool[i].node);
+          if (in[i]) {
+            Scalar::fit<Op::Forward>(node, ArrayGen::InitialCovariance_values());
+          } else {
+            Node& prevnode = *(pool[i].node-1);
+            Scalar::fit<Op::Forward, true>(node, prevnode);
+          }
+        }
+      }
+    }
+
+    lastStatePointer = currentStatePointer;
+  });
+
+  // Backward fit and smoother iterations
+  if (m_debug) {
+    std::cout << "Calculating backward fit and smoother iterations" << std::endl;
+  }
+  std::for_each (m_scheduler.rbegin(), m_scheduler.rend(), [&] (Sch::Blueprint<vector_width()>& s) {
+    const auto& in  = s.out;
+    const auto& out = s.in;
+    const auto& action = s.action;
+    auto& pool = s.pool;
+
+    const auto& currentStatePointer = backwardStoreIterator.previousVector();
+    const auto& currentNodeParametersPointer = backwardNodeIterator.previousVector();
+    const auto& transportMatrixPointer = backwardTransportIterator.previousVector();
+    const auto& smootherStatePointer = smoothMainStoreIterator.previousVector();
+    Mem::View::State lastState (lastStatePointer);
+    Mem::View::State currentState (currentStatePointer);
+    Mem::View::NodeParameters lastParameters (lastNodeParametersPointer);
+    Mem::View::NodeParameters currentParameters (currentNodeParametersPointer);
+    Mem::View::State forwardState (forwardMainStoreIterator.previousVector());
+
+    if (m_verbose) {
+      std::cout << s << std::endl
+        << " pool[0] np " << (*(pool[0].node)).m_nodeParameters.m_basePointer
+        << " currentNP " << currentNodeParametersPointer << std::endl
+        << " pool[0] bwd state " << (*(pool[0].node)).node().m_backwardState.m_basePointer
+        << " currentBwdState " << currentStatePointer << std::endl
+        << " pool[0] tm " << (*(pool[0].node)).m_backwardTransportMatrix.m_basePointer
+        << " current tm " << transportMatrixPointer << std::endl
+        << " pool[0] smooth state " << (*(pool[0].node)).m_smoothState.m_basePointer
+        << " currentSmoothState " << smootherStatePointer << std::endl;
+    }
+
+    if (action.all()) {
+      if (in.all()) {
+        // All of the nodes are starting now
+        // Just prepare data and do "update"
+
+        // Fetch the reference vectors for the state
+        std::copy_n(ArrayGen::InitialState<vector_width()>(pool).begin(),
+          5*vector_width(),
+          currentState.m_updatedState.m_basePointer);
+
+        // Fetch the initial covariance matrix for the cov matrix
+        std::copy_n(ArrayGen::InitialCovariance<vector_width()>().begin(),
+          15*vector_width(),
+          currentState.m_updatedCovariance.m_basePointer);
+      } else {
+        if (in.any() || out.any()) {
+          // Some states are starting or finishing now, but others continue their execution
+          // Treat all nodes individually, and then smooth and update them together
+          // Note: The smoothed state for the first and lastState item will be redone afterwards
+          for (unsigned i=0; i<vector_width(); ++i) {
+            Node& node = *(pool[i].node);
+
+            if (in[i]) {
+              Scalar::initialize<Op::Backward>(node, ArrayGen::InitialCovariance_values());
+            } else {
+              Node& prevnode = *(pool[i].node+1);
+              Scalar::predict<Op::Backward, true>(node, prevnode);
+            }
+          }
+        } else {
+          // Predict
+          Vector::predict<Op::Backward>::op<vector_width()> (
+            lastParameters.m_transportVector.m_basePointer,
+            lastParameters.m_noiseMatrix.m_basePointer,
+            transportMatrixPointer,
+            lastState.m_updatedState.m_basePointer,
+            lastState.m_updatedCovariance.m_basePointer,
+            currentState.m_updatedState.m_basePointer,
+            currentState.m_updatedCovariance.m_basePointer
+          );
+        }
+
+        // Do the smoother *here*
+        // This benefits from cache locality,
+        // and removes the need of doing swapping to restore
+        // the updated states
+        
+        // Fetch the corresponding forward and smooth state, and update it
+        Mem::View::SmoothState smoothState (smootherStatePointer);
+
+        // Smoother
+        const uint16_t smoother_result = Vector::smoother<vector_width()> (
+          forwardState.m_updatedState.m_basePointer,
+          forwardState.m_updatedCovariance.m_basePointer,
+          currentState.m_updatedState.m_basePointer,      // These are still the predicted state
+          currentState.m_updatedCovariance.m_basePointer, // and covariance
+          smoothState.m_state.m_basePointer,
+          smoothState.m_covariance.m_basePointer
+        );
+
+        // Update residuals
+        Vector::updateResiduals<vector_width(), checkNodeType> (
+          currentParameters.m_referenceVector.m_basePointer,
+          currentParameters.m_projectionMatrix.m_basePointer,
+          currentParameters.m_referenceResidual,
+          currentParameters.m_errorMeasure,
+          smoothState.m_state.m_basePointer,
+          smoothState.m_covariance.m_basePointer,
+          smoothState.m_residual,
+          smoothState.m_errResidual,
+          pool
+        );
+
+        // Set errors
+        if (smoother_result != ArrayGen::mask<vector_width()>()) {
+          for (unsigned i=0; i<vector_width(); ++i) {
+            if (action[i] && ~(smoother_result >> i) & 0x01) {
+              pool[i].track->track().setFitStatus(LHCb::Track::FitFailed);
+            }
+          }
+        }
+
+        if (m_debug) {
+          if (smoother_result != ArrayGen::mask<vector_width()>()) {
+            std::cout << "Smoother errors: " << smoother_result << std::endl;
+          }
+        }
+      }
+
+      // Update
+      Vector::update<vector_width(), checkNodeType> (
+        currentParameters.m_referenceVector.m_basePointer,
+        currentParameters.m_projectionMatrix.m_basePointer,
+        currentParameters.m_referenceResidual,
+        currentParameters.m_errorMeasure,
+        currentState.m_updatedState.m_basePointer,
+        currentState.m_updatedCovariance.m_basePointer,
+        currentState.m_chi2,
+        pool
+      );
+    } else {
+      // Since !action.all(), we couldn't allocate enough
+      // nodes to process in parallel - so let's do them
+      // individually
+      for (unsigned i=0; i<vector_width(); ++i) {
+        if (action[i]) {
+          Node& node = *(pool[i].node);
+
+          if (in[i]) {
+            Scalar::fit<Op::Backward>(node, ArrayGen::InitialCovariance_values());
+          } else {
+            Node& prevnode = *(pool[i].node+1);
+
+            if (out[i]) {
+              Scalar::fit<Op::Backward, true>(node, prevnode);
+            } else {
+              Scalar::predict<Op::Backward, true>(node, prevnode);
+              const bool smoothSuccess = Scalar::smoother<true, true>(node);
+              Scalar::updateResiduals(node);
+              if (!smoothSuccess) {
+                pool[i].track->track().setFitStatus(LHCb::Track::FitFailed);
+              }
+              Scalar::update<Op::Backward>(node);
+            }
+          }
+        }
+      }
+    }
+
+    lastStatePointer = currentStatePointer;
+    lastNodeParametersPointer = currentNodeParametersPointer;
+  });
+
+  // Calculate the chi2 a posteriori
+  std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) {
+    auto& nodes = track.m_nodes;
+    
+    // Reset the values
+    track.m_ndof = -track.m_nTrackParameters;
+    auto ndofBackward = -track.m_nTrackParameters;
+    track.m_forwardFitChi2  = ((TRACKVECTORFIT_PRECISION) 0.0);
+    track.m_backwardFitChi2 = ((TRACKVECTORFIT_PRECISION) 0.0);
+
+    for (unsigned k=0; k<nodes.size(); ++k) {
+      Node& node = nodes[k];
+      if (node.node().type() == LHCb::Node::HitOnTrack) {
+        track.m_forwardFitChi2 += node.get<Op::Forward, Op::Chi2>();
+        ++track.m_ndof;
+      }
+      node.get<Op::Forward, Op::Chi2>() = track.m_forwardFitChi2;
+      node.m_ndof = track.m_ndof;
+
+      // Backwards chi2
+      Node& backwardNode = nodes[nodes.size() - 1 - k];
+      if (backwardNode.node().type() == LHCb::Node::HitOnTrack) {
+        track.m_backwardFitChi2 += backwardNode.get<Op::Backward, Op::Chi2>();
+        ++ndofBackward;
+      }
+      backwardNode.get<Op::Backward, Op::Chi2>() = track.m_backwardFitChi2;
+      backwardNode.m_ndofBackward = ndofBackward;
+    }
+  });
+
+  std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) {
+    // Copy first element
+    Scalar::smoother<false, true>(track.m_nodes[0]);
+    Scalar::updateResiduals(track.m_nodes[0]);
+
+    // and lastState
+    const unsigned element = track.m_nodes.size() - 1;
+    Scalar::smoother<true, false>(track.m_nodes[element]);
+    Scalar::updateResiduals(track.m_nodes[element]);
+  });
+
+  // Set the LHCb::Node information accordingly
+  std::for_each(tracks.begin(), tracks.end(), [] (Track& track) {
+    // TODO - Either the min or max chi2 could be picked up here.
+    track.track().setChi2AndDoF(track.minChi2(), track.ndof());
+  });
+}
+
+}
+
+}
diff --git a/Tr/TrackVectorFit/TrackVectorFit/Types.h b/Tr/TrackVectorFit/TrackVectorFit/Types.h
index 030f6763bbdc4ad8625c5eb427382e002c2f19bd..5edac3e7df04a30464a1bd2bcfc22bcfa52eed55 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/Types.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/Types.h
@@ -117,8 +117,6 @@ struct FitNode : public LHCb::Node {
 };
 
 struct Node {
-  _aligned Gaudi::TrackVector m_transportVector; // Transport vector for propagation from previous node to this one
-  _aligned Gaudi::TrackSymMatrix m_noiseMatrix; // Noise in propagation from previous node to this one
   Mem::View::TrackMatrix<25> m_forwardTransportMatrix;
   Mem::View::TrackMatrix<25> m_backwardTransportMatrix;
   Mem::View::SmoothState m_smoothState;
@@ -135,8 +133,6 @@ struct Node {
   Node (const Node& copy) = default;
 
   // Const getters
-  inline const Gaudi::TrackVector& transportVector () const { return m_transportVector; }
-  inline const Gaudi::TrackSymMatrix& noiseMatrix () const { return m_noiseMatrix; }
   inline const TRACKVECTORFIT_PRECISION& refResidual () const {
     assert(m_nodeParameters.m_basePointer != nullptr);
     return *m_nodeParameters.m_referenceResidual;
@@ -158,8 +154,14 @@ struct Node {
   inline FitNode& node () { return *m_node; }
 
   // Setters
-  inline void setTransportVector (const Gaudi::TrackVector& transportVector) { m_transportVector = transportVector; }
-  inline void setNoiseMatrix (const Gaudi::TrackSymMatrix& noiseMatrix) { m_noiseMatrix = noiseMatrix; }
+  inline void setTransportVector (const Gaudi::TrackVector& transportVector) {
+    assert(m_nodeParameters.m_basePointer != nullptr);
+    m_nodeParameters.m_transportVector.copy(transportVector);
+  }
+  inline void setNoiseMatrix (const Gaudi::TrackSymMatrix& noiseMatrix) {
+    assert(m_nodeParameters.m_basePointer != nullptr);
+    m_nodeParameters.m_noiseMatrix.copy(noiseMatrix);
+  }
   inline void setRefResidual (const TRACKVECTORFIT_PRECISION& res) {
     assert(m_nodeParameters.m_basePointer != nullptr);
     (*m_nodeParameters.m_referenceResidual) = res;
@@ -200,32 +202,52 @@ struct Node {
   template<class R, class S,
     class U =
       std::conditional_t<std::is_same<S, Op::Covariance>::value,       Mem::View::TrackSymMatrix,
+      std::conditional_t<std::is_same<S, Op::NoiseMatrix>::value,      Mem::View::TrackSymMatrix,
       std::conditional_t<std::is_same<S, Op::StateVector>::value,      Mem::View::TrackVector,
       std::conditional_t<std::is_same<S, Op::ReferenceVector>::value,  Mem::View::TrackVector,
       std::conditional_t<std::is_same<S, Op::ProjectionMatrix>::value, Mem::View::TrackVector,
-      std::conditional_t<std::is_same<S, Op::TransportMatrix>::value,  Mem::View::TrackMatrix<25>, TRACKVECTORFIT_PRECISION>>>>>
+      std::conditional_t<std::is_same<S, Op::TransportVector>::value,  Mem::View::TrackVector,
+      std::conditional_t<std::is_same<S, Op::TransportMatrix>::value,  Mem::View::TrackMatrix<25>, TRACKVECTORFIT_PRECISION>>>>>>>
   >
   inline const U& get () const;
 
   template<class R, class S,
     class U =
       std::conditional_t<std::is_same<S, Op::Covariance>::value,       Mem::View::TrackSymMatrix,
+      std::conditional_t<std::is_same<S, Op::NoiseMatrix>::value,      Mem::View::TrackSymMatrix,
       std::conditional_t<std::is_same<S, Op::StateVector>::value,      Mem::View::TrackVector,
       std::conditional_t<std::is_same<S, Op::ReferenceVector>::value,  Mem::View::TrackVector,
       std::conditional_t<std::is_same<S, Op::ProjectionMatrix>::value, Mem::View::TrackVector,
-      std::conditional_t<std::is_same<S, Op::TransportMatrix>::value,  Mem::View::TrackMatrix<25>, TRACKVECTORFIT_PRECISION>>>>>
+      std::conditional_t<std::is_same<S, Op::TransportVector>::value,  Mem::View::TrackVector,
+      std::conditional_t<std::is_same<S, Op::TransportMatrix>::value,  Mem::View::TrackMatrix<25>, TRACKVECTORFIT_PRECISION>>>>>>>
   >
   inline U& get ();
+
+  inline void setOutlier ();
 };
 
 #include "NodeGetters.h"
 
-struct Track {
-  std::vector<Node, aligned_allocator<TRACKVECTORFIT_PRECISION, VectorConfiguration::bytewidth()>> m_nodes;
+void Node::setOutlier () {
+  // Set this node as outlier
+  // Store its state before it dissappears
+  auto& node = this->node();
+  node.setErrMeasure(this->get<Op::NodeParameters, Op::ErrMeasure>());
+  node.setProjectionMatrix((Gaudi::TrackProjectionMatrix) this->get<Op::NodeParameters, Op::ProjectionMatrix>());
+  node.setResidual(this->get<Op::Smooth, Op::Residual>());
+  node.setErrResidual(this->get<Op::Smooth, Op::ErrResidual>());
+  const auto smoothStateVector = (Gaudi::TrackVector) this->get<Op::Smooth, Op::StateVector>();
+  node.setRefVector(smoothStateVector);
+  node.setState(
+    smoothStateVector,
+    (Gaudi::TrackSymMatrix) this->get<Op::Smooth, Op::Covariance>(),
+    node.z());
+
+  node.setType(LHCb::Node::Outlier);
+}
 
-  // Active measurements from this node
-  unsigned m_forwardUpstream;
-  unsigned m_backwardUpstream;
+struct Track {
+  std::vector<Node> m_nodes;
 
   // Chi square of track
   TRACKVECTORFIT_PRECISION m_forwardFitChi2 = ((TRACKVECTORFIT_PRECISION) 0.0);
@@ -240,41 +262,16 @@ struct Track {
   unsigned m_iterationsToConverge;
   unsigned m_numberOfOutlierIterations;
 
-  // Initial covariances
-  Gaudi::TrackSymMatrix m_seedCovariance;
-
   LHCb::Track* m_track;
   unsigned m_index;
 
-  // TODO Move this to the fit level: One can just issue one std::copy_n, and just copy a big chunk of memory
-  //      from a container of just smoothed states to a container of just refvectors
+  // Note: This will be invoked only if the size of the scheduler is the same
   inline void updateRefVectors () {
     std::for_each(m_nodes.begin(), m_nodes.end(), [] (Node& n) {
       n.get<Op::NodeParameters, Op::ReferenceVector>().copy(n.get<Op::Smooth, Op::StateVector>());
     });
   }
 
-  inline void initializeForwardUpstream () {
-    m_forwardUpstream = 0;
-    for (unsigned i=0; i<m_nodes.size(); ++i) {
-      if (m_nodes[i].node().type() == LHCb::Node::HitOnTrack) {
-        m_forwardUpstream = i;
-        return;
-      }
-    }
-  }
-
-  inline void initializeBackwardUpstream () {
-    m_backwardUpstream = m_nodes.size() - 1;
-    for (unsigned i=0; i<m_nodes.size(); ++i) {
-      const unsigned reverse_i = m_nodes.size() - 1 - i;
-      if (m_nodes[reverse_i].node().type() == LHCb::Node::HitOnTrack) {
-        m_backwardUpstream = i;
-        return;
-      }
-    }
-  }
-
   Track (LHCb::Track& track, const unsigned& trackIndex)
   : m_track(&track), m_index(trackIndex) {
     // Generate VectorFit nodes
@@ -283,28 +280,12 @@ struct Track {
     for (auto*& node : m_track->fitResult()->nodes()) {
       m_nodes.emplace_back(static_cast<FitNode*>(node), nodeIndex++);
     }
-
-    // Find the first HitOnTrack node for forward and backward
-    initializeForwardUpstream();
-    initializeBackwardUpstream();
   }
 
   Track (LHCb::Track& track) : Track(track, 0) {}
 
   Track (const Track& track) = default;
 
-  inline void updateUpstream () {
-    // A node became an outlier. Find out if this
-    // has any impact on the upstream defined nodes.
-    if (m_nodes[m_forwardUpstream].node().type() == LHCb::Node::Outlier) {
-      initializeForwardUpstream();
-    }
-
-    if (m_nodes[m_nodes.size() - 1 - m_backwardUpstream].node().type() == LHCb::Node::Outlier) {
-      initializeBackwardUpstream();
-    }
-  }
-
   inline const LHCb::Track& track () const {
     return *m_track;
   }
@@ -313,20 +294,16 @@ struct Track {
     return *m_track;
   }
 
-  inline std::vector<Node, aligned_allocator<TRACKVECTORFIT_PRECISION, VectorConfiguration::bytewidth()>>&
+  inline std::vector<Node>&
   nodes () {
     return m_nodes;
   }
 
-  inline const std::vector<Node, aligned_allocator<TRACKVECTORFIT_PRECISION, VectorConfiguration::bytewidth()>>&
+  inline const std::vector<Node>&
   nodes () const {
     return m_nodes;
   }
 
-  inline void setSeedCovariance (const Gaudi::TrackSymMatrix& seedCovariance) {
-    m_seedCovariance = seedCovariance;
-  }
-
   inline void setNTrackParameters (const int& nTrackParameters) {
     m_nTrackParameters = nTrackParameters;
   }
@@ -503,20 +480,20 @@ inline std::ostream& operator<< (std::ostream& s, const Node& n) {
 
   s << "Node " << "#" << n.m_index << "\n";
   if (n.node().m_forwardState.m_basePointer != nullptr) {
-  s << " Forward state: " << n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::StateVector>()
-    << ", covariance: " << n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::Covariance>()
-    << ", chi2: " << n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::Chi2>() << "\n";
+  s << " Forward state: " << n.get<Op::Forward, Op::StateVector>()
+    << ", covariance: " << n.get<Op::Forward, Op::Covariance>()
+    << ", chi2: " << n.get<Op::Forward, Op::Chi2>() << "\n";
   }
   if (n.node().m_backwardState.m_basePointer != nullptr) {
-  s << " Backward state: " << n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::StateVector>()
-    << ", covariance: " << n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::Covariance>()
-    << ", chi2: " << n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::Chi2>() << "\n";
+  s << " Backward state: " << n.get<Op::Backward, Op::StateVector>()
+    << ", covariance: " << n.get<Op::Backward, Op::Covariance>()
+    << ", chi2: " << n.get<Op::Backward, Op::Chi2>() << "\n";
   }
   if (n.m_smoothState.m_basePointer != nullptr) {
-  s << " Smoothed state: " << n.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::StateVector>()
-    << ", covariance: " << n.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::Covariance>() << "\n"
-    << " Total chi2: " << n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::Chi2>() << ", "
-      << n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::Chi2>() << "\n";
+  s << " Smoothed state: " << n.get<Op::Smooth, Op::StateVector>()
+    << ", covariance: " << n.get<Op::Smooth, Op::Covariance>() << "\n"
+    << " Total chi2: " << n.get<Op::Forward, Op::Chi2>() << ", "
+      << n.get<Op::Backward, Op::Chi2>() << "\n";
   }
   s << " hasInfoUpstream: -, -\n";
 
@@ -528,23 +505,25 @@ inline std::ostream& operator<< (std::ostream& s, const Node& n) {
   }
   s << " at z " << node.z() << "\n"
     << " transport matrix ";
-  if (n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::TransportMatrix>().m_basePointer != nullptr) {
-    s << n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::TransportMatrix>();
+  if (n.get<Op::Forward, Op::TransportMatrix>().m_basePointer != nullptr) {
+    s << n.get<Op::Forward, Op::TransportMatrix>();
   }
   s << "\n inverse transport matrix ";
-  if (n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::TransportMatrix>().m_basePointer != nullptr) {
-    s << n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::TransportMatrix>();
+  if (n.get<Op::Backward, Op::TransportMatrix>().m_basePointer != nullptr) {
+    s << n.get<Op::Backward, Op::TransportMatrix>();
   }
   if (n.m_smoothState.m_basePointer != nullptr) {
-    s << "\n residual " << n.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::Residual>()
-      << " errResidual " << n.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::ErrResidual>();
+    s << "\n residual " << n.get<Op::Smooth, Op::Residual>()
+      << " errResidual " << n.get<Op::Smooth, Op::ErrResidual>();
   }
   if (n.m_nodeParameters.m_basePointer != nullptr) {
-    s << " projectionMatrix " << n.get<Tr::TrackVectorFit::Op::NodeParameters, Tr::TrackVectorFit::Op::ProjectionMatrix>()
-      << " refVector " << n.get<Tr::TrackVectorFit::Op::NodeParameters, Tr::TrackVectorFit::Op::ReferenceVector>()
-      << " refResidual " << n.get<Tr::TrackVectorFit::Op::NodeParameters, Tr::TrackVectorFit::Op::ReferenceResidual>()
-      << " errMeasure " << n.get<Tr::TrackVectorFit::Op::NodeParameters, Tr::TrackVectorFit::Op::ErrMeasure>();
+    s << " projectionMatrix " << n.get<Op::NodeParameters, Op::ProjectionMatrix>()
+      << " refVector " << n.get<Op::NodeParameters, Op::ReferenceVector>()
+      << " refResidual " << n.get<Op::NodeParameters, Op::ReferenceResidual>()
+      << " errMeasure " << n.get<Op::NodeParameters, Op::ErrMeasure>();
   }
+  s << "\n noiseMatrix " << n.get<Op::NodeParameters, Op::NoiseMatrix>()
+    << " transportVector " << n.get<Op::NodeParameters, Op::TransportVector>();
 
   return s;
 }
diff --git a/Tr/TrackVectorFit/TrackVectorFit/VectorConfiguration.h b/Tr/TrackVectorFit/TrackVectorFit/VectorConfiguration.h
index 7b508b69dfe5ffed3b3b4784ccf086134180e99b..8751376c322eb1599578e37f257c1404e06b5777 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/VectorConfiguration.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/VectorConfiguration.h
@@ -3,41 +3,18 @@
 #include "Kernel/VectorConfiguration.h"
 #include "TrackInterfaces/ITrackFitter.h"
 
-#define TO_STRING(s) WRAPPED_TO_STRING(s)
-#define WRAPPED_TO_STRING(s) #s
+namespace Tr {
+namespace TrackVectorFit {
 
-namespace ISAs {
-  struct DEFAULT {
-    template <class F>
-    static constexpr std::size_t card() { return 1; }
-  };
-  struct SCALAR {
-    template <class F>
-    static constexpr std::size_t card() { return 1; }
-  };
-  struct SSE {
-    template <class F>
-    static constexpr std::size_t card() { return 16 / sizeof(F); }
-  };
-  struct AVX {
-    template <class F>
-    static constexpr std::size_t card() { return 32 / sizeof(F); }
-  };
-  struct AVX512 {
-    template <class F>
-    static constexpr std::size_t card() { return 64 / sizeof(F); }
-  };
+constexpr std::size_t vector_width () {
+  return VectorConfiguration::width<TRACKVECTORFIT_PRECISION>();
+}
 
-#if defined(__AVX512F__)
-  using CURRENT = AVX512;
-#elif defined(__AVX__)
-  using CURRENT = AVX;
-#elif defined(__SSE__)
-  using CURRENT = SSE;
-#else
-  using CURRENT = SCALAR;
-#endif
 }
+}
+
+#define TO_STRING(s) WRAPPED_TO_STRING(s)
+#define WRAPPED_TO_STRING(s) #s
 
 // Include vectorclass
 #if defined(__AVX512F__)
diff --git a/Tr/TrackVectorFit/TrackVectorFit/scalar/Combined.h b/Tr/TrackVectorFit/TrackVectorFit/scalar/Combined.h
index 6678cc5be79ddf1787ad3cd50b15a3c669f48389..b9b1fd679612d4dd2edcb00cf38c85f430967144 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/scalar/Combined.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/scalar/Combined.h
@@ -21,6 +21,17 @@ inline void fit (
   update<T>(node);
 }
 
+template<class T>
+inline void fit (
+  Node& node,
+  const std::array<TRACKVECTORFIT_PRECISION, 15>& covariance
+) {
+  node.get<T, Op::StateVector>().copy(node.get<Op::NodeParameters, Op::ReferenceVector>());
+  node.get<T, Op::Covariance>().copy(covariance);
+
+  update<T>(node);
+}
+
 template<class T, bool U>
 inline void fit (
   Node& node,
diff --git a/Tr/TrackVectorFit/TrackVectorFit/scalar/Predict.h b/Tr/TrackVectorFit/TrackVectorFit/scalar/Predict.h
index 27eac37fe2ca527013e54d28884d2dedefd539ad..f3ac073e9557eaefd83d476a4c6be34e6b7b8268 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/scalar/Predict.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/scalar/Predict.h
@@ -21,13 +21,13 @@ inline void transportCovariance (
     pc.copy(uc);
 
     if (tm[2] != 0 or tm[8] != 0) {
-      pc(0,0) += 2 * uc(2,0) * tm[2] + uc(2,2) * tm[2] * tm[2];
-      pc(2,0) += uc(2,2) * tm[2];
-      pc(1,1) += 2 * uc(3,1) * tm[8] + uc(3,3) * tm[8] * tm[8];
-      pc(3,1) += uc(3,3) * tm[8];
-      pc(1,0) += uc(2,1) * tm[2] + uc(3,0) * tm[8] + uc(3,2) * tm[2] * tm[8];
-      pc(2,1) += uc(3,2) * tm[8];
-      pc(3,0) += uc(3,2) * tm[2];
+      pc[0] += 2 * uc[3] * tm[2] + uc[5] * tm[2] * tm[2];
+      pc[3] += uc[5] * tm[2];
+      pc[2] += 2 * uc[7] * tm[8] + uc[9] * tm[8] * tm[8];
+      pc[7] += uc[9] * tm[8];
+      pc[1] += uc[4] * tm[2] + uc[6] * tm[8] + uc[8] * tm[2] * tm[8];
+      pc[4] += uc[8] * tm[8];
+      pc[6] += uc[8] * tm[2];
     }
   }
 }
@@ -44,22 +44,21 @@ inline void transportCovariance (
     pc.copy(uc);
 
     if (tm[2] != 0 or tm[8] != 0) {
-      pc(0,0) += 2 * uc(2,0) * tm[2] + uc(2,2) * tm[2] * tm[2];
-      pc(2,0) += uc(2,2) * tm[2];
-      pc(1,1) += 2 * uc(3,1) * tm[8] + uc(3,3) * tm[8] * tm[8];
-      pc(3,1) += uc(3,3) * tm[8];
-      pc(1,0) += uc(2,1) * tm[2] + uc(3,0) * tm[8] + uc(3,2) * tm[2] * tm[8];
-      pc(2,1) += uc(3,2) * tm[8];
-      pc(3,0) += uc(3,2) * tm[2];
+      pc[0] += 2 * uc(2,0) * tm[2] + uc(2,2) * tm[2] * tm[2];
+      pc[3] += uc(2,2) * tm[2];
+      pc[2] += 2 * uc(3,1) * tm[8] + uc(3,3) * tm[8] * tm[8];
+      pc[7] += uc(3,3) * tm[8];
+      pc[1] += uc(2,1) * tm[2] + uc(3,0) * tm[8] + uc(3,2) * tm[2] * tm[8];
+      pc[4] += uc(3,2) * tm[8];
+      pc[6] += uc(3,2) * tm[2];
     }
   }
 }
 
-// Initialise
 template<class T>
-inline void initialise (
+inline void initialize (
   Node& node,
-  const Gaudi::TrackSymMatrix& covariance
+  const std::array<TRACKVECTORFIT_PRECISION, 15>& covariance
 ) {
   node.get<T, Op::StateVector>().copy(node.get<Op::NodeParameters, Op::ReferenceVector>());
   node.get<T, Op::Covariance>().copy(covariance);
@@ -85,7 +84,7 @@ inline void predict<Op::Forward, true> (
   Node& node,
   const Node& prevnode
 ) {
-  node.get<Op::Forward, Op::StateVector>().copy(node.transportVector());
+  node.get<Op::Forward, Op::StateVector>().copy(node.get<Op::NodeParameters, Op::TransportVector>());
   for (int i=0; i<5; ++i) {
     for (int j=0; j<5; ++j) {
       node.get<Op::Forward, Op::StateVector>()[i] += 
@@ -98,7 +97,7 @@ inline void predict<Op::Forward, true> (
     node.get<Op::Forward, Op::TransportMatrix>(),
     prevnode.get<Op::Forward, Op::Covariance>(),
     node.get<Op::Forward, Op::Covariance>());
-  node.get<Op::Forward, Op::Covariance>() += node.noiseMatrix();
+  node.get<Op::Forward, Op::Covariance>() += node.get<Op::NodeParameters, Op::NoiseMatrix>();
 }
 
 template<>
@@ -117,7 +116,7 @@ inline void predict<Op::Backward, true> (
 ) {
   Gaudi::TrackVector temp_sub;
   for (int i=0; i<5; ++i) {
-    temp_sub[i] = prevnode.get<Op::Backward, Op::StateVector>()[i] - prevnode.transportVector().Array()[i];
+    temp_sub[i] = prevnode.get<Op::Backward, Op::StateVector>()[i] - prevnode.get<Op::NodeParameters, Op::TransportVector>()[i];
   };
 
   for (int i=0; i<5; ++i) {
@@ -129,7 +128,7 @@ inline void predict<Op::Backward, true> (
 
   Gaudi::TrackSymMatrix temp_cov;
   for (int i=0; i<15; ++i) {
-    temp_cov.Array()[i] = prevnode.get<Op::Backward, Op::Covariance>()[i] + prevnode.noiseMatrix().Array()[i];
+    temp_cov.Array()[i] = prevnode.get<Op::Backward, Op::Covariance>()[i] + prevnode.get<Op::NodeParameters, Op::NoiseMatrix>()[i];
   }
 
   transportCovariance (
diff --git a/Tr/TrackVectorFit/TrackVectorFit/vector/Combined.h b/Tr/TrackVectorFit/TrackVectorFit/vector/Combined.h
index fd26c67d23e49160818f1441ab87122082699247..7f7b727a526a7a46b3c252aaae4c51be2189d018 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/vector/Combined.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/vector/Combined.h
@@ -12,22 +12,25 @@ namespace Vector {
 
 template<class T>
 struct fit {
-  template<unsigned W>
+  template<unsigned W, bool checkNodeType>
   static inline void op (
-    std::array<Sch::Item, W>& n,
     fp_ptr_64_const rv,
     fp_ptr_64_const pm,
     fp_ptr_64_const rr,
     fp_ptr_64_const em,
+    fp_ptr_64_const tv,
+    fp_ptr_64_const nm,
     fp_ptr_64_const tm,
     fp_ptr_64_const last_us,
     fp_ptr_64_const last_uc,
     fp_ptr_64 us,
     fp_ptr_64 uc,
-    fp_ptr_64 chi2
+    fp_ptr_64 chi2,
+    std::array<Sch::Item, W>& n
   ) {
     predict<T>::template op<W> (
-      n,
+      tv,
+      nm,
       tm,
       last_us,
       last_uc,
@@ -35,15 +38,15 @@ struct fit {
       uc
     );
 
-    update<W> (
-      n,
+    update<W, checkNodeType> (
       rv,
       pm,
       rr,
       em,
       us,
       uc,
-      chi2
+      chi2,
+      n
     );
   }
 };
diff --git a/Tr/TrackVectorFit/TrackVectorFit/vector/Math.h b/Tr/TrackVectorFit/TrackVectorFit/vector/Math.h
index 7e081909d3cb9c7673842b255545eebd8e7b64d0..63dfad08c941815422822d0a979cd87c585baf1b 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/vector/Math.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/vector/Math.h
@@ -12,26 +12,30 @@ namespace Vector {
 
 template<unsigned W>
 struct MathCommon {
-  static constexpr inline unsigned symid (const int row, const int col) {
-    return ((row * (row + 1)) / 2) + col;
+  /**
+   * @brief      Cast for values into TRACKVECTORFIT_PRECISION
+   *             mostly to avoid warnings with certain libraries
+   *             
+   *             It should be resolved in compile time to the proper
+   *             type and not produce any code
+   */
+  template<class T>
+  static constexpr TRACKVECTORFIT_PRECISION cast (const T& value) {
+    return ((TRACKVECTORFIT_PRECISION) value);
   }
 
+  /**
+   * @brief      Creates a boolvectype populated with true if the node type
+   *             is HitOnTrack and false otherwise
+   */
   template <size_t... Is>
-  static constexpr inline typename Vectype<W>::booltype makeBoolHitOnTrack (
+  static constexpr typename Vectype<W>::booltype makeBoolHitOnTrack (
     const std::array<Sch::Item, W>& nodes,
     std::index_sequence<Is...>
   ) {
     return typename Vectype<W>::booltype(nodes[Is].node->node().type() == LHCb::Node::HitOnTrack...);
   }
 
-  template <size_t... Is>
-  static constexpr inline typename Vectype<W>::booltype makeBoolMeasurement (
-    const std::array<Sch::Item, W>& nodes,
-    std::index_sequence<Is...>
-  ) {
-    return typename Vectype<W>::booltype(nodes[Is].node->node().hasMeasurement()...);
-  }
-
   /**
    * @brief      Similarity transform, manually horizontally vectorised
    *
@@ -176,6 +180,7 @@ struct MathCommon {
    * @param[in]  errorMeas2  Measurement error squared
    * @param[in]  nodes       Nodes
    */
+  template<bool checkNodeType>
   static inline void update (
     fp_ptr_64_const rv_p,
     fp_ptr_64_const pm_p,
@@ -184,7 +189,7 @@ struct MathCommon {
     fp_ptr_64 us_p,
     fp_ptr_64 uc_p,
     fp_ptr_64 chi2_p,
-    const std::array<Sch::Item, W>& nodes_p
+    const std::array<Sch::Item, W>& n
   )  {
     using vectype = typename Vectype<W>::type;
     using boolvectype = typename Vectype<W>::booltype;
@@ -259,11 +264,16 @@ struct MathCommon {
                                 + H[2] * cht[2]
                                 + H[3] * cht[3]
                                 + H[4] * cht[4];
-    const vectype denNotZero = select(denominator != 0, denominator, ((TRACKVECTORFIT_PRECISION) 1.));
-    const boolvectype maskedVector = makeBoolHitOnTrack(nodes_p, std::make_index_sequence<W>());
-    const vectype errorResInv = select(maskedVector,
-      ((TRACKVECTORFIT_PRECISION) 1.) / denNotZero,
-      ((TRACKVECTORFIT_PRECISION) 0.));
+    // const vectype denNotZero = select(denominator != 0, denominator, 1.);
+    // const vectype errorResInv = 1. / denNotZero;
+    vectype errorResInv = cast(1.) / denominator;
+
+    // This should be optimized at compile time
+    // With c++17: if constexpr (checkNodeType) { ... }
+    if (checkNodeType) {
+      const boolvectype maskedVector = makeBoolHitOnTrack(n, std::make_index_sequence<W>());
+      errorResInv = select(maskedVector, errorResInv, cast(0.));
+    }
 
     // update the state vector and cov matrix
     const vectype w = res * errorResInv;
@@ -412,8 +422,9 @@ struct MathCommon {
 
     // Factorization
     L[0] = Csum[0];
-    success &= L[0] > 0.0;
-    L[0] = sqrt(1.0 / select(success, L[0], ((TRACKVECTORFIT_PRECISION) 1.0)));
+    success &= L[0] > cast(0.);
+    // L[0] = sqrt(cast(1.) / L[0]);
+    L[0] = sqrt(cast(1.) / select(success, L[0], cast(1.)));
   
     L[1]  = Csum[1]  * L[0];
     L[3]  = Csum[3]  * L[0];
@@ -421,29 +432,33 @@ struct MathCommon {
     L[10] = Csum[10] * L[0];
   
     L[2] = Csum[2] - L[1]*L[1];
-    success &= L[2] > 0.0;
-    L[2] = sqrt(1.0 / select(success, L[2], ((TRACKVECTORFIT_PRECISION) 1.0)));
+    success &= L[2] > cast(0.0);
+    // L[2] = sqrt(1. / L[2]);
+    L[2] = sqrt(cast(1.) / select(success, L[2], cast(1.)));
   
     L[4]  = (Csum[4]  - L[3] *L[1]) * L[2];
     L[7]  = (Csum[7]  - L[6] *L[1]) * L[2];
     L[11] = (Csum[11] - L[10]*L[1]) * L[2];
   
     L[5] = Csum[5] - L[3]*L[3] - L[4]*L[4];
-    success &= L[5] > 0.0;
-    L[5] = sqrt(1.0 / select(success, L[5], ((TRACKVECTORFIT_PRECISION) 1.0)));
+    success &= L[5] > cast(0.);
+    // L[5] = sqrt(1. / L[5]);
+    L[5] = sqrt(cast(1.) / select(success, L[5], cast(1.)));
   
     L[8]  = (Csum[8]  - L[6] *L[3] - L[7] *L[4]) * L[5];
     L[12] = (Csum[12] - L[10]*L[3] - L[11]*L[4]) * L[5];
   
     L[9] = Csum[9] - L[6]*L[6] - L[7]*L[7] - L[8]*L[8];
-    success &= L[9] > 0.0;
-    L[9] = sqrt(1.0 / select(success, L[9], ((TRACKVECTORFIT_PRECISION) 1.0)));
+    success &= L[9] > cast(0.);
+    // L[9] = sqrt(1. / L[9]);
+    L[9] = sqrt(cast(1.) / select(success, L[9], cast(1.)));
   
     L[13] = (Csum[13] - L[10]*L[6] - L[11]*L[7] - L[12]*L[8]) * L[9];
   
     L[14] = Csum[14] - L[10]*L[10] - L[11]*L[11] - L[12]*L[12] - L[13]*L[13];
-    success &= L[14] > 0.0;
-    L[14] = sqrt(1.0 / select(success, L[14], ((TRACKVECTORFIT_PRECISION) 1.0)));
+    success &= L[14] > cast(0.);
+    // L[14] = sqrt(1. / L[14]);
+    L[14] = sqrt(cast(1.) / select(success, L[14], cast(1.)));
 
     // Inversion
 
@@ -571,8 +586,8 @@ struct MathCommon {
     return to_bits(success);
   }
 
+  template<bool checkNodeType>
   static inline void updateResiduals (
-    const std::array<Sch::Item, W>& nodes_p,
     fp_ptr_64_const rv_p,
     fp_ptr_64_const pm_p,
     fp_ptr_64_const rr_p,
@@ -580,7 +595,8 @@ struct MathCommon {
     fp_ptr_64_const ss_p,
     fp_ptr_64_const sc_p,
     fp_ptr_64 res_p,
-    fp_ptr_64 errRes_p
+    fp_ptr_64 errRes_p,
+    const std::array<Sch::Item, W>& n
   ) {
     using vectype = typename Vectype<W>::type;
     using boolvectype = typename Vectype<W>::booltype;
@@ -590,7 +606,7 @@ struct MathCommon {
     std::array<vectype, 5> ss;
     std::array<vectype, 15> sc;
     std::array<vectype, 5> v;
-    vectype res, errRes, em, rr, HCH, sign, value, error;
+    vectype errRes, em, rr, error;
 
     // Load rv
     rv[0].load_a(rv_p + 0*W);
@@ -642,16 +658,11 @@ struct MathCommon {
     v[2] = sc[3] *pm[0]+sc[4] *pm[1]+sc[5] *pm[2]+sc[8] *pm[3]+sc[12]*pm[4];
     v[3] = sc[6] *pm[0]+sc[7] *pm[1]+sc[8] *pm[2]+sc[9] *pm[3]+sc[13]*pm[4];
     v[4] = sc[10]*pm[0]+sc[11]*pm[1]+sc[12]*pm[2]+sc[13]*pm[3]+sc[14]*pm[4];
-    HCH = pm[0]*v[0] + pm[1]*v[1] + pm[2]*v[2] + pm[3]*v[3] + pm[4]*v[4];
-
-    // const TRACKVECTORFIT_PRECISION V = node.m_errMeasure * node.m_errMeasure;
-    // const TRACKVECTORFIT_PRECISION sign = node.m_type == LHCb::Node::HitOnTrack ? -1 : 1;
-    const boolvectype maskedHitOnTrack = makeBoolHitOnTrack(nodes_p, std::make_index_sequence<W>());
-    sign = select(maskedHitOnTrack, -1.0, 1.0);
+    const vectype HCH = -(pm[0]*v[0] + pm[1]*v[1] + pm[2]*v[2] + pm[3]*v[3] + pm[4]*v[4]);
 
     // const TrackVectorContiguous& refX = node.m_refVector.m_parameters;
     // value = node.m_refResidual + (pm * (rv - node.get<Op::Smooth, Op::StateVector>()));
-    value = rr + (
+    const vectype value = rr + (
         pm[0] * (rv[0] - ss[0]) +
         pm[1] * (rv[1] - ss[1]) +
         pm[2] * (rv[2] - ss[2]) +
@@ -659,18 +670,26 @@ struct MathCommon {
         pm[4] * (rv[4] - ss[4])
     );
 
-    // error = V + sign * HCH;
-    error = em * em + sign * HCH;
-
     // Bring back the changes
-    const boolvectype maskedMeasurement = makeBoolMeasurement(nodes_p, std::make_index_sequence<W>());
-    res = select(maskedMeasurement, value, 0.0);
-    res.store_a(res_p);
+    value.store_a(res_p);
+
+    // This should be optimized at compile time
+    // With c++17: if constexpr (checkNodeType) { ... }
+    if (checkNodeType) {
+      // const TRACKVECTORFIT_PRECISION V = node.m_errMeasure * node.m_errMeasure;
+      // const TRACKVECTORFIT_PRECISION sign = node.m_type == LHCb::Node::HitOnTrack ? -1 : 1;
+      const boolvectype maskedHitOnTrack = makeBoolHitOnTrack(n, std::make_index_sequence<W>());
+      const vectype sign = select(maskedHitOnTrack, cast(-1.), cast(1.));
+
+      // error = V + sign * HCH;
+      error = em * em + sign * HCH;
+    } else {
+      // error = V + HCH;
+      error = em * em + HCH;
+    }
 
     errRes = sqrt(abs(error));
-    errRes = select(error >= 0, errRes, -errRes);
-
-    errRes = select(maskedMeasurement, errRes, 0.0);
+    errRes = select(error >= cast(0.), errRes, -errRes);
     errRes.store_a(errRes_p);
   }
 };
@@ -680,18 +699,18 @@ struct Math {
   template<unsigned W>
   static inline void predictState (
     fp_ptr_64_const tm,
-    const std::array<TRACKVECTORFIT_PRECISION, 5*W>& tv,
+    fp_ptr_64_const tv,
     fp_ptr_64_const us,
     fp_ptr_64 ps
   );
 
-  // template<unsigned W>
-  // static inline void predictCovariance (
-  //   fp_ptr_64_const tm,
-  //   std::array<TRACKVECTORFIT_PRECISION, 15*W>& nm,
-  //   fp_ptr_64_const uc,
-  //   fp_ptr_64 pc
-  // );
+  template<unsigned W>
+  static inline void predictCovariance (
+    fp_ptr_64_const tm,
+    fp_ptr_64_const nm,
+    fp_ptr_64_const uc,
+    fp_ptr_64 pc
+  );
 };
 
 template<>
@@ -699,7 +718,7 @@ struct Math<Op::Forward> {
   template<unsigned W>
   static inline void predictState (
     fp_ptr_64_const tm_p,
-    const std::array<TRACKVECTORFIT_PRECISION, 5*W>& tv_p,
+    fp_ptr_64_const tv_p,
     fp_ptr_64_const us_p,
     fp_ptr_64 ps_p
   ) {
@@ -738,11 +757,11 @@ struct Math<Op::Forward> {
     tm[24].load_a(tm_p + 24*W);
 
     // Load tv
-    tv[0].load_a(tv_p.data() + 0*W);
-    tv[1].load_a(tv_p.data() + 1*W);
-    tv[2].load_a(tv_p.data() + 2*W);
-    tv[3].load_a(tv_p.data() + 3*W);
-    tv[4].load_a(tv_p.data() + 4*W);
+    tv[0].load_a(tv_p + 0*W);
+    tv[1].load_a(tv_p + 1*W);
+    tv[2].load_a(tv_p + 2*W);
+    tv[3].load_a(tv_p + 3*W);
+    tv[4].load_a(tv_p + 4*W);
 
     // Load us
     us[0].load_a(us_p + 0*W);
@@ -768,7 +787,7 @@ struct Math<Op::Forward> {
   template<unsigned W>
   static inline void predictCovariance (
     fp_ptr_64_const tm,
-    const std::array<TRACKVECTORFIT_PRECISION, 15*W>& nm,
+    fp_ptr_64_const nm,
     fp_ptr_64_const uc,
     fp_ptr_64 pc
   ) {
@@ -776,21 +795,21 @@ struct Math<Op::Forward> {
     MathCommon<W>::similarity_5_5(tm, uc, pc);
 
     using vectype = typename Vectype<W>::type;
-    (vectype().load_a(pc +  0*W) + vectype().load_a(nm.data() +  0*W)).store_a(pc +  0*W);
-    (vectype().load_a(pc +  1*W) + vectype().load_a(nm.data() +  1*W)).store_a(pc +  1*W);
-    (vectype().load_a(pc +  2*W) + vectype().load_a(nm.data() +  2*W)).store_a(pc +  2*W);
-    (vectype().load_a(pc +  3*W) + vectype().load_a(nm.data() +  3*W)).store_a(pc +  3*W);
-    (vectype().load_a(pc +  4*W) + vectype().load_a(nm.data() +  4*W)).store_a(pc +  4*W);
-    (vectype().load_a(pc +  5*W) + vectype().load_a(nm.data() +  5*W)).store_a(pc +  5*W);
-    (vectype().load_a(pc +  6*W) + vectype().load_a(nm.data() +  6*W)).store_a(pc +  6*W);
-    (vectype().load_a(pc +  7*W) + vectype().load_a(nm.data() +  7*W)).store_a(pc +  7*W);
-    (vectype().load_a(pc +  8*W) + vectype().load_a(nm.data() +  8*W)).store_a(pc +  8*W);
-    (vectype().load_a(pc +  9*W) + vectype().load_a(nm.data() +  9*W)).store_a(pc +  9*W);
-    (vectype().load_a(pc + 10*W) + vectype().load_a(nm.data() + 10*W)).store_a(pc + 10*W);
-    (vectype().load_a(pc + 11*W) + vectype().load_a(nm.data() + 11*W)).store_a(pc + 11*W);
-    (vectype().load_a(pc + 12*W) + vectype().load_a(nm.data() + 12*W)).store_a(pc + 12*W);
-    (vectype().load_a(pc + 13*W) + vectype().load_a(nm.data() + 13*W)).store_a(pc + 13*W);
-    (vectype().load_a(pc + 14*W) + vectype().load_a(nm.data() + 14*W)).store_a(pc + 14*W);
+    (vectype().load_a(pc +  0*W) + vectype().load_a(nm +  0*W)).store_a(pc +  0*W);
+    (vectype().load_a(pc +  1*W) + vectype().load_a(nm +  1*W)).store_a(pc +  1*W);
+    (vectype().load_a(pc +  2*W) + vectype().load_a(nm +  2*W)).store_a(pc +  2*W);
+    (vectype().load_a(pc +  3*W) + vectype().load_a(nm +  3*W)).store_a(pc +  3*W);
+    (vectype().load_a(pc +  4*W) + vectype().load_a(nm +  4*W)).store_a(pc +  4*W);
+    (vectype().load_a(pc +  5*W) + vectype().load_a(nm +  5*W)).store_a(pc +  5*W);
+    (vectype().load_a(pc +  6*W) + vectype().load_a(nm +  6*W)).store_a(pc +  6*W);
+    (vectype().load_a(pc +  7*W) + vectype().load_a(nm +  7*W)).store_a(pc +  7*W);
+    (vectype().load_a(pc +  8*W) + vectype().load_a(nm +  8*W)).store_a(pc +  8*W);
+    (vectype().load_a(pc +  9*W) + vectype().load_a(nm +  9*W)).store_a(pc +  9*W);
+    (vectype().load_a(pc + 10*W) + vectype().load_a(nm + 10*W)).store_a(pc + 10*W);
+    (vectype().load_a(pc + 11*W) + vectype().load_a(nm + 11*W)).store_a(pc + 11*W);
+    (vectype().load_a(pc + 12*W) + vectype().load_a(nm + 12*W)).store_a(pc + 12*W);
+    (vectype().load_a(pc + 13*W) + vectype().load_a(nm + 13*W)).store_a(pc + 13*W);
+    (vectype().load_a(pc + 14*W) + vectype().load_a(nm + 14*W)).store_a(pc + 14*W);
   }
 };
 
@@ -799,7 +818,7 @@ struct Math<Op::Backward> {
   template<unsigned W>
   static inline void predictState (
     fp_ptr_64_const tm_p,
-    const std::array<TRACKVECTORFIT_PRECISION, 5*W>& tv_p,
+    fp_ptr_64_const tv_p,
     fp_ptr_64_const us_p,
     fp_ptr_64 ps_p
   ) {
@@ -839,11 +858,11 @@ struct Math<Op::Backward> {
     tm[24].load_a(tm_p + 24*W);
 
     // Load tv
-    tv[0].load_a(tv_p.data() + 0*W);
-    tv[1].load_a(tv_p.data() + 1*W);
-    tv[2].load_a(tv_p.data() + 2*W);
-    tv[3].load_a(tv_p.data() + 3*W);
-    tv[4].load_a(tv_p.data() + 4*W);
+    tv[0].load_a(tv_p + 0*W);
+    tv[1].load_a(tv_p + 1*W);
+    tv[2].load_a(tv_p + 2*W);
+    tv[3].load_a(tv_p + 3*W);
+    tv[4].load_a(tv_p + 4*W);
 
     // Load us
     us[0].load_a(us_p + 0*W);
@@ -875,29 +894,31 @@ struct Math<Op::Backward> {
   template<unsigned W>
   static inline void predictCovariance (
     fp_ptr_64_const tm,
-    std::array<TRACKVECTORFIT_PRECISION, 15*W>& nm,
+    fp_ptr_64_const nm,
     fp_ptr_64_const uc,
     fp_ptr_64 pc
   ) {
     using vectype = typename Vectype<W>::type;
-    (vectype().load_a(nm.data() +  0*W) + vectype().load_a(uc +  0*W)).store_a(nm.data() +  0*W);
-    (vectype().load_a(nm.data() +  1*W) + vectype().load_a(uc +  1*W)).store_a(nm.data() +  1*W);
-    (vectype().load_a(nm.data() +  2*W) + vectype().load_a(uc +  2*W)).store_a(nm.data() +  2*W);
-    (vectype().load_a(nm.data() +  3*W) + vectype().load_a(uc +  3*W)).store_a(nm.data() +  3*W);
-    (vectype().load_a(nm.data() +  4*W) + vectype().load_a(uc +  4*W)).store_a(nm.data() +  4*W);
-    (vectype().load_a(nm.data() +  5*W) + vectype().load_a(uc +  5*W)).store_a(nm.data() +  5*W);
-    (vectype().load_a(nm.data() +  6*W) + vectype().load_a(uc +  6*W)).store_a(nm.data() +  6*W);
-    (vectype().load_a(nm.data() +  7*W) + vectype().load_a(uc +  7*W)).store_a(nm.data() +  7*W);
-    (vectype().load_a(nm.data() +  8*W) + vectype().load_a(uc +  8*W)).store_a(nm.data() +  8*W);
-    (vectype().load_a(nm.data() +  9*W) + vectype().load_a(uc +  9*W)).store_a(nm.data() +  9*W);
-    (vectype().load_a(nm.data() + 10*W) + vectype().load_a(uc + 10*W)).store_a(nm.data() + 10*W);
-    (vectype().load_a(nm.data() + 11*W) + vectype().load_a(uc + 11*W)).store_a(nm.data() + 11*W);
-    (vectype().load_a(nm.data() + 12*W) + vectype().load_a(uc + 12*W)).store_a(nm.data() + 12*W);
-    (vectype().load_a(nm.data() + 13*W) + vectype().load_a(uc + 13*W)).store_a(nm.data() + 13*W);
-    (vectype().load_a(nm.data() + 14*W) + vectype().load_a(uc + 14*W)).store_a(nm.data() + 14*W);
+
+    _aligned std::array<TRACKVECTORFIT_PRECISION, 15*W> temp;
+    (vectype().load_a(nm +  0*W) + vectype().load_a(uc +  0*W)).store_a(temp.data() +  0*W);
+    (vectype().load_a(nm +  1*W) + vectype().load_a(uc +  1*W)).store_a(temp.data() +  1*W);
+    (vectype().load_a(nm +  2*W) + vectype().load_a(uc +  2*W)).store_a(temp.data() +  2*W);
+    (vectype().load_a(nm +  3*W) + vectype().load_a(uc +  3*W)).store_a(temp.data() +  3*W);
+    (vectype().load_a(nm +  4*W) + vectype().load_a(uc +  4*W)).store_a(temp.data() +  4*W);
+    (vectype().load_a(nm +  5*W) + vectype().load_a(uc +  5*W)).store_a(temp.data() +  5*W);
+    (vectype().load_a(nm +  6*W) + vectype().load_a(uc +  6*W)).store_a(temp.data() +  6*W);
+    (vectype().load_a(nm +  7*W) + vectype().load_a(uc +  7*W)).store_a(temp.data() +  7*W);
+    (vectype().load_a(nm +  8*W) + vectype().load_a(uc +  8*W)).store_a(temp.data() +  8*W);
+    (vectype().load_a(nm +  9*W) + vectype().load_a(uc +  9*W)).store_a(temp.data() +  9*W);
+    (vectype().load_a(nm + 10*W) + vectype().load_a(uc + 10*W)).store_a(temp.data() + 10*W);
+    (vectype().load_a(nm + 11*W) + vectype().load_a(uc + 11*W)).store_a(temp.data() + 11*W);
+    (vectype().load_a(nm + 12*W) + vectype().load_a(uc + 12*W)).store_a(temp.data() + 12*W);
+    (vectype().load_a(nm + 13*W) + vectype().load_a(uc + 13*W)).store_a(temp.data() + 13*W);
+    (vectype().load_a(nm + 14*W) + vectype().load_a(uc + 14*W)).store_a(temp.data() + 14*W);
 
     // Delegate to similarity_5_5 implementation
-    MathCommon<W>::similarity_5_5(tm, nm.data(), pc);
+    MathCommon<W>::similarity_5_5(tm, temp.data(), pc);
   }
 };
 
diff --git a/Tr/TrackVectorFit/TrackVectorFit/vector/Predict.h b/Tr/TrackVectorFit/TrackVectorFit/vector/Predict.h
index 4aa3c847283791120a8629c2e1bcf30aaf06ee6e..a482d25663b037853a458bf0e9bb3f22f0928e4f 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/vector/Predict.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/vector/Predict.h
@@ -14,7 +14,8 @@ template<class T>
 struct predict {
   template<unsigned W>
   static inline void op (
-    const std::array<Sch::Item, W>& n,
+    fp_ptr_64_const tv,
+    fp_ptr_64_const nm,
     fp_ptr_64_const tm,
     fp_ptr_64_const us,
     fp_ptr_64_const uc,
@@ -27,16 +28,14 @@ template<>
 struct predict<Op::Forward> {
   template<unsigned W>
   static inline void op (
-    const std::array<Sch::Item, W>& n,
+    fp_ptr_64_const tv,
+    fp_ptr_64_const nm,
     fp_ptr_64_const tm,
     fp_ptr_64_const us,
     fp_ptr_64_const uc,
     fp_ptr_64 ps,
     fp_ptr_64 pc
   ) {
-    _aligned const std::array<TRACKVECTORFIT_PRECISION, 15*W> nm = ArrayGen::getCurrentNoiseMatrix(n);
-    _aligned const std::array<TRACKVECTORFIT_PRECISION, 5*W> tv = ArrayGen::getCurrentTransportVector(n);
-
     Math<Op::Forward>::predictState<W> (
       tm,
       tv,
@@ -57,16 +56,14 @@ template<>
 struct predict<Op::Backward> {
   template<unsigned W>
   static inline void op (
-    const std::array<Sch::Item, W>& n,
+    fp_ptr_64_const tv,
+    fp_ptr_64_const nm,
     fp_ptr_64_const tm,
     fp_ptr_64_const us,
     fp_ptr_64_const uc,
     fp_ptr_64 ps,
     fp_ptr_64 pc
   ) {
-    _aligned const std::array<TRACKVECTORFIT_PRECISION, 5*W> tv = ArrayGen::getPreviousTransportVector(n);
-    _aligned std::array<TRACKVECTORFIT_PRECISION, 15*W> nm = ArrayGen::getPreviousNoiseMatrix(n);
-
     Math<Op::Backward>::predictState<W> (
       tm,
       tv,
diff --git a/Tr/TrackVectorFit/TrackVectorFit/vector/Smoother.h b/Tr/TrackVectorFit/TrackVectorFit/vector/Smoother.h
index 47bb4e6a0049aeba772e0f35c4a709523b1ad380..7e1b9b5715641bf98e3ed84e5983d84642089830 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/vector/Smoother.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/vector/Smoother.h
@@ -28,9 +28,8 @@ inline uint16_t smoother (
   );
 }
 
-template<long unsigned W>
+template<long unsigned W, bool checkNodeType>
 inline void updateResiduals (
-  const std::array<Sch::Item, W>& n,
   fp_ptr_64_const rv,
   fp_ptr_64_const pm,
   fp_ptr_64_const rr,
@@ -38,10 +37,10 @@ inline void updateResiduals (
   fp_ptr_64_const ss,
   fp_ptr_64_const sc,
   fp_ptr_64 res,
-  fp_ptr_64 errRes
+  fp_ptr_64 errRes,
+  const std::array<Sch::Item, W>& n
 ) {
-  MathCommon<W>::updateResiduals (
-    n,
+  MathCommon<W>::template updateResiduals<checkNodeType> (
     rv,
     pm,
     rr,
@@ -49,7 +48,8 @@ inline void updateResiduals (
     ss,
     sc,
     res,
-    errRes
+    errRes,
+    n
   );
 }
 
diff --git a/Tr/TrackVectorFit/TrackVectorFit/vector/Update.h b/Tr/TrackVectorFit/TrackVectorFit/vector/Update.h
index d8ec226d3761b51399c4d6ba17f9cf707afc4c67..550b327e30cb1962b85b2692797a90100fa31da4 100644
--- a/Tr/TrackVectorFit/TrackVectorFit/vector/Update.h
+++ b/Tr/TrackVectorFit/TrackVectorFit/vector/Update.h
@@ -9,18 +9,18 @@ namespace TrackVectorFit {
 
 namespace Vector {
 
-template<unsigned W>
+template<unsigned W, bool checkNodeType>
 inline void update (
-  const std::array<Sch::Item, W>& n,
   fp_ptr_64_const rv,
   fp_ptr_64_const pm,
   fp_ptr_64_const rr,
   fp_ptr_64_const em,
   fp_ptr_64 us,
   fp_ptr_64 uc,
-  fp_ptr_64 chi2
+  fp_ptr_64 chi2,
+  const std::array<Sch::Item, W>& n
 ) {
-  MathCommon<W>::update (
+  MathCommon<W>::template update<checkNodeType> (
     rv,
     pm,
     rr,
diff --git a/Tr/TrackVectorFit/src/TrackVectorFit.cpp b/Tr/TrackVectorFit/src/TrackVectorFit.cpp
index 2e3bbc40a151d4bbb0c912b2fca0130177e6d400..1df23278655ccc1a5dfb34964ccbd53b882a19fa 100644
--- a/Tr/TrackVectorFit/src/TrackVectorFit.cpp
+++ b/Tr/TrackVectorFit/src/TrackVectorFit.cpp
@@ -1,595 +1 @@
-#include "TrackVectorFit/TrackVectorFit.h"
-
-using namespace Tr::TrackVectorFit;
-
-TrackVectorFit::TrackVectorFit (
-  const unsigned& memManagerStorageIncrements,
-  const bool& debug
-) : m_memManagerStorageIncrements(memManagerStorageIncrements),
-  m_debug(debug) {
-  m_smoothStore = Mem::Store(Mem::View::SmoothState::size(), memManagerStorageIncrements);
-  m_nodeParametersStore = Mem::Store(Mem::View::NodeParameters::size(), memManagerStorageIncrements);
-  m_transportForwardStore = Mem::Store(Mem::View::TrackMatrix<25>::size(), memManagerStorageIncrements);
-  m_transportBackwardStore = Mem::Store(Mem::View::TrackMatrix<25>::size(), memManagerStorageIncrements);
-  m_auxStore = Mem::Store(Mem::View::TrackSymMatrix::size() + Mem::View::TrackVector::size(), memManagerStorageIncrements);
-}
-
-void TrackVectorFit::operator() (
-  std::list<std::reference_wrapper<Track>>& tracks
-) {
-  // Initialize the smooth state iterator here
-  Mem::Iterator smoothMainStoreIterator (m_smoothStoreIterator);
-
-  if (tracks.size() >= VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()) {
-    if (m_debug) {
-      std::cout << "Scheduler pre iterations:" << std::endl
-        << m_scheduler_pre << std::endl
-        << "Scheduler main iterations:" << std::endl
-        << m_scheduler_main << std::endl
-        << "Scheduler post iterations:" << std::endl
-        << m_scheduler_post << std::endl;
-    }
-
-    // Iterators for all stores
-    Mem::Iterator forwardTransportIterator (m_transportForwardStore);
-    Mem::ReverseIterator backwardTransportIterator (m_transportBackwardStore);
-    Mem::ReverseIterator backwardNodeIterator (m_nodeParametersStore);
-    Mem::ReverseIterator backwardStoreIterator (*m_backwardStore);
-
-    // Iterators initialized from the object state
-    Mem::Iterator forwardNodeIterator (m_forwardNodeIterator);
-    Mem::Iterator forwardStoreIterator (m_forwardStoreIterator);
-    Mem::ReverseIterator forwardMainStoreIterator (m_forwardMainStoreIterator);
-
-    // Forward fit
-    //  Pre iterations
-    if (m_debug) {
-      std::cout << std::endl << "Calculating forward fit pre iterations" << std::endl;
-    }
-    std::for_each (m_scheduler_pre.begin(), m_scheduler_pre.end(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) {
-      const auto& out = s.out;
-      const auto& action = s.action;
-      auto& pool = s.pool;
-
-      if (m_debug) {
-        std::cout << s << std::endl;
-      }
-
-      // Update
-      Mem::View::State current (forwardStoreIterator.nextVector());
-      Mem::View::NodeParameters currentParameters (forwardNodeIterator.nextVector());
-
-      // Predict (always in a new vector, no need to swap)
-      for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-        if (action[i]) {
-          auto& node = *(pool[i].node);
-          Scalar::initialise<Op::Forward>(node, pool[i].track->m_seedCovariance);
-        }
-      }
-
-      if (out.any()) {
-        Vector::update<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> (
-          pool,
-          currentParameters.m_referenceVector.m_basePointer,
-          currentParameters.m_projectionMatrix.m_basePointer,
-          currentParameters.m_referenceResidual,
-          currentParameters.m_errorMeasure,
-          current.m_updatedState.m_basePointer,
-          current.m_updatedCovariance.m_basePointer,
-          current.m_chi2
-        );
-      }
-    });
-
-    // Forward fit
-    //  Main iterations
-    if (m_debug) {
-      std::cout << std::endl << "Calculating forward fit main iterations" << std::endl;
-    }
-    std::list<SwapStore> swaps;
-    auto lastVector = forwardStoreIterator.nextVector();
-    std::for_each (m_scheduler_main.begin(), m_scheduler_main.end(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) {
-      const auto& in  = s.in;
-      const auto& out = s.out;
-      auto& pool = s.pool;
-
-      if (m_debug) {
-        std::cout << s << std::endl;
-      }
-
-      // Feed the data we need in our vector
-      if (in.any()) {
-        for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-          if (in[i]) {
-            Mem::View::State memslot (lastVector + i);
-            memslot.m_updatedState.copy((pool[i].node-1)->get<Op::Forward, Op::StateVector>());
-            memslot.m_updatedCovariance.copy((pool[i].node-1)->get<Op::Forward, Op::Covariance>());
-          }
-        }
-      }
-
-      // predict and update
-      const auto& currentVector = forwardStoreIterator.nextVector();
-      Mem::View::State last (lastVector);
-      Mem::View::State current (currentVector);
-      const auto& tm = forwardTransportIterator.nextVector();
-      Mem::View::NodeParameters currentParameters (forwardNodeIterator.nextVector());
-
-      Vector::fit<Op::Forward>::op<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> (
-        pool,
-        currentParameters.m_referenceVector.m_basePointer,
-        currentParameters.m_projectionMatrix.m_basePointer,
-        currentParameters.m_referenceResidual,
-        currentParameters.m_errorMeasure,
-        tm,
-        last.m_updatedState.m_basePointer,
-        last.m_updatedCovariance.m_basePointer,
-        current.m_updatedState.m_basePointer,
-        current.m_updatedCovariance.m_basePointer,
-        current.m_chi2
-      );
-
-      // Move requested data out
-      if (out.any()) {
-        for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-          if (out[i]) {
-            auto& node = *(pool[i].node);
-
-            Mem::View::State memslot (m_auxStore.getNextElement());
-            Mem::View::State currentslot (currentVector + i);
-            memslot.m_updatedState.copy(currentslot.m_updatedState.m_basePointer);
-            memslot.m_updatedCovariance.copy(currentslot.m_updatedCovariance.m_basePointer);
-            swaps.push_back(
-              SwapStore(
-                memslot,
-                node.get<Op::Forward, Op::StateVector>(),
-                node.get<Op::Forward, Op::Covariance>()
-              )
-            );
-          }
-        }
-      }
-
-      lastVector = currentVector;
-    });
-
-    // Restore updated state
-    for (auto& swap : swaps) {
-      swap.state.copy(swap.store.m_updatedState);
-      swap.covariance.copy(swap.store.m_updatedCovariance);
-    }
-
-    // Forward fit
-    //  Post iterations
-    if (m_debug) {
-      std::cout << std::endl << "Calculating forward fit post iterations" << std::endl;
-    }
-    lastVector = forwardStoreIterator.nextVector();
-    std::for_each (m_scheduler_post.begin(), m_scheduler_post.end(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) {
-      const auto& in  = s.in;
-      const auto& out = s.out;
-      auto& pool = s.pool;
-
-      if (m_debug) {
-        std::cout << s << std::endl;
-      }
-
-      // Feed the data we need in our vector
-      if (in.any()) {
-        for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-          if (in[i]) {
-            Mem::View::State memslot (lastVector + i);
-            memslot.m_updatedState.copy((pool[i].node-1)->get<Op::Forward, Op::StateVector>());
-            memslot.m_updatedCovariance.copy((pool[i].node-1)->get<Op::Forward, Op::Covariance>());
-          }
-        }
-      }
-
-      // predict and update
-      const auto& currentVector = forwardStoreIterator.nextVector();
-      Mem::View::State last (lastVector);
-      Mem::View::State current (currentVector);
-      const auto& tm = forwardTransportIterator.nextVector();
-      Mem::View::NodeParameters currentParameters (forwardNodeIterator.nextVector());
-
-      Vector::fit<Op::Forward>::op<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> (
-        pool,
-        currentParameters.m_referenceVector.m_basePointer,
-        currentParameters.m_projectionMatrix.m_basePointer,
-        currentParameters.m_referenceResidual,
-        currentParameters.m_errorMeasure,
-        tm,
-        last.m_updatedState.m_basePointer,
-        last.m_updatedCovariance.m_basePointer,
-        current.m_updatedState.m_basePointer,
-        current.m_updatedCovariance.m_basePointer,
-        current.m_chi2
-      );
-
-      // Move requested data out and update data pointers
-      if (out.any()) {
-        for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-          if (out[i]) {
-            auto& node = *(pool[i].node);
-
-            Mem::View::State memslot (m_auxStore.getNextElement());
-            Mem::View::State currentslot (currentVector + i);
-            memslot.m_updatedState.copy(currentslot.m_updatedState.m_basePointer);
-            memslot.m_updatedCovariance.copy(currentslot.m_updatedCovariance.m_basePointer);
-            node.get<Op::Forward, Op::StateVector>().setBasePointer(memslot.m_updatedState);
-            node.get<Op::Forward, Op::Covariance>().setBasePointer(memslot.m_updatedCovariance);
-          }
-        }
-      }
-
-      lastVector = currentVector;
-    });
-
-    // Backward fit and smoother
-    //  Pre iterations
-    if (m_debug) {
-      std::cout << "Calculating backward fit pre iterations" << std::endl;
-    }
-    std::for_each (m_scheduler_post.rbegin(), m_scheduler_post.rend(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) {
-      const auto& out = s.in;
-      const auto& action = s.action;
-      auto& pool = s.pool;
-
-      if (m_debug) {
-        std::cout << s << std::endl;
-      }
-
-      // Update
-      Mem::View::State current (backwardStoreIterator.previousVector());
-      Mem::View::NodeParameters currentParameters (backwardNodeIterator.previousVector());
-
-      // Predict (always in a new vector, no need to swap)
-      for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-        if (action[i]) {
-          auto& node = *(pool[i].node);
-          Scalar::initialise<Op::Backward>(node, pool[i].track->m_seedCovariance);
-        }
-      }
-      
-      if (out.any()) {
-        Vector::update<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> (
-          pool,
-          currentParameters.m_referenceVector.m_basePointer,
-          currentParameters.m_projectionMatrix.m_basePointer,
-          currentParameters.m_referenceResidual,
-          currentParameters.m_errorMeasure,
-          current.m_updatedState.m_basePointer,
-          current.m_updatedCovariance.m_basePointer,
-          current.m_chi2
-        );
-      }
-    });
-
-    // Backward fit and smoother
-    //  Main iterations
-    if (m_debug) {
-      std::cout << "Calculating backward fit and smoother main iterations" << std::endl;
-    }
-    lastVector = backwardStoreIterator.previousVector();
-    std::for_each (m_scheduler_main.rbegin(), m_scheduler_main.rend(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) {
-      const auto& in  = s.out;
-      const auto& out = s.in;
-      const auto& action = s.action;
-      auto& pool = s.pool;
-
-      if (m_debug) {
-        std::cout << s << std::endl;
-      }
-
-      // Feed the data we need in our vector
-      if (in.any()) {
-        for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-          if (in[i]) {
-            Mem::View::State memslot (lastVector + i);
-            memslot.m_updatedState.copy((pool[i].node+1)->get<Op::Backward, Op::StateVector>());
-            memslot.m_updatedCovariance.copy((pool[i].node+1)->get<Op::Backward, Op::Covariance>());
-          }
-        }
-      }
-
-      // Update pointers of smooth state
-      const auto& smootherVector = smoothMainStoreIterator.nextVector();
-      for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-        if (action[i]) {
-          pool[i].node->m_smoothState.setBasePointer(smootherVector + i);
-        }
-      }
-
-      // predict and update
-      const auto& currentVector = backwardStoreIterator.previousVector();
-      Mem::View::State last (lastVector);
-      Mem::View::State current (currentVector);
-      const auto& tm = backwardTransportIterator.previousVector();
-      Mem::View::NodeParameters currentParameters (backwardNodeIterator.previousVector());
-
-      Vector::predict<Op::Backward>::op<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> (
-        pool,
-        tm,
-        last.m_updatedState.m_basePointer,
-        last.m_updatedCovariance.m_basePointer,
-        current.m_updatedState.m_basePointer,
-        current.m_updatedCovariance.m_basePointer
-      );
-
-      // Do the smoother *here*
-      // This benefits from cache locality,
-      // and removes the need of doing swapping to restore
-      // the updated states
-      
-      // Fetch the corresponding forward and smooth state, and update it
-      Mem::View::State forward (forwardMainStoreIterator.previousVector());
-      Mem::View::SmoothState smooth (smootherVector);
-
-      // Smoother
-      const uint16_t smoother_result = Vector::smoother<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> (
-        forward.m_updatedState.m_basePointer,
-        forward.m_updatedCovariance.m_basePointer,
-        current.m_updatedState.m_basePointer,      // These are still the predicted state
-        current.m_updatedCovariance.m_basePointer, // and covariance
-        smooth.m_state.m_basePointer,
-        smooth.m_covariance.m_basePointer
-      );
-
-      // Update residuals
-      Vector::updateResiduals (
-        pool,
-        currentParameters.m_referenceVector.m_basePointer,
-        currentParameters.m_projectionMatrix.m_basePointer,
-        currentParameters.m_referenceResidual,
-        currentParameters.m_errorMeasure,
-        smooth.m_state.m_basePointer,
-        smooth.m_covariance.m_basePointer,
-        smooth.m_residual,
-        smooth.m_errResidual
-      );
-
-      // Update
-      Vector::update<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> (
-        pool,
-        currentParameters.m_referenceVector.m_basePointer,
-        currentParameters.m_projectionMatrix.m_basePointer,
-        currentParameters.m_referenceResidual,
-        currentParameters.m_errorMeasure,
-        current.m_updatedState.m_basePointer,
-        current.m_updatedCovariance.m_basePointer,
-        current.m_chi2
-      );
-
-      // Set errors
-      if (smoother_result != ArrayGen::mask<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>()) {
-        for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-          if (action[i] && !(smoother_result >> i) & 0x01) {
-            pool[i].track->track().setFitStatus(LHCb::Track::FitFailed);
-          }
-        }
-      }
-
-      if (m_debug) {
-        if (smoother_result != ArrayGen::mask<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>()) {
-          std::cout << "Smoother errors: " << smoother_result << std::endl;
-        }
-      }
-
-      // Move requested data out and update data pointers
-      if (out.any()) {
-        for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-          if (out[i]) {
-            auto& node = *(pool[i].node);
-
-            Mem::View::State memslot (m_auxStore.getNextElement());
-            Mem::View::State currentslot (currentVector + i);
-            memslot.m_updatedState.copy(currentslot.m_updatedState.m_basePointer);
-            memslot.m_updatedCovariance.copy(currentslot.m_updatedCovariance.m_basePointer);
-            node.get<Op::Backward, Op::StateVector>().setBasePointer(memslot.m_updatedState);
-            node.get<Op::Backward, Op::Covariance>().setBasePointer(memslot.m_updatedCovariance);
-          }
-        }
-      }
-
-      lastVector = currentVector;
-    });
-
-    // Backward fit
-    //  Post iterations
-    if (m_debug) {
-      std::cout << "Calculating backward fit post iterations" << std::endl;
-    }
-    lastVector = backwardStoreIterator.previousVector();
-    std::for_each (m_scheduler_pre.rbegin(), m_scheduler_pre.rend(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) {
-      const auto& in  = s.out;
-      const auto& out = s.in;
-      auto& pool = s.pool;
-
-      if (m_debug) {
-        std::cout << s << std::endl;
-      }
-
-      // Feed the data we need in our vector
-      if (in.any()) {
-        for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-          if (in[i]) {
-            Mem::View::State memslot (lastVector + i);
-            memslot.m_updatedState.copy((pool[i].node+1)->get<Op::Backward, Op::StateVector>());
-            memslot.m_updatedCovariance.copy((pool[i].node+1)->get<Op::Backward, Op::Covariance>());
-          }
-        }
-      }
-
-      // predict and update
-      const auto& currentVector = backwardStoreIterator.previousVector();
-      Mem::View::State last (lastVector);
-      Mem::View::State current (currentVector);
-      const auto& tm = backwardTransportIterator.previousVector();
-      Mem::View::NodeParameters currentParameters (backwardNodeIterator.previousVector());
-
-      Vector::fit<Op::Backward>::op<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> (
-        pool,
-        currentParameters.m_referenceVector.m_basePointer,
-        currentParameters.m_projectionMatrix.m_basePointer,
-        currentParameters.m_referenceResidual,
-        currentParameters.m_errorMeasure,
-        tm,
-        last.m_updatedState.m_basePointer,
-        last.m_updatedCovariance.m_basePointer,
-        current.m_updatedState.m_basePointer,
-        current.m_updatedCovariance.m_basePointer,
-        current.m_chi2
-      );
-
-      // Move requested data out and update data pointers
-      if (out.any()) {
-        for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) {
-          if (out[i]) {
-            auto& node = *(pool[i].node);
-
-            Mem::View::State memslot (m_auxStore.getNextElement());
-            Mem::View::State currentslot (currentVector + i);
-            memslot.m_updatedState.copy(currentslot.m_updatedState.m_basePointer);
-            memslot.m_updatedCovariance.copy(currentslot.m_updatedCovariance.m_basePointer);
-            node.get<Op::Backward, Op::StateVector>().setBasePointer(memslot.m_updatedState);
-            node.get<Op::Backward, Op::Covariance>().setBasePointer(memslot.m_updatedCovariance);
-          }
-        }
-      }
-
-      lastVector = currentVector;
-    });
-  } else {
-    // Forward fit
-    std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) {
-      auto& nodes = track.m_nodes;
-      Node& node = nodes.front();
-      Scalar::fit<Op::Forward>(node, track.m_seedCovariance);
-
-      // Before m_forwardUpstream
-      for (unsigned i=1; i<=track.m_forwardUpstream; ++i) {
-        Node& node = nodes[i];
-        Node& prevnode = nodes[i-1];
-        Scalar::fit<Op::Forward, false>(node, prevnode);
-      }
-
-      // After m_forwardUpstream
-      for (unsigned i=track.m_forwardUpstream+1; i<nodes.size(); ++i) {
-        Node& node = nodes[i];
-        Node& prevnode = nodes[i-1];
-        Scalar::fit<Op::Forward, true>(node, prevnode);
-      }
-    });
-
-    // Backward fit
-    std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) {
-      auto& nodes = track.m_nodes;
-      Node& node = nodes.back();
-      Scalar::fit<Op::Backward>(node, track.m_seedCovariance);
-
-      // Before m_backwardUpstream
-      for (unsigned i=1; i<=track.m_backwardUpstream; ++i) {
-        const int element = track.m_nodes.size() - i - 1;
-        Node& prevnode = nodes[element + 1];
-        Node& node = nodes[element];
-        Scalar::fit<Op::Backward, false>(node, prevnode);
-      }
-
-      // After m_backwardUpstream
-      for (unsigned i=track.m_backwardUpstream+1; i<nodes.size(); ++i) {
-        const int element = track.m_nodes.size() - i - 1;
-        Node& prevnode = nodes[element + 1];
-        Node& node = nodes[element];
-        Scalar::predict<Op::Backward, true>(node, prevnode);
-
-        // Do the smoother
-        if (i < (track.m_nodes.size() - track.m_forwardUpstream - 1)) {
-          node.m_smoothState.setBasePointer(smoothMainStoreIterator.nextElement());
-          const bool smoothSuccess = Scalar::smoother<true, true>(node);
-          Scalar::updateResiduals(node);
-
-          if (not smoothSuccess) {
-            track.track().setFitStatus(LHCb::Track::FitFailed);
-          }
-        }
-
-        Scalar::update<Op::Backward>(node);
-      }
-    });
-  }
-
-  // Calculate the chi2 a posteriori
-  std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) {
-    auto& nodes = track.m_nodes;
-    
-    // Reset the values
-    track.m_ndof = -track.m_nTrackParameters;
-    auto ndofBackward = -track.m_nTrackParameters;
-    track.m_forwardFitChi2  = ((TRACKVECTORFIT_PRECISION) 0.0);
-    track.m_backwardFitChi2 = ((TRACKVECTORFIT_PRECISION) 0.0);
-
-    for (unsigned k=0; k<nodes.size(); ++k) {
-      Node& node = nodes[k];
-      if (node.node().type() == LHCb::Node::HitOnTrack) {
-        track.m_forwardFitChi2 += node.get<Op::Forward, Op::Chi2>();
-        ++track.m_ndof;
-      }
-      node.get<Op::Forward, Op::Chi2>() = track.m_forwardFitChi2;
-      node.m_ndof = track.m_ndof;
-
-      // Backwards chi2
-      Node& backwardNode = nodes[nodes.size() - 1 - k];
-      if (backwardNode.node().type() == LHCb::Node::HitOnTrack) {
-        track.m_backwardFitChi2 += backwardNode.get<Op::Backward, Op::Chi2>();
-        ++ndofBackward;
-      }
-      backwardNode.get<Op::Backward, Op::Chi2>() = track.m_backwardFitChi2;
-      backwardNode.m_ndofBackward = ndofBackward;
-    }
-  });
-
-  // Not upstream iterations
-  std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) {
-    for (unsigned j=0; j<=track.m_forwardUpstream; ++j) {
-      track.m_nodes[j].m_smoothState.setBasePointer(smoothMainStoreIterator.nextElement());
-      Scalar::smoother<false, true>(track.m_nodes[j]);
-      Scalar::updateResiduals(track.m_nodes[j]);
-    }
-
-    for (unsigned j=0; j<=track.m_backwardUpstream; ++j) {
-      const unsigned element = track.m_nodes.size() - j - 1;
-      track.m_nodes[element].m_smoothState.setBasePointer(smoothMainStoreIterator.nextElement());
-      Scalar::smoother<true, false>(track.m_nodes[element]);
-      Scalar::updateResiduals(track.m_nodes[element]);
-    }
-  });
-
-  // Set the LHCb::Node information accordingly
-  std::for_each(tracks.begin(), tracks.end(), [] (Track& track) {
-    // TODO - Either the min or max chi2 could be picked up here.
-    track.track().setChi2AndDoF(track.minChi2(), track.ndof());
-  });
-}
-
-/**
- * @brief Populate the nodes with the calculated information.
- */
-void TrackVectorFit::populateNodes (
-  Track& track
-) {
-  std::for_each(track.nodes().begin(), track.nodes().end(), [] (Node& n) {
-    auto& node = n.node();
-
-    node.setErrMeasure(n.get<Op::NodeParameters, Op::ErrMeasure>());
-    node.setProjectionMatrix((Gaudi::TrackProjectionMatrix) n.get<Op::NodeParameters, Op::ProjectionMatrix>());
-    node.setResidual(n.get<Op::Smooth, Op::Residual>());
-    node.setErrResidual(n.get<Op::Smooth, Op::ErrResidual>());
-    
-    const auto smoothStateVector = (Gaudi::TrackVector) n.get<Op::Smooth, Op::StateVector>();
-    node.setRefVector(smoothStateVector);
-    node.setState(
-      smoothStateVector,
-      (Gaudi::TrackSymMatrix) n.get<Op::Smooth, Op::Covariance>(),
-      node.z());
-  });
-}
+#include "TrackVectorFit/TrackVectorFit.h"
\ No newline at end of file