diff --git a/Tr/TrackFitEvent/Event/FitNode.h b/Tr/TrackFitEvent/Event/FitNode.h index b5ba89f04cda6e04e103f4eada1f67f7abbd4971..9d3d95511976140cf8988b61a1fad32739a1625f 100755 --- a/Tr/TrackFitEvent/Event/FitNode.h +++ b/Tr/TrackFitEvent/Event/FitNode.h @@ -340,9 +340,13 @@ namespace LHCb s << " measurement of type " << node.measurement().type() << " with LHCbID " << node.measurement().lhcbID().channelID(); } - s << " at z " << node.z() - << " with chi2 " << node.chi2() << "\n" - << " transport matrix "; + s << " at z " << node.z(); + if (node.hasMeasurement()) { + s << " with chi2 " << node.chi2() << "\n"; + } else { + s << " with chi2 -\n"; + } + s << " transport matrix "; node.print(s, node.transportMatrix()); s << "\n inverse transport matrix "; node.print(s, node.invertTransportMatrix()); @@ -353,7 +357,11 @@ namespace LHCb s << " refVector "; node.print(s, node.refVector().parameters()); s << " refResidual " << node.refResidual() - << " errMeasure " << node.errMeasure(); + << " errMeasure " << node.errMeasure() << "\n" + << " noiseMatrix "; + node.print(s, node.noiseMatrix()); + s << " transportVector "; + node.print(s, node.transportVector()); return s; } diff --git a/Tr/TrackFitter/src/TrackMasterFitter.cpp b/Tr/TrackFitter/src/TrackMasterFitter.cpp index 14c9c87e99679ea1cafe3e1557d6f2b317f0a044..772a853233f3656051e421f2df159be9da7d21cc 100644 --- a/Tr/TrackFitter/src/TrackMasterFitter.cpp +++ b/Tr/TrackFitter/src/TrackMasterFitter.cpp @@ -523,6 +523,11 @@ LHCb::Node* TrackMasterFitter::outlierRemoved(Track& track) const // one. const double totalchi2 = std::max(nodes.front()->totalChi2(LHCb::FitNode::Backward).chi2(), nodes.back()->totalChi2(LHCb::FitNode::Forward).chi2()); + + if (m_debugLevel){ + debug() << "removeWorstOutlier: total chi2 " << totalchi2 << endmsg; + } + using NodeWithChi2 = std::pair<const FitNode*, double>; std::vector< NodeWithChi2 > nodesWithChi2UL; nodesWithChi2UL.reserve(nodes.size()); @@ -538,6 +543,17 @@ LHCb::Node* TrackMasterFitter::outlierRemoved(Track& track) const const auto* tmpnode = node->prevNode(dir); if (tmpnode) chi2MatchAndHit -= tmpnode->totalChi2(dir).chi2(); } + + if (m_debugLevel){ + const auto* prevnode = node->prevNode(0); + const auto* nextnode = node->prevNode(1); + + debug() << "node LHCbID " << node->measurement().lhcbID().channelID() + << " chi2Contribution " << chi2MatchAndHit << " (" << totalchi2 << " - " + << (prevnode ? prevnode->totalChi2(0).chi2() : 0) << " - " + << (nextnode ? nextnode->totalChi2(1).chi2() : 0) << ")" << endmsg; + } + if (chi2MatchAndHit > m_chi2Outliers) { nodesWithChi2UL.emplace_back( node, chi2MatchAndHit); } @@ -549,9 +565,23 @@ LHCb::Node* TrackMasterFitter::outlierRemoved(Track& track) const auto worstChi2 = m_chi2Outliers; std::sort(nodesWithChi2UL.begin(), nodesWithChi2UL.end(), DecreasingChi2); + if (m_debugLevel) { + debug() << "All bad measurements, ordered: " << endmsg; + for (const auto& nodeUL : nodesWithChi2UL ) { + auto& node = nodeUL.first; + debug() << "Measurement of type " << node->measurement().type() + << " LHCbID " << node->measurement().lhcbID().channelID() + << " at z " << node->z() + << " with chi2Contribution " << nodeUL.second + << " and chi2 " << node->chi2() + << endmsg; + } + } + // -- if the first is smaller than worstChi2, and it is sorted in decreasing order // -- the 'if' will never be true and we can return here (M. De Cian) if (!nodesWithChi2UL.empty() && nodesWithChi2UL.front().second < worstChi2) return outlier; + for (const auto& node : nodesWithChi2UL) { if (node.second > worstChi2) { const auto chi2 = node.first->chi2(); @@ -562,14 +592,18 @@ LHCb::Node* TrackMasterFitter::outlierRemoved(Track& track) const } } } - if (outlier && m_debugLevel) { - debug() << "Measurement of type=" - << outlier -> measurement().type() - << " LHCbID=" - << outlier -> measurement().lhcbID().channelID() - << " at z=" << outlier->z() - << " with chi2=" << outlier->chi2() - << " removed." << endmsg; + if (m_debugLevel) { + if (outlier) { + debug() << "Measurement of type " + << outlier -> measurement().type() + << " LHCbID " + << outlier -> measurement().lhcbID().channelID() + << " at z " << outlier->z() + << " with chi2 " << outlier->chi2() + << " removed." << endmsg; + } else { + debug() << "Outlier not found" << endmsg; + } } return outlier; @@ -849,6 +883,10 @@ StatusCode TrackMasterFitter::updateTransport(LHCb::Track& track) const auto inode = nodes.begin(); const LHCb::StateVector* refvector = &((*inode)->refVector()); TrackMatrix F = ROOT::Math::SMatrixIdentity(); + + // Bug: We still need to reset the first node + (*inode)->resetFilterStatus(); + for (++inode; inode!=nodes.end() && sc.isSuccess(); ++inode) { FitNode* node = *inode; diff --git a/Tr/TrackFitter/src/TrackVectorFitter.cpp b/Tr/TrackFitter/src/TrackVectorFitter.cpp index 5fbc381ed8fdc78ef23d698115574537752d10f2..e9684648c3409bac4f868f2d36fe6696fdda40f7 100644 --- a/Tr/TrackFitter/src/TrackVectorFitter.cpp +++ b/Tr/TrackFitter/src/TrackVectorFitter.cpp @@ -7,7 +7,7 @@ DECLARE_TOOL_FACTORY(TrackVectorFitter) namespace Tr { namespace TrackVectorFitter { -thread_local Tr::TrackVectorFit::TrackVectorFit fit; +thread_local Tr::TrackVectorFit::TrackVectorFit fitter; } } @@ -41,7 +41,7 @@ TrackVectorFitter::TrackVectorFitter ( /** * @brief Initializes all tools and debug level. */ -StatusCode TrackVectorFitter::initialize () { +StatusCode TrackVectorFitter::initialize () { // Note: Do we really need to specify the pid, or is it always 211 for the fit? // Count the number of hits of each type @@ -138,7 +138,7 @@ StatusCode TrackVectorFitter::operator() ( // __itt_resume(); // } - Tr::TrackVectorFitter::fit.reset(updatedStatesStore); + Tr::TrackVectorFitter::fitter.reset(updatedStatesStore, msgLevel(MSG::DEBUG)); // Generate the Tr::TrackVectorFit::Tracks std::list<Tr::TrackVectorFit::Track> tList; @@ -177,7 +177,7 @@ StatusCode TrackVectorFitter::operator() ( unsigned tSize = tConvergence.size(); // Run the vector scheduler - Tr::TrackVectorFitter::fit.initializeBasePointers(tConvergence); + Tr::TrackVectorFitter::fitter.initializeBasePointers<false>(tConvergence); std::for_each(tConvergence.begin(), tConvergence.end(), [&] (Tr::TrackVectorFit::Track& t) { // Populate the reference vectors from the defined states @@ -186,12 +186,9 @@ StatusCode TrackVectorFitter::operator() ( // Project measurements, update residuals and materials // for the first time projectAndUpdate(t, pid); - - // Populate with the seed covariance - generateAndSetSeedCovariance(t); }); - Tr::TrackVectorFitter::fit(tConvergence); + Tr::TrackVectorFitter::fitter.smoothFit<false>(tConvergence); eraseFailed(tConvergence); if (msgLevel(MSG::DEBUG)) { @@ -209,16 +206,22 @@ StatusCode TrackVectorFitter::operator() ( const bool sameSize = (tSize == tConvergence.size()); tSize = tConvergence.size(); - if ( !sameSize ) { + if (!sameSize) { // Run the vector scheduler - Tr::TrackVectorFitter::fit.initializeBasePointers(tConvergence); + Tr::TrackVectorFitter::fitter.initializeBasePointers<true>( + tConvergence, + !(m_updateMaterial && m_applyMaterialCorrections) + ); + } else { + // Even though we don't initialize the scheduler and pointers, + // we still need to copy the smoothed state into the ref vectors + std::for_each(tConvergence.begin(), tConvergence.end(), [&] (Tr::TrackVectorFit::Track& t) { + t.updateRefVectors(); + }); } std::for_each(tConvergence.begin(), tConvergence.end(), [&] (Tr::TrackVectorFit::Track& t) { - // Update reference trajectories with smoothed states - t.updateRefVectors(); - - projectAndUpdate(t, pid, m_updateMaterial, m_updateTransport && i <= m_maxUpdateTransports+1); + projectAndUpdate(t, pid, m_updateMaterial); // Note: projectAndUpdate may flaw the track (set its fit status to FitFailed). // We proceed with those nevertheless, otherwise we screw up the scheduler. // We will check them after the fit anyway. @@ -227,7 +230,7 @@ StatusCode TrackVectorFitter::operator() ( t.saveChi2(); }); - Tr::TrackVectorFitter::fit(tConvergence); + Tr::TrackVectorFitter::fitter.smoothFit<false>(tConvergence); if (msgLevel(MSG::DEBUG)) { debug() << "Convergence iteration #" << i << endmsg; @@ -310,15 +313,16 @@ StatusCode TrackVectorFitter::operator() ( // In the outlier removal iterations, // we always need to initialize the base pointers, // as the track size varies - Tr::TrackVectorFitter::fit.initializeBasePointers(tOutliers); + Tr::TrackVectorFitter::fitter.initializeBasePointers<true>( + tOutliers, + !(m_updateMaterial && m_applyMaterialCorrections) + ); std::for_each(tOutliers.begin(), tOutliers.end(), [&] (Tr::TrackVectorFit::Track& t) { - t.updateRefVectors(); - projectAndUpdate(t, pid, m_updateMaterial, m_updateTransport); + projectAndUpdate(t, pid, m_updateMaterial); }); - Tr::TrackVectorFitter::fit(tOutliers); - eraseFailed(tOutliers); + Tr::TrackVectorFitter::fitter.smoothFit<true>(tOutliers); if (msgLevel(MSG::DEBUG)) { debug() << "Outlier removal iteration #" << i << endmsg; @@ -326,6 +330,8 @@ StatusCode TrackVectorFitter::operator() ( printNodes(t); }); } + + eraseFailed(tOutliers); } // Add leftovers @@ -356,7 +362,7 @@ void TrackVectorFitter::populateTracks ( ) const { std::for_each(tracks.begin(), tracks.end(), [&] (Tr::TrackVectorFit::Track& t) { // Populate nodes with states, covariances, etc. - Tr::TrackVectorFitter::fit.populateNodes(t); + Tr::TrackVectorFitter::fitter.populateNodes(t); // Determine states determineStates(t); @@ -382,43 +388,6 @@ void TrackVectorFitter::generateFitResult (LHCb::Track& track) const { } } -/** - * @brief Generates the seed covariance, and populates it. - */ -void TrackVectorFitter::generateAndSetSeedCovariance (Tr::TrackVectorFit::Track& t) const { - const auto& track = t.track(); - const auto& nodes = t.nodes(); - Gaudi::TrackSymMatrix seedCovariance; - - // Set off-diagonal elements to zero - if (m_useSeedStateErrors) { - LHCb::State state0 = track.firstState(); - auto z1 = nodes.front().node().z(); - extrapolator(track.type())->propagate(state0, z1); - seedCovariance = state0.covariance(); - if (msgLevel(MSG::DEBUG)) { - debug() << " state0 at z " << z1 - << " vector " << state0.stateVector() << "\n" - << " covariance " << state0.covariance() << endmsg; - } - } else { - seedCovariance(0,0) = m_errorX * m_errorX; - seedCovariance(1,1) = m_errorY * m_errorY; - seedCovariance(2,2) = m_errorTx * m_errorTx; - seedCovariance(3,3) = m_errorTy * m_errorTy; - - const auto temp = m_errorQoP[0] * track.firstState().qOverP(); - seedCovariance(4,4) = temp * temp + m_errorQoP[1] * m_errorQoP[1]; - } - - if (msgLevel(MSG::DEBUG)) { - debug() << "SeedState: z = " << nodes.front().node().z() - << " covariance = " << seedCovariance << endmsg; - } - - t.setSeedCovariance(std::move(seedCovariance)); -} - /** * @brief Determines the z position of the closest approach * to the beam line by linear extrapolation. @@ -488,28 +457,6 @@ StatusCode TrackVectorFitter::makeNodes (LHCb::Track& track, const LHCb::Tr::PID } ); - // Create reference nodes depending on track type - if (m_addDefaultRefNodes) { - if (track.hasVelo() && !track.checkFlag(LHCb::Track::Flags::Backward)) { - nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZEndVelo, LHCb::State::EndVelo)); - } - if (track.hasTT()) { - nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZBegRich1, LHCb::State::BegRich1)); - nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZEndRich1, LHCb::State::EndRich1)); - } - if (track.hasT()) { - nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZBegT, LHCb::State::AtT)); - nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZBegRich2, LHCb::State::BegRich2)); - nodes.push_back(new Tr::TrackVectorFit::FitNode(StateParameters::ZEndRich2, LHCb::State::EndRich2)); - } - } - - // At a node for the position at the beamline - if (m_stateAtBeamLine && (track.hasTT() || track.hasVelo())) { - const LHCb::State& refstate = *(track.checkFlag(LHCb::Track::Flags::Backward) ? track.states().back() : track.states().front()); - nodes.push_back(new Tr::TrackVectorFit::FitNode(closestToBeamLine(refstate), LHCb::State::ClosestToBeam)); - } - // Sort the nodes in z const bool backward = track.checkFlag(LHCb::Track::Flags::Backward); const bool upstream = (m_upstream && !backward) || (!m_upstream && backward); @@ -543,8 +490,7 @@ void TrackVectorFitter::populateRefVectors (Tr::TrackVectorFit::Track& t) const void TrackVectorFitter::projectAndUpdate ( Tr::TrackVectorFit::Track& t, const LHCb::Tr::PID& pid, - const bool& doUpdateMaterialCorrections, - const bool& doUpdateTransport + const bool& doUpdateMaterialCorrections ) const { // update the projections. need to be done every time ref is updated projectReference(t); @@ -556,10 +502,7 @@ void TrackVectorFitter::projectAndUpdate ( } // create the transport of the reference (after the noise, because it uses energy loss) - if (doUpdateTransport) { - updateTransport(t); - // return failureInfo("Problem updating transport"); - } + updateTransport(t); } /** @@ -689,15 +632,6 @@ void TrackVectorFitter::projectReference (Tr::TrackVectorFit::Track& t) const { for (auto& n : t.nodes()) { auto& node = n.node(); - - // node must have a measurement associated, - // otherwise one doesn't project - if (!node.hasMeasurement()) { - // Reset projection matrix, residual and errMeasure to 0 - n.resetProjection(); - continue; - } - // get the projector object, for the kind of measurement we have // ITrackProjectorSelector m_projectorSelector ITrackProjector* projector = m_projectorSelector->projector(node.measurement()); @@ -882,16 +816,8 @@ void TrackVectorFitter::updateTransport (Tr::TrackVectorFit::Track& t) const // calculate the 'transport vector' (need to replace that) const Gaudi::TrackVector tv = stateVector.parameters() - tm * refVector; n.setTransportVector(tv); - - if (i > t.m_forwardUpstream) { - n.setTransportMatrix(tm); - } - - if (i < (nodes.size() - t.m_backwardUpstream)) { - // Note: Store the inverse transport matrix of this node - // on the datatype of the node that will require it - nodes[i-1].calculateAndSetInverseTransportMatrix(tm); - } + n.setTransportMatrix(tm); + nodes[i-1].calculateAndSetInverseTransportMatrix(tm); // test dtx/dqop to see if the momentum affects this track. if (std::abs(tm(2,4)) != 0) { @@ -917,7 +843,7 @@ void TrackVectorFitter::updateTransport (Tr::TrackVectorFit::Track& t) const */ bool TrackVectorFitter::removeWorstOutlier (Tr::TrackVectorFit::Track& t) const { // return false if outlier chi2 cut < 0 - if (m_chi2Outliers < 0.0) { + if (m_chi2Outliers.value() < 0.0) { return false; } @@ -929,7 +855,9 @@ bool TrackVectorFitter::removeWorstOutlier (Tr::TrackVectorFit::Track& t) const } } - std::vector<std::reference_wrapper<Tr::TrackVectorFit::Node>> badNodes; + using NodeWithChi2 = std::pair<std::reference_wrapper<Tr::TrackVectorFit::Node>, double>; + std::vector<NodeWithChi2> nodesWithChi2UL; + nodesWithChi2UL.reserve(t.nodes().size()); const TRACKVECTORFIT_PRECISION trackChi2 = t.chi2(); @@ -949,59 +877,74 @@ bool TrackVectorFitter::removeWorstOutlier (Tr::TrackVectorFit::Track& t) const if ((it+1) != t.nodes().end()) { chi2Contribution -= (it+1)->get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::Chi2>(); } if (msgLevel(MSG::DEBUG)){ - debug() << chi2Contribution << " (" << trackChi2 << " - " + debug() << "node LHCbID " << n.node().measurement().lhcbID().channelID() + << " chi2Contribution " << chi2Contribution << " (" << trackChi2 << " - " << ((it != t.nodes().begin()) ? (it-1)->get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::Chi2>() : 0) << " - " << (((it+1) != t.nodes().end()) ? (it+1)->get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::Chi2>() : 0) << ")" << endmsg; } - if (chi2Contribution > m_chi2Outliers) { - badNodes.push_back(n); + if (chi2Contribution > m_chi2Outliers.value()) { + nodesWithChi2UL.emplace_back(n, chi2Contribution); } } } } - // If we didn't find any bad nodes, return false - if (badNodes.empty()) { + // Sort by decreasing chi2 + std::sort(nodesWithChi2UL.begin(), nodesWithChi2UL.end(), + [] (const NodeWithChi2& n0, const NodeWithChi2& n1) { + return n0.second > n1.second; + }); + + if (msgLevel(MSG::DEBUG)) { + debug() << "All bad measurements, ordered: " << endmsg; + for (const NodeWithChi2& node : nodesWithChi2UL) { + Tr::TrackVectorFit::Node& n = node.first; + debug() << "Measurement of type " << n.node().measurement().type() + << " LHCbID " << n.node().measurement().lhcbID().channelID() + << " at z " << n.node().z() + << " with chi2Contribution " << node.second + << " and chi2 " << n.smoothChi2() + << endmsg; + } + } + + if (!nodesWithChi2UL.empty() && nodesWithChi2UL.front().second < m_chi2Outliers.value()) { return false; } - // Sort by decreasing chi2 - std::sort(badNodes.begin(), badNodes.end(), [] (const Tr::TrackVectorFit::Node& n0, const Tr::TrackVectorFit::Node& n1) { - return n0.smoothChi2() > n1.smoothChi2(); - }); + auto worstChi2 = m_chi2Outliers.value(); + auto worstNodeIt = nodesWithChi2UL.end(); + for (auto it=nodesWithChi2UL.begin(); it!=nodesWithChi2UL.end(); ++it) { + if (it->second > worstChi2) { + Tr::TrackVectorFit::Node& n = it->first; + const auto chi2 = n.smoothChi2(); + if (chi2 > worstChi2) { + worstChi2 = chi2; + worstNodeIt = it; + } + } + } - Tr::TrackVectorFit::Node& badNode = badNodes.front(); + if (worstNodeIt == nodesWithChi2UL.end()) { + if (msgLevel(MSG::DEBUG)) { + debug() << "Outlier not found" << endmsg; + } - if (badNode.smoothChi2() <= m_chi2Outliers) { return false; } if (msgLevel(MSG::DEBUG)) { - debug() << "Measurement of type " << badNode.node().measurement().type() - << " LHCbID " << badNode.node().measurement().lhcbID().channelID() - << " at z " << badNode.node().z() - << " node chi2 " << badNode.smoothChi2() - << " residual " << badNode.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::Residual>() - << " errResidual " << badNode.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::ErrResidual>() + Tr::TrackVectorFit::Node& n = worstNodeIt->first; + debug() << "Measurement of type " << n.node().measurement().type() + << " LHCbID " << n.node().measurement().lhcbID().channelID() + << " at z " << n.node().z() + << " with chi2 " << worstChi2 << " removed." << endmsg; - - debug() << "All bad measurements, ordered: " << endmsg; - for (const Tr::TrackVectorFit::Node& node : badNodes ) { - debug() << "Measurement of type " << node.node().measurement().type() - << " LHCbID " << node.node().measurement().lhcbID().channelID() - << " at z " << node.node().z() - << " with chi2 " << node.smoothChi2() - << " residual " << node.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::Residual>() - << " errResidual " << node.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::ErrResidual>() << endmsg; - } } // Mark the node as an outlier - badNode.node().setType(LHCb::Node::Outlier); - - // Update the track's upstreams, if need be - t.updateUpstream(); + worstNodeIt->first.get().setOutlier(); return true; } @@ -1044,7 +987,7 @@ bool TrackVectorFitter::removeWorstOutlierSimplified (Tr::TrackVectorFit::Track& return n0.smoothChi2() > n1.smoothChi2(); }); - ((Tr::TrackVectorFit::Node) badNodes[0]).node().setType(LHCb::Node::Outlier); + ((Tr::TrackVectorFit::Node) badNodes[0]).setOutlier(); return true; } diff --git a/Tr/TrackFitter/src/TrackVectorFitter.h b/Tr/TrackFitter/src/TrackVectorFitter.h index acd828f305b12d8d9fad71996f9c03150d6c1a25..43500da36f2930f1362a59cc85c01f13a695aaeb 100644 --- a/Tr/TrackFitter/src/TrackVectorFitter.h +++ b/Tr/TrackFitter/src/TrackVectorFitter.h @@ -51,17 +51,11 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> { Gaudi::Property<bool> m_applyEnergyLossCorrections {this, "ApplyEnergyLossCorr", true}; Gaudi::Property<bool> m_applyMaterialCorrections {this, "ApplyMaterialCorrections", true}; Gaudi::Property<double> m_chi2Outliers {this, "Chi2Outliers", 9.0}; - Gaudi::Property<double> m_errorX {this, "ErrorX", 20.0 * Gaudi::Units::mm}; - Gaudi::Property<double> m_errorY {this, "ErrorY", 20.0 * Gaudi::Units::mm}; - Gaudi::Property<double> m_errorTx {this, "ErrorTx", 0.1}; - Gaudi::Property<double> m_errorTy {this, "ErrorTy", 0.1}; - Gaudi::Property<std::vector<double>> m_errorQoP {this, "ErrorQoP", {0.0, 0.01}}; Gaudi::Property<bool> m_fillExtraInfo {this, "FillExtraInfo", true}; Gaudi::Property<bool> m_makeMeasurements {this, "MakeMeasurements", false}; Gaudi::Property<bool> m_makeNodes {this, "MakeNodes", false}; Gaudi::Property<double> m_maxDeltaChi2Converged {this, "MaxDeltaChiSqConverged", 0.01}; Gaudi::Property<double> m_maxMomentumForScattering {this, "MaxMomentumForScattering", 500. * Gaudi::Units::GeV}; - Gaudi::Property<unsigned> m_maxUpdateTransports {this, "MaxUpdateTransports", 10}; Gaudi::Property<unsigned> m_memManagerStorageIncrements {this, "MemManagerStorageIncrements", 4096}; Gaudi::Property<double> m_minMomentumForELossCorr {this, "MinMomentumELossCorr", 10. * Gaudi::Units::MeV}; Gaudi::Property<double> m_minMomentumForScattering {this, "MinMomentumForScattering", 100. * Gaudi::Units::MeV}; @@ -76,7 +70,6 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> { Gaudi::Property<double> m_scatteringPt {this, "TransverseMomentumForScattering", 400. * Gaudi::Units::MeV}; Gaudi::Property<bool> m_stateAtBeamLine {this, "StateAtBeamLine", true}; Gaudi::Property<bool> m_updateMaterial {this, "UpdateMaterial", false}; - Gaudi::Property<bool> m_updateTransport {this, "UpdateTransport", true}; Gaudi::Property<bool> m_upstream {this, "FitUpstream", true}; Gaudi::Property<bool> m_useSeedStateErrors {this, "UseSeedStateErrors", false}; // Tool Handles @@ -93,16 +86,11 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> { debug() << "m_applyEnergyLossCorrections " << m_applyEnergyLossCorrections << endmsg; debug() << "m_applyMaterialCorrections " << m_applyMaterialCorrections << endmsg; debug() << "m_chi2Outliers " << m_chi2Outliers << endmsg; - debug() << "m_errorTx " << m_errorTx << endmsg; - debug() << "m_errorTy " << m_errorTy << endmsg; - debug() << "m_errorX " << m_errorX << endmsg; - debug() << "m_errorY " << m_errorY << endmsg; debug() << "m_fillExtraInfo " << m_fillExtraInfo << endmsg; debug() << "m_makeMeasurements " << m_makeMeasurements << endmsg; debug() << "m_makeNodes " << m_makeNodes << endmsg; debug() << "m_maxDeltaChi2Converged " << m_maxDeltaChi2Converged << endmsg; debug() << "m_maxMomentumForScattering " << m_maxMomentumForScattering << endmsg; - debug() << "m_maxUpdateTransports " << m_maxUpdateTransports << endmsg; debug() << "m_minMomentumForELossCorr " << m_minMomentumForELossCorr << endmsg; debug() << "m_minMomentumForScattering " << m_minMomentumForScattering << endmsg; debug() << "m_minNumMuonHits " << m_minNumMuonHits << endmsg; @@ -116,7 +104,6 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> { debug() << "m_scatteringPt " << m_scatteringPt << endmsg; debug() << "m_stateAtBeamLine " << m_stateAtBeamLine << endmsg; debug() << "m_updateMaterial " << m_updateMaterial << endmsg; - debug() << "m_updateTransport " << m_updateTransport << endmsg; debug() << "m_upstream " << m_upstream << endmsg; debug() << "m_useSeedStateErrors " << m_useSeedStateErrors << endmsg; } @@ -171,8 +158,6 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> { void generateFitResult (LHCb::Track& track) const; - void generateAndSetSeedCovariance (Tr::TrackVectorFit::Track& t) const; - double closestToBeamLine (const LHCb::State& state) const; StatusCode makeNodes (LHCb::Track& track, const LHCb::Tr::PID& pid) const; @@ -182,8 +167,7 @@ struct TrackVectorFitter : public extends<GaudiTool, ITrackFitter> { void projectAndUpdate ( Tr::TrackVectorFit::Track& t, const LHCb::Tr::PID& pid, - const bool& doUpdateMaterialCorrections = true, - const bool& doUpdateTransport = true + const bool& doUpdateMaterialCorrections = true ) const; StatusCode initializeRefStates (LHCb::Track& track, const LHCb::Tr::PID& pid) const; diff --git a/Tr/TrackVectorFit/TrackVectorFit/ArrayGen.h b/Tr/TrackVectorFit/TrackVectorFit/ArrayGen.h index 0f2e621d533f9dcb706cdce2805f04b416426b12..f8fcc6631fde497cc48e40a49e20e9b7f7673e88 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/ArrayGen.h +++ b/Tr/TrackVectorFit/TrackVectorFit/ArrayGen.h @@ -4,7 +4,6 @@ #include "Types.h" #include "Scheduler.h" #include "VectorConfiguration.h" -#include "SimdTransposeHelper.h" // ------------------ // Array constructors @@ -19,33 +18,70 @@ struct ArrayGen { static inline constexpr uint16_t mask () { return (W==16 ? 0xFFFF : (W==8 ? 0xFF : (W==4 ? 0x0F : 0x03))); } - - template<size_t W> - static inline constexpr std::array<TRACKVECTORFIT_PRECISION, 5*W> getPreviousTransportVector ( - const std::array<Sch::Item, W>& nodes + + template<size_t W, std::size_t... Is> + static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 5*W> InitialState_helper ( + const std::array<Sch::Item, W>& nodes, + std::index_sequence<Is...> ) { - return transpose_helper<5>::get<W>([&nodes](int j){ return (nodes[j].node+1)->transportVector().Array(); }); + return { + nodes[Is].node->m_nodeParameters.m_referenceVector[0]..., + nodes[Is].node->m_nodeParameters.m_referenceVector[1]..., + nodes[Is].node->m_nodeParameters.m_referenceVector[2]..., + nodes[Is].node->m_nodeParameters.m_referenceVector[3]..., + nodes[Is].node->m_nodeParameters.m_referenceVector[4]... + }; } template<size_t W> - static inline constexpr std::array<TRACKVECTORFIT_PRECISION, 5*W> getCurrentTransportVector ( + static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 5*W> InitialState ( const std::array<Sch::Item, W>& nodes ) { - return transpose_helper<5>::get<W>([&nodes](int j){ return nodes[j].node->transportVector().Array(); }); + return InitialState_helper<W>(nodes, std::make_index_sequence<W>{}); + } + + static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 15> InitialCovariance_values () { + return { + 400, + 0, 400, + 0, 0, 0.01, + 0, 0, 0, 0.01, + 0, 0, 0, 0, 0.0001 + }; } - template<size_t W> - static inline constexpr std::array<TRACKVECTORFIT_PRECISION, 15*W> getPreviousNoiseMatrix ( - const std::array<Sch::Item, W>& nodes + static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 15> InitialCovariance_values ( + std::size_t ) { - return transpose_helper<15>::get<W>([&nodes](int j){ return (nodes[j].node+1)->noiseMatrix().Array(); }); + return InitialCovariance_values(); } - template<size_t W> - static inline constexpr std::array<TRACKVECTORFIT_PRECISION, 15*W> getCurrentNoiseMatrix ( - const std::array<Sch::Item, W>& nodes + template<size_t W, std::size_t... Is> + static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 15*W> InitialCovariance_helper ( + std::index_sequence<Is...> ) { - return transpose_helper<15>::get<W>([&nodes](int j){ return nodes[j].node->noiseMatrix().Array(); }); + return { + InitialCovariance_values(Is)[0]..., + InitialCovariance_values(Is)[1]..., + InitialCovariance_values(Is)[2]..., + InitialCovariance_values(Is)[3]..., + InitialCovariance_values(Is)[4]..., + InitialCovariance_values(Is)[5]..., + InitialCovariance_values(Is)[6]..., + InitialCovariance_values(Is)[7]..., + InitialCovariance_values(Is)[8]..., + InitialCovariance_values(Is)[9]..., + InitialCovariance_values(Is)[10]..., + InitialCovariance_values(Is)[11]..., + InitialCovariance_values(Is)[12]..., + InitialCovariance_values(Is)[13]..., + InitialCovariance_values(Is)[14]... + }; + } + + template<size_t W> + static constexpr inline std::array<TRACKVECTORFIT_PRECISION, 15*W> InitialCovariance () { + return InitialCovariance_helper<W>(std::make_index_sequence<W>{}); } }; diff --git a/Tr/TrackVectorFit/TrackVectorFit/MemView.h b/Tr/TrackVectorFit/TrackVectorFit/MemView.h index 57973e3120eb205c62f398c55dceff6c23502dab..f2261074f69e83acc0047ea02d4e41bad8c8919a 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/MemView.h +++ b/Tr/TrackVectorFit/TrackVectorFit/MemView.h @@ -22,16 +22,18 @@ namespace Op { class ProjectionMatrix; class ReferenceResidual; class ErrMeasure; + class TransportVector; + class NoiseMatrix; // Forward, Backward, Smooth class StateVector; class Covariance; // Forward, Backward + class TransportMatrix; class Chi2; // Smooth - class TransportMatrix; class Residual; class ErrResidual; } @@ -50,10 +52,12 @@ using TrackVector = LHCb::Vector::Mem::View::TrackVector<TRACKVECTORFIT_PRECISIO using TrackSymMatrix = LHCb::Vector::Mem::View::TrackSymMatrix<TRACKVECTORFIT_PRECISION>; struct NodeParameters { - constexpr static size_t size () { return 12; } + constexpr static size_t size () { return 32; } TRACKVECTORFIT_PRECISION* m_basePointer = nullptr; TrackVector m_referenceVector; TrackVector m_projectionMatrix; + TrackVector m_transportVector; + TrackSymMatrix m_noiseMatrix; TRACKVECTORFIT_PRECISION* m_referenceResidual; TRACKVECTORFIT_PRECISION* m_errorMeasure; @@ -65,9 +69,11 @@ struct NodeParameters { void setBasePointer (TRACKVECTORFIT_PRECISION* p) { m_basePointer = p; - m_referenceVector = TrackVector(p); p += 5 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); - m_projectionMatrix = TrackVector(p); p += 5 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); - m_referenceResidual = p; p += VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); + m_referenceVector = TrackVector(p); p += 5 * vector_width(); + m_projectionMatrix = TrackVector(p); p += 5 * vector_width(); + m_transportVector = TrackVector(p); p += 5 * vector_width(); + m_noiseMatrix = TrackSymMatrix(p); p += 15 * vector_width(); + m_referenceResidual = p; p += vector_width(); m_errorMeasure = p; } @@ -78,6 +84,8 @@ struct NodeParameters { void copy (const NodeParameters& s) { m_referenceVector.copy(s.m_referenceVector); m_projectionMatrix.copy(s.m_projectionMatrix); + m_transportVector.copy(s.m_transportVector); + m_noiseMatrix.copy(s.m_noiseMatrix); *m_referenceResidual = *s.m_referenceResidual; *m_errorMeasure = *s.m_errorMeasure; } @@ -99,9 +107,9 @@ struct SmoothState { void setBasePointer (TRACKVECTORFIT_PRECISION* p) { m_basePointer = p; - m_state = TrackVector(p); p += 5 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); - m_covariance = TrackSymMatrix(p); p += 15 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); - m_residual = p; p += VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); + m_state = TrackVector(p); p += 5 * vector_width(); + m_covariance = TrackSymMatrix(p); p += 15 * vector_width(); + m_residual = p; p += vector_width(); m_errResidual = p; } @@ -132,8 +140,8 @@ struct State { void setBasePointer (TRACKVECTORFIT_PRECISION* p) { m_basePointer = p; - m_updatedState = TrackVector(p); p += 5 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); - m_updatedCovariance = TrackSymMatrix(p); p += 15 * VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); + m_updatedState = TrackVector(p); p += 5 * vector_width(); + m_updatedCovariance = TrackSymMatrix(p); p += 15 * vector_width(); m_chi2 = p; } diff --git a/Tr/TrackVectorFit/TrackVectorFit/NodeGetters.h b/Tr/TrackVectorFit/TrackVectorFit/NodeGetters.h index b0a026129ef56c8183a9f18c382461e69103d4e9..d90d3ac6dfcdc31ac98c331e95ab9ed10ea1944b 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/NodeGetters.h +++ b/Tr/TrackVectorFit/TrackVectorFit/NodeGetters.h @@ -41,6 +41,16 @@ inline const TrackSymMatrix& Node::get<Op::Forward, Op::Covariance> () const { return m_node->forwardCovariance(); } +template<> +inline const TrackSymMatrix& Node::get<Op::NodeParameters, Op::NoiseMatrix> () const { + return m_nodeParameters.m_noiseMatrix; +} + +template<> +inline const TrackVector& Node::get<Op::NodeParameters, Op::TransportVector> () const { + return m_nodeParameters.m_transportVector; +} + template<> inline const TrackMatrix<25>& Node::get<Op::Forward, Op::TransportMatrix> () const { return m_forwardTransportMatrix; @@ -125,6 +135,16 @@ inline TrackSymMatrix& Node::get<Op::Forward, Op::Covariance> () { return m_node->forwardCovariance(); } +template<> +inline TrackSymMatrix& Node::get<Op::NodeParameters, Op::NoiseMatrix> () { + return m_nodeParameters.m_noiseMatrix; +} + +template<> +inline TrackVector& Node::get<Op::NodeParameters, Op::TransportVector> () { + return m_nodeParameters.m_transportVector; +} + template<> inline TrackMatrix<25>& Node::get<Op::Forward, Op::TransportMatrix> () { return m_forwardTransportMatrix; diff --git a/Tr/TrackVectorFit/TrackVectorFit/Scheduler.h b/Tr/TrackVectorFit/TrackVectorFit/Scheduler.h index 4c5375d1460923679358f1f5e5d66c4c7e0ad25b..0542c63b6d0c492efe788df7b631b523a785f986 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/Scheduler.h +++ b/Tr/TrackVectorFit/TrackVectorFit/Scheduler.h @@ -12,20 +12,34 @@ namespace Sch { struct Item { Track* track; - unsigned trackIndex; - std::vector<Node, aligned_allocator<TRACKVECTORFIT_PRECISION, ALIGNMENT>>::iterator node; - std::vector<Node, aligned_allocator<TRACKVECTORFIT_PRECISION, ALIGNMENT>>::iterator lastnode; + std::vector<Node>::iterator node; + unsigned trackIndex = 0; + unsigned nodeIndex = 0; Item () = default; Item (const Item& copy) = default; - Item ( - Track* track, - const unsigned& nodeIndex, - const unsigned& lastnodeIndex - ) : track(track) { + Item (Track* track) + : track(track) { trackIndex = track->m_index; - node = track->m_nodes.begin() + nodeIndex; - lastnode = track->m_nodes.begin() + lastnodeIndex; + node = track->m_nodes.begin(); + } + + void incrementNodeIndex () { + ++node; + ++nodeIndex; + } + + template<size_t N> + friend inline std::ostream& operator<< ( + std::ostream& cout, + const std::array<Item, N>& pool + ) { + cout << "{ "; + for (unsigned j=0; j<pool.size(); ++j) { + cout << pool[j].trackIndex << "-" << pool[j].nodeIndex << " "; + } + cout << "}"; + return cout; } }; @@ -60,14 +74,8 @@ struct Blueprint { ) { cout << reverse<N>(blueprint.in) << " " << reverse<N>(blueprint.out) << " " - << reverse<N>(blueprint.action) << " "; - - auto& pool = blueprint.pool; - cout << "{ "; - for (unsigned j=0; j<pool.size(); ++j) { - cout << pool[j].trackIndex << "-" << pool[j].node->m_index << " "; - } - cout << "}"; + << reverse<N>(blueprint.action) << " " + << blueprint.pool; return cout; } @@ -84,27 +92,40 @@ struct Blueprint { } }; + template<size_t N> -struct StaticSchedulerCommon { - // Generic generate method - template<class F0, class F1, class F2> +struct StaticScheduler { + StaticScheduler () = default; + StaticScheduler (const StaticScheduler& copy) = default; + static std::list<Blueprint<N>> generate ( - std::list<std::reference_wrapper<Track>>& tracks, - const F0& getSize, - const F1& getCurrentElement, - const F2& getLastElement + std::list<std::reference_wrapper<Track>>& tracks ) { - if (N > tracks.size()) { - std::cout << "Scheduler: N <= tracks.size(), things probably won't work" << std::endl; - } + auto getSize = [] (const Track& t) { return t.m_nodes.size(); }; // Order tracks std::vector<std::reference_wrapper<Track>> ordered_tracks (tracks.begin(), tracks.end()); std::sort(ordered_tracks.begin(), ordered_tracks.end(), [&getSize] (const Track& t0, const Track& t1) { - const size_t size0 = getSize(t0); - const size_t size1 = getSize(t1); - return size0 > size1; + const auto& type0 = t0.track().type(); + const auto& type1 = t1.track().type(); + + // Always prefer the non velo or velor tracks + if (!(type0 == LHCb::Track::Velo || type0 == LHCb::Track::VeloR) + && (type1 == LHCb::Track::Velo || type1 == LHCb::Track::VeloR)) { + return true; + } + else if ((type0 == LHCb::Track::Velo || type0 == LHCb::Track::VeloR) + && !(type1 == LHCb::Track::Velo || type1 == LHCb::Track::VeloR)) { + return false; + } + else { + // Otherwise, compare sizes + const size_t size0 = getSize(t0); + const size_t size1 = getSize(t1); + + return size0 > size1; + } }); // Initialise @@ -126,11 +147,7 @@ struct StaticSchedulerCommon { const unsigned slot = slots.back(); slots.pop_back(); in[slot] = true; - pool[slot] = Item ( - &track, - getCurrentElement(track), - getLastElement(track) - ); + pool[slot] = Item(&track); } while (slots.empty()) { @@ -139,7 +156,7 @@ struct StaticSchedulerCommon { out = false; for (size_t i=0; i<N; ++i) { Item& s = pool[i]; - if (s.node + 1 == s.lastnode) { + if (s.node + 1 == s.track->m_nodes.end()) { out[i] = 1; slots.push_back(i); } @@ -151,8 +168,8 @@ struct StaticSchedulerCommon { // Initialise in, and prepare nodes for next iteration in = false; std::for_each (pool.begin(), pool.end(), [] (Item& s) { - if (s.node + 1 != s.lastnode) { - ++s.node; + if (s.node + 1 != s.track->m_nodes.end()) { + s.incrementNodeIndex(); } }); } @@ -177,7 +194,7 @@ struct StaticSchedulerCommon { std::for_each (processingIndices.begin(), processingIndicesEnd, [&] (const unsigned& i) { Item& s = pool[i]; action[i] = 1; - if (s.node + 1 == s.lastnode) { + if (s.node + 1 == s.track->m_nodes.end()) { out[i] = true; } }); @@ -189,8 +206,8 @@ struct StaticSchedulerCommon { in = false; processingIndicesEnd = std::remove_if(processingIndices.begin(), processingIndicesEnd, [&] (const unsigned& i) { Item& s = pool[i]; - if (s.node + 1 == s.lastnode) return true; - ++s.node; + if (s.node + 1 == s.track->m_nodes.end()) return true; + s.incrementNodeIndex(); return false; }); } @@ -200,78 +217,8 @@ struct StaticSchedulerCommon { } }; -// Specializable generate method -class Pre; -class Main; -class Post; - -template<class T, size_t N> -struct StaticScheduler { - StaticScheduler () = default; - StaticScheduler (const StaticScheduler& copy) = default; - - static std::list<Blueprint<N>> - generate ( - std::list<std::reference_wrapper<Track>>& tracks - ); -}; - -template<size_t N> -struct StaticScheduler<Pre, N> { - static std::list<Blueprint<N>> - generate ( - std::list<std::reference_wrapper<Track>>& tracks - ) { - return StaticSchedulerCommon<N>::generate ( - tracks, - [] (const Track& t) { return (t.m_forwardUpstream + 1); }, - [] (const Track&) { return 0; }, - [] (const Track& t) { return t.m_forwardUpstream + 1; } - ); - } -}; - -template<size_t N> -struct StaticScheduler<Main, N> { - static std::list<Blueprint<N>> - generate ( - std::list<std::reference_wrapper<Track>>& tracks - ) { - return StaticSchedulerCommon<N>::generate ( - tracks, - [] (const Track& t) { return t.m_nodes.size() - t.m_forwardUpstream - t.m_backwardUpstream; }, - [] (const Track& t) { return t.m_forwardUpstream + 1; }, - [] (const Track& t) { return t.m_nodes.size() - t.m_backwardUpstream - 1; } - ); - } -}; - -template<size_t N> -struct StaticScheduler<Post, N> { - static std::list<Blueprint<N>> - generate ( - std::list<std::reference_wrapper<Track>>& tracks - ) { - return StaticSchedulerCommon<N>::generate ( - tracks, - [] (const Track& t) { return (t.m_backwardUpstream + 1); }, - [] (const Track& t) { return t.m_nodes.size() - t.m_backwardUpstream - 1; }, - [] (const Track& t) { return t.m_nodes.size(); } - ); - } -}; - } -struct SwapStore { - Mem::View::State store; - TrackVector& state; - TrackSymMatrix& covariance; - - SwapStore (const Mem::View::State& store, TrackVector& state, TrackSymMatrix& covariance) : - store(store), state(state), covariance(covariance) {} -}; - } } diff --git a/Tr/TrackVectorFit/TrackVectorFit/SimdTranspose.h b/Tr/TrackVectorFit/TrackVectorFit/SimdTranspose.h deleted file mode 100644 index 1bb91518af33c67d7fee99ca527954ed312663d2..0000000000000000000000000000000000000000 --- a/Tr/TrackVectorFit/TrackVectorFit/SimdTranspose.h +++ /dev/null @@ -1,239 +0,0 @@ -#pragma once - -#include "VectorConfiguration.h" - -#ifdef __SSE__ -/** - * Transposition in SSE of 4 PRECISION vectors - */ -//#define _MM_TRANSPOSE4_PS(r0, r1, r2, r3) ... - -/** - * Transposition in SSE of 2 PRECISION vectors - */ -#define ___MM_TRANSPOSE2_PD(in0, in1, out0, out1, __in0, __in1, __out0, __out1) \ - do { \ - __m128d __in0 = (in0), __in1 = (in1); \ - __m128d __out0, __out1; \ - __out0 = _mm_shuffle_pd(__in0, __in1, 0x0); \ - __out1 = _mm_shuffle_pd(__in0, __in1, 0x3); \ - (out0) = __out0, (out1) = __out1; \ - } while (0) - -#define _MM_TRANSPOSE2_PD(in0, in1) \ - ___MM_TRANSPOSE2_PD(in0, in1, in0, in1, __in0##__LINE__, __in1##__LINE__, __out0##__LINE__, __out1##__LINE__) - -#endif //__SSE__ - - -#ifdef __AVX__ -/** - * Transposition in AVX of 8 PRECISION vectors - */ -#define ___MM256_TRANSPOSE8_PS(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, out4, out5, out6, out7, __in0, __in1, __in2, __in3, __in4, __in5, __in6, __in7, __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7, __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7, __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7) \ - do { \ - __m256 __in0 = (in0), __in1 = (in1), __in2 = (in2), __in3 = (in3), __in4 = (in4), __in5 = (in5), __in6 = (in6), __in7 = (in7); \ - __m256 __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7; \ - __m256 __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7; \ - __m256 __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7; \ - __tmp0 = _mm256_unpacklo_ps(__in0, __in1); \ - __tmp1 = _mm256_unpackhi_ps(__in0, __in1); \ - __tmp2 = _mm256_unpacklo_ps(__in2, __in3); \ - __tmp3 = _mm256_unpackhi_ps(__in2, __in3); \ - __tmp4 = _mm256_unpacklo_ps(__in4, __in5); \ - __tmp5 = _mm256_unpackhi_ps(__in4, __in5); \ - __tmp6 = _mm256_unpacklo_ps(__in6, __in7); \ - __tmp7 = _mm256_unpackhi_ps(__in6, __in7); \ - __tmpp0 = _mm256_shuffle_ps(__tmp0, __tmp2, 0x44); \ - __tmpp1 = _mm256_shuffle_ps(__tmp0, __tmp2, 0xEE); \ - __tmpp2 = _mm256_shuffle_ps(__tmp1, __tmp3, 0x44); \ - __tmpp3 = _mm256_shuffle_ps(__tmp1, __tmp3, 0xEE); \ - __tmpp4 = _mm256_shuffle_ps(__tmp4, __tmp6, 0x44); \ - __tmpp5 = _mm256_shuffle_ps(__tmp4, __tmp6, 0xEE); \ - __tmpp6 = _mm256_shuffle_ps(__tmp5, __tmp7, 0x44); \ - __tmpp7 = _mm256_shuffle_ps(__tmp5, __tmp7, 0xEE); \ - __out0 = _mm256_permute2f128_ps(__tmpp0, __tmpp4, 0x20); \ - __out1 = _mm256_permute2f128_ps(__tmpp1, __tmpp5, 0x20); \ - __out2 = _mm256_permute2f128_ps(__tmpp2, __tmpp6, 0x20); \ - __out3 = _mm256_permute2f128_ps(__tmpp3, __tmpp7, 0x20); \ - __out4 = _mm256_permute2f128_ps(__tmpp0, __tmpp4, 0x31); \ - __out5 = _mm256_permute2f128_ps(__tmpp1, __tmpp5, 0x31); \ - __out6 = _mm256_permute2f128_ps(__tmpp2, __tmpp6, 0x31); \ - __out7 = _mm256_permute2f128_ps(__tmpp3, __tmpp7, 0x31); \ - (out0) = __out0, (out1) = __out1, (out2) = __out2, (out3) = __out3, (out4) = __out4, (out5) = __out5, (out6) = __out6, (out7) = __out7; \ - } while (0) - -#define _MM256_TRANSPOSE8_PS(in0, in1, in2, in3, in4, in5, in6, in7) \ - ___MM256_TRANSPOSE8_PS(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7, \ - __in0##__LINE__, __in1##__LINE__, __in2##__LINE__, __in3##__LINE__, __in4##__LINE__, __in5##__LINE__, __in6##__LINE__, __in7##__LINE__, \ - __out0##__LINE__, __out1##__LINE__, __out2##__LINE__, __out3##__LINE__, __out4##__LINE__, __out5##__LINE__, __out6##__LINE__, __out7##__LINE__, \ - __tmp0##__LINE__, __tmp1##__LINE__, __tmp2##__LINE__, __tmp3##__LINE__, __tmp4##__LINE__, __tmp5##__LINE__, __tmp6##__LINE__, __tmp7##__LINE__, \ - __tmpp0##__LINE__, __tmpp1##__LINE__, __tmpp2##__LINE__, __tmpp3##__LINE__, __tmpp4##__LINE__, __tmpp5##__LINE__, __tmpp6##__LINE__, __tmpp7##__LINE__) - - -/** - * Transposition in AVX of 4 PRECISION vectors - */ -#define ___MM256_TRANSPOSE4_PD(in0, in1, in2, in3, out0, out1, out2, out3, __in0, __in1, __in2, __in3, __out0, __out1, __out2, __out3, __tmp0, __tmp1, __tmp2, __tmp3) \ - do { \ - __m256d __in0 = (in0), __in1 = (in1), __in2 = (in2), __in3 = (in3); \ - __m256d __tmp0, __tmp1, __tmp2, __tmp3; \ - __m256d __out0, __out1, __out2, __out3; \ - __tmp0 = _mm256_shuffle_pd(__in0, __in1, 0x0); \ - __tmp1 = _mm256_shuffle_pd(__in0, __in1, 0xf); \ - __tmp2 = _mm256_shuffle_pd(__in2, __in3, 0x0); \ - __tmp3 = _mm256_shuffle_pd(__in2, __in3, 0xf); \ - __out0 = _mm256_permute2f128_pd(__tmp0, __tmp2, 0x20); \ - __out1 = _mm256_permute2f128_pd(__tmp1, __tmp3, 0x20); \ - __out2 = _mm256_permute2f128_pd(__tmp0, __tmp2, 0x31); \ - __out3 = _mm256_permute2f128_pd(__tmp1, __tmp3, 0x31); \ - (out0) = __out0, (out1) = __out1, (out2) = __out2, (out3) = __out3; \ - } while (0) - -#define _MM256_TRANSPOSE4_PD(in0, in1, in2, in3) \ - ___MM256_TRANSPOSE4_PD(in0, in1, in2, in3, in0, in1, in2, in3, \ - __in0##__LINE__, __in1##__LINE__, __in2##__LINE__, __in3##__LINE__, \ - __out0##__LINE__, __out1##__LINE__, __out2##__LINE__, __out3##__LINE__, \ - __tmp0##__LINE__, __tmp1##__LINE__, __tmp2##__LINE__, __tmp3##__LINE__) - -#endif //__AVX__ - -#ifdef __AVX512F__ -/** - * Transposition in AVX512 of 16 PRECISION vectors - */ -#define ___MM512_TRANSPOSE16_PS(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, inA, inB, inC, inD, inE, inF, out0, out1, out2, out3, out4, out5, out6, out7, out8, out9, outA, outB, outC, outD, outE, outF, __in0, __in1, __in2, __in3, __in4, __in5, __in6, __in7, __in8, __in9, __inA, __inB, __inC, __inD, __inE, __inF, __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7, __out8, __out9, __outA, __outB, __outC, __outD, __outE, __outF, __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7, __tmp8, __tmp9, __tmpA, __tmpB, __tmpC, __tmpD, __tmpE, __tmpF, __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7, __tmpp8, __tmpp9, __tmppA, __tmppB, __tmppC, __tmppD, __tmpppE, __tmppF) \ - do { \ - __m512i __in0 = _mm512_castps_si512(in0), __in1 = _mm512_castps_si512(in1), __in2 = _mm512_castps_si512(in2), __in3 = _mm512_castps_si512(in3), __in4 = _mm512_castps_si512(in4), __in5 = _mm512_castps_si512(in5), __in6 = _mm512_castps_si512(in6), __in7 = _mm512_castps_si512(in7), __in8 = _mm512_castps_si512(in8), __in9 = _mm512_castps_si512(in9), __inA = _mm512_castps_si512(inA), __inB = _mm512_castps_si512(inB), __inC = _mm512_castps_si512(inC), __inD = _mm512_castps_si512(inD), __inE = _mm512_castps_si512(inE), __inF = _mm512_castps_si512(inF); \ - __m512i __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7, __tmp8, __tmp9, __tmpA, __tmpB, __tmpC, __tmpD, __tmpE, __tmpF; \ - __m512i __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7, __tmpp8, __tmpp9, __tmppA, __tmppB, __tmppC, __tmppD, __tmppE, __tmppF; \ - __m512i __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7, __out8, __out9, __outA, __outB, __outC, __outD, __outE, __outF; \ - \ - __tmp0 = _mm512_unpacklo_epi32(__in0,__in1); \ - __tmp1 = _mm512_unpackhi_epi32(__in0,__in1); \ - __tmp2 = _mm512_unpacklo_epi32(__in2,__in3); \ - __tmp3 = _mm512_unpackhi_epi32(__in2,__in3); \ - __tmp4 = _mm512_unpacklo_epi32(__in4,__in5); \ - __tmp5 = _mm512_unpackhi_epi32(__in4,__in5); \ - __tmp6 = _mm512_unpacklo_epi32(__in6,__in7); \ - __tmp7 = _mm512_unpackhi_epi32(__in6,__in7); \ - __tmp8 = _mm512_unpacklo_epi32(__in8,__in9); \ - __tmp9 = _mm512_unpackhi_epi32(__in8,__in9); \ - __tmpA = _mm512_unpacklo_epi32(__inA,__inB); \ - __tmpB = _mm512_unpackhi_epi32(__inA,__inB); \ - __tmpC = _mm512_unpacklo_epi32(__inC,__inD); \ - __tmpD = _mm512_unpackhi_epi32(__inC,__inD); \ - __tmpE = _mm512_unpacklo_epi32(__inE,__inF); \ - __tmpF = _mm512_unpackhi_epi32(__inE,__inF); \ - \ - __tmpp0 = _mm512_unpacklo_epi64(__tmp0,__tmp2); \ - __tmpp1 = _mm512_unpackhi_epi64(__tmp0,__tmp2); \ - __tmpp2 = _mm512_unpacklo_epi64(__tmp1,__tmp3); \ - __tmpp3 = _mm512_unpackhi_epi64(__tmp1,__tmp3); \ - __tmpp4 = _mm512_unpacklo_epi64(__tmp4,__tmp6); \ - __tmpp5 = _mm512_unpackhi_epi64(__tmp4,__tmp6); \ - __tmpp6 = _mm512_unpacklo_epi64(__tmp5,__tmp7); \ - __tmpp7 = _mm512_unpackhi_epi64(__tmp5,__tmp7); \ - __tmpp8 = _mm512_unpacklo_epi64(__tmp8,__tmpA); \ - __tmpp9 = _mm512_unpackhi_epi64(__tmp8,__tmpA); \ - __tmppA = _mm512_unpacklo_epi64(__tmp9,__tmpB); \ - __tmppB = _mm512_unpackhi_epi64(__tmp9,__tmpB); \ - __tmppC = _mm512_unpacklo_epi64(__tmpC,__tmpE); \ - __tmppD = _mm512_unpackhi_epi64(__tmpC,__tmpE); \ - __tmppE = _mm512_unpacklo_epi64(__tmpD,__tmpF); \ - __tmppF = _mm512_unpackhi_epi64(__tmpD,__tmpF); \ - \ - __tmp0 = _mm512_shuffle_i32x4(__tmpp0, __tmpp4, 0x88); \ - __tmp1 = _mm512_shuffle_i32x4(__tmpp1, __tmpp5, 0x88); \ - __tmp2 = _mm512_shuffle_i32x4(__tmpp2, __tmpp6, 0x88); \ - __tmp3 = _mm512_shuffle_i32x4(__tmpp3, __tmpp7, 0x88); \ - __tmp4 = _mm512_shuffle_i32x4(__tmpp0, __tmpp4, 0xdd); \ - __tmp5 = _mm512_shuffle_i32x4(__tmpp1, __tmpp5, 0xdd); \ - __tmp6 = _mm512_shuffle_i32x4(__tmpp2, __tmpp6, 0xdd); \ - __tmp7 = _mm512_shuffle_i32x4(__tmpp3, __tmpp7, 0xdd); \ - __tmp8 = _mm512_shuffle_i32x4(__tmpp8, __tmppC, 0x88); \ - __tmp9 = _mm512_shuffle_i32x4(__tmpp9, __tmppD, 0x88); \ - __tmpA = _mm512_shuffle_i32x4(__tmppA, __tmppE, 0x88); \ - __tmpB = _mm512_shuffle_i32x4(__tmppB, __tmppF, 0x88); \ - __tmpC = _mm512_shuffle_i32x4(__tmpp8, __tmppC, 0xdd); \ - __tmpD = _mm512_shuffle_i32x4(__tmpp9, __tmppD, 0xdd); \ - __tmpE = _mm512_shuffle_i32x4(__tmppA, __tmppE, 0xdd); \ - __tmpF = _mm512_shuffle_i32x4(__tmppB, __tmppF, 0xdd); \ - \ - __out0 = _mm512_shuffle_i32x4(__tmp0, __tmp8, 0x88); \ - __out1 = _mm512_shuffle_i32x4(__tmp1, __tmp9, 0x88); \ - __out2 = _mm512_shuffle_i32x4(__tmp2, __tmpA, 0x88); \ - __out3 = _mm512_shuffle_i32x4(__tmp3, __tmpB, 0x88); \ - __out4 = _mm512_shuffle_i32x4(__tmp4, __tmpC, 0x88); \ - __out5 = _mm512_shuffle_i32x4(__tmp5, __tmpD, 0x88); \ - __out6 = _mm512_shuffle_i32x4(__tmp6, __tmpE, 0x88); \ - __out7 = _mm512_shuffle_i32x4(__tmp7, __tmpF, 0x88); \ - __out8 = _mm512_shuffle_i32x4(__tmp0, __tmp8, 0xdd); \ - __out9 = _mm512_shuffle_i32x4(__tmp1, __tmp9, 0xdd); \ - __outA = _mm512_shuffle_i32x4(__tmp2, __tmpA, 0xdd); \ - __outB = _mm512_shuffle_i32x4(__tmp3, __tmpB, 0xdd); \ - __outC = _mm512_shuffle_i32x4(__tmp4, __tmpC, 0xdd); \ - __outD = _mm512_shuffle_i32x4(__tmp5, __tmpD, 0xdd); \ - __outE = _mm512_shuffle_i32x4(__tmp6, __tmpE, 0xdd); \ - __outF = _mm512_shuffle_i32x4(__tmp7, __tmpF, 0xdd); \ - \ - (out0) = _mm512_castsi512_ps(__out0), (out1) = _mm512_castsi512_ps(__out1), (out2) = _mm512_castsi512_ps(__out2), (out3) = _mm512_castsi512_ps(__out3), (out4) = _mm512_castsi512_ps(__out4), (out5) = _mm512_castsi512_ps(__out5), (out6) = _mm512_castsi512_ps(__out6), (out7) = _mm512_castsi512_ps(__out7), (out8) = _mm512_castsi512_ps(__out8), (out9) = _mm512_castsi512_ps(__out9), (outA) = _mm512_castsi512_ps(__outA), (outB) = _mm512_castsi512_ps(__outB), (outC) = _mm512_castsi512_ps(__outC), (outD) = _mm512_castsi512_ps(__outD), (outE) = _mm512_castsi512_ps(__outE), (outF) = _mm512_castsi512_ps(__outF); \ - } while (0) - -#define _MM512_TRANSPOSE16_PS(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, inA, inB, inC, inD, inE, inF) \ - ___MM512_TRANSPOSE16_PS(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, inA, inB, inC, inD, inE, inF, in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, inA, inB, inC, inD, inE, inF, \ - __in0##__LINE__, __in1##__LINE__, __in2##__LINE__, __in3##__LINE__, __in4##__LINE__, __in5##__LINE__, __in6##__LINE__, __in7##__LINE__, __in8##__LINE__, __in9##__LINE__, __inA##__LINE__, __inB##__LINE__, __inC##__LINE__, __inD##__LINE__, __inE##__LINE__, __inF##__LINE__, \ - __out0##__LINE__, __out1##__LINE__, __out2##__LINE__, __out3##__LINE__, __out4##__LINE__, __out5##__LINE__, __out6##__LINE__, __out7##__LINE__, __out8##__LINE__, __out9##__LINE__, __outA##__LINE__, __outB##__LINE__, __outC##__LINE__, __outD##__LINE__, __outE##__LINE__, __outF##__LINE__, \ - __tmp0##__LINE__, __tmp1##__LINE__, __tmp2##__LINE__, __tmp3##__LINE__, __tmp4##__LINE__, __tmp5##__LINE__, __tmp6##__LINE__, __tmp7##__LINE__, __tmp8##__LINE__, __tmp9##__LINE__, __tmpA##__LINE__, __tmpB##__LINE__, __tmpC##__LINE__, __tmpD##__LINE__, __tmpE##__LINE__, __tmpF##__LINE__, \ - __tmpp0##__LINE__, __tmpp1##__LINE__, __tmpp2##__LINE__, __tmpp3##__LINE__, __tmpp4##__LINE__, __tmpp5##__LINE__, __tmpp6##__LINE__, __tmpp7##__LINE__, __tmpp8##__LINE__, __tmpp9##__LINE__, __tmppA##__LINE__, __tmppB##__LINE__, __tmppC##__LINE__, __tmppD##__LINE__, __tmppE##__LINE__, __tmppF##__LINE__) - - - -/** - * Transposition in AVX512 of 8 PRECISION vectors - */ -#define ___MM512_TRANSPOSE8_PD(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, out4, out5, out6, out7, __in0, __in1, __in2, __in3, __in4, __in5, __in6, __in7, __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7, __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7, __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7) \ - do { \ - __m512i __in0 = _mm512_castpd_si512(in0), __in1 = _mm512_castpd_si512(in1), __in2 = _mm512_castpd_si512(in2), __in3 = _mm512_castpd_si512(in3), __in4 = _mm512_castpd_si512(in4), __in5 = _mm512_castpd_si512(in5), __in6 = _mm512_castpd_si512(in6), __in7 = _mm512_castpd_si512(in7); \ - __m512i __tmp0, __tmp1, __tmp2, __tmp3, __tmp4, __tmp5, __tmp6, __tmp7; \ - __m512i __tmpp0, __tmpp1, __tmpp2, __tmpp3, __tmpp4, __tmpp5, __tmpp6, __tmpp7; \ - __m512i __out0, __out1, __out2, __out3, __out4, __out5, __out6, __out7; \ - \ - __tmp0 = _mm512_unpacklo_epi64(__in0,__in1); \ - __tmp1 = _mm512_unpackhi_epi64(__in0,__in1); \ - __tmp2 = _mm512_unpacklo_epi64(__in2,__in3); \ - __tmp3 = _mm512_unpackhi_epi64(__in2,__in3); \ - __tmp4 = _mm512_unpacklo_epi64(__in4,__in5); \ - __tmp5 = _mm512_unpackhi_epi64(__in4,__in5); \ - __tmp6 = _mm512_unpacklo_epi64(__in6,__in7); \ - __tmp7 = _mm512_unpackhi_epi64(__in6,__in7); \ - \ - __tmpp0 = _mm512_shuffle_i64x2(__tmp0, __tmp2, 0x88); \ - __tmpp1 = _mm512_shuffle_i64x2(__tmp1, __tmp3, 0x88); \ - __tmpp2 = _mm512_shuffle_i64x2(__tmp0, __tmp2, 0xdd); \ - __tmpp3 = _mm512_shuffle_i64x2(__tmp1, __tmp3, 0xdd); \ - __tmpp4 = _mm512_shuffle_i64x2(__tmp4, __tmp6, 0x88); \ - __tmpp5 = _mm512_shuffle_i64x2(__tmp5, __tmp7, 0x88); \ - __tmpp6 = _mm512_shuffle_i64x2(__tmp4, __tmp6, 0xdd); \ - __tmpp7 = _mm512_shuffle_i64x2(__tmp5, __tmp7, 0xdd); \ - \ - __out0 = _mm512_shuffle_i64x2(__tmpp0, __tmpp4, 0x88); \ - __out1 = _mm512_shuffle_i64x2(__tmpp1, __tmpp5, 0x88); \ - __out2 = _mm512_shuffle_i64x2(__tmpp2, __tmpp6, 0x88); \ - __out3 = _mm512_shuffle_i64x2(__tmpp3, __tmpp7, 0x88); \ - __out4 = _mm512_shuffle_i64x2(__tmpp0, __tmpp4, 0xdd); \ - __out5 = _mm512_shuffle_i64x2(__tmpp1, __tmpp5, 0xdd); \ - __out6 = _mm512_shuffle_i64x2(__tmpp2, __tmpp6, 0xdd); \ - __out7 = _mm512_shuffle_i64x2(__tmpp3, __tmpp7, 0xdd); \ - \ - (out0) = _mm512_castsi512_pd(__out0), (out1) = _mm512_castsi512_pd(__out1), (out2) = _mm512_castsi512_pd(__out2), (out3) = _mm512_castsi512_pd(__out3), (out4) = _mm512_castsi512_pd(__out4), (out5) = _mm512_castsi512_pd(__out5), (out6) = _mm512_castsi512_pd(__out6), (out7) = _mm512_castsi512_pd(__out7); \ - } while (0) - -#define _MM512_TRANSPOSE8_PD(in0, in1, in2, in3, in4, in5, in6, in7) \ - ___MM512_TRANSPOSE8_PD(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7, \ - __in0##__LINE__, __in1##__LINE__, __in2##__LINE__, __in3##__LINE__, __in4##__LINE__, __in5##__LINE__, __in6##__LINE__, __in7##__LINE__, \ - __out0##__LINE__, __out1##__LINE__, __out2##__LINE__, __out3##__LINE__, __out4##__LINE__, __out5##__LINE__, __out6##__LINE__, __out7##__LINE__, \ - __tmp0##__LINE__, __tmp1##__LINE__, __tmp2##__LINE__, __tmp3##__LINE__, __tmp4##__LINE__, __tmp5##__LINE__, __tmp6##__LINE__, __tmp7##__LINE__, \ - __tmpp0##__LINE__, __tmpp1##__LINE__, __tmpp2##__LINE__, __tmpp3##__LINE__, __tmpp4##__LINE__, __tmpp5##__LINE__, __tmpp6##__LINE__, __tmpp7##__LINE__) - -#endif //__AVX512F__ diff --git a/Tr/TrackVectorFit/TrackVectorFit/SimdTransposeHelper.h b/Tr/TrackVectorFit/TrackVectorFit/SimdTransposeHelper.h deleted file mode 100644 index 6758aafe4df06a17e8c78a54d4821c22b1070f86..0000000000000000000000000000000000000000 --- a/Tr/TrackVectorFit/TrackVectorFit/SimdTransposeHelper.h +++ /dev/null @@ -1,665 +0,0 @@ -#pragma once - -#ifdef __SSE__ -#include "immintrin.h" -#endif -#include "VectorConfiguration.h" -#include "SimdTranspose.h" - -namespace Tr { - -namespace TrackVectorFit { - -namespace { - template <class T> - static constexpr T round_down(T n, T align) { - return n & -align; - } - template <class T> - static constexpr T round_up(T n, T align) { - return (n + align-1) & -align; - } - template <std::size_t N, class ISA, class T> - struct transpose_helper_soaize { - template <std::size_t W, class F, class G> - static void soaize(F in_rows, G out_rows) { - for (int i = 0; i < N; i++) { - for (int j = 0; j < W; j++) { - out_rows(i)[j] = in_rows(j)[i]; - } - } - } - }; - template <std::size_t N, class ISA, class T> - struct transpose_helper_get { - template <class F, std::size_t... Is> - static std::array<T, N*sizeof...(Is)> get( - F getter, - std::index_sequence<Is...> - ) { - constexpr std::size_t W = sizeof...(Is); - std::array<T, N*W> soa; - transpose_helper_soaize<N, ISA, T>::template soaize<W>( - getter, - [&soa](int i){ return &soa[i*W]; } - ); - return soa; - } - }; - - template <std::size_t N, class ISA = ISAs::CURRENT, class T = TRACKVECTORFIT_PRECISION> - struct transpose_helper { - template <std::size_t W, class F> - static std::array<T, N*W> get( - F getter - ) { - return transpose_helper_get<N, ISA, T>::get(getter, std::make_index_sequence<W>{}); - } - }; - - template <class T> - struct transpose_helper_get<5, ISAs::SCALAR, T> { - template <class F, std::size_t... Is> - static std::array<T, 5*sizeof...(Is)> get( - F getter, - std::index_sequence<Is...> - ) { - return std::array<TRACKVECTORFIT_PRECISION, 5*sizeof...(Is)> { - getter(Is)[0]... , - getter(Is)[1]... , - getter(Is)[2]... , - getter(Is)[3]... , - getter(Is)[4]... - }; - } - }; - - template <class T> - struct transpose_helper_get<15, ISAs::SCALAR, T> { - template <class F, std::size_t... Is> - static std::array<T, 15*sizeof...(Is)> get( - F getter, - std::index_sequence<Is...> - ) { - return std::array<TRACKVECTORFIT_PRECISION, 15*sizeof...(Is)> { - getter(Is)[0]... , - getter(Is)[1]... , - getter(Is)[2]... , - getter(Is)[3]... , - getter(Is)[4]... , - getter(Is)[5]... , - getter(Is)[6]... , - getter(Is)[7]... , - getter(Is)[8]... , - getter(Is)[9]... , - getter(Is)[10]... , - getter(Is)[11]... , - getter(Is)[12]... , - getter(Is)[13]... , - getter(Is)[14]... - }; - } - }; - - template <class T> - struct transpose_helper_get<25, ISAs::SCALAR, T> { - template <class F, std::size_t... Is> - static std::array<T, 25*sizeof...(Is)> get( - F getter, - std::index_sequence<Is...> - ) { - return std::array<TRACKVECTORFIT_PRECISION, 25*sizeof...(Is)> { - getter(Is)[0]... , - getter(Is)[1]... , - getter(Is)[2]... , - getter(Is)[3]... , - getter(Is)[4]... , - getter(Is)[5]... , - getter(Is)[6]... , - getter(Is)[7]... , - getter(Is)[8]... , - getter(Is)[9]... , - getter(Is)[10]... , - getter(Is)[11]... , - getter(Is)[12]... , - getter(Is)[13]... , - getter(Is)[14]... , - getter(Is)[15]... , - getter(Is)[16]... , - getter(Is)[17]... , - getter(Is)[18]... , - getter(Is)[19]... , - getter(Is)[20]... , - getter(Is)[21]... , - getter(Is)[22]... , - getter(Is)[23]... , - getter(Is)[24]... - }; - } - }; - -#ifdef TRACKVECTORFIT_SINGLE_PRECISION - -#ifdef __SSE__ - template <std::size_t N> - struct transpose_helper_soaize<N, ISAs::SSE, TRACKVECTORFIT_PRECISION> { - template <std::size_t W, class F, class G> - static void soaize(F in_rows, G out_rows) { - constexpr std::size_t sse_card = ISAs::SSE::card<TRACKVECTORFIT_PRECISION>(); - unsigned i, j; - static_assert(W % sse_card == 0, "Width is not a multiple of SIMD card (4)"); - for (i = 0; i < round_down(N, sse_card); i += sse_card) { - for (j = 0; j < W; j += sse_card) { - __m128 r0, r1, r2, r3; - r0 = _mm_loadu_ps(&in_rows(j+0)[i]); - r1 = _mm_loadu_ps(&in_rows(j+1)[i]); - r2 = _mm_loadu_ps(&in_rows(j+1)[i]); - r3 = _mm_loadu_ps(&in_rows(j+1)[i]); - - _MM_TRANSPOSE4_PS(r0, r1, r2, r3); - - _mm_store_ps(&out_rows(i+0)[j], r0); - _mm_store_ps(&out_rows(i+1)[j], r1); - _mm_store_ps(&out_rows(i+1)[j], r2); - _mm_store_ps(&out_rows(i+1)[j], r3); - } - } /* remainder */ { - i = round_down(N, sse_card); - for (j = 0; j < W; j += sse_card) { - __m128 r0, r1, r2, r3; - if (N - i <= 4) { - r0 = _mm_loadu_ps(&in_rows(j+0)[i]); - r1 = _mm_loadu_ps(&in_rows(j+1)[i]); - r2 = _mm_loadu_ps(&in_rows(j+2)[i]); - r3 = _mm_loadu_ps(&in_rows(j+3)[i]); - } else { - r0 = _mm_loadu_ps(&in_rows(j+0)[i]); - r1 = _mm_loadu_ps(&in_rows(j+1)[i]); - r2 = _mm_loadu_ps(&in_rows(j+2)[i]); - r3 = _mm_loadu_ps(&in_rows(j+3)[i]); - } - - _MM_TRANSPOSE4_PS(r0, r1, r2, r3); - - switch (N - i) { - case 4: - _mm_store_ps(&out_rows(i+3)[j], r3); - case 3: - _mm_store_ps(&out_rows(i+2)[j], r2); - case 2: - _mm_store_ps(&out_rows(i+1)[j], r1); - case 1: - _mm_store_ps(&out_rows(i+0)[j], r0); - } - } - } - } - }; -#endif //__SSE__ - -#ifdef __AVX__ - template <std::size_t N> - struct transpose_helper_soaize<N, ISAs::AVX, TRACKVECTORFIT_PRECISION> { - template <std::size_t W, class F, class G> - static void soaize(F in_rows, G out_rows) { - constexpr std::size_t avx_card = ISAs::AVX::card<TRACKVECTORFIT_PRECISION>(); - unsigned i, j; - static_assert(W % avx_card == 0, "Width is not a multiple of SIMD card (8)"); - for (i = 0; i < round_down(N, avx_card); i += avx_card) { - for (j = 0; j < W; j += avx_card) { - __m256 r0, r1, r2, r3, r4, r5, r6, r7; - r0 = _mm256_loadu_ps(&in_rows(j+0)[i]); - r1 = _mm256_loadu_ps(&in_rows(j+1)[i]); - r2 = _mm256_loadu_ps(&in_rows(j+2)[i]); - r3 = _mm256_loadu_ps(&in_rows(j+3)[i]); - r4 = _mm256_loadu_ps(&in_rows(j+4)[i]); - r5 = _mm256_loadu_ps(&in_rows(j+5)[i]); - r6 = _mm256_loadu_ps(&in_rows(j+6)[i]); - r7 = _mm256_loadu_ps(&in_rows(j+7)[i]); - - _MM256_TRANSPOSE8_PS(r0, r1, r2, r3, r4, r5, r6, r7); - - _mm256_store_ps(&out_rows(i+0)[j], r0); - _mm256_store_ps(&out_rows(i+1)[j], r1); - _mm256_store_ps(&out_rows(i+2)[j], r2); - _mm256_store_ps(&out_rows(i+3)[j], r3); - _mm256_store_ps(&out_rows(i+4)[j], r4); - _mm256_store_ps(&out_rows(i+5)[j], r5); - _mm256_store_ps(&out_rows(i+6)[j], r6); - _mm256_store_ps(&out_rows(i+7)[j], r7); - } - } /* remainder */ { - i = round_down(N, avx_card); - for (j = 0; j < W; j += avx_card) { - __m256 r0, r1, r2, r3, r4, r5, r6, r7; - if (N - i == 1) { - r0 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+0)[i])); - r1 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+1)[i])); - r2 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+2)[i])); - r3 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+3)[i])); - r4 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+4)[i])); - r5 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+5)[i])); - r6 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+6)[i])); - r7 = _mm256_castps128_ps256(_mm_load_ss(&in_rows(j+7)[i])); - } else if (N - i <= 4) { - r0 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+0)[i])); - r1 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+1)[i])); - r2 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+2)[i])); - r3 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+3)[i])); - r4 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+4)[i])); - r5 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+5)[i])); - r6 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+6)[i])); - r7 = _mm256_castps128_ps256(_mm_loadu_ps(&in_rows(j+7)[i])); - } else { - r0 = _mm256_loadu_ps(&in_rows(j+0)[i]); - r1 = _mm256_loadu_ps(&in_rows(j+1)[i]); - r2 = _mm256_loadu_ps(&in_rows(j+2)[i]); - r3 = _mm256_loadu_ps(&in_rows(j+3)[i]); - r4 = _mm256_loadu_ps(&in_rows(j+4)[i]); - r5 = _mm256_loadu_ps(&in_rows(j+5)[i]); - r6 = _mm256_loadu_ps(&in_rows(j+6)[i]); - r7 = _mm256_loadu_ps(&in_rows(j+7)[i]); - } - - _MM256_TRANSPOSE8_PS(r0, r1, r2, r3, r4, r5, r6, r7); - - switch (N - i) { - case 8: - _mm256_store_ps(&out_rows(i+7)[j], r7); - case 7: - _mm256_store_ps(&out_rows(i+6)[j], r6); - case 6: - _mm256_store_ps(&out_rows(i+5)[j], r5); - case 5: - _mm256_store_ps(&out_rows(i+4)[j], r4); - case 4: - _mm256_store_ps(&out_rows(i+3)[j], r3); - case 3: - _mm256_store_ps(&out_rows(i+2)[j], r2); - case 2: - _mm256_store_ps(&out_rows(i+1)[j], r1); - case 1: - _mm256_store_ps(&out_rows(i+0)[j], r0); - } - } - } - } - }; -#endif //__AVX__ - -#ifdef __AVX512F__ - template <std::size_t N> - struct transpose_helper_soaize<N, ISAs::AVX512, TRACKVECTORFIT_PRECISION> { - template <std::size_t W, class F, class G> - static void soaize(F in_rows, G out_rows) { - constexpr std::size_t avx512_card = ISAs::AVX512::card<TRACKVECTORFIT_PRECISION>(); - unsigned i, j; - static_assert(W % avx512_card == 0, "Width is not a multiple of SIMD card (16)"); - for (i = 0; i < round_down(N, avx512_card); i += avx512_card) { - for (j = 0; j < W; j += avx512_card) { - __m512 r0, r1, r2, r3, r4, r5, r6, r7, - r8, r9, r10, r11, r12, r13, r14, r15; - r0 = _mm512_loadu_ps(&in_rows(j+0)[i]); - r1 = _mm512_loadu_ps(&in_rows(j+1)[i]); - r2 = _mm512_loadu_ps(&in_rows(j+2)[i]); - r3 = _mm512_loadu_ps(&in_rows(j+3)[i]); - r4 = _mm512_loadu_ps(&in_rows(j+4)[i]); - r5 = _mm512_loadu_ps(&in_rows(j+5)[i]); - r6 = _mm512_loadu_ps(&in_rows(j+6)[i]); - r7 = _mm512_loadu_ps(&in_rows(j+7)[i]); - r8 = _mm512_loadu_ps(&in_rows(j+8)[i]); - r9 = _mm512_loadu_ps(&in_rows(j+9)[i]); - r10 = _mm512_loadu_ps(&in_rows(j+10)[i]); - r11 = _mm512_loadu_ps(&in_rows(j+11)[i]); - r12 = _mm512_loadu_ps(&in_rows(j+12)[i]); - r13 = _mm512_loadu_ps(&in_rows(j+13)[i]); - r14 = _mm512_loadu_ps(&in_rows(j+14)[i]); - r15 = _mm512_loadu_ps(&in_rows(j+15)[i]); - - _MM512_TRANSPOSE16_PS(r0, r1, r2, r3, r4, r5, r6, r7, - r8, r9, r10, r11, r12, r13, r14, r15); - - _mm512_store_ps(&out_rows(i+0)[j], r0); - _mm512_store_ps(&out_rows(i+1)[j], r1); - _mm512_store_ps(&out_rows(i+2)[j], r2); - _mm512_store_ps(&out_rows(i+3)[j], r3); - _mm512_store_ps(&out_rows(i+4)[j], r4); - _mm512_store_ps(&out_rows(i+5)[j], r5); - _mm512_store_ps(&out_rows(i+6)[j], r6); - _mm512_store_ps(&out_rows(i+7)[j], r7); - _mm512_store_ps(&out_rows(i+8)[j], r8); - _mm512_store_ps(&out_rows(i+9)[j], r9); - _mm512_store_ps(&out_rows(i+10)[j], r10); - _mm512_store_ps(&out_rows(i+11)[j], r11); - _mm512_store_ps(&out_rows(i+12)[j], r12); - _mm512_store_ps(&out_rows(i+13)[j], r13); - _mm512_store_ps(&out_rows(i+14)[j], r14); - _mm512_store_ps(&out_rows(i+15)[j], r15); - } - } /* remainder */ { - i = round_down(N, avx512_card); - - for (j = 0; j < W; j += avx512_card) { - __m512 r0, r1, r2, r3, r4, r5, r6, r7, - r8, r9, r10, r11, r12, r13, r14, r15; - if (N - i == 1) { - r0 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+0)[i])); - r1 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+1)[i])); - r2 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+2)[i])); - r3 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+3)[i])); - r4 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+4)[i])); - r5 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+5)[i])); - r6 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+6)[i])); - r7 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+7)[i])); - r8 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+8)[i])); - r9 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+9)[i])); - r10 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+10)[i])); - r11 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+11)[i])); - r12 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+12)[i])); - r13 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+13)[i])); - r14 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+14)[i])); - r15 = _mm512_castps128_ps512(_mm_load_ss(&in_rows(j+15)[i])); - } else if (N - i <= 4) { - r0 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+0)[i])); - r1 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+1)[i])); - r2 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+2)[i])); - r3 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+3)[i])); - r4 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+4)[i])); - r5 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+5)[i])); - r6 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+6)[i])); - r7 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+7)[i])); - r8 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+8)[i])); - r9 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+9)[i])); - r10 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+10)[i])); - r11 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+11)[i])); - r12 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+12)[i])); - r13 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+13)[i])); - r14 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+14)[i])); - r15 = _mm512_castps128_ps512(_mm_loadu_ps(&in_rows(j+15)[i])); - } else if (N - i <= 8) { - r0 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+0)[i])); - r1 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+1)[i])); - r2 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+2)[i])); - r3 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+3)[i])); - r4 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+4)[i])); - r5 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+5)[i])); - r6 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+6)[i])); - r7 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+7)[i])); - r8 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+8)[i])); - r9 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+9)[i])); - r10 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+10)[i])); - r11 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+11)[i])); - r12 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+12)[i])); - r13 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+13)[i])); - r14 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+14)[i])); - r15 = _mm512_castps256_ps512(_mm256_loadu_ps(&in_rows(j+15)[i])); - } else { - r0 = _mm512_loadu_ps(&in_rows(j+0)[i]); - r1 = _mm512_loadu_ps(&in_rows(j+1)[i]); - r2 = _mm512_loadu_ps(&in_rows(j+2)[i]); - r3 = _mm512_loadu_ps(&in_rows(j+3)[i]); - r4 = _mm512_loadu_ps(&in_rows(j+4)[i]); - r5 = _mm512_loadu_ps(&in_rows(j+5)[i]); - r6 = _mm512_loadu_ps(&in_rows(j+6)[i]); - r7 = _mm512_loadu_ps(&in_rows(j+7)[i]); - r8 = _mm512_loadu_ps(&in_rows(j+8)[i]); - r9 = _mm512_loadu_ps(&in_rows(j+9)[i]); - r10 = _mm512_loadu_ps(&in_rows(j+10)[i]); - r11 = _mm512_loadu_ps(&in_rows(j+11)[i]); - r12 = _mm512_loadu_ps(&in_rows(j+12)[i]); - r13 = _mm512_loadu_ps(&in_rows(j+13)[i]); - r14 = _mm512_loadu_ps(&in_rows(j+14)[i]); - r15 = _mm512_loadu_ps(&in_rows(j+15)[i]); - } - - _MM512_TRANSPOSE16_PS(r0, r1, r2, r3, r4, r5, r6, r7, - r8, r9, r10, r11, r12, r13, r14, r15); - - switch (N - i) { - case 16: - _mm512_store_ps(&out_rows(i+15)[j], r15); - case 15: - _mm512_store_ps(&out_rows(i+14)[j], r14); - case 14: - _mm512_store_ps(&out_rows(i+13)[j], r13); - case 13: - _mm512_store_ps(&out_rows(i+12)[j], r12); - case 12: - _mm512_store_ps(&out_rows(i+11)[j], r11); - case 11: - _mm512_store_ps(&out_rows(i+10)[j], r10); - case 10: - _mm512_store_ps(&out_rows(i+9)[j], r9); - case 9: - _mm512_store_ps(&out_rows(i+8)[j], r8); - case 8: - _mm512_store_ps(&out_rows(i+7)[j], r7); - case 7: - _mm512_store_ps(&out_rows(i+6)[j], r6); - case 6: - _mm512_store_ps(&out_rows(i+5)[j], r5); - case 5: - _mm512_store_ps(&out_rows(i+4)[j], r4); - case 4: - _mm512_store_ps(&out_rows(i+3)[j], r3); - case 3: - _mm512_store_ps(&out_rows(i+2)[j], r2); - case 2: - _mm512_store_ps(&out_rows(i+1)[j], r1); - case 1: - _mm512_store_ps(&out_rows(i+0)[j], r0); - } - } - } - } - }; -#endif //__AVX512F__ - -#else - -#ifdef __SSE__ - template <std::size_t N> - struct transpose_helper_soaize<N, ISAs::SSE, double> { - template <std::size_t W, class F, class G> - static void soaize(F in_rows, G out_rows) { - constexpr std::size_t sse_card = ISAs::SSE::card<double>(); - unsigned i, j; - static_assert(W % sse_card == 0, "Width is not a multiple of SIMD card (2)"); - for (i = 0; i < round_down(N, sse_card); i += sse_card) { - for (j = 0; j < W; j += sse_card) { - __m128d r0, r1; - r0 = _mm_loadu_pd(&in_rows(j+0)[i]); - r1 = _mm_loadu_pd(&in_rows(j+1)[i]); - - _MM_TRANSPOSE2_PD(r0, r1); - - _mm_store_pd(&out_rows(i+0)[j], r0); - _mm_store_pd(&out_rows(i+1)[j], r1); - } - } /* remainder */ { - i = round_down(N, sse_card); - if (i == N-1) { - for (j = 0; j < W; j += sse_card) { - __m128d r0, r1; - r0 = _mm_loadu_pd(&in_rows(j+0)[i]); - r1 = _mm_loadu_pd(&in_rows(j+1)[i]); - - _MM_TRANSPOSE2_PD(r0, r1); - - _mm_store_pd(&out_rows(i+0)[j], r0); - } - } - } - } - }; -#endif //__SSE__ - -#ifdef __AVX__ - template <std::size_t N> - struct transpose_helper_soaize<N, ISAs::AVX, double> { - template <std::size_t W, class F, class G> - static void soaize(F in_rows, G out_rows) { - constexpr std::size_t avx_card = ISAs::AVX::card<double>(); - unsigned i, j; - static_assert(W % avx_card == 0, "Width is not a multiple of SIMD card (4)"); - for (i = 0; i < round_down(N, avx_card); i += avx_card) { - for (j = 0; j < W; j += avx_card) { - __m256d r0, r1, r2, r3; - r0 = _mm256_loadu_pd(&in_rows(j+0)[i]); - r1 = _mm256_loadu_pd(&in_rows(j+1)[i]); - r2 = _mm256_loadu_pd(&in_rows(j+2)[i]); - r3 = _mm256_loadu_pd(&in_rows(j+3)[i]); - - _MM256_TRANSPOSE4_PD(r0, r1, r2, r3); - - _mm256_store_pd(&out_rows(i+0)[j], r0); - _mm256_store_pd(&out_rows(i+1)[j], r1); - _mm256_store_pd(&out_rows(i+2)[j], r2); - _mm256_store_pd(&out_rows(i+3)[j], r3); - } - } /* remainder */ { - i = round_down(N, avx_card); - for (j = 0; j < W; j += avx_card) { - __m256d r0, r1, r2, r3; - /*if (N - i == 1) { - r0 = _mm256_castpd128_pd256(_mm_load_sd(&in_rows(j+0)[i])); - r1 = _mm256_castpd128_pd256(_mm_load_sd(&in_rows(j+1)[i])); - r2 = _mm256_castpd128_pd256(_mm_load_sd(&in_rows(j+2)[i])); - r3 = _mm256_castpd128_pd256(_mm_load_sd(&in_rows(j+3)[i])); - } else*/ if (N - i <= 2) { - r0 = _mm256_castpd128_pd256(_mm_loadu_pd(&in_rows(j+0)[i])); - r1 = _mm256_castpd128_pd256(_mm_loadu_pd(&in_rows(j+1)[i])); - r2 = _mm256_castpd128_pd256(_mm_loadu_pd(&in_rows(j+2)[i])); - r3 = _mm256_castpd128_pd256(_mm_loadu_pd(&in_rows(j+3)[i])); - } else { - r0 = _mm256_loadu_pd(&in_rows(j+0)[i]); - r1 = _mm256_loadu_pd(&in_rows(j+1)[i]); - r2 = _mm256_loadu_pd(&in_rows(j+2)[i]); - r3 = _mm256_loadu_pd(&in_rows(j+3)[i]); - } - - _MM256_TRANSPOSE4_PD(r0, r1, r2, r3); - - switch (N - i) { - case 4: - _mm256_store_pd(&out_rows(i+3)[j], r3); - case 3: - _mm256_store_pd(&out_rows(i+2)[j], r2); - case 2: - _mm256_store_pd(&out_rows(i+1)[j], r1); - case 1: - _mm256_store_pd(&out_rows(i+0)[j], r0); - } - } - } - } - }; -#endif //__AVX__ - -#ifdef __AVX512F__ - template <std::size_t N> - struct transpose_helper_soaize<N, ISAs::AVX512, double> { - template <std::size_t W, class F, class G> - static void soaize(F in_rows, G out_rows) { - //constexpr std::size_t N = 15; - constexpr std::size_t avx512_card = ISAs::AVX512::card<double>(); - unsigned i, j; - static_assert(W % avx512_card == 0, "Width is not a multiple of SIMD card (8)"); - for (i = 0; i < round_down(N, avx512_card); i += avx512_card) { - for (j = 0; j < W; j += avx512_card) { - __m512d r0, r1, r2, r3, r4, r5, r6, r7; - r0 = _mm512_loadu_pd(&in_rows(j+0)[i]); - r1 = _mm512_loadu_pd(&in_rows(j+1)[i]); - r2 = _mm512_loadu_pd(&in_rows(j+2)[i]); - r3 = _mm512_loadu_pd(&in_rows(j+3)[i]); - r4 = _mm512_loadu_pd(&in_rows(j+4)[i]); - r5 = _mm512_loadu_pd(&in_rows(j+5)[i]); - r6 = _mm512_loadu_pd(&in_rows(j+6)[i]); - r7 = _mm512_loadu_pd(&in_rows(j+7)[i]); - - _MM512_TRANSPOSE8_PD(r0, r1, r2, r3, r4, r5, r6, r7); - - _mm512_store_pd(&out_rows(i+0)[j], r0); - _mm512_store_pd(&out_rows(i+1)[j], r1); - _mm512_store_pd(&out_rows(i+2)[j], r2); - _mm512_store_pd(&out_rows(i+3)[j], r3); - _mm512_store_pd(&out_rows(i+4)[j], r4); - _mm512_store_pd(&out_rows(i+5)[j], r5); - _mm512_store_pd(&out_rows(i+6)[j], r6); - _mm512_store_pd(&out_rows(i+7)[j], r7); - } - } /* remainder */ { - i = round_down(N, avx512_card); - - for (j = 0; j < W; j += avx512_card) { - __m512d r0, r1, r2, r3, r4, r5, r6, r7; - if (N - i == 1) { - r0 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+0)[i])); - r1 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+1)[i])); - r2 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+2)[i])); - r3 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+3)[i])); - r4 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+4)[i])); - r5 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+5)[i])); - r6 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+6)[i])); - r7 = _mm512_castpd128_pd512(_mm_load_sd(&in_rows(j+7)[i])); - } else if (N - i <= 2) { - r0 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+0)[i])); - r1 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+1)[i])); - r2 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+2)[i])); - r3 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+3)[i])); - r4 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+4)[i])); - r5 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+5)[i])); - r6 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+6)[i])); - r7 = _mm512_castpd128_pd512(_mm_loadu_pd(&in_rows(j+7)[i])); - } else if (N - i <= 4) { - r0 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+0)[i])); - r1 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+1)[i])); - r2 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+2)[i])); - r3 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+3)[i])); - r4 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+4)[i])); - r5 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+5)[i])); - r6 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+6)[i])); - r7 = _mm512_castpd256_pd512(_mm256_loadu_pd(&in_rows(j+7)[i])); - } else { - r0 = _mm512_loadu_pd(&in_rows(j+0)[i]); - r1 = _mm512_loadu_pd(&in_rows(j+1)[i]); - r2 = _mm512_loadu_pd(&in_rows(j+2)[i]); - r3 = _mm512_loadu_pd(&in_rows(j+3)[i]); - r4 = _mm512_loadu_pd(&in_rows(j+4)[i]); - r5 = _mm512_loadu_pd(&in_rows(j+5)[i]); - r6 = _mm512_loadu_pd(&in_rows(j+6)[i]); - r7 = _mm512_loadu_pd(&in_rows(j+7)[i]); - } - - _MM512_TRANSPOSE8_PD(r0, r1, r2, r3, r4, r5, r6, r7); - - switch (N - i) { - case 8: - _mm512_store_pd(&out_rows(i+7)[j], r7); - case 7: - _mm512_store_pd(&out_rows(i+6)[j], r6); - case 6: - _mm512_store_pd(&out_rows(i+5)[j], r5); - case 5: - _mm512_store_pd(&out_rows(i+4)[j], r4); - case 4: - _mm512_store_pd(&out_rows(i+3)[j], r3); - case 3: - _mm512_store_pd(&out_rows(i+2)[j], r2); - case 2: - _mm512_store_pd(&out_rows(i+1)[j], r1); - case 1: - _mm512_store_pd(&out_rows(i+0)[j], r0); - } - } - } - } - }; -#endif //__AVX512F__ - -#endif -} - -} - -} diff --git a/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h b/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h index ae55e65a6583db2e5b3903bea9a55a6960ad8f7c..546b66c268ce1d5eed6df01701333cb3ef9ed2d8 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h +++ b/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h @@ -26,30 +26,67 @@ namespace TrackVectorFit { struct TrackVectorFit { unsigned m_memManagerStorageIncrements; bool m_debug; - // The three schedulers - std::list<Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>> m_scheduler_pre; - std::list<Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>> m_scheduler_main; - std::list<Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>> m_scheduler_post; + bool m_verbose; + // The scheduler + std::list<Sch::Blueprint<vector_width()>> m_scheduler; + // VectorFit MemManagers local to this TrackVectorFit Mem::Store m_smoothStore; Mem::Store m_nodeParametersStore; Mem::Store m_transportForwardStore; Mem::Store m_transportBackwardStore; - Mem::Store m_auxStore; Mem::Iterator m_forwardNodeIterator; Mem::Iterator m_forwardStoreIterator; - Mem::Iterator m_smoothStoreIterator; + Mem::ReverseIterator m_smoothStoreIterator; Mem::ReverseIterator m_forwardMainStoreIterator; + // Stores with TES ownership std::vector<Mem::Store>::iterator m_forwardStore; std::vector<Mem::Store>::iterator m_backwardStore; + /** + * @brief Constructor + * + * @param memManagerStorageIncrements Storage increment for each new chunk of data + * @param debug Debug flag + */ TrackVectorFit ( const unsigned& memManagerStorageIncrements = 4096, - const bool& debug = false - ); + const bool& debug = false, + const bool& verbose = false + ) : m_memManagerStorageIncrements(memManagerStorageIncrements), + m_debug(debug), m_verbose(verbose) { + m_smoothStore = Mem::Store(Mem::View::SmoothState::size(), memManagerStorageIncrements); + m_nodeParametersStore = Mem::Store(Mem::View::NodeParameters::size(), memManagerStorageIncrements); + m_transportForwardStore = Mem::Store(Mem::View::TrackMatrix<25>::size(), memManagerStorageIncrements); + m_transportBackwardStore = Mem::Store(Mem::View::TrackMatrix<25>::size(), memManagerStorageIncrements); + } + + /** + * @brief Reference getter for m_scheduler + * + * @return The scheduler + */ + std::list<Sch::Blueprint<vector_width()>>& scheduler () { + return m_scheduler; + } + + /** + * @brief Resets the stores, add new stores to updatedStatesStore + * that will contain the forward and backward stores. + * + * @param updatedStatesStore Stores for keeping the updated states, + * both forward and backward. + * @param debug Sets the debug flag across the fitter + */ + void reset ( + std::vector<Mem::Store>& updatedStatesStore, + const bool& debug=false, + const bool& verbose=false + ) { + m_debug = debug; + m_verbose = verbose; - void reset (std::vector<Mem::Store>& updatedStatesStore) { // Reserve updated states stores updatedStatesStore.emplace_back(Mem::View::State::size(), m_memManagerStorageIncrements); updatedStatesStore.emplace_back(Mem::View::State::size(), m_memManagerStorageIncrements); @@ -59,117 +96,136 @@ struct TrackVectorFit { // Only reset stores not resetted in initializeBasePointers m_smoothStore.reset(); m_nodeParametersStore.reset(); - m_auxStore.reset(); } - void operator() ( - Track& track - ); - - void operator() ( - std::list<std::reference_wrapper<Track>>& tracks - ); - + /** + * @brief Initializes the scheduler and base pointers + * for all tracks + * + * @details A static scheduler schedules the execution of all nodes in the tracks + * for a machine configured with vector_width() SIMD width. The scheduler + * follows a DTA (Decreasing Time Algorithm) in order to minimize the + * number of iterations. + * + * Additionally, all base pointers are initialized for all nodes. + * Consequent accesses to any data member in the Nodes is possible. + * Iterators are also initialized in preparation of the upcoming fit. + * + * @param tracks Tracks to be scheduled and initialized. + * @tparam updateRefVectors Boolean to indicate whether to update reference vectors + * prior to populating the smooth pointer. + */ + template<bool updateRefVectors> void initializeBasePointers ( - std::list<std::reference_wrapper<Track>>& tracks + std::list<std::reference_wrapper<Track>>& tracks, + bool copyNoiseMatrix=false ) { m_transportForwardStore.reset(); m_transportBackwardStore.reset(); - if (tracks.size() >= VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()) { - // Schedule what we are going to do - m_scheduler_pre = Sch::StaticScheduler<Sch::Pre, VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>::generate(tracks); - m_scheduler_main = Sch::StaticScheduler<Sch::Main, VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>::generate(tracks); - m_scheduler_post = Sch::StaticScheduler<Sch::Post, VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>::generate(tracks); - - // Populates the base pointers with the scheduler provided - auto populateBasePointers = - [&] ( - decltype(m_scheduler_pre)& sch, - const bool& populateForward, - const bool& populateBackward - ) { - std::for_each(sch.begin(), sch.end(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) { - auto& pool = s.pool; - const auto& action = s.action; - - m_nodeParametersStore.getNewVector(); - m_forwardStore->getNewVector(); - m_backwardStore->getNewVector(); - m_smoothStore.getNewVector(); - if (populateForward) { m_transportForwardStore.getNewVector(); } - if (populateBackward) { m_transportBackwardStore.getNewVector(); } - - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (action[i]) { - pool[i].node->m_nodeParameters.setBasePointer(m_nodeParametersStore.getLastVector() + i); - pool[i].node->node().m_forwardState.setBasePointer(m_forwardStore->getLastVector() + i); - pool[i].node->node().m_backwardState.setBasePointer(m_backwardStore->getLastVector() + i); - // Note: Do NOT update smooth pointer here (we still need that) - - if (populateForward) { - pool[i].node->get<Op::Forward, Op::TransportMatrix>().setBasePointer(m_transportForwardStore.getLastVector() + i); - } - - if (populateBackward) { - pool[i].node->get<Op::Backward, Op::TransportMatrix>().setBasePointer(m_transportBackwardStore.getLastVector() + i); - } + if (m_debug) { + std::cout << "Initializing base pointers of " << tracks.size() << " tracks" << std::endl; + } + + // Schedule what we are going to do + m_scheduler = Sch::StaticScheduler<vector_width()>::generate(tracks); + + // Populates the base pointers with the scheduler provided + auto populateBasePointers = + [&] ( + std::list<Sch::Blueprint<vector_width()>>& sch + ) { + std::for_each(sch.begin(), sch.end(), [&] (Sch::Blueprint<vector_width()>& s) { + auto& pool = s.pool; + const auto& action = s.action; + + m_nodeParametersStore.getNewVector(); + m_forwardStore->getNewVector(); + m_backwardStore->getNewVector(); + m_smoothStore.getNewVector(); + m_transportForwardStore.getNewVector(); + m_transportBackwardStore.getNewVector(); + + for (unsigned i=0; i<vector_width(); ++i) { + if (action[i]) { + Node& n = *(pool[i].node); + + // Note: We should evaluate the impact of this + // It is probably worth copying, as now we iterate forward and + // backward through the set + if (copyNoiseMatrix) { + auto noiseMatrix = n.get<Op::NodeParameters, Op::NoiseMatrix>(); + n.m_nodeParameters.setBasePointer(m_nodeParametersStore.getLastVector() + i); + n.get<Op::NodeParameters, Op::NoiseMatrix>().copy(noiseMatrix); + } else { + n.m_nodeParameters.setBasePointer(m_nodeParametersStore.getLastVector() + i); } + + // Note: In c++17 we can even do + // if constexpr (updateRefVectors) { ... } + if (updateRefVectors) { + n.get<Op::NodeParameters, Op::ReferenceVector>().copy(n.get<Op::Smooth, Op::StateVector>()); + } + + n.node().m_forwardState.setBasePointer(m_forwardStore->getLastVector() + i); + n.node().m_backwardState.setBasePointer(m_backwardStore->getLastVector() + i); + n.m_smoothState.setBasePointer(m_smoothStore.getLastVector() + i); + n.get<Op::Forward, Op::TransportMatrix>().setBasePointer(m_transportForwardStore.getLastVector() + i); + n.get<Op::Backward, Op::TransportMatrix>().setBasePointer(m_transportBackwardStore.getLastVector() + i); } - }); - }; - - // Align the stores to point to new vectors, and initialize iterator attributes - m_forwardStore->align(); - m_backwardStore->align(); - m_smoothStore.align(); - m_nodeParametersStore.align(); - m_forwardNodeIterator = Mem::Iterator(m_nodeParametersStore, true); - m_forwardStoreIterator = Mem::Iterator(*m_forwardStore, true); - m_smoothStoreIterator = Mem::Iterator(m_smoothStore, true); - - // Populate all pointers - populateBasePointers(m_scheduler_pre, false, true); - // Add one extra vector to avoid swapping out elements - m_forwardStore->getNewVector(); - m_backwardStore->getNewVector(); - populateBasePointers(m_scheduler_main, true, true); - // Initialize the reverse mainstore iterators here - m_forwardMainStoreIterator = Mem::ReverseIterator(*m_forwardStore); - // Add one extra vector to avoid swapping out elements - m_forwardStore->getNewVector(); - m_backwardStore->getNewVector(); - populateBasePointers(m_scheduler_post, true, false); - } else { - // Align the stores to point to new vectors, and initialize iterator attributes - m_forwardStore->align(); - m_backwardStore->align(); - m_smoothStore.align(); - m_nodeParametersStore.align(); - m_forwardNodeIterator = Mem::Iterator(m_nodeParametersStore, true); - m_forwardStoreIterator = Mem::Iterator(*m_forwardStore, true); - m_smoothStoreIterator = Mem::Iterator(m_smoothStore, true); - - // Just populate the base pointers of all nodes - std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) { - std::for_each(track.m_nodes.begin(), track.m_nodes.end(), [&] (Node& node) { - node.m_nodeParameters.setBasePointer(m_nodeParametersStore.getNextElement()); - node.get<Op::Forward, Op::TransportMatrix>().setBasePointer(m_transportForwardStore.getNextElement()); - node.get<Op::Backward, Op::TransportMatrix>().setBasePointer(m_transportBackwardStore.getNextElement()); - node.node().m_forwardState.setBasePointer(m_forwardStore->getNextElement()); - node.node().m_backwardState.setBasePointer(m_backwardStore->getNextElement()); - m_smoothStore.getNextElement(); - // Note: Do NOT update the smooth pointer here (we still need that) - }); + } }); - } + }; + + // Align the stores to point to new vectors, and initialize iterator attributes + m_forwardStore->align(); + m_backwardStore->align(); + m_smoothStore.align(); + m_nodeParametersStore.align(); + m_forwardNodeIterator = Mem::Iterator(m_nodeParametersStore, true); + m_forwardStoreIterator = Mem::Iterator(*m_forwardStore, true); + + populateBasePointers(m_scheduler); + + // Initialize the reverse mainstore iterators here + m_forwardMainStoreIterator = Mem::ReverseIterator(*m_forwardStore); + m_smoothStoreIterator = Mem::ReverseIterator(m_smoothStore); } + + template<bool checkNodeType> + void smoothFit ( + std::list<std::reference_wrapper<Track>>& tracks + ); + /** + * @brief Populate the nodes with the calculated fit information. + * This performs the AOSOA -> AOS conversion. + */ void populateNodes ( Track& track - ); + ) { + std::for_each(track.nodes().begin(), track.nodes().end(), [] (Node& n) { + auto& node = n.node(); + + if (node.type() == LHCb::Node::HitOnTrack) { + node.setErrMeasure(n.get<Op::NodeParameters, Op::ErrMeasure>()); + node.setProjectionMatrix((Gaudi::TrackProjectionMatrix) n.get<Op::NodeParameters, Op::ProjectionMatrix>()); + node.setResidual(n.get<Op::Smooth, Op::Residual>()); + node.setErrResidual(n.get<Op::Smooth, Op::ErrResidual>()); + + const auto smoothStateVector = (Gaudi::TrackVector) n.get<Op::Smooth, Op::StateVector>(); + node.setRefVector(smoothStateVector); + node.setState( + smoothStateVector, + (Gaudi::TrackSymMatrix) n.get<Op::Smooth, Op::Covariance>(), + node.z()); + } + }); + } }; } } + +#include "TrackVectorFit.h_impl" diff --git a/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h_impl b/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h_impl new file mode 100644 index 0000000000000000000000000000000000000000..fb6dbe423fcab9de25f41cb0cadd3e2ad8dc5265 --- /dev/null +++ b/Tr/TrackVectorFit/TrackVectorFit/TrackVectorFit.h_impl @@ -0,0 +1,403 @@ +namespace Tr { + +namespace TrackVectorFit { + +/** + * @brief Performs the forward fit, backward fit and smoother + * over all designated tracks. + * + * @details The vectorized fit is activated if invoked with at least + * vector_width() tracks. A fallback sequential fit is invoked + * otherwise. In either case, the method relies on the structures generated by + * initializedBasePointers to perform the fit. + * + * The vectorized fit strictly follows the scheduler to + * determine what is computed at each iteration. In particular, + * tracks are processed in the following order: + * + * * Forward fit: All tracks are fitted in the forward direction. + * Only updated states are kept. + * + * * Backward fit and smoother: All tracks are fitted in the backward + * direction. The smoothed state is a result of averaging the + * forward updated state and the backward predicted state. + * After its execution, the backward updated state and the smoothed + * state are kept. + * + * No conversion from / to AOSOA is performed in the fit. For that, + * check populateNodes. + * + * @param tracks Tracks to be fitted + */ +template<bool checkNodeType> +void TrackVectorFit::smoothFit ( + std::list<std::reference_wrapper<Track>>& tracks +) { + if (m_debug) { + std::cout << "Fitting " << tracks.size() << " tracks" << std::endl; + } + + // Iterators for all stores + Mem::Iterator forwardTransportIterator (m_transportForwardStore); + Mem::ReverseIterator backwardTransportIterator (m_transportBackwardStore); + Mem::ReverseIterator backwardNodeIterator (m_nodeParametersStore); + Mem::ReverseIterator backwardStoreIterator (*m_backwardStore); + + // Iterators initialized from the object iterators + Mem::Iterator forwardNodeIterator (m_forwardNodeIterator); + Mem::Iterator forwardStoreIterator (m_forwardStoreIterator); + Mem::ReverseIterator forwardMainStoreIterator (m_forwardMainStoreIterator); + Mem::ReverseIterator smoothMainStoreIterator (m_smoothStoreIterator); + + if (m_debug) { + std::cout << "Scheduler iterations:" << std::endl + << m_scheduler << std::endl << std::endl + << "Iterators:" << std::endl + << " forwardTransportIterator: " << forwardTransportIterator + << " backwardTransportIterator: " << backwardTransportIterator + << " backwardNodeIterator: " << backwardNodeIterator + << " backwardStoreIterator: " << backwardStoreIterator + << " forwardNodeIterator: " << forwardNodeIterator + << " forwardStoreIterator: " << forwardStoreIterator + << " forwardMainStoreIterator: " << forwardMainStoreIterator + << " smoothMainStoreIterator: " << smoothMainStoreIterator + << std::endl + << "Calculating forward fit iterations" << std::endl; + } + + // Pointers to keep track of the lastState state and node parameters pointers + TRACKVECTORFIT_PRECISION* lastStatePointer = nullptr; + TRACKVECTORFIT_PRECISION* lastNodeParametersPointer = nullptr; + + std::for_each (m_scheduler.begin(), m_scheduler.end(), [&] (Sch::Blueprint<vector_width()>& s) { + const auto& in = s.in; + const auto& action = s.action; + auto& pool = s.pool; + + // Increment iterators, prepare views + const auto& currentStatePointer = forwardStoreIterator.nextVector(); + const auto& transportMatrixPointer = forwardTransportIterator.nextVector(); + const auto& currentParametersPointer = forwardNodeIterator.nextVector(); + Mem::View::NodeParameters currentParameters (currentParametersPointer); + Mem::View::State lastState (lastStatePointer); + Mem::View::State currentState (currentStatePointer); + + if (m_verbose) { + std::cout << s << std::endl + << " pool[0] np " << (*(pool[0].node)).m_nodeParameters.m_basePointer + << " currentNP " << currentParametersPointer << std::endl + << " pool[0] fwd state " << (*(pool[0].node)).node().m_forwardState.m_basePointer + << " currentFwdState " << currentStatePointer << std::endl + << " pool[0] tm " << (*(pool[0].node)).m_forwardTransportMatrix.m_basePointer + << " current tm " << transportMatrixPointer << std::endl; + } + + if (action.all()) { + if (in.all()) { + // All of the nodes are starting now + // Just prepare data and do "update" + + // Fetch the reference vectors for the state + std::copy_n(ArrayGen::InitialState<vector_width()>(pool).begin(), + 5*vector_width(), + currentState.m_updatedState.m_basePointer); + + // Fetch the initial covariance matrix for the cov matrix + std::copy_n(ArrayGen::InitialCovariance<vector_width()>().begin(), + 15*vector_width(), + currentState.m_updatedCovariance.m_basePointer); + + // Update + Vector::update<vector_width(), checkNodeType> ( + currentParameters.m_referenceVector.m_basePointer, + currentParameters.m_projectionMatrix.m_basePointer, + currentParameters.m_referenceResidual, + currentParameters.m_errorMeasure, + currentState.m_updatedState.m_basePointer, + currentState.m_updatedCovariance.m_basePointer, + currentState.m_chi2, + pool + ); + } else if (in.any()) { + // Some states are starting now, but others continue their execution + // Do a vectorized fit, and follow up with + // an initial sequential fit for the starting nodes + + // Fit + Vector::fit<Op::Forward>::op<vector_width(), checkNodeType> ( + currentParameters.m_referenceVector.m_basePointer, + currentParameters.m_projectionMatrix.m_basePointer, + currentParameters.m_referenceResidual, + currentParameters.m_errorMeasure, + currentParameters.m_transportVector.m_basePointer, + currentParameters.m_noiseMatrix.m_basePointer, + transportMatrixPointer, + lastState.m_updatedState.m_basePointer, + lastState.m_updatedCovariance.m_basePointer, + currentState.m_updatedState.m_basePointer, + currentState.m_updatedCovariance.m_basePointer, + currentState.m_chi2, + pool + ); + + // Replace data with those from starting nodes + for (unsigned i=0; i<vector_width(); ++i) { + if (in[i]) { + Scalar::fit<Op::Forward>(*(pool[i].node), ArrayGen::InitialCovariance_values()); + } + } + } + else { + // Fit + Vector::fit<Op::Forward>::op<vector_width(), checkNodeType> ( + currentParameters.m_referenceVector.m_basePointer, + currentParameters.m_projectionMatrix.m_basePointer, + currentParameters.m_referenceResidual, + currentParameters.m_errorMeasure, + currentParameters.m_transportVector.m_basePointer, + currentParameters.m_noiseMatrix.m_basePointer, + transportMatrixPointer, + lastState.m_updatedState.m_basePointer, + lastState.m_updatedCovariance.m_basePointer, + currentState.m_updatedState.m_basePointer, + currentState.m_updatedCovariance.m_basePointer, + currentState.m_chi2, + pool + ); + } + } else { + // Since !action.all(), we couldn't allocate enough + // nodes to process in parallel - so let's do them + // individually + for (unsigned i=0; i<vector_width(); ++i) { + if (action[i]) { + Node& node = *(pool[i].node); + if (in[i]) { + Scalar::fit<Op::Forward>(node, ArrayGen::InitialCovariance_values()); + } else { + Node& prevnode = *(pool[i].node-1); + Scalar::fit<Op::Forward, true>(node, prevnode); + } + } + } + } + + lastStatePointer = currentStatePointer; + }); + + // Backward fit and smoother iterations + if (m_debug) { + std::cout << "Calculating backward fit and smoother iterations" << std::endl; + } + std::for_each (m_scheduler.rbegin(), m_scheduler.rend(), [&] (Sch::Blueprint<vector_width()>& s) { + const auto& in = s.out; + const auto& out = s.in; + const auto& action = s.action; + auto& pool = s.pool; + + const auto& currentStatePointer = backwardStoreIterator.previousVector(); + const auto& currentNodeParametersPointer = backwardNodeIterator.previousVector(); + const auto& transportMatrixPointer = backwardTransportIterator.previousVector(); + const auto& smootherStatePointer = smoothMainStoreIterator.previousVector(); + Mem::View::State lastState (lastStatePointer); + Mem::View::State currentState (currentStatePointer); + Mem::View::NodeParameters lastParameters (lastNodeParametersPointer); + Mem::View::NodeParameters currentParameters (currentNodeParametersPointer); + Mem::View::State forwardState (forwardMainStoreIterator.previousVector()); + + if (m_verbose) { + std::cout << s << std::endl + << " pool[0] np " << (*(pool[0].node)).m_nodeParameters.m_basePointer + << " currentNP " << currentNodeParametersPointer << std::endl + << " pool[0] bwd state " << (*(pool[0].node)).node().m_backwardState.m_basePointer + << " currentBwdState " << currentStatePointer << std::endl + << " pool[0] tm " << (*(pool[0].node)).m_backwardTransportMatrix.m_basePointer + << " current tm " << transportMatrixPointer << std::endl + << " pool[0] smooth state " << (*(pool[0].node)).m_smoothState.m_basePointer + << " currentSmoothState " << smootherStatePointer << std::endl; + } + + if (action.all()) { + if (in.all()) { + // All of the nodes are starting now + // Just prepare data and do "update" + + // Fetch the reference vectors for the state + std::copy_n(ArrayGen::InitialState<vector_width()>(pool).begin(), + 5*vector_width(), + currentState.m_updatedState.m_basePointer); + + // Fetch the initial covariance matrix for the cov matrix + std::copy_n(ArrayGen::InitialCovariance<vector_width()>().begin(), + 15*vector_width(), + currentState.m_updatedCovariance.m_basePointer); + } else { + if (in.any() || out.any()) { + // Some states are starting or finishing now, but others continue their execution + // Treat all nodes individually, and then smooth and update them together + // Note: The smoothed state for the first and lastState item will be redone afterwards + for (unsigned i=0; i<vector_width(); ++i) { + Node& node = *(pool[i].node); + + if (in[i]) { + Scalar::initialize<Op::Backward>(node, ArrayGen::InitialCovariance_values()); + } else { + Node& prevnode = *(pool[i].node+1); + Scalar::predict<Op::Backward, true>(node, prevnode); + } + } + } else { + // Predict + Vector::predict<Op::Backward>::op<vector_width()> ( + lastParameters.m_transportVector.m_basePointer, + lastParameters.m_noiseMatrix.m_basePointer, + transportMatrixPointer, + lastState.m_updatedState.m_basePointer, + lastState.m_updatedCovariance.m_basePointer, + currentState.m_updatedState.m_basePointer, + currentState.m_updatedCovariance.m_basePointer + ); + } + + // Do the smoother *here* + // This benefits from cache locality, + // and removes the need of doing swapping to restore + // the updated states + + // Fetch the corresponding forward and smooth state, and update it + Mem::View::SmoothState smoothState (smootherStatePointer); + + // Smoother + const uint16_t smoother_result = Vector::smoother<vector_width()> ( + forwardState.m_updatedState.m_basePointer, + forwardState.m_updatedCovariance.m_basePointer, + currentState.m_updatedState.m_basePointer, // These are still the predicted state + currentState.m_updatedCovariance.m_basePointer, // and covariance + smoothState.m_state.m_basePointer, + smoothState.m_covariance.m_basePointer + ); + + // Update residuals + Vector::updateResiduals<vector_width(), checkNodeType> ( + currentParameters.m_referenceVector.m_basePointer, + currentParameters.m_projectionMatrix.m_basePointer, + currentParameters.m_referenceResidual, + currentParameters.m_errorMeasure, + smoothState.m_state.m_basePointer, + smoothState.m_covariance.m_basePointer, + smoothState.m_residual, + smoothState.m_errResidual, + pool + ); + + // Set errors + if (smoother_result != ArrayGen::mask<vector_width()>()) { + for (unsigned i=0; i<vector_width(); ++i) { + if (action[i] && ~(smoother_result >> i) & 0x01) { + pool[i].track->track().setFitStatus(LHCb::Track::FitFailed); + } + } + } + + if (m_debug) { + if (smoother_result != ArrayGen::mask<vector_width()>()) { + std::cout << "Smoother errors: " << smoother_result << std::endl; + } + } + } + + // Update + Vector::update<vector_width(), checkNodeType> ( + currentParameters.m_referenceVector.m_basePointer, + currentParameters.m_projectionMatrix.m_basePointer, + currentParameters.m_referenceResidual, + currentParameters.m_errorMeasure, + currentState.m_updatedState.m_basePointer, + currentState.m_updatedCovariance.m_basePointer, + currentState.m_chi2, + pool + ); + } else { + // Since !action.all(), we couldn't allocate enough + // nodes to process in parallel - so let's do them + // individually + for (unsigned i=0; i<vector_width(); ++i) { + if (action[i]) { + Node& node = *(pool[i].node); + + if (in[i]) { + Scalar::fit<Op::Backward>(node, ArrayGen::InitialCovariance_values()); + } else { + Node& prevnode = *(pool[i].node+1); + + if (out[i]) { + Scalar::fit<Op::Backward, true>(node, prevnode); + } else { + Scalar::predict<Op::Backward, true>(node, prevnode); + const bool smoothSuccess = Scalar::smoother<true, true>(node); + Scalar::updateResiduals(node); + if (!smoothSuccess) { + pool[i].track->track().setFitStatus(LHCb::Track::FitFailed); + } + Scalar::update<Op::Backward>(node); + } + } + } + } + } + + lastStatePointer = currentStatePointer; + lastNodeParametersPointer = currentNodeParametersPointer; + }); + + // Calculate the chi2 a posteriori + std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) { + auto& nodes = track.m_nodes; + + // Reset the values + track.m_ndof = -track.m_nTrackParameters; + auto ndofBackward = -track.m_nTrackParameters; + track.m_forwardFitChi2 = ((TRACKVECTORFIT_PRECISION) 0.0); + track.m_backwardFitChi2 = ((TRACKVECTORFIT_PRECISION) 0.0); + + for (unsigned k=0; k<nodes.size(); ++k) { + Node& node = nodes[k]; + if (node.node().type() == LHCb::Node::HitOnTrack) { + track.m_forwardFitChi2 += node.get<Op::Forward, Op::Chi2>(); + ++track.m_ndof; + } + node.get<Op::Forward, Op::Chi2>() = track.m_forwardFitChi2; + node.m_ndof = track.m_ndof; + + // Backwards chi2 + Node& backwardNode = nodes[nodes.size() - 1 - k]; + if (backwardNode.node().type() == LHCb::Node::HitOnTrack) { + track.m_backwardFitChi2 += backwardNode.get<Op::Backward, Op::Chi2>(); + ++ndofBackward; + } + backwardNode.get<Op::Backward, Op::Chi2>() = track.m_backwardFitChi2; + backwardNode.m_ndofBackward = ndofBackward; + } + }); + + std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) { + // Copy first element + Scalar::smoother<false, true>(track.m_nodes[0]); + Scalar::updateResiduals(track.m_nodes[0]); + + // and lastState + const unsigned element = track.m_nodes.size() - 1; + Scalar::smoother<true, false>(track.m_nodes[element]); + Scalar::updateResiduals(track.m_nodes[element]); + }); + + // Set the LHCb::Node information accordingly + std::for_each(tracks.begin(), tracks.end(), [] (Track& track) { + // TODO - Either the min or max chi2 could be picked up here. + track.track().setChi2AndDoF(track.minChi2(), track.ndof()); + }); +} + +} + +} diff --git a/Tr/TrackVectorFit/TrackVectorFit/Types.h b/Tr/TrackVectorFit/TrackVectorFit/Types.h index 030f6763bbdc4ad8625c5eb427382e002c2f19bd..5edac3e7df04a30464a1bd2bcfc22bcfa52eed55 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/Types.h +++ b/Tr/TrackVectorFit/TrackVectorFit/Types.h @@ -117,8 +117,6 @@ struct FitNode : public LHCb::Node { }; struct Node { - _aligned Gaudi::TrackVector m_transportVector; // Transport vector for propagation from previous node to this one - _aligned Gaudi::TrackSymMatrix m_noiseMatrix; // Noise in propagation from previous node to this one Mem::View::TrackMatrix<25> m_forwardTransportMatrix; Mem::View::TrackMatrix<25> m_backwardTransportMatrix; Mem::View::SmoothState m_smoothState; @@ -135,8 +133,6 @@ struct Node { Node (const Node& copy) = default; // Const getters - inline const Gaudi::TrackVector& transportVector () const { return m_transportVector; } - inline const Gaudi::TrackSymMatrix& noiseMatrix () const { return m_noiseMatrix; } inline const TRACKVECTORFIT_PRECISION& refResidual () const { assert(m_nodeParameters.m_basePointer != nullptr); return *m_nodeParameters.m_referenceResidual; @@ -158,8 +154,14 @@ struct Node { inline FitNode& node () { return *m_node; } // Setters - inline void setTransportVector (const Gaudi::TrackVector& transportVector) { m_transportVector = transportVector; } - inline void setNoiseMatrix (const Gaudi::TrackSymMatrix& noiseMatrix) { m_noiseMatrix = noiseMatrix; } + inline void setTransportVector (const Gaudi::TrackVector& transportVector) { + assert(m_nodeParameters.m_basePointer != nullptr); + m_nodeParameters.m_transportVector.copy(transportVector); + } + inline void setNoiseMatrix (const Gaudi::TrackSymMatrix& noiseMatrix) { + assert(m_nodeParameters.m_basePointer != nullptr); + m_nodeParameters.m_noiseMatrix.copy(noiseMatrix); + } inline void setRefResidual (const TRACKVECTORFIT_PRECISION& res) { assert(m_nodeParameters.m_basePointer != nullptr); (*m_nodeParameters.m_referenceResidual) = res; @@ -200,32 +202,52 @@ struct Node { template<class R, class S, class U = std::conditional_t<std::is_same<S, Op::Covariance>::value, Mem::View::TrackSymMatrix, + std::conditional_t<std::is_same<S, Op::NoiseMatrix>::value, Mem::View::TrackSymMatrix, std::conditional_t<std::is_same<S, Op::StateVector>::value, Mem::View::TrackVector, std::conditional_t<std::is_same<S, Op::ReferenceVector>::value, Mem::View::TrackVector, std::conditional_t<std::is_same<S, Op::ProjectionMatrix>::value, Mem::View::TrackVector, - std::conditional_t<std::is_same<S, Op::TransportMatrix>::value, Mem::View::TrackMatrix<25>, TRACKVECTORFIT_PRECISION>>>>> + std::conditional_t<std::is_same<S, Op::TransportVector>::value, Mem::View::TrackVector, + std::conditional_t<std::is_same<S, Op::TransportMatrix>::value, Mem::View::TrackMatrix<25>, TRACKVECTORFIT_PRECISION>>>>>>> > inline const U& get () const; template<class R, class S, class U = std::conditional_t<std::is_same<S, Op::Covariance>::value, Mem::View::TrackSymMatrix, + std::conditional_t<std::is_same<S, Op::NoiseMatrix>::value, Mem::View::TrackSymMatrix, std::conditional_t<std::is_same<S, Op::StateVector>::value, Mem::View::TrackVector, std::conditional_t<std::is_same<S, Op::ReferenceVector>::value, Mem::View::TrackVector, std::conditional_t<std::is_same<S, Op::ProjectionMatrix>::value, Mem::View::TrackVector, - std::conditional_t<std::is_same<S, Op::TransportMatrix>::value, Mem::View::TrackMatrix<25>, TRACKVECTORFIT_PRECISION>>>>> + std::conditional_t<std::is_same<S, Op::TransportVector>::value, Mem::View::TrackVector, + std::conditional_t<std::is_same<S, Op::TransportMatrix>::value, Mem::View::TrackMatrix<25>, TRACKVECTORFIT_PRECISION>>>>>>> > inline U& get (); + + inline void setOutlier (); }; #include "NodeGetters.h" -struct Track { - std::vector<Node, aligned_allocator<TRACKVECTORFIT_PRECISION, VectorConfiguration::bytewidth()>> m_nodes; +void Node::setOutlier () { + // Set this node as outlier + // Store its state before it dissappears + auto& node = this->node(); + node.setErrMeasure(this->get<Op::NodeParameters, Op::ErrMeasure>()); + node.setProjectionMatrix((Gaudi::TrackProjectionMatrix) this->get<Op::NodeParameters, Op::ProjectionMatrix>()); + node.setResidual(this->get<Op::Smooth, Op::Residual>()); + node.setErrResidual(this->get<Op::Smooth, Op::ErrResidual>()); + const auto smoothStateVector = (Gaudi::TrackVector) this->get<Op::Smooth, Op::StateVector>(); + node.setRefVector(smoothStateVector); + node.setState( + smoothStateVector, + (Gaudi::TrackSymMatrix) this->get<Op::Smooth, Op::Covariance>(), + node.z()); + + node.setType(LHCb::Node::Outlier); +} - // Active measurements from this node - unsigned m_forwardUpstream; - unsigned m_backwardUpstream; +struct Track { + std::vector<Node> m_nodes; // Chi square of track TRACKVECTORFIT_PRECISION m_forwardFitChi2 = ((TRACKVECTORFIT_PRECISION) 0.0); @@ -240,41 +262,16 @@ struct Track { unsigned m_iterationsToConverge; unsigned m_numberOfOutlierIterations; - // Initial covariances - Gaudi::TrackSymMatrix m_seedCovariance; - LHCb::Track* m_track; unsigned m_index; - // TODO Move this to the fit level: One can just issue one std::copy_n, and just copy a big chunk of memory - // from a container of just smoothed states to a container of just refvectors + // Note: This will be invoked only if the size of the scheduler is the same inline void updateRefVectors () { std::for_each(m_nodes.begin(), m_nodes.end(), [] (Node& n) { n.get<Op::NodeParameters, Op::ReferenceVector>().copy(n.get<Op::Smooth, Op::StateVector>()); }); } - inline void initializeForwardUpstream () { - m_forwardUpstream = 0; - for (unsigned i=0; i<m_nodes.size(); ++i) { - if (m_nodes[i].node().type() == LHCb::Node::HitOnTrack) { - m_forwardUpstream = i; - return; - } - } - } - - inline void initializeBackwardUpstream () { - m_backwardUpstream = m_nodes.size() - 1; - for (unsigned i=0; i<m_nodes.size(); ++i) { - const unsigned reverse_i = m_nodes.size() - 1 - i; - if (m_nodes[reverse_i].node().type() == LHCb::Node::HitOnTrack) { - m_backwardUpstream = i; - return; - } - } - } - Track (LHCb::Track& track, const unsigned& trackIndex) : m_track(&track), m_index(trackIndex) { // Generate VectorFit nodes @@ -283,28 +280,12 @@ struct Track { for (auto*& node : m_track->fitResult()->nodes()) { m_nodes.emplace_back(static_cast<FitNode*>(node), nodeIndex++); } - - // Find the first HitOnTrack node for forward and backward - initializeForwardUpstream(); - initializeBackwardUpstream(); } Track (LHCb::Track& track) : Track(track, 0) {} Track (const Track& track) = default; - inline void updateUpstream () { - // A node became an outlier. Find out if this - // has any impact on the upstream defined nodes. - if (m_nodes[m_forwardUpstream].node().type() == LHCb::Node::Outlier) { - initializeForwardUpstream(); - } - - if (m_nodes[m_nodes.size() - 1 - m_backwardUpstream].node().type() == LHCb::Node::Outlier) { - initializeBackwardUpstream(); - } - } - inline const LHCb::Track& track () const { return *m_track; } @@ -313,20 +294,16 @@ struct Track { return *m_track; } - inline std::vector<Node, aligned_allocator<TRACKVECTORFIT_PRECISION, VectorConfiguration::bytewidth()>>& + inline std::vector<Node>& nodes () { return m_nodes; } - inline const std::vector<Node, aligned_allocator<TRACKVECTORFIT_PRECISION, VectorConfiguration::bytewidth()>>& + inline const std::vector<Node>& nodes () const { return m_nodes; } - inline void setSeedCovariance (const Gaudi::TrackSymMatrix& seedCovariance) { - m_seedCovariance = seedCovariance; - } - inline void setNTrackParameters (const int& nTrackParameters) { m_nTrackParameters = nTrackParameters; } @@ -503,20 +480,20 @@ inline std::ostream& operator<< (std::ostream& s, const Node& n) { s << "Node " << "#" << n.m_index << "\n"; if (n.node().m_forwardState.m_basePointer != nullptr) { - s << " Forward state: " << n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::StateVector>() - << ", covariance: " << n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::Covariance>() - << ", chi2: " << n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::Chi2>() << "\n"; + s << " Forward state: " << n.get<Op::Forward, Op::StateVector>() + << ", covariance: " << n.get<Op::Forward, Op::Covariance>() + << ", chi2: " << n.get<Op::Forward, Op::Chi2>() << "\n"; } if (n.node().m_backwardState.m_basePointer != nullptr) { - s << " Backward state: " << n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::StateVector>() - << ", covariance: " << n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::Covariance>() - << ", chi2: " << n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::Chi2>() << "\n"; + s << " Backward state: " << n.get<Op::Backward, Op::StateVector>() + << ", covariance: " << n.get<Op::Backward, Op::Covariance>() + << ", chi2: " << n.get<Op::Backward, Op::Chi2>() << "\n"; } if (n.m_smoothState.m_basePointer != nullptr) { - s << " Smoothed state: " << n.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::StateVector>() - << ", covariance: " << n.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::Covariance>() << "\n" - << " Total chi2: " << n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::Chi2>() << ", " - << n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::Chi2>() << "\n"; + s << " Smoothed state: " << n.get<Op::Smooth, Op::StateVector>() + << ", covariance: " << n.get<Op::Smooth, Op::Covariance>() << "\n" + << " Total chi2: " << n.get<Op::Forward, Op::Chi2>() << ", " + << n.get<Op::Backward, Op::Chi2>() << "\n"; } s << " hasInfoUpstream: -, -\n"; @@ -528,23 +505,25 @@ inline std::ostream& operator<< (std::ostream& s, const Node& n) { } s << " at z " << node.z() << "\n" << " transport matrix "; - if (n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::TransportMatrix>().m_basePointer != nullptr) { - s << n.get<Tr::TrackVectorFit::Op::Forward, Tr::TrackVectorFit::Op::TransportMatrix>(); + if (n.get<Op::Forward, Op::TransportMatrix>().m_basePointer != nullptr) { + s << n.get<Op::Forward, Op::TransportMatrix>(); } s << "\n inverse transport matrix "; - if (n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::TransportMatrix>().m_basePointer != nullptr) { - s << n.get<Tr::TrackVectorFit::Op::Backward, Tr::TrackVectorFit::Op::TransportMatrix>(); + if (n.get<Op::Backward, Op::TransportMatrix>().m_basePointer != nullptr) { + s << n.get<Op::Backward, Op::TransportMatrix>(); } if (n.m_smoothState.m_basePointer != nullptr) { - s << "\n residual " << n.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::Residual>() - << " errResidual " << n.get<Tr::TrackVectorFit::Op::Smooth, Tr::TrackVectorFit::Op::ErrResidual>(); + s << "\n residual " << n.get<Op::Smooth, Op::Residual>() + << " errResidual " << n.get<Op::Smooth, Op::ErrResidual>(); } if (n.m_nodeParameters.m_basePointer != nullptr) { - s << " projectionMatrix " << n.get<Tr::TrackVectorFit::Op::NodeParameters, Tr::TrackVectorFit::Op::ProjectionMatrix>() - << " refVector " << n.get<Tr::TrackVectorFit::Op::NodeParameters, Tr::TrackVectorFit::Op::ReferenceVector>() - << " refResidual " << n.get<Tr::TrackVectorFit::Op::NodeParameters, Tr::TrackVectorFit::Op::ReferenceResidual>() - << " errMeasure " << n.get<Tr::TrackVectorFit::Op::NodeParameters, Tr::TrackVectorFit::Op::ErrMeasure>(); + s << " projectionMatrix " << n.get<Op::NodeParameters, Op::ProjectionMatrix>() + << " refVector " << n.get<Op::NodeParameters, Op::ReferenceVector>() + << " refResidual " << n.get<Op::NodeParameters, Op::ReferenceResidual>() + << " errMeasure " << n.get<Op::NodeParameters, Op::ErrMeasure>(); } + s << "\n noiseMatrix " << n.get<Op::NodeParameters, Op::NoiseMatrix>() + << " transportVector " << n.get<Op::NodeParameters, Op::TransportVector>(); return s; } diff --git a/Tr/TrackVectorFit/TrackVectorFit/VectorConfiguration.h b/Tr/TrackVectorFit/TrackVectorFit/VectorConfiguration.h index 7b508b69dfe5ffed3b3b4784ccf086134180e99b..8751376c322eb1599578e37f257c1404e06b5777 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/VectorConfiguration.h +++ b/Tr/TrackVectorFit/TrackVectorFit/VectorConfiguration.h @@ -3,41 +3,18 @@ #include "Kernel/VectorConfiguration.h" #include "TrackInterfaces/ITrackFitter.h" -#define TO_STRING(s) WRAPPED_TO_STRING(s) -#define WRAPPED_TO_STRING(s) #s +namespace Tr { +namespace TrackVectorFit { -namespace ISAs { - struct DEFAULT { - template <class F> - static constexpr std::size_t card() { return 1; } - }; - struct SCALAR { - template <class F> - static constexpr std::size_t card() { return 1; } - }; - struct SSE { - template <class F> - static constexpr std::size_t card() { return 16 / sizeof(F); } - }; - struct AVX { - template <class F> - static constexpr std::size_t card() { return 32 / sizeof(F); } - }; - struct AVX512 { - template <class F> - static constexpr std::size_t card() { return 64 / sizeof(F); } - }; +constexpr std::size_t vector_width () { + return VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); +} -#if defined(__AVX512F__) - using CURRENT = AVX512; -#elif defined(__AVX__) - using CURRENT = AVX; -#elif defined(__SSE__) - using CURRENT = SSE; -#else - using CURRENT = SCALAR; -#endif } +} + +#define TO_STRING(s) WRAPPED_TO_STRING(s) +#define WRAPPED_TO_STRING(s) #s // Include vectorclass #if defined(__AVX512F__) diff --git a/Tr/TrackVectorFit/TrackVectorFit/scalar/Combined.h b/Tr/TrackVectorFit/TrackVectorFit/scalar/Combined.h index 6678cc5be79ddf1787ad3cd50b15a3c669f48389..b9b1fd679612d4dd2edcb00cf38c85f430967144 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/scalar/Combined.h +++ b/Tr/TrackVectorFit/TrackVectorFit/scalar/Combined.h @@ -21,6 +21,17 @@ inline void fit ( update<T>(node); } +template<class T> +inline void fit ( + Node& node, + const std::array<TRACKVECTORFIT_PRECISION, 15>& covariance +) { + node.get<T, Op::StateVector>().copy(node.get<Op::NodeParameters, Op::ReferenceVector>()); + node.get<T, Op::Covariance>().copy(covariance); + + update<T>(node); +} + template<class T, bool U> inline void fit ( Node& node, diff --git a/Tr/TrackVectorFit/TrackVectorFit/scalar/Predict.h b/Tr/TrackVectorFit/TrackVectorFit/scalar/Predict.h index 27eac37fe2ca527013e54d28884d2dedefd539ad..f3ac073e9557eaefd83d476a4c6be34e6b7b8268 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/scalar/Predict.h +++ b/Tr/TrackVectorFit/TrackVectorFit/scalar/Predict.h @@ -21,13 +21,13 @@ inline void transportCovariance ( pc.copy(uc); if (tm[2] != 0 or tm[8] != 0) { - pc(0,0) += 2 * uc(2,0) * tm[2] + uc(2,2) * tm[2] * tm[2]; - pc(2,0) += uc(2,2) * tm[2]; - pc(1,1) += 2 * uc(3,1) * tm[8] + uc(3,3) * tm[8] * tm[8]; - pc(3,1) += uc(3,3) * tm[8]; - pc(1,0) += uc(2,1) * tm[2] + uc(3,0) * tm[8] + uc(3,2) * tm[2] * tm[8]; - pc(2,1) += uc(3,2) * tm[8]; - pc(3,0) += uc(3,2) * tm[2]; + pc[0] += 2 * uc[3] * tm[2] + uc[5] * tm[2] * tm[2]; + pc[3] += uc[5] * tm[2]; + pc[2] += 2 * uc[7] * tm[8] + uc[9] * tm[8] * tm[8]; + pc[7] += uc[9] * tm[8]; + pc[1] += uc[4] * tm[2] + uc[6] * tm[8] + uc[8] * tm[2] * tm[8]; + pc[4] += uc[8] * tm[8]; + pc[6] += uc[8] * tm[2]; } } } @@ -44,22 +44,21 @@ inline void transportCovariance ( pc.copy(uc); if (tm[2] != 0 or tm[8] != 0) { - pc(0,0) += 2 * uc(2,0) * tm[2] + uc(2,2) * tm[2] * tm[2]; - pc(2,0) += uc(2,2) * tm[2]; - pc(1,1) += 2 * uc(3,1) * tm[8] + uc(3,3) * tm[8] * tm[8]; - pc(3,1) += uc(3,3) * tm[8]; - pc(1,0) += uc(2,1) * tm[2] + uc(3,0) * tm[8] + uc(3,2) * tm[2] * tm[8]; - pc(2,1) += uc(3,2) * tm[8]; - pc(3,0) += uc(3,2) * tm[2]; + pc[0] += 2 * uc(2,0) * tm[2] + uc(2,2) * tm[2] * tm[2]; + pc[3] += uc(2,2) * tm[2]; + pc[2] += 2 * uc(3,1) * tm[8] + uc(3,3) * tm[8] * tm[8]; + pc[7] += uc(3,3) * tm[8]; + pc[1] += uc(2,1) * tm[2] + uc(3,0) * tm[8] + uc(3,2) * tm[2] * tm[8]; + pc[4] += uc(3,2) * tm[8]; + pc[6] += uc(3,2) * tm[2]; } } } -// Initialise template<class T> -inline void initialise ( +inline void initialize ( Node& node, - const Gaudi::TrackSymMatrix& covariance + const std::array<TRACKVECTORFIT_PRECISION, 15>& covariance ) { node.get<T, Op::StateVector>().copy(node.get<Op::NodeParameters, Op::ReferenceVector>()); node.get<T, Op::Covariance>().copy(covariance); @@ -85,7 +84,7 @@ inline void predict<Op::Forward, true> ( Node& node, const Node& prevnode ) { - node.get<Op::Forward, Op::StateVector>().copy(node.transportVector()); + node.get<Op::Forward, Op::StateVector>().copy(node.get<Op::NodeParameters, Op::TransportVector>()); for (int i=0; i<5; ++i) { for (int j=0; j<5; ++j) { node.get<Op::Forward, Op::StateVector>()[i] += @@ -98,7 +97,7 @@ inline void predict<Op::Forward, true> ( node.get<Op::Forward, Op::TransportMatrix>(), prevnode.get<Op::Forward, Op::Covariance>(), node.get<Op::Forward, Op::Covariance>()); - node.get<Op::Forward, Op::Covariance>() += node.noiseMatrix(); + node.get<Op::Forward, Op::Covariance>() += node.get<Op::NodeParameters, Op::NoiseMatrix>(); } template<> @@ -117,7 +116,7 @@ inline void predict<Op::Backward, true> ( ) { Gaudi::TrackVector temp_sub; for (int i=0; i<5; ++i) { - temp_sub[i] = prevnode.get<Op::Backward, Op::StateVector>()[i] - prevnode.transportVector().Array()[i]; + temp_sub[i] = prevnode.get<Op::Backward, Op::StateVector>()[i] - prevnode.get<Op::NodeParameters, Op::TransportVector>()[i]; }; for (int i=0; i<5; ++i) { @@ -129,7 +128,7 @@ inline void predict<Op::Backward, true> ( Gaudi::TrackSymMatrix temp_cov; for (int i=0; i<15; ++i) { - temp_cov.Array()[i] = prevnode.get<Op::Backward, Op::Covariance>()[i] + prevnode.noiseMatrix().Array()[i]; + temp_cov.Array()[i] = prevnode.get<Op::Backward, Op::Covariance>()[i] + prevnode.get<Op::NodeParameters, Op::NoiseMatrix>()[i]; } transportCovariance ( diff --git a/Tr/TrackVectorFit/TrackVectorFit/vector/Combined.h b/Tr/TrackVectorFit/TrackVectorFit/vector/Combined.h index fd26c67d23e49160818f1441ab87122082699247..7f7b727a526a7a46b3c252aaae4c51be2189d018 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/vector/Combined.h +++ b/Tr/TrackVectorFit/TrackVectorFit/vector/Combined.h @@ -12,22 +12,25 @@ namespace Vector { template<class T> struct fit { - template<unsigned W> + template<unsigned W, bool checkNodeType> static inline void op ( - std::array<Sch::Item, W>& n, fp_ptr_64_const rv, fp_ptr_64_const pm, fp_ptr_64_const rr, fp_ptr_64_const em, + fp_ptr_64_const tv, + fp_ptr_64_const nm, fp_ptr_64_const tm, fp_ptr_64_const last_us, fp_ptr_64_const last_uc, fp_ptr_64 us, fp_ptr_64 uc, - fp_ptr_64 chi2 + fp_ptr_64 chi2, + std::array<Sch::Item, W>& n ) { predict<T>::template op<W> ( - n, + tv, + nm, tm, last_us, last_uc, @@ -35,15 +38,15 @@ struct fit { uc ); - update<W> ( - n, + update<W, checkNodeType> ( rv, pm, rr, em, us, uc, - chi2 + chi2, + n ); } }; diff --git a/Tr/TrackVectorFit/TrackVectorFit/vector/Math.h b/Tr/TrackVectorFit/TrackVectorFit/vector/Math.h index 7e081909d3cb9c7673842b255545eebd8e7b64d0..63dfad08c941815422822d0a979cd87c585baf1b 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/vector/Math.h +++ b/Tr/TrackVectorFit/TrackVectorFit/vector/Math.h @@ -12,26 +12,30 @@ namespace Vector { template<unsigned W> struct MathCommon { - static constexpr inline unsigned symid (const int row, const int col) { - return ((row * (row + 1)) / 2) + col; + /** + * @brief Cast for values into TRACKVECTORFIT_PRECISION + * mostly to avoid warnings with certain libraries + * + * It should be resolved in compile time to the proper + * type and not produce any code + */ + template<class T> + static constexpr TRACKVECTORFIT_PRECISION cast (const T& value) { + return ((TRACKVECTORFIT_PRECISION) value); } + /** + * @brief Creates a boolvectype populated with true if the node type + * is HitOnTrack and false otherwise + */ template <size_t... Is> - static constexpr inline typename Vectype<W>::booltype makeBoolHitOnTrack ( + static constexpr typename Vectype<W>::booltype makeBoolHitOnTrack ( const std::array<Sch::Item, W>& nodes, std::index_sequence<Is...> ) { return typename Vectype<W>::booltype(nodes[Is].node->node().type() == LHCb::Node::HitOnTrack...); } - template <size_t... Is> - static constexpr inline typename Vectype<W>::booltype makeBoolMeasurement ( - const std::array<Sch::Item, W>& nodes, - std::index_sequence<Is...> - ) { - return typename Vectype<W>::booltype(nodes[Is].node->node().hasMeasurement()...); - } - /** * @brief Similarity transform, manually horizontally vectorised * @@ -176,6 +180,7 @@ struct MathCommon { * @param[in] errorMeas2 Measurement error squared * @param[in] nodes Nodes */ + template<bool checkNodeType> static inline void update ( fp_ptr_64_const rv_p, fp_ptr_64_const pm_p, @@ -184,7 +189,7 @@ struct MathCommon { fp_ptr_64 us_p, fp_ptr_64 uc_p, fp_ptr_64 chi2_p, - const std::array<Sch::Item, W>& nodes_p + const std::array<Sch::Item, W>& n ) { using vectype = typename Vectype<W>::type; using boolvectype = typename Vectype<W>::booltype; @@ -259,11 +264,16 @@ struct MathCommon { + H[2] * cht[2] + H[3] * cht[3] + H[4] * cht[4]; - const vectype denNotZero = select(denominator != 0, denominator, ((TRACKVECTORFIT_PRECISION) 1.)); - const boolvectype maskedVector = makeBoolHitOnTrack(nodes_p, std::make_index_sequence<W>()); - const vectype errorResInv = select(maskedVector, - ((TRACKVECTORFIT_PRECISION) 1.) / denNotZero, - ((TRACKVECTORFIT_PRECISION) 0.)); + // const vectype denNotZero = select(denominator != 0, denominator, 1.); + // const vectype errorResInv = 1. / denNotZero; + vectype errorResInv = cast(1.) / denominator; + + // This should be optimized at compile time + // With c++17: if constexpr (checkNodeType) { ... } + if (checkNodeType) { + const boolvectype maskedVector = makeBoolHitOnTrack(n, std::make_index_sequence<W>()); + errorResInv = select(maskedVector, errorResInv, cast(0.)); + } // update the state vector and cov matrix const vectype w = res * errorResInv; @@ -412,8 +422,9 @@ struct MathCommon { // Factorization L[0] = Csum[0]; - success &= L[0] > 0.0; - L[0] = sqrt(1.0 / select(success, L[0], ((TRACKVECTORFIT_PRECISION) 1.0))); + success &= L[0] > cast(0.); + // L[0] = sqrt(cast(1.) / L[0]); + L[0] = sqrt(cast(1.) / select(success, L[0], cast(1.))); L[1] = Csum[1] * L[0]; L[3] = Csum[3] * L[0]; @@ -421,29 +432,33 @@ struct MathCommon { L[10] = Csum[10] * L[0]; L[2] = Csum[2] - L[1]*L[1]; - success &= L[2] > 0.0; - L[2] = sqrt(1.0 / select(success, L[2], ((TRACKVECTORFIT_PRECISION) 1.0))); + success &= L[2] > cast(0.0); + // L[2] = sqrt(1. / L[2]); + L[2] = sqrt(cast(1.) / select(success, L[2], cast(1.))); L[4] = (Csum[4] - L[3] *L[1]) * L[2]; L[7] = (Csum[7] - L[6] *L[1]) * L[2]; L[11] = (Csum[11] - L[10]*L[1]) * L[2]; L[5] = Csum[5] - L[3]*L[3] - L[4]*L[4]; - success &= L[5] > 0.0; - L[5] = sqrt(1.0 / select(success, L[5], ((TRACKVECTORFIT_PRECISION) 1.0))); + success &= L[5] > cast(0.); + // L[5] = sqrt(1. / L[5]); + L[5] = sqrt(cast(1.) / select(success, L[5], cast(1.))); L[8] = (Csum[8] - L[6] *L[3] - L[7] *L[4]) * L[5]; L[12] = (Csum[12] - L[10]*L[3] - L[11]*L[4]) * L[5]; L[9] = Csum[9] - L[6]*L[6] - L[7]*L[7] - L[8]*L[8]; - success &= L[9] > 0.0; - L[9] = sqrt(1.0 / select(success, L[9], ((TRACKVECTORFIT_PRECISION) 1.0))); + success &= L[9] > cast(0.); + // L[9] = sqrt(1. / L[9]); + L[9] = sqrt(cast(1.) / select(success, L[9], cast(1.))); L[13] = (Csum[13] - L[10]*L[6] - L[11]*L[7] - L[12]*L[8]) * L[9]; L[14] = Csum[14] - L[10]*L[10] - L[11]*L[11] - L[12]*L[12] - L[13]*L[13]; - success &= L[14] > 0.0; - L[14] = sqrt(1.0 / select(success, L[14], ((TRACKVECTORFIT_PRECISION) 1.0))); + success &= L[14] > cast(0.); + // L[14] = sqrt(1. / L[14]); + L[14] = sqrt(cast(1.) / select(success, L[14], cast(1.))); // Inversion @@ -571,8 +586,8 @@ struct MathCommon { return to_bits(success); } + template<bool checkNodeType> static inline void updateResiduals ( - const std::array<Sch::Item, W>& nodes_p, fp_ptr_64_const rv_p, fp_ptr_64_const pm_p, fp_ptr_64_const rr_p, @@ -580,7 +595,8 @@ struct MathCommon { fp_ptr_64_const ss_p, fp_ptr_64_const sc_p, fp_ptr_64 res_p, - fp_ptr_64 errRes_p + fp_ptr_64 errRes_p, + const std::array<Sch::Item, W>& n ) { using vectype = typename Vectype<W>::type; using boolvectype = typename Vectype<W>::booltype; @@ -590,7 +606,7 @@ struct MathCommon { std::array<vectype, 5> ss; std::array<vectype, 15> sc; std::array<vectype, 5> v; - vectype res, errRes, em, rr, HCH, sign, value, error; + vectype errRes, em, rr, error; // Load rv rv[0].load_a(rv_p + 0*W); @@ -642,16 +658,11 @@ struct MathCommon { v[2] = sc[3] *pm[0]+sc[4] *pm[1]+sc[5] *pm[2]+sc[8] *pm[3]+sc[12]*pm[4]; v[3] = sc[6] *pm[0]+sc[7] *pm[1]+sc[8] *pm[2]+sc[9] *pm[3]+sc[13]*pm[4]; v[4] = sc[10]*pm[0]+sc[11]*pm[1]+sc[12]*pm[2]+sc[13]*pm[3]+sc[14]*pm[4]; - HCH = pm[0]*v[0] + pm[1]*v[1] + pm[2]*v[2] + pm[3]*v[3] + pm[4]*v[4]; - - // const TRACKVECTORFIT_PRECISION V = node.m_errMeasure * node.m_errMeasure; - // const TRACKVECTORFIT_PRECISION sign = node.m_type == LHCb::Node::HitOnTrack ? -1 : 1; - const boolvectype maskedHitOnTrack = makeBoolHitOnTrack(nodes_p, std::make_index_sequence<W>()); - sign = select(maskedHitOnTrack, -1.0, 1.0); + const vectype HCH = -(pm[0]*v[0] + pm[1]*v[1] + pm[2]*v[2] + pm[3]*v[3] + pm[4]*v[4]); // const TrackVectorContiguous& refX = node.m_refVector.m_parameters; // value = node.m_refResidual + (pm * (rv - node.get<Op::Smooth, Op::StateVector>())); - value = rr + ( + const vectype value = rr + ( pm[0] * (rv[0] - ss[0]) + pm[1] * (rv[1] - ss[1]) + pm[2] * (rv[2] - ss[2]) + @@ -659,18 +670,26 @@ struct MathCommon { pm[4] * (rv[4] - ss[4]) ); - // error = V + sign * HCH; - error = em * em + sign * HCH; - // Bring back the changes - const boolvectype maskedMeasurement = makeBoolMeasurement(nodes_p, std::make_index_sequence<W>()); - res = select(maskedMeasurement, value, 0.0); - res.store_a(res_p); + value.store_a(res_p); + + // This should be optimized at compile time + // With c++17: if constexpr (checkNodeType) { ... } + if (checkNodeType) { + // const TRACKVECTORFIT_PRECISION V = node.m_errMeasure * node.m_errMeasure; + // const TRACKVECTORFIT_PRECISION sign = node.m_type == LHCb::Node::HitOnTrack ? -1 : 1; + const boolvectype maskedHitOnTrack = makeBoolHitOnTrack(n, std::make_index_sequence<W>()); + const vectype sign = select(maskedHitOnTrack, cast(-1.), cast(1.)); + + // error = V + sign * HCH; + error = em * em + sign * HCH; + } else { + // error = V + HCH; + error = em * em + HCH; + } errRes = sqrt(abs(error)); - errRes = select(error >= 0, errRes, -errRes); - - errRes = select(maskedMeasurement, errRes, 0.0); + errRes = select(error >= cast(0.), errRes, -errRes); errRes.store_a(errRes_p); } }; @@ -680,18 +699,18 @@ struct Math { template<unsigned W> static inline void predictState ( fp_ptr_64_const tm, - const std::array<TRACKVECTORFIT_PRECISION, 5*W>& tv, + fp_ptr_64_const tv, fp_ptr_64_const us, fp_ptr_64 ps ); - // template<unsigned W> - // static inline void predictCovariance ( - // fp_ptr_64_const tm, - // std::array<TRACKVECTORFIT_PRECISION, 15*W>& nm, - // fp_ptr_64_const uc, - // fp_ptr_64 pc - // ); + template<unsigned W> + static inline void predictCovariance ( + fp_ptr_64_const tm, + fp_ptr_64_const nm, + fp_ptr_64_const uc, + fp_ptr_64 pc + ); }; template<> @@ -699,7 +718,7 @@ struct Math<Op::Forward> { template<unsigned W> static inline void predictState ( fp_ptr_64_const tm_p, - const std::array<TRACKVECTORFIT_PRECISION, 5*W>& tv_p, + fp_ptr_64_const tv_p, fp_ptr_64_const us_p, fp_ptr_64 ps_p ) { @@ -738,11 +757,11 @@ struct Math<Op::Forward> { tm[24].load_a(tm_p + 24*W); // Load tv - tv[0].load_a(tv_p.data() + 0*W); - tv[1].load_a(tv_p.data() + 1*W); - tv[2].load_a(tv_p.data() + 2*W); - tv[3].load_a(tv_p.data() + 3*W); - tv[4].load_a(tv_p.data() + 4*W); + tv[0].load_a(tv_p + 0*W); + tv[1].load_a(tv_p + 1*W); + tv[2].load_a(tv_p + 2*W); + tv[3].load_a(tv_p + 3*W); + tv[4].load_a(tv_p + 4*W); // Load us us[0].load_a(us_p + 0*W); @@ -768,7 +787,7 @@ struct Math<Op::Forward> { template<unsigned W> static inline void predictCovariance ( fp_ptr_64_const tm, - const std::array<TRACKVECTORFIT_PRECISION, 15*W>& nm, + fp_ptr_64_const nm, fp_ptr_64_const uc, fp_ptr_64 pc ) { @@ -776,21 +795,21 @@ struct Math<Op::Forward> { MathCommon<W>::similarity_5_5(tm, uc, pc); using vectype = typename Vectype<W>::type; - (vectype().load_a(pc + 0*W) + vectype().load_a(nm.data() + 0*W)).store_a(pc + 0*W); - (vectype().load_a(pc + 1*W) + vectype().load_a(nm.data() + 1*W)).store_a(pc + 1*W); - (vectype().load_a(pc + 2*W) + vectype().load_a(nm.data() + 2*W)).store_a(pc + 2*W); - (vectype().load_a(pc + 3*W) + vectype().load_a(nm.data() + 3*W)).store_a(pc + 3*W); - (vectype().load_a(pc + 4*W) + vectype().load_a(nm.data() + 4*W)).store_a(pc + 4*W); - (vectype().load_a(pc + 5*W) + vectype().load_a(nm.data() + 5*W)).store_a(pc + 5*W); - (vectype().load_a(pc + 6*W) + vectype().load_a(nm.data() + 6*W)).store_a(pc + 6*W); - (vectype().load_a(pc + 7*W) + vectype().load_a(nm.data() + 7*W)).store_a(pc + 7*W); - (vectype().load_a(pc + 8*W) + vectype().load_a(nm.data() + 8*W)).store_a(pc + 8*W); - (vectype().load_a(pc + 9*W) + vectype().load_a(nm.data() + 9*W)).store_a(pc + 9*W); - (vectype().load_a(pc + 10*W) + vectype().load_a(nm.data() + 10*W)).store_a(pc + 10*W); - (vectype().load_a(pc + 11*W) + vectype().load_a(nm.data() + 11*W)).store_a(pc + 11*W); - (vectype().load_a(pc + 12*W) + vectype().load_a(nm.data() + 12*W)).store_a(pc + 12*W); - (vectype().load_a(pc + 13*W) + vectype().load_a(nm.data() + 13*W)).store_a(pc + 13*W); - (vectype().load_a(pc + 14*W) + vectype().load_a(nm.data() + 14*W)).store_a(pc + 14*W); + (vectype().load_a(pc + 0*W) + vectype().load_a(nm + 0*W)).store_a(pc + 0*W); + (vectype().load_a(pc + 1*W) + vectype().load_a(nm + 1*W)).store_a(pc + 1*W); + (vectype().load_a(pc + 2*W) + vectype().load_a(nm + 2*W)).store_a(pc + 2*W); + (vectype().load_a(pc + 3*W) + vectype().load_a(nm + 3*W)).store_a(pc + 3*W); + (vectype().load_a(pc + 4*W) + vectype().load_a(nm + 4*W)).store_a(pc + 4*W); + (vectype().load_a(pc + 5*W) + vectype().load_a(nm + 5*W)).store_a(pc + 5*W); + (vectype().load_a(pc + 6*W) + vectype().load_a(nm + 6*W)).store_a(pc + 6*W); + (vectype().load_a(pc + 7*W) + vectype().load_a(nm + 7*W)).store_a(pc + 7*W); + (vectype().load_a(pc + 8*W) + vectype().load_a(nm + 8*W)).store_a(pc + 8*W); + (vectype().load_a(pc + 9*W) + vectype().load_a(nm + 9*W)).store_a(pc + 9*W); + (vectype().load_a(pc + 10*W) + vectype().load_a(nm + 10*W)).store_a(pc + 10*W); + (vectype().load_a(pc + 11*W) + vectype().load_a(nm + 11*W)).store_a(pc + 11*W); + (vectype().load_a(pc + 12*W) + vectype().load_a(nm + 12*W)).store_a(pc + 12*W); + (vectype().load_a(pc + 13*W) + vectype().load_a(nm + 13*W)).store_a(pc + 13*W); + (vectype().load_a(pc + 14*W) + vectype().load_a(nm + 14*W)).store_a(pc + 14*W); } }; @@ -799,7 +818,7 @@ struct Math<Op::Backward> { template<unsigned W> static inline void predictState ( fp_ptr_64_const tm_p, - const std::array<TRACKVECTORFIT_PRECISION, 5*W>& tv_p, + fp_ptr_64_const tv_p, fp_ptr_64_const us_p, fp_ptr_64 ps_p ) { @@ -839,11 +858,11 @@ struct Math<Op::Backward> { tm[24].load_a(tm_p + 24*W); // Load tv - tv[0].load_a(tv_p.data() + 0*W); - tv[1].load_a(tv_p.data() + 1*W); - tv[2].load_a(tv_p.data() + 2*W); - tv[3].load_a(tv_p.data() + 3*W); - tv[4].load_a(tv_p.data() + 4*W); + tv[0].load_a(tv_p + 0*W); + tv[1].load_a(tv_p + 1*W); + tv[2].load_a(tv_p + 2*W); + tv[3].load_a(tv_p + 3*W); + tv[4].load_a(tv_p + 4*W); // Load us us[0].load_a(us_p + 0*W); @@ -875,29 +894,31 @@ struct Math<Op::Backward> { template<unsigned W> static inline void predictCovariance ( fp_ptr_64_const tm, - std::array<TRACKVECTORFIT_PRECISION, 15*W>& nm, + fp_ptr_64_const nm, fp_ptr_64_const uc, fp_ptr_64 pc ) { using vectype = typename Vectype<W>::type; - (vectype().load_a(nm.data() + 0*W) + vectype().load_a(uc + 0*W)).store_a(nm.data() + 0*W); - (vectype().load_a(nm.data() + 1*W) + vectype().load_a(uc + 1*W)).store_a(nm.data() + 1*W); - (vectype().load_a(nm.data() + 2*W) + vectype().load_a(uc + 2*W)).store_a(nm.data() + 2*W); - (vectype().load_a(nm.data() + 3*W) + vectype().load_a(uc + 3*W)).store_a(nm.data() + 3*W); - (vectype().load_a(nm.data() + 4*W) + vectype().load_a(uc + 4*W)).store_a(nm.data() + 4*W); - (vectype().load_a(nm.data() + 5*W) + vectype().load_a(uc + 5*W)).store_a(nm.data() + 5*W); - (vectype().load_a(nm.data() + 6*W) + vectype().load_a(uc + 6*W)).store_a(nm.data() + 6*W); - (vectype().load_a(nm.data() + 7*W) + vectype().load_a(uc + 7*W)).store_a(nm.data() + 7*W); - (vectype().load_a(nm.data() + 8*W) + vectype().load_a(uc + 8*W)).store_a(nm.data() + 8*W); - (vectype().load_a(nm.data() + 9*W) + vectype().load_a(uc + 9*W)).store_a(nm.data() + 9*W); - (vectype().load_a(nm.data() + 10*W) + vectype().load_a(uc + 10*W)).store_a(nm.data() + 10*W); - (vectype().load_a(nm.data() + 11*W) + vectype().load_a(uc + 11*W)).store_a(nm.data() + 11*W); - (vectype().load_a(nm.data() + 12*W) + vectype().load_a(uc + 12*W)).store_a(nm.data() + 12*W); - (vectype().load_a(nm.data() + 13*W) + vectype().load_a(uc + 13*W)).store_a(nm.data() + 13*W); - (vectype().load_a(nm.data() + 14*W) + vectype().load_a(uc + 14*W)).store_a(nm.data() + 14*W); + + _aligned std::array<TRACKVECTORFIT_PRECISION, 15*W> temp; + (vectype().load_a(nm + 0*W) + vectype().load_a(uc + 0*W)).store_a(temp.data() + 0*W); + (vectype().load_a(nm + 1*W) + vectype().load_a(uc + 1*W)).store_a(temp.data() + 1*W); + (vectype().load_a(nm + 2*W) + vectype().load_a(uc + 2*W)).store_a(temp.data() + 2*W); + (vectype().load_a(nm + 3*W) + vectype().load_a(uc + 3*W)).store_a(temp.data() + 3*W); + (vectype().load_a(nm + 4*W) + vectype().load_a(uc + 4*W)).store_a(temp.data() + 4*W); + (vectype().load_a(nm + 5*W) + vectype().load_a(uc + 5*W)).store_a(temp.data() + 5*W); + (vectype().load_a(nm + 6*W) + vectype().load_a(uc + 6*W)).store_a(temp.data() + 6*W); + (vectype().load_a(nm + 7*W) + vectype().load_a(uc + 7*W)).store_a(temp.data() + 7*W); + (vectype().load_a(nm + 8*W) + vectype().load_a(uc + 8*W)).store_a(temp.data() + 8*W); + (vectype().load_a(nm + 9*W) + vectype().load_a(uc + 9*W)).store_a(temp.data() + 9*W); + (vectype().load_a(nm + 10*W) + vectype().load_a(uc + 10*W)).store_a(temp.data() + 10*W); + (vectype().load_a(nm + 11*W) + vectype().load_a(uc + 11*W)).store_a(temp.data() + 11*W); + (vectype().load_a(nm + 12*W) + vectype().load_a(uc + 12*W)).store_a(temp.data() + 12*W); + (vectype().load_a(nm + 13*W) + vectype().load_a(uc + 13*W)).store_a(temp.data() + 13*W); + (vectype().load_a(nm + 14*W) + vectype().load_a(uc + 14*W)).store_a(temp.data() + 14*W); // Delegate to similarity_5_5 implementation - MathCommon<W>::similarity_5_5(tm, nm.data(), pc); + MathCommon<W>::similarity_5_5(tm, temp.data(), pc); } }; diff --git a/Tr/TrackVectorFit/TrackVectorFit/vector/Predict.h b/Tr/TrackVectorFit/TrackVectorFit/vector/Predict.h index 4aa3c847283791120a8629c2e1bcf30aaf06ee6e..a482d25663b037853a458bf0e9bb3f22f0928e4f 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/vector/Predict.h +++ b/Tr/TrackVectorFit/TrackVectorFit/vector/Predict.h @@ -14,7 +14,8 @@ template<class T> struct predict { template<unsigned W> static inline void op ( - const std::array<Sch::Item, W>& n, + fp_ptr_64_const tv, + fp_ptr_64_const nm, fp_ptr_64_const tm, fp_ptr_64_const us, fp_ptr_64_const uc, @@ -27,16 +28,14 @@ template<> struct predict<Op::Forward> { template<unsigned W> static inline void op ( - const std::array<Sch::Item, W>& n, + fp_ptr_64_const tv, + fp_ptr_64_const nm, fp_ptr_64_const tm, fp_ptr_64_const us, fp_ptr_64_const uc, fp_ptr_64 ps, fp_ptr_64 pc ) { - _aligned const std::array<TRACKVECTORFIT_PRECISION, 15*W> nm = ArrayGen::getCurrentNoiseMatrix(n); - _aligned const std::array<TRACKVECTORFIT_PRECISION, 5*W> tv = ArrayGen::getCurrentTransportVector(n); - Math<Op::Forward>::predictState<W> ( tm, tv, @@ -57,16 +56,14 @@ template<> struct predict<Op::Backward> { template<unsigned W> static inline void op ( - const std::array<Sch::Item, W>& n, + fp_ptr_64_const tv, + fp_ptr_64_const nm, fp_ptr_64_const tm, fp_ptr_64_const us, fp_ptr_64_const uc, fp_ptr_64 ps, fp_ptr_64 pc ) { - _aligned const std::array<TRACKVECTORFIT_PRECISION, 5*W> tv = ArrayGen::getPreviousTransportVector(n); - _aligned std::array<TRACKVECTORFIT_PRECISION, 15*W> nm = ArrayGen::getPreviousNoiseMatrix(n); - Math<Op::Backward>::predictState<W> ( tm, tv, diff --git a/Tr/TrackVectorFit/TrackVectorFit/vector/Smoother.h b/Tr/TrackVectorFit/TrackVectorFit/vector/Smoother.h index 47bb4e6a0049aeba772e0f35c4a709523b1ad380..7e1b9b5715641bf98e3ed84e5983d84642089830 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/vector/Smoother.h +++ b/Tr/TrackVectorFit/TrackVectorFit/vector/Smoother.h @@ -28,9 +28,8 @@ inline uint16_t smoother ( ); } -template<long unsigned W> +template<long unsigned W, bool checkNodeType> inline void updateResiduals ( - const std::array<Sch::Item, W>& n, fp_ptr_64_const rv, fp_ptr_64_const pm, fp_ptr_64_const rr, @@ -38,10 +37,10 @@ inline void updateResiduals ( fp_ptr_64_const ss, fp_ptr_64_const sc, fp_ptr_64 res, - fp_ptr_64 errRes + fp_ptr_64 errRes, + const std::array<Sch::Item, W>& n ) { - MathCommon<W>::updateResiduals ( - n, + MathCommon<W>::template updateResiduals<checkNodeType> ( rv, pm, rr, @@ -49,7 +48,8 @@ inline void updateResiduals ( ss, sc, res, - errRes + errRes, + n ); } diff --git a/Tr/TrackVectorFit/TrackVectorFit/vector/Update.h b/Tr/TrackVectorFit/TrackVectorFit/vector/Update.h index d8ec226d3761b51399c4d6ba17f9cf707afc4c67..550b327e30cb1962b85b2692797a90100fa31da4 100644 --- a/Tr/TrackVectorFit/TrackVectorFit/vector/Update.h +++ b/Tr/TrackVectorFit/TrackVectorFit/vector/Update.h @@ -9,18 +9,18 @@ namespace TrackVectorFit { namespace Vector { -template<unsigned W> +template<unsigned W, bool checkNodeType> inline void update ( - const std::array<Sch::Item, W>& n, fp_ptr_64_const rv, fp_ptr_64_const pm, fp_ptr_64_const rr, fp_ptr_64_const em, fp_ptr_64 us, fp_ptr_64 uc, - fp_ptr_64 chi2 + fp_ptr_64 chi2, + const std::array<Sch::Item, W>& n ) { - MathCommon<W>::update ( + MathCommon<W>::template update<checkNodeType> ( rv, pm, rr, diff --git a/Tr/TrackVectorFit/src/TrackVectorFit.cpp b/Tr/TrackVectorFit/src/TrackVectorFit.cpp index 2e3bbc40a151d4bbb0c912b2fca0130177e6d400..1df23278655ccc1a5dfb34964ccbd53b882a19fa 100644 --- a/Tr/TrackVectorFit/src/TrackVectorFit.cpp +++ b/Tr/TrackVectorFit/src/TrackVectorFit.cpp @@ -1,595 +1 @@ -#include "TrackVectorFit/TrackVectorFit.h" - -using namespace Tr::TrackVectorFit; - -TrackVectorFit::TrackVectorFit ( - const unsigned& memManagerStorageIncrements, - const bool& debug -) : m_memManagerStorageIncrements(memManagerStorageIncrements), - m_debug(debug) { - m_smoothStore = Mem::Store(Mem::View::SmoothState::size(), memManagerStorageIncrements); - m_nodeParametersStore = Mem::Store(Mem::View::NodeParameters::size(), memManagerStorageIncrements); - m_transportForwardStore = Mem::Store(Mem::View::TrackMatrix<25>::size(), memManagerStorageIncrements); - m_transportBackwardStore = Mem::Store(Mem::View::TrackMatrix<25>::size(), memManagerStorageIncrements); - m_auxStore = Mem::Store(Mem::View::TrackSymMatrix::size() + Mem::View::TrackVector::size(), memManagerStorageIncrements); -} - -void TrackVectorFit::operator() ( - std::list<std::reference_wrapper<Track>>& tracks -) { - // Initialize the smooth state iterator here - Mem::Iterator smoothMainStoreIterator (m_smoothStoreIterator); - - if (tracks.size() >= VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()) { - if (m_debug) { - std::cout << "Scheduler pre iterations:" << std::endl - << m_scheduler_pre << std::endl - << "Scheduler main iterations:" << std::endl - << m_scheduler_main << std::endl - << "Scheduler post iterations:" << std::endl - << m_scheduler_post << std::endl; - } - - // Iterators for all stores - Mem::Iterator forwardTransportIterator (m_transportForwardStore); - Mem::ReverseIterator backwardTransportIterator (m_transportBackwardStore); - Mem::ReverseIterator backwardNodeIterator (m_nodeParametersStore); - Mem::ReverseIterator backwardStoreIterator (*m_backwardStore); - - // Iterators initialized from the object state - Mem::Iterator forwardNodeIterator (m_forwardNodeIterator); - Mem::Iterator forwardStoreIterator (m_forwardStoreIterator); - Mem::ReverseIterator forwardMainStoreIterator (m_forwardMainStoreIterator); - - // Forward fit - // Pre iterations - if (m_debug) { - std::cout << std::endl << "Calculating forward fit pre iterations" << std::endl; - } - std::for_each (m_scheduler_pre.begin(), m_scheduler_pre.end(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) { - const auto& out = s.out; - const auto& action = s.action; - auto& pool = s.pool; - - if (m_debug) { - std::cout << s << std::endl; - } - - // Update - Mem::View::State current (forwardStoreIterator.nextVector()); - Mem::View::NodeParameters currentParameters (forwardNodeIterator.nextVector()); - - // Predict (always in a new vector, no need to swap) - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (action[i]) { - auto& node = *(pool[i].node); - Scalar::initialise<Op::Forward>(node, pool[i].track->m_seedCovariance); - } - } - - if (out.any()) { - Vector::update<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> ( - pool, - currentParameters.m_referenceVector.m_basePointer, - currentParameters.m_projectionMatrix.m_basePointer, - currentParameters.m_referenceResidual, - currentParameters.m_errorMeasure, - current.m_updatedState.m_basePointer, - current.m_updatedCovariance.m_basePointer, - current.m_chi2 - ); - } - }); - - // Forward fit - // Main iterations - if (m_debug) { - std::cout << std::endl << "Calculating forward fit main iterations" << std::endl; - } - std::list<SwapStore> swaps; - auto lastVector = forwardStoreIterator.nextVector(); - std::for_each (m_scheduler_main.begin(), m_scheduler_main.end(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) { - const auto& in = s.in; - const auto& out = s.out; - auto& pool = s.pool; - - if (m_debug) { - std::cout << s << std::endl; - } - - // Feed the data we need in our vector - if (in.any()) { - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (in[i]) { - Mem::View::State memslot (lastVector + i); - memslot.m_updatedState.copy((pool[i].node-1)->get<Op::Forward, Op::StateVector>()); - memslot.m_updatedCovariance.copy((pool[i].node-1)->get<Op::Forward, Op::Covariance>()); - } - } - } - - // predict and update - const auto& currentVector = forwardStoreIterator.nextVector(); - Mem::View::State last (lastVector); - Mem::View::State current (currentVector); - const auto& tm = forwardTransportIterator.nextVector(); - Mem::View::NodeParameters currentParameters (forwardNodeIterator.nextVector()); - - Vector::fit<Op::Forward>::op<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> ( - pool, - currentParameters.m_referenceVector.m_basePointer, - currentParameters.m_projectionMatrix.m_basePointer, - currentParameters.m_referenceResidual, - currentParameters.m_errorMeasure, - tm, - last.m_updatedState.m_basePointer, - last.m_updatedCovariance.m_basePointer, - current.m_updatedState.m_basePointer, - current.m_updatedCovariance.m_basePointer, - current.m_chi2 - ); - - // Move requested data out - if (out.any()) { - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (out[i]) { - auto& node = *(pool[i].node); - - Mem::View::State memslot (m_auxStore.getNextElement()); - Mem::View::State currentslot (currentVector + i); - memslot.m_updatedState.copy(currentslot.m_updatedState.m_basePointer); - memslot.m_updatedCovariance.copy(currentslot.m_updatedCovariance.m_basePointer); - swaps.push_back( - SwapStore( - memslot, - node.get<Op::Forward, Op::StateVector>(), - node.get<Op::Forward, Op::Covariance>() - ) - ); - } - } - } - - lastVector = currentVector; - }); - - // Restore updated state - for (auto& swap : swaps) { - swap.state.copy(swap.store.m_updatedState); - swap.covariance.copy(swap.store.m_updatedCovariance); - } - - // Forward fit - // Post iterations - if (m_debug) { - std::cout << std::endl << "Calculating forward fit post iterations" << std::endl; - } - lastVector = forwardStoreIterator.nextVector(); - std::for_each (m_scheduler_post.begin(), m_scheduler_post.end(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) { - const auto& in = s.in; - const auto& out = s.out; - auto& pool = s.pool; - - if (m_debug) { - std::cout << s << std::endl; - } - - // Feed the data we need in our vector - if (in.any()) { - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (in[i]) { - Mem::View::State memslot (lastVector + i); - memslot.m_updatedState.copy((pool[i].node-1)->get<Op::Forward, Op::StateVector>()); - memslot.m_updatedCovariance.copy((pool[i].node-1)->get<Op::Forward, Op::Covariance>()); - } - } - } - - // predict and update - const auto& currentVector = forwardStoreIterator.nextVector(); - Mem::View::State last (lastVector); - Mem::View::State current (currentVector); - const auto& tm = forwardTransportIterator.nextVector(); - Mem::View::NodeParameters currentParameters (forwardNodeIterator.nextVector()); - - Vector::fit<Op::Forward>::op<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> ( - pool, - currentParameters.m_referenceVector.m_basePointer, - currentParameters.m_projectionMatrix.m_basePointer, - currentParameters.m_referenceResidual, - currentParameters.m_errorMeasure, - tm, - last.m_updatedState.m_basePointer, - last.m_updatedCovariance.m_basePointer, - current.m_updatedState.m_basePointer, - current.m_updatedCovariance.m_basePointer, - current.m_chi2 - ); - - // Move requested data out and update data pointers - if (out.any()) { - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (out[i]) { - auto& node = *(pool[i].node); - - Mem::View::State memslot (m_auxStore.getNextElement()); - Mem::View::State currentslot (currentVector + i); - memslot.m_updatedState.copy(currentslot.m_updatedState.m_basePointer); - memslot.m_updatedCovariance.copy(currentslot.m_updatedCovariance.m_basePointer); - node.get<Op::Forward, Op::StateVector>().setBasePointer(memslot.m_updatedState); - node.get<Op::Forward, Op::Covariance>().setBasePointer(memslot.m_updatedCovariance); - } - } - } - - lastVector = currentVector; - }); - - // Backward fit and smoother - // Pre iterations - if (m_debug) { - std::cout << "Calculating backward fit pre iterations" << std::endl; - } - std::for_each (m_scheduler_post.rbegin(), m_scheduler_post.rend(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) { - const auto& out = s.in; - const auto& action = s.action; - auto& pool = s.pool; - - if (m_debug) { - std::cout << s << std::endl; - } - - // Update - Mem::View::State current (backwardStoreIterator.previousVector()); - Mem::View::NodeParameters currentParameters (backwardNodeIterator.previousVector()); - - // Predict (always in a new vector, no need to swap) - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (action[i]) { - auto& node = *(pool[i].node); - Scalar::initialise<Op::Backward>(node, pool[i].track->m_seedCovariance); - } - } - - if (out.any()) { - Vector::update<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> ( - pool, - currentParameters.m_referenceVector.m_basePointer, - currentParameters.m_projectionMatrix.m_basePointer, - currentParameters.m_referenceResidual, - currentParameters.m_errorMeasure, - current.m_updatedState.m_basePointer, - current.m_updatedCovariance.m_basePointer, - current.m_chi2 - ); - } - }); - - // Backward fit and smoother - // Main iterations - if (m_debug) { - std::cout << "Calculating backward fit and smoother main iterations" << std::endl; - } - lastVector = backwardStoreIterator.previousVector(); - std::for_each (m_scheduler_main.rbegin(), m_scheduler_main.rend(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) { - const auto& in = s.out; - const auto& out = s.in; - const auto& action = s.action; - auto& pool = s.pool; - - if (m_debug) { - std::cout << s << std::endl; - } - - // Feed the data we need in our vector - if (in.any()) { - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (in[i]) { - Mem::View::State memslot (lastVector + i); - memslot.m_updatedState.copy((pool[i].node+1)->get<Op::Backward, Op::StateVector>()); - memslot.m_updatedCovariance.copy((pool[i].node+1)->get<Op::Backward, Op::Covariance>()); - } - } - } - - // Update pointers of smooth state - const auto& smootherVector = smoothMainStoreIterator.nextVector(); - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (action[i]) { - pool[i].node->m_smoothState.setBasePointer(smootherVector + i); - } - } - - // predict and update - const auto& currentVector = backwardStoreIterator.previousVector(); - Mem::View::State last (lastVector); - Mem::View::State current (currentVector); - const auto& tm = backwardTransportIterator.previousVector(); - Mem::View::NodeParameters currentParameters (backwardNodeIterator.previousVector()); - - Vector::predict<Op::Backward>::op<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> ( - pool, - tm, - last.m_updatedState.m_basePointer, - last.m_updatedCovariance.m_basePointer, - current.m_updatedState.m_basePointer, - current.m_updatedCovariance.m_basePointer - ); - - // Do the smoother *here* - // This benefits from cache locality, - // and removes the need of doing swapping to restore - // the updated states - - // Fetch the corresponding forward and smooth state, and update it - Mem::View::State forward (forwardMainStoreIterator.previousVector()); - Mem::View::SmoothState smooth (smootherVector); - - // Smoother - const uint16_t smoother_result = Vector::smoother<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> ( - forward.m_updatedState.m_basePointer, - forward.m_updatedCovariance.m_basePointer, - current.m_updatedState.m_basePointer, // These are still the predicted state - current.m_updatedCovariance.m_basePointer, // and covariance - smooth.m_state.m_basePointer, - smooth.m_covariance.m_basePointer - ); - - // Update residuals - Vector::updateResiduals ( - pool, - currentParameters.m_referenceVector.m_basePointer, - currentParameters.m_projectionMatrix.m_basePointer, - currentParameters.m_referenceResidual, - currentParameters.m_errorMeasure, - smooth.m_state.m_basePointer, - smooth.m_covariance.m_basePointer, - smooth.m_residual, - smooth.m_errResidual - ); - - // Update - Vector::update<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> ( - pool, - currentParameters.m_referenceVector.m_basePointer, - currentParameters.m_projectionMatrix.m_basePointer, - currentParameters.m_referenceResidual, - currentParameters.m_errorMeasure, - current.m_updatedState.m_basePointer, - current.m_updatedCovariance.m_basePointer, - current.m_chi2 - ); - - // Set errors - if (smoother_result != ArrayGen::mask<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>()) { - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (action[i] && !(smoother_result >> i) & 0x01) { - pool[i].track->track().setFitStatus(LHCb::Track::FitFailed); - } - } - } - - if (m_debug) { - if (smoother_result != ArrayGen::mask<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>()) { - std::cout << "Smoother errors: " << smoother_result << std::endl; - } - } - - // Move requested data out and update data pointers - if (out.any()) { - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (out[i]) { - auto& node = *(pool[i].node); - - Mem::View::State memslot (m_auxStore.getNextElement()); - Mem::View::State currentslot (currentVector + i); - memslot.m_updatedState.copy(currentslot.m_updatedState.m_basePointer); - memslot.m_updatedCovariance.copy(currentslot.m_updatedCovariance.m_basePointer); - node.get<Op::Backward, Op::StateVector>().setBasePointer(memslot.m_updatedState); - node.get<Op::Backward, Op::Covariance>().setBasePointer(memslot.m_updatedCovariance); - } - } - } - - lastVector = currentVector; - }); - - // Backward fit - // Post iterations - if (m_debug) { - std::cout << "Calculating backward fit post iterations" << std::endl; - } - lastVector = backwardStoreIterator.previousVector(); - std::for_each (m_scheduler_pre.rbegin(), m_scheduler_pre.rend(), [&] (Sch::Blueprint<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()>& s) { - const auto& in = s.out; - const auto& out = s.in; - auto& pool = s.pool; - - if (m_debug) { - std::cout << s << std::endl; - } - - // Feed the data we need in our vector - if (in.any()) { - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (in[i]) { - Mem::View::State memslot (lastVector + i); - memslot.m_updatedState.copy((pool[i].node+1)->get<Op::Backward, Op::StateVector>()); - memslot.m_updatedCovariance.copy((pool[i].node+1)->get<Op::Backward, Op::Covariance>()); - } - } - } - - // predict and update - const auto& currentVector = backwardStoreIterator.previousVector(); - Mem::View::State last (lastVector); - Mem::View::State current (currentVector); - const auto& tm = backwardTransportIterator.previousVector(); - Mem::View::NodeParameters currentParameters (backwardNodeIterator.previousVector()); - - Vector::fit<Op::Backward>::op<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>()> ( - pool, - currentParameters.m_referenceVector.m_basePointer, - currentParameters.m_projectionMatrix.m_basePointer, - currentParameters.m_referenceResidual, - currentParameters.m_errorMeasure, - tm, - last.m_updatedState.m_basePointer, - last.m_updatedCovariance.m_basePointer, - current.m_updatedState.m_basePointer, - current.m_updatedCovariance.m_basePointer, - current.m_chi2 - ); - - // Move requested data out and update data pointers - if (out.any()) { - for (unsigned i=0; i<VectorConfiguration::width<TRACKVECTORFIT_PRECISION>(); ++i) { - if (out[i]) { - auto& node = *(pool[i].node); - - Mem::View::State memslot (m_auxStore.getNextElement()); - Mem::View::State currentslot (currentVector + i); - memslot.m_updatedState.copy(currentslot.m_updatedState.m_basePointer); - memslot.m_updatedCovariance.copy(currentslot.m_updatedCovariance.m_basePointer); - node.get<Op::Backward, Op::StateVector>().setBasePointer(memslot.m_updatedState); - node.get<Op::Backward, Op::Covariance>().setBasePointer(memslot.m_updatedCovariance); - } - } - } - - lastVector = currentVector; - }); - } else { - // Forward fit - std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) { - auto& nodes = track.m_nodes; - Node& node = nodes.front(); - Scalar::fit<Op::Forward>(node, track.m_seedCovariance); - - // Before m_forwardUpstream - for (unsigned i=1; i<=track.m_forwardUpstream; ++i) { - Node& node = nodes[i]; - Node& prevnode = nodes[i-1]; - Scalar::fit<Op::Forward, false>(node, prevnode); - } - - // After m_forwardUpstream - for (unsigned i=track.m_forwardUpstream+1; i<nodes.size(); ++i) { - Node& node = nodes[i]; - Node& prevnode = nodes[i-1]; - Scalar::fit<Op::Forward, true>(node, prevnode); - } - }); - - // Backward fit - std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) { - auto& nodes = track.m_nodes; - Node& node = nodes.back(); - Scalar::fit<Op::Backward>(node, track.m_seedCovariance); - - // Before m_backwardUpstream - for (unsigned i=1; i<=track.m_backwardUpstream; ++i) { - const int element = track.m_nodes.size() - i - 1; - Node& prevnode = nodes[element + 1]; - Node& node = nodes[element]; - Scalar::fit<Op::Backward, false>(node, prevnode); - } - - // After m_backwardUpstream - for (unsigned i=track.m_backwardUpstream+1; i<nodes.size(); ++i) { - const int element = track.m_nodes.size() - i - 1; - Node& prevnode = nodes[element + 1]; - Node& node = nodes[element]; - Scalar::predict<Op::Backward, true>(node, prevnode); - - // Do the smoother - if (i < (track.m_nodes.size() - track.m_forwardUpstream - 1)) { - node.m_smoothState.setBasePointer(smoothMainStoreIterator.nextElement()); - const bool smoothSuccess = Scalar::smoother<true, true>(node); - Scalar::updateResiduals(node); - - if (not smoothSuccess) { - track.track().setFitStatus(LHCb::Track::FitFailed); - } - } - - Scalar::update<Op::Backward>(node); - } - }); - } - - // Calculate the chi2 a posteriori - std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) { - auto& nodes = track.m_nodes; - - // Reset the values - track.m_ndof = -track.m_nTrackParameters; - auto ndofBackward = -track.m_nTrackParameters; - track.m_forwardFitChi2 = ((TRACKVECTORFIT_PRECISION) 0.0); - track.m_backwardFitChi2 = ((TRACKVECTORFIT_PRECISION) 0.0); - - for (unsigned k=0; k<nodes.size(); ++k) { - Node& node = nodes[k]; - if (node.node().type() == LHCb::Node::HitOnTrack) { - track.m_forwardFitChi2 += node.get<Op::Forward, Op::Chi2>(); - ++track.m_ndof; - } - node.get<Op::Forward, Op::Chi2>() = track.m_forwardFitChi2; - node.m_ndof = track.m_ndof; - - // Backwards chi2 - Node& backwardNode = nodes[nodes.size() - 1 - k]; - if (backwardNode.node().type() == LHCb::Node::HitOnTrack) { - track.m_backwardFitChi2 += backwardNode.get<Op::Backward, Op::Chi2>(); - ++ndofBackward; - } - backwardNode.get<Op::Backward, Op::Chi2>() = track.m_backwardFitChi2; - backwardNode.m_ndofBackward = ndofBackward; - } - }); - - // Not upstream iterations - std::for_each(tracks.begin(), tracks.end(), [&] (Track& track) { - for (unsigned j=0; j<=track.m_forwardUpstream; ++j) { - track.m_nodes[j].m_smoothState.setBasePointer(smoothMainStoreIterator.nextElement()); - Scalar::smoother<false, true>(track.m_nodes[j]); - Scalar::updateResiduals(track.m_nodes[j]); - } - - for (unsigned j=0; j<=track.m_backwardUpstream; ++j) { - const unsigned element = track.m_nodes.size() - j - 1; - track.m_nodes[element].m_smoothState.setBasePointer(smoothMainStoreIterator.nextElement()); - Scalar::smoother<true, false>(track.m_nodes[element]); - Scalar::updateResiduals(track.m_nodes[element]); - } - }); - - // Set the LHCb::Node information accordingly - std::for_each(tracks.begin(), tracks.end(), [] (Track& track) { - // TODO - Either the min or max chi2 could be picked up here. - track.track().setChi2AndDoF(track.minChi2(), track.ndof()); - }); -} - -/** - * @brief Populate the nodes with the calculated information. - */ -void TrackVectorFit::populateNodes ( - Track& track -) { - std::for_each(track.nodes().begin(), track.nodes().end(), [] (Node& n) { - auto& node = n.node(); - - node.setErrMeasure(n.get<Op::NodeParameters, Op::ErrMeasure>()); - node.setProjectionMatrix((Gaudi::TrackProjectionMatrix) n.get<Op::NodeParameters, Op::ProjectionMatrix>()); - node.setResidual(n.get<Op::Smooth, Op::Residual>()); - node.setErrResidual(n.get<Op::Smooth, Op::ErrResidual>()); - - const auto smoothStateVector = (Gaudi::TrackVector) n.get<Op::Smooth, Op::StateVector>(); - node.setRefVector(smoothStateVector); - node.setState( - smoothStateVector, - (Gaudi::TrackSymMatrix) n.get<Op::Smooth, Op::Covariance>(), - node.z()); - }); -} +#include "TrackVectorFit/TrackVectorFit.h" \ No newline at end of file