diff --git a/Pr/PrVeloUT/src/PrVeloUT.cpp b/Pr/PrVeloUT/src/PrVeloUT.cpp index fb9a7d99b6502dc63a5de885ab3e297bc9fa5b2c..d4e1d6fd06f8b5cbdd158a8c1f92f7b04f6b82ee 100644 --- a/Pr/PrVeloUT/src/PrVeloUT.cpp +++ b/Pr/PrVeloUT/src/PrVeloUT.cpp @@ -1,5 +1,5 @@ /***************************************************************************** \ -* (c) Copyright 2000-2022 CERN for the benefit of the LHCb Collaboration * +* (c) Copyright 2024 CERN for the benefit of the LHCb Collaboration * * * * This software is distributed under the terms of the GNU General Public * * Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". * @@ -8,299 +8,119 @@ * granted to it by virtue of its status as an Intergovernmental Organization * * or submit itself to any jurisdiction. * \*****************************************************************************/ + #include "PrUTMagnetTool.h" -#include <Core/FloatComparison.h> -#include <DetDesc/GenericConditionAccessorHolder.h> -#include <Event/PrHits.h> -#include <Event/PrUpstreamTracks.h> -#include <Event/PrVeloTracks.h> -#include <Event/UTSectorHelper.h> -#include <Event/UTTrackUtils.h> -#include <GaudiAlg/ISequencerTimerTool.h> -#include <LHCbAlgs/Transformer.h> -#include <LHCbMath/GeomFun.h> -#include <Magnet/DeMagnet.h> -#include <PrKernel/PrMutUTHits.h> -#include <UTDAQ/UTDAQHelper.h> -#include <UTDAQ/UTInfo.h> -#include <vdt/sqrt.h> - -/** @class PrVeloUT PrVeloUT.h - * - * PrVeloUT algorithm. This is just a wrapper, - * the actual pattern recognition is done in the 'PrVeloUTTool'. - * - * - InputTracksName: Input location for Velo tracks - * - OutputTracksName: Output location for VeloTT tracks - * - TimingMeasurement: Do a timing measurement? - * - * @author Mariusz Witek - * @date 2007-05-08 - * @update for A-Team framework 2007-08-20 SHM - * - * 2017-03-01: Christoph Hasse (adapt to future framework) - * 2019-04-26: Arthur Hennequin (change data Input/Output) - * 2020-08-26: Peilian Li (change data Input/Output to SOACollection) - */ - -namespace LHCb::Pr { - - using simd = SIMDWrapper::best::types; - using scalar = SIMDWrapper::scalar::types; - - constexpr auto nanMomentum = std::numeric_limits<float>::quiet_NaN(); - - constexpr static int batchSize = 2 * simd::size; - const int totalUTLayers = static_cast<int>( UTInfo::DetectorNumbers::TotalLayers ); - - struct ProtoTracks final { - - std::array<float, simd::size> wbs; - std::array<float, simd::size> xMidFields; - std::array<float, simd::size> invKinkVeloDists; - - // -- this is for the hits - // -- this does _not_ include overlap hits, so only 4 per track - std::array<float, 4 * batchSize> xs; - std::array<float, 4 * batchSize> zs; - std::array<float, 4 * batchSize> weightss{}; // this needs to be zero-initialized - std::array<float, 4 * batchSize> sins; - std::array<int, 4 * batchSize> ids; - std::array<int, 4 * batchSize> hitIndexs; - - // -- this is the output of the fit - std::array<float, batchSize> qps; - std::array<float, batchSize> chi2TTs; - std::array<float, batchSize> xTTs; - std::array<float, batchSize> xSlopeTTs; - std::array<float, batchSize> ys; - - // -- and this the original state (in the Velo) - std::array<float, 3 * batchSize> statePoss; - std::array<float, 2 * batchSize> stateDirs; - std::array<int, batchSize> ancestorIndexs; - - // -- and this an index to find the hit containers - std::array<int, batchSize> hitContIndexs; - - std::size_t size{ 0 }; - SOA_ACCESSOR_VAR( x, &( xs[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( z, &( zs[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( weight, &( weightss[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( sin, &( sins[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( id, &( ids[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( hitIndex, &( hitIndexs[pos * batchSize] ), int pos ) - - SOA_ACCESSOR( qp, qps.data() ) - SOA_ACCESSOR( chi2TT, chi2TTs.data() ) - SOA_ACCESSOR( xTT, xTTs.data() ) - SOA_ACCESSOR( xSlopeTT, xSlopeTTs.data() ) - SOA_ACCESSOR( y, ys.data() ) - - SOA_ACCESSOR( ancestorIndex, ancestorIndexs.data() ) - SOA_ACCESSOR( hitContIndex, hitContIndexs.data() ) - VEC3_SOA_ACCESSOR( pos, (float*)&( statePoss[0] ), (float*)&( statePoss[batchSize] ), - (float*)&( statePoss[2 * batchSize] ) ) - VEC3_XY_SOA_ACCESSOR( dir, (float*)&( stateDirs[0] ), (float*)&( stateDirs[batchSize] ), 1.0f ) - - SOA_ACCESSOR( wb, wbs.data() ) - SOA_ACCESSOR( xMidField, xMidFields.data() ) - SOA_ACCESSOR( invKinkVeloDist, invKinkVeloDists.data() ) - - void initTracks( int indexVal, float maxPseudoChi2 ) { - hitIndexs.fill( indexVal ); - chi2TTs.fill( maxPseudoChi2 ); - size = 0; - } - template <typename dType> - void fillHelperParams( Vec3<typename dType::float_v> pos, Vec3<typename dType::float_v> dir, const float zKink, - const float sigmaVeloSlope ) { +#include <algorithm> +#include <array> +#include <limits> +#include <vector> + +#include "Gaudi/Accumulators.h" +#include "GaudiKernel/EventContext.h" +#include "GaudiKernel/SystemOfUnits.h" + +#include "DetDesc/GenericConditionAccessorHolder.h" +#include "Magnet/DeMagnet.h" +#include "UTDet/DeUTDetector.h" + +#include "Event/PrHits.h" +#include "Event/PrUpstreamTracks.h" +#include "Event/PrVeloTracks.h" +#include "Event/SOACollection.h" +#include "Event/UTSectorHelper.h" +#include "Event/UTTrackUtils.h" +#include "Event/ZipUtils.h" +#include "Kernel/STLExtensions.h" +#include "LHCbAlgs/Transformer.h" +#include "LHCbMath/GeomFun.h" +#include "LHCbMath/SIMDWrapper.h" + +#include "PrKernel/PrMutUTHits.h" +#include "UTDAQ/UTDAQHelper.h" +#include "UTDAQ/UTInfo.h" + +#include "vdt/sqrt.h" + +namespace LHCb::Pr::VeloUT { + namespace { + using simd = SIMDWrapper::best::types; + using scalar = SIMDWrapper::scalar::types; + using TracksTag = Upstream::Tag; - using F = typename dType::float_v; + namespace HitTag = UT::Mut::HitTag; + namespace TU = LHCb::UT::TrackUtils; - F( pos.x + dir.x * ( zKink - pos.z ) ).store( xMidFields.data() ); - F a = sigmaVeloSlope * ( zKink - pos.z ); - F( 1.0f / ( a * a ) ).store( wbs.data() ); - F( 1.0f / ( zKink - pos.z ) ).store( invKinkVeloDists.data() ); - } - - template <typename dType> - void storeSimpleFitInfo( typename dType::float_v chi2TT, typename dType::float_v qp, typename dType::float_v xTT, - typename dType::float_v xSlopeTT, unsigned int trackIndex ) { + constexpr auto EndVelo = Event::Enum::State::Location::EndVelo; + constexpr auto nanMomentum = std::numeric_limits<float>::quiet_NaN(); + constexpr auto totalUTLayers = static_cast<int>( UTInfo::DetectorNumbers::TotalLayers ); + // -- Used for the calculation of the size of the search windows + constexpr std::array<float, totalUTLayers> normFact{ 0.95f, 1.0f, 1.36f, 1.41f }; + using LooseBounds = std::array<TU::BoundariesLoose, totalUTLayers>; + using NominalBounds = std::array<TU::BoundariesNominal, totalUTLayers>; + + struct ProtoTrackTag { + struct InitialWeight : Event::float_field {}; + struct XMidField : Event::float_field {}; + struct InvKinkVeloDist : Event::float_field {}; + // -- this is for the hits + // -- this does _not_ include overlap hits, so only 4 per track + struct X : Event::floats_field<4> {}; + struct Z : Event::floats_field<4> {}; + struct Weight : Event::floats_field<4> {}; + struct Sin : Event::floats_field<4> {}; + struct ID : Event::ints_field<4> {}; + struct HitIndex : Event::ints_field<4> {}; + // fit output + struct QOverP : Event::float_field {}; + struct Chi2 : Event::float_field {}; + struct XOffset : Event::float_field {}; + struct XSlope : Event::float_field {}; + struct YSlope : Event::float_field {}; + // -- and this the original state (in the Velo) + struct VeloState : Event::pos_dir_field {}; + // -- and this an index to find the hit containers + struct Ancestor : Event::int_field {}; + struct HitIndexContainer : Event::int_field {}; + + template <typename T> + using prototrack_t = + Event::SOACollection<T, InitialWeight, XMidField, InvKinkVeloDist, X, Z, Weight, Sin, ID, HitIndex, QOverP, + Chi2, XOffset, XSlope, YSlope, VeloState, Ancestor, HitIndexContainer>; + }; - using F = typename dType::float_v; + struct ProtoTracks : ProtoTrackTag::prototrack_t<ProtoTracks> { + using base_t = typename ProtoTrackTag::prototrack_t<ProtoTracks>; + using base_t::allocator_type; + using base_t::base_t; - F( chi2TT ).store( &chi2TTs[trackIndex] ); - F( qp ).store( &qps[trackIndex] ); - F( xTT ).store( &xTTs[trackIndex] ); - F( xSlopeTT ).store( &xSlopeTTs[trackIndex] ); - } + template <SIMDWrapper::InstructionSet simd, ProxyBehaviour behaviour, typename ContainerType> + struct ProtoTrackProxy : Event::Proxy<simd, behaviour, ContainerType> { + using base_t = typename Event::Proxy<simd, behaviour, ContainerType>; + using base_t::base_t; + auto isInvalid() const { return this->template field<ProtoTrackTag::HitIndex>( 0 ).get() == -1; } + }; - // -- this runs over all 4 layers, even if no hit was found - // -- but it fills a weight of 0 - // -- Note: These are not "physical" layers, as the hits are ordered such that only - // -- the last one can be not filled. - template <typename dType> - void fillHitInfo( const LHCb::Event::Zip<SIMDWrapper::Scalar, LHCb::Pr::UT::Mut::Hits>& hitsInL, - unsigned int trackIndex ) { + template <SIMDWrapper::InstructionSet simd, ProxyBehaviour behaviour, typename ContainerType> + using proxy_type = ProtoTrackProxy<simd, behaviour, ContainerType>; + }; - using F = typename dType::float_v; - using I = typename dType::int_v; - auto nValid{ 0 }; - if ( hitsInL.size() != 0 ) { - for ( int i = 0; i < totalUTLayers; ++i ) { - int hitI = hitIndexs[trackIndex + i * batchSize]; - const auto valid = ( hitI != -1 ); - const auto weight = valid ? hitsInL[hitI].weight().cast() : 0.0f; - nValid += valid; - hitI = std::max( 0, hitI ); - LHCb::LHCbID id( LHCb::Detector::UT::ChannelID( hitsInL[hitI].channelID().cast() ) ); - - F( weight ).store( &weightss[trackIndex + i * batchSize] ); - F( hitsInL[hitI].x() ).store( &xs[trackIndex + i * batchSize] ); - F( hitsInL[hitI].z() ).store( &zs[trackIndex + i * batchSize] ); - F( hitsInL[hitI].sin() ).store( &sins[trackIndex + i * batchSize] ); - I( hitsInL[hitI].index() ).store( &hitIndexs[trackIndex + i * batchSize] ); - I( id.lhcbID() ).store( &ids[trackIndex + i * batchSize] ); - } - } - // this is useful in filterMode - if ( !nValid ) { F( nanMomentum ).store( &qps[trackIndex] ); } - } - }; + struct VeloState { + float x, y, z, tx, ty; + }; - namespace { - struct VeloUTGeomCache final { - VeloUTGeomCache( const DeUTDetector& det, const UTMagnetTool::Cache& magtoolcache ) : common( det ) { + struct VeloUTGeomCache { + VeloUTGeomCache( const DeUTDetector& det, const UTMagnetTool::Cache& magtoolcache ) : common{ det } { // m_zMidUT is a position of normalization plane which should to be close to z middle of UT ( +- 5 cm ). // Cached once in VeloUTTool at initialization. No need to update with small UT movement. zMidUT = magtoolcache.zCenterUT; // zMidField and distToMomentum is properly recalculated in PrUTMagnetTool when B field changes distToMomentum = 1.0f / magtoolcache.dist2mom; } - VeloUTGeomCache() = default; UTDAQ::GeomCache common; - float zMidUT{ 0. }, distToMomentum{ 0. }; + float zMidUT{ 0.f }; + float distToMomentum{ 0.f }; }; - } // namespace - - class VeloUT - : public LHCb::Algorithm::Transformer<Upstream::Tracks( const EventContext&, const Velo::Tracks&, - const LHCb::Pr::UT::Hits&, const VeloUTGeomCache&, - const DeMagnet& ), - LHCb::Algorithm::Traits::usesConditions<VeloUTGeomCache, DeMagnet>> { - - public: - /// Standard constructor - using base_class_t = - LHCb::Algorithm::Transformer<Upstream::Tracks( const EventContext&, const Velo::Tracks&, - const LHCb::Pr::UT::Hits&, const VeloUTGeomCache&, - const DeMagnet& ), - LHCb::Algorithm::Traits::usesConditions<VeloUTGeomCache, DeMagnet>>; - - VeloUT( const std::string& name, ISvcLocator* pSvcLocator ) - : base_class_t( name, pSvcLocator, - { KeyValue{ "InputTracksName", "Rec/Track/Velo" }, KeyValue{ "UTHits", UTInfo::HitLocation }, - KeyValue{ "GeometryInfo", "AlgorithmSpecific-" + name + "-UTGeometryInfo" }, - KeyValue{ "Magnet", LHCb::Det::Magnet::det_path } }, - KeyValue{ "OutputTracksName", "Rec/Track/UT" } ) {} - - StatusCode initialize() override { - return Transformer::initialize().andThen( [&] { - addConditionDerivation( { DeUTDetLocation::location(), m_PrUTMagnetTool->cacheLocation() }, - inputLocation<VeloUTGeomCache>(), - []( const DeUTDetector& det, const UTMagnetTool::Cache& magtoolcache ) { - return VeloUTGeomCache( det, magtoolcache ); - } ); - } ); - } - - Upstream::Tracks operator()( const EventContext&, const Velo::Tracks&, const LHCb::Pr::UT::Hits&, - const VeloUTGeomCache&, const DeMagnet& ) const override final; - - private: - Gaudi::Property<float> m_minMomentum{ this, "MinMomentum", 1500.f * Gaudi::Units::MeV }; - Gaudi::Property<float> m_minPT{ this, "MinPT", 300.f * Gaudi::Units::MeV }; - Gaudi::Property<float> m_minMomentumFinal{ this, "MinMomentumFinal", 2500.f * Gaudi::Units::MeV }; - Gaudi::Property<float> m_minPTFinal{ this, "MinPTFinal", 425.f * Gaudi::Units::MeV }; - Gaudi::Property<float> m_maxPseudoChi2{ this, "MaxPseudoChi2", 1280. }; - Gaudi::Property<float> m_yTol{ this, "YTolerance", 0.5 * Gaudi::Units::mm }; // 0.8 - Gaudi::Property<float> m_yTolSlope{ this, "YTolSlope", 0.08 }; // 0.2 - Gaudi::Property<float> m_hitTol{ this, "HitTol", 0.8 * Gaudi::Units::mm }; // 0.8 - Gaudi::Property<float> m_deltaTx{ this, "DeltaTx", 0.018 }; // 0.02 - Gaudi::Property<float> m_maxXSlope{ this, "MaxXSlope", 0.350 }; - Gaudi::Property<float> m_maxYSlope{ this, "MaxYSlope", 0.300 }; - Gaudi::Property<float> m_centralHoleSize{ this, "CentralHoleSize", 33. * Gaudi::Units::mm }; - Gaudi::Property<float> m_intraLayerDist{ this, "IntraLayerDist", 15.0 * Gaudi::Units::mm }; - Gaudi::Property<float> m_overlapTol{ this, "OverlapTol", 0.5 * Gaudi::Units::mm }; - Gaudi::Property<float> m_passHoleSize{ this, "PassHoleSize", 40. * Gaudi::Units::mm }; - Gaudi::Property<float> m_LD3Hits{ this, "LD3HitsMin", -0.5 }; - Gaudi::Property<float> m_LD4Hits{ this, "LD4HitsMin", -0.5 }; - Gaudi::Property<int> m_minLayers{ this, "MinLayers", 3 }; - - Gaudi::Property<bool> m_printVariables{ this, "PrintVariables", false }; - Gaudi::Property<bool> m_passTracks{ this, "PassTracks", false }; - Gaudi::Property<bool> m_finalFit{ this, "FinalFit", true }; - Gaudi::Property<bool> m_fiducialCuts{ this, "FiducialCuts", true }; - Gaudi::Property<bool> m_looseSectorSearch{ this, "LooseSectorSearch", false }; - Gaudi::Property<bool> m_filterMode{ this, "FilterMode", false }; - - mutable Gaudi::Accumulators::SummingCounter<unsigned int> m_seedsCounter{ this, "#seeds" }; - mutable Gaudi::Accumulators::SummingCounter<unsigned int> m_tracksCounter{ this, "#tracks" }; - - LHCb::UT::TrackUtils::MiniStates getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, - float zMidUT ) const; - - template <typename Boundaries, typename BoundariesTypes> - bool getHitsScalar( const LHCb::Pr::UT::Hits& hh, const LHCb::UT::TrackUtils::MiniStates& filteredStates, - const std::array<Boundaries, 4>& compBoundsArray, LHCb::Pr::UT::Mut::Hits& hitsInLayers, - const std::size_t t, float zMidUT, int minLayers = totalUTLayers - 1 ) const; - - inline void findHits( const LHCb::Pr::UT::Hits& hh, const simd::float_v& yProto, const simd::float_v& ty, - const simd::float_v& tx, const simd::float_v& xOnTrackProto, const simd::float_v& tolProto, - const simd::float_v& xTolNormFact, LHCb::Pr::UT::Mut::Hits& mutHits, - const simd::float_v& yTol, const int firstIndex, const int lastIndex ) const; - - template <bool forward> - bool formClusters( const LHCb::Pr::UT::Mut::Hits& hitsInLayers, ProtoTracks& pTracks, const int trackIndex, - float zMidUT ) const; - - template <typename BdlTable> - void prepareOutputTrackSIMD( const ProtoTracks& protoTracks, - const std::array<LHCb::Pr::UT::Mut::Hits, batchSize>& hitsInLayers, - Upstream::Tracks& outputTracks, const Velo::Tracks& inputTracks, - const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const; - - /// Multipurpose tool for Bdl and deflection - ToolHandle<UTMagnetTool> m_PrUTMagnetTool{ this, "PrUTMagnetTool", "PrUTMagnetTool" }; - - mutable Gaudi::Accumulators::MsgCounter<MSG::ERROR> m_too_much_in_filtered{ - this, "Reached the maximum number of tracks in filteredStates!!" }; - mutable Gaudi::Accumulators::MsgCounter<MSG::ERROR> m_too_much_in_boundaries{ - this, "Reached the maximum number of tracks in Boundaries!!" }; - - constexpr static float c_zKink{ 1780.0 }; - constexpr static float c_sigmaVeloSlope{ 0.10 * Gaudi::Units::mrad }; - constexpr static float c_invSigmaVeloSlope{ 10.0 / Gaudi::Units::mrad }; - }; -} // namespace LHCb::Pr - -//----------------------------------------------------------------------------- -// Implementation file for class : PrVeloUT -// -// 2007-05-08: Mariusz Witek -// 2017-03-01: Christoph Hasse (adapt to future framework) -// 2019-04-26: Arthur Hennequin (change data Input/Output) -//----------------------------------------------------------------------------- - -// Declaration of the Algorithm Factory -DECLARE_COMPONENT_WITH_ID( LHCb::Pr::VeloUT, "PrVeloUT" ) - -using TracksTag = LHCb::Pr::Upstream::Tag; -namespace HitTag = LHCb::Pr::UT::Mut::HitTag; -namespace LHCb::Pr { - namespace { simd::mask_v CholeskyDecomposition3( const std::array<simd::float_v, 6>& mat, std::array<simd::float_v, 3>& rhs ) { // -- copied from Root::Math::CholeskyDecomp @@ -334,51 +154,49 @@ namespace LHCb::Pr { return mask; } - // -- parameters that describe the z position of the kink point as a function of ty in a 4th order polynomial (even - // terms only) - constexpr auto magFieldParams = std::array{ 2010.0f, -2240.0f, -71330.f }; - // perform a fit using trackhelper's best hits with y correction, improve qop estimate - simd::float_v fastfitterSIMD( std::array<simd::float_v, 4>& improvedParams, const ProtoTracks& protoTracks, - const float zMidUT, const simd::float_v qpxz2p, const int t, - simd::mask_v& goodFitMask ) { - - const Vec3<simd::float_v> pos = protoTracks.pos<simd::float_v>( t ); - const Vec3<simd::float_v> dir = protoTracks.dir<simd::float_v>( t ); - - const simd::float_v x = pos.x; - const simd::float_v y = pos.y; - const simd::float_v z = pos.z; - const simd::float_v tx = dir.x; - const simd::float_v ty = dir.y; - const simd::float_v zKink = - magFieldParams[0] - ty * ty * magFieldParams[1] - ty * ty * ty * ty * magFieldParams[2]; - const simd::float_v xMidField = x + tx * ( zKink - z ); - - const simd::float_v zDiff = 0.001f * ( zKink - zMidUT ); + template <typename Proxy> + simd::float_v fastfitterSIMD( std::array<simd::float_v, 4>& improvedParams, Proxy pTrack, const float zMidUT, + const simd::float_v qpxz2p, simd::mask_v& goodFitMask ) { + // -- parameters that describe the z position of the kink point as a function of ty in a 4th order polynomial + // (even + // terms only) + constexpr auto magFieldParams = std::array{ 2010.0f, -2240.0f, -71330.f }; + const auto ty = pTrack.template field<ProtoTrackTag::VeloState>().ty(); + const auto tx = pTrack.template field<ProtoTrackTag::VeloState>().tx(); + const auto z = pTrack.template field<ProtoTrackTag::VeloState>().z(); + const auto y = pTrack.template field<ProtoTrackTag::VeloState>().y(); + const auto x = pTrack.template field<ProtoTrackTag::VeloState>().x(); + const auto ty2 = ty * ty; + const auto zKink = magFieldParams[0] - ty2 * magFieldParams[1] - ty2 * ty2 * magFieldParams[2]; + const auto xMidField = x + tx * ( zKink - z ); + const auto zDiff = 0.001f * ( zKink - zMidUT ); // -- This is to avoid division by zero... - const simd::float_v pHelper = max( abs( protoTracks.qp<simd::float_v>( t ) * qpxz2p ), 1e-9f ); - const simd::float_v invP = pHelper * 1.f / sqrt( 1.0f + ty * ty ); + const auto qop = pTrack.template field<ProtoTrackTag::QOverP>().get(); + const auto pHelper = max( abs( qop * qpxz2p ), 1e-9f ); + const auto invP = pHelper * 1.f / sqrt( 1.0f + ty2 ); // these resolution are semi-empirical, could be tuned and might not be correct for low momentum. - const simd::float_v error1 = - 0.14f + 10000.0f * invP; // this is the resolution due to multiple scattering between Velo and UT - const simd::float_v error2 = 0.12f + 3000.0f * invP; // this is the resolution due to the finite Velo resolution - const simd::float_v error = error1 * error1 + error2 * error2; - const simd::float_v weight = 1.0f / error; + // this is the resolution due to multiple scattering between Velo and UT + const auto error1 = 0.14f + 10000.0f * invP; + // this is the resolution due to the finite Velo resolution + const auto error2 = 0.12f + 3000.0f * invP; + + const auto error = error1 * error1 + error2 * error2; + const auto weight = 1.0f / error; std::array<simd::float_v, 6> mat = { weight, weight * zDiff, weight * zDiff * zDiff, 0.0f, 0.0f, 0.0f }; std::array<simd::float_v, 3> rhs = { weight * xMidField, weight * xMidField * zDiff, 0.0f }; - for ( int i = 0; i < 4; ++i ) { + for ( int i = 0; i < totalUTLayers; ++i ) { // -- there are 3-hit candidates, but we'll // -- just treat them like 4-hit candidates // -- with 0 weight for the last hit - const simd::float_v ui = protoTracks.x<simd::float_v>( t, i ); - const simd::float_v dz = 0.001f * ( protoTracks.z<simd::float_v>( t, i ) - zMidUT ); - const simd::float_v w = protoTracks.weight<simd::float_v>( t, i ); - const simd::float_v ta = protoTracks.sin<simd::float_v>( t, i ); + const auto ui = pTrack.template field<ProtoTrackTag::X>( i ).get(); + const auto dz = 0.001f * ( pTrack.template field<ProtoTrackTag::Z>( i ).get() - zMidUT ); + const auto w = pTrack.template field<ProtoTrackTag::Weight>( i ).get(); + const auto ta = pTrack.template field<ProtoTrackTag::Sin>( i ).get(); mat[0] += w; mat[1] += w * dz; mat[2] += w * dz * dz; @@ -389,46 +207,43 @@ namespace LHCb::Pr { rhs[1] += w * ui * dz; rhs[2] += w * ui * ta; } - + // TODO: this is overkill for dimension 3, vanilla matrix inversion should be fine goodFitMask = !CholeskyDecomposition3( mat, rhs ); - const simd::float_v xUTFit = rhs[0]; - const simd::float_v xSlopeUTFit = 0.001f * rhs[1]; - const simd::float_v offsetY = rhs[2]; + const auto xUTFit = rhs[0]; + const auto xSlopeUTFit = 0.001f * rhs[1]; + const auto offsetY = rhs[2]; - const simd::float_v distX = ( xMidField - xUTFit - xSlopeUTFit * ( zKink - zMidUT ) ); + const auto distX = xMidField - xUTFit - xSlopeUTFit * ( zKink - zMidUT ); // -- This takes into account that the distance between a point and track is smaller than the distance on the // x-axis - const simd::float_v distCorrectionX2 = 1.0f / ( 1 + xSlopeUTFit * xSlopeUTFit ); - simd::float_v chi2 = weight * ( distX * distX * distCorrectionX2 + offsetY * offsetY / ( 1.0f + ty * ty ) ); - - for ( int i = 0; i < 4; ++i ) { - - const simd::float_v dz = protoTracks.z<simd::float_v>( t, i ) - zMidUT; - const simd::float_v w = protoTracks.weight<simd::float_v>( t, i ); - const simd::float_v dist = ( protoTracks.x<simd::float_v>( t, i ) - xUTFit - xSlopeUTFit * dz - - offsetY * protoTracks.sin<simd::float_v>( t, i ) ); - + const auto distCorrectionX2 = 1.0f / ( 1 + xSlopeUTFit * xSlopeUTFit ); + auto chi2 = weight * ( distX * distX * distCorrectionX2 + offsetY * offsetY / ( 1.0f + ty2 ) ); + for ( int i = 0; i < totalUTLayers; ++i ) { + const auto dz = pTrack.template field<ProtoTrackTag::Z>( i ).get() - zMidUT; + const auto w = pTrack.template field<ProtoTrackTag::Weight>( i ).get(); + const auto x = pTrack.template field<ProtoTrackTag::X>( i ).get(); + const auto sin = pTrack.template field<ProtoTrackTag::Sin>( i ).get(); + const auto dist = x - xUTFit - xSlopeUTFit * dz - offsetY * sin; chi2 += w * dist * dist * distCorrectionX2; } // new VELO slope x - const simd::float_v xb = - 0.5f * ( ( xUTFit + xSlopeUTFit * ( zKink - zMidUT ) ) + xMidField ); // the 0.5 is empirical - const simd::float_v xSlopeVeloFit = ( xb - x ) / ( zKink - z ); + const auto xb = 0.5f * ( ( xUTFit + xSlopeUTFit * ( zKink - zMidUT ) ) + xMidField ); // the 0.5 is empirical + const auto xSlopeVeloFit = ( xb - x ) / ( zKink - z ); improvedParams = { xUTFit, xSlopeUTFit, y + ty * ( zMidUT - z ) + offsetY, chi2 }; // calculate q/p - const simd::float_v sinInX = xSlopeVeloFit / sqrt( 1.0f + xSlopeVeloFit * xSlopeVeloFit + ty * ty ); - const simd::float_v sinOutX = xSlopeUTFit / sqrt( 1.0f + xSlopeUTFit * xSlopeUTFit + ty * ty ); + const auto sinInX = xSlopeVeloFit / sqrt( 1.0f + xSlopeVeloFit * xSlopeVeloFit + ty2 ); + const auto sinOutX = xSlopeUTFit / sqrt( 1.0f + xSlopeUTFit * xSlopeUTFit + ty2 ); return ( sinInX - sinOutX ); } // -- Evaluate the linear discriminant // -- Coefficients derived with LD method for p, pT and chi2 with TMVA template <int nHits> - simd::float_v evaluateLinearDiscriminantSIMD( const std::array<simd::float_v, 3>& inputValues ) { + simd::float_v evaluateLinearDiscriminantSIMD( std::array<simd::float_v, 3> inputValues ) { constexpr auto coeffs = ( nHits == 3 ? std::array{ 0.162880166064f, -0.107081172665f, 0.134153123662f, -0.137764853657f } @@ -440,11 +255,6 @@ namespace LHCb::Pr { coeffs[2] * log<simd::float_v>( inputValues[1] ) + coeffs[3] * log<simd::float_v>( inputValues[2] ); } - /* - simd::float_v calcXTol( const simd::float_v minMom, const simd::float_v ty ) { - return ( 38000.0f / minMom + 0.25f ) * ( 1.0f + ty * ty * 0.8f ); - } - */ // -------------------------------------------------------------------- // -- Helper function to calculate the planeCode: 0 - 1 - 2 - 3 // -------------------------------------------------------------------- @@ -469,23 +279,18 @@ namespace LHCb::Pr { } // -------------------------------------------------------------------- - // -- These things are all hardcopied from the PrTableForFunction + // -- TODO: These things are all hardcopied from the PrTableForFunction // -- and PrUTMagnetTool // -- If the granularity or whatever changes, this will give wrong results simd::int_v masterIndexSIMD( const simd::int_v index1, const simd::int_v index2, const simd::int_v index3 ) { return ( index3 * 11 + index2 ) * 31 + index1; } - constexpr auto minValsBdl = std::array{ -0.3f, -250.0f, 0.0f }; - constexpr auto maxValsBdl = std::array{ 0.3f, 250.0f, 800.0f }; - constexpr auto deltaBdl = std::array{ 0.02f, 50.0f, 80.0f }; - // constexpr auto dxDyHelper = std::array{0.0f, 1.0f, -1.0f, 0.0f}; // =========================================================================================== // -- 2 helper functions for fit // -- Pseudo chi2 fit, templated for 3 or 4 hits // =========================================================================================== - inline __attribute__( ( always_inline ) ) void - addHit( span<float, 3> mat, span<float, 2> rhs, const LHCb::Pr::UT::Mut::Hits& hits, int index, float zMidUT ) { + void addHit( span<float, 3> mat, span<float, 2> rhs, const UT::Mut::Hits& hits, int index, float zMidUT ) { const auto& hit = hits.scalar()[index]; const float ui = hit.x().cast(); const float ci = hit.cos().cast(); @@ -497,28 +302,28 @@ namespace LHCb::Pr { rhs[0] += wi * ui; rhs[1] += wi * ui * dz; } - template <std::size_t N> - inline __attribute__( ( always_inline ) ) void - simpleFit( const std::array<int, N>& indices, const LHCb::Pr::UT::Mut::Hits& hits, ProtoTracks& pTracks, - const int trackIndex, float zMidUT, float zKink, float invSigmaVeloSlope ) { + + template <std::size_t N, typename Proxy> + [[gnu::always_inline]] inline void simpleFit( const std::array<int, N>& indices, const UT::Mut::Hits& hits, + Proxy pTrack, float zMidUT, float zKink, float invSigmaVeloSlope ) { static_assert( N == 3 || N == 4 ); // -- Scale the z-component, to not run into numerical problems // -- with floats - const float wb = pTracks.wb<scalar::float_v>( 0 ).cast(); - const float xMidField = pTracks.xMidField<scalar::float_v>( 0 ).cast(); - const float invKinkVeloDist = pTracks.invKinkVeloDist<scalar::float_v>( 0 ).cast(); - const float stateX = pTracks.pos<scalar::float_v>( trackIndex ).x.cast(); - const float stateTx = pTracks.dir<scalar::float_v>( trackIndex ).x.cast(); + const auto wb = pTrack.template field<ProtoTrackTag::InitialWeight>().get().cast(); + const auto xMidField = pTrack.template field<ProtoTrackTag::XMidField>().get().cast(); + const auto invKinkVeloDist = pTrack.template field<ProtoTrackTag::InvKinkVeloDist>().get().cast(); + const auto stateX = pTrack.template field<ProtoTrackTag::VeloState>().x().cast(); + const auto stateTx = pTrack.template field<ProtoTrackTag::VeloState>().tx().cast(); const float zDiff = 0.001f * ( zKink - zMidUT ); auto mat = std::array{ wb, wb * zDiff, wb * zDiff * zDiff }; auto rhs = std::array{ wb * xMidField, wb * xMidField * zDiff }; auto const muthit = hits.scalar(); - std::for_each( indices.begin(), indices.end(), - [&]( const auto index ) { addHit( mat, rhs, hits, index, zMidUT ); } ); + for ( auto index : indices ) { addHit( mat, rhs, hits, index, zMidUT ); } + // TODO: this is overkill for dimension 2, vanilla matrix inversion should be fine ROOT::Math::CholeskyDecomp<float, 2> decomp( mat.data() ); if ( !decomp ) return; @@ -541,26 +346,111 @@ namespace LHCb::Pr { } ) / ( N + 1 - 2 ); - if ( chi2TT < pTracks.chi2TT<scalar::float_v>( trackIndex ).cast() ) { + if ( chi2TT < pTrack.template field<ProtoTrackTag::Chi2>().get().cast() ) { // calculate q/p const float sinInX = xSlopeVeloFit * vdt::fast_isqrtf( 1.0f + xSlopeVeloFit * xSlopeVeloFit ); const float sinOutX = xSlopeTTFit * vdt::fast_isqrtf( 1.0f + xSlopeTTFit * xSlopeTTFit ); - const float qp = ( sinInX - sinOutX ); - - pTracks.storeSimpleFitInfo<scalar>( chi2TT, qp, xTTFit, xSlopeTTFit, trackIndex ); - for ( std::size_t i = 0; i < N; i++ ) { pTracks.store_hitIndex<scalar::int_v>( trackIndex, i, indices[i] ); } - if constexpr ( N == 3 ) { pTracks.store_hitIndex<scalar::int_v>( trackIndex, 3, -1 ); } + const float qp = sinInX - sinOutX; + pTrack.template field<ProtoTrackTag::Chi2>().set( chi2TT ); + pTrack.template field<ProtoTrackTag::QOverP>().set( qp ); + pTrack.template field<ProtoTrackTag::XOffset>().set( xTTFit ); + pTrack.template field<ProtoTrackTag::XSlope>().set( xSlopeTTFit ); + for ( std::size_t i = 0; i < N; i++ ) { pTrack.template field<ProtoTrackTag::HitIndex>( i ).set( indices[i] ); } + if constexpr ( N == 3 ) { pTrack.template field<ProtoTrackTag::HitIndex>( N ).set( -1 ); } } } } // namespace - //============================================================================= - // Main execution - //============================================================================= - Upstream::Tracks VeloUT::operator()( const EventContext& evtCtx, const Velo::Tracks& inputTracks, - const LHCb::Pr::UT::Hits& hh, const VeloUTGeomCache& velogeom, - const DeMagnet& magnet ) const { + class PrVeloUT + : public Algorithm::Transformer<Upstream::Tracks( const EventContext&, const Velo::Tracks&, const UT::Hits&, + const VeloUTGeomCache&, const DeMagnet& ), + DetDesc::usesConditions<VeloUTGeomCache, DeMagnet>> { + + public: + PrVeloUT( const std::string& name, ISvcLocator* pSvcLocator ) + : Transformer( name, pSvcLocator, + { KeyValue{ "InputTracksName", "Rec/Track/Velo" }, KeyValue{ "UTHits", UTInfo::HitLocation }, + KeyValue{ "GeometryInfo", "AlgorithmSpecific-" + name + "-UTGeometryInfo" }, + KeyValue{ "Magnet", Det::Magnet::det_path } }, + KeyValue{ "OutputTracksName", "Rec/Track/UT" } ) {} + + StatusCode initialize() override { + return Transformer::initialize().andThen( [&] { + addConditionDerivation( { DeUTDetLocation::location(), m_PrUTMagnetTool->cacheLocation() }, + inputLocation<VeloUTGeomCache>(), + []( const DeUTDetector& det, const UTMagnetTool::Cache& magtoolcache ) { + return VeloUTGeomCache( det, magtoolcache ); + } ); + } ); + } + + Upstream::Tracks operator()( const EventContext&, const Velo::Tracks&, const UT::Hits&, const VeloUTGeomCache&, + const DeMagnet& ) const override final; + + private: + Gaudi::Property<float> m_minMomentum{ this, "MinMomentum", 1500.f * Gaudi::Units::MeV }; + Gaudi::Property<float> m_minPT{ this, "MinPT", 300.f * Gaudi::Units::MeV }; + Gaudi::Property<float> m_minMomentumFinal{ this, "MinMomentumFinal", 2500.f * Gaudi::Units::MeV }; + Gaudi::Property<float> m_minPTFinal{ this, "MinPTFinal", 425.f * Gaudi::Units::MeV }; + Gaudi::Property<float> m_maxPseudoChi2{ this, "MaxPseudoChi2", 1280. }; + Gaudi::Property<float> m_yTol{ this, "YTolerance", 0.5 * Gaudi::Units::mm }; // 0.8 + Gaudi::Property<float> m_yTolSlope{ this, "YTolSlope", 0.08 }; // 0.2 + Gaudi::Property<float> m_hitTol{ this, "HitTol", 0.8 * Gaudi::Units::mm }; // 0.8 + Gaudi::Property<float> m_deltaTx{ this, "DeltaTx", 0.018 }; // 0.02 + Gaudi::Property<float> m_maxXSlope{ this, "MaxXSlope", 0.350 }; + Gaudi::Property<float> m_maxYSlope{ this, "MaxYSlope", 0.300 }; + Gaudi::Property<float> m_centralHoleSize{ this, "CentralHoleSize", 33. * Gaudi::Units::mm }; + Gaudi::Property<float> m_intraLayerDist{ this, "IntraLayerDist", 15.0 * Gaudi::Units::mm }; + Gaudi::Property<float> m_overlapTol{ this, "OverlapTol", 0.5 * Gaudi::Units::mm }; + Gaudi::Property<float> m_passHoleSize{ this, "PassHoleSize", 40. * Gaudi::Units::mm }; + Gaudi::Property<float> m_LD3Hits{ this, "LD3HitsMin", -0.5 }; + Gaudi::Property<float> m_LD4Hits{ this, "LD4HitsMin", -0.5 }; + Gaudi::Property<int> m_minLayers{ this, "MinLayers", 3 }; + + Gaudi::Property<bool> m_printVariables{ this, "PrintVariables", false }; + Gaudi::Property<bool> m_passTracks{ this, "PassTracks", false }; + Gaudi::Property<bool> m_finalFit{ this, "FinalFit", true }; + Gaudi::Property<bool> m_fiducialCuts{ this, "FiducialCuts", true }; + Gaudi::Property<bool> m_filterMode{ this, "FilterMode", false }; + Gaudi::Property<bool> m_looseSectorSearch{ this, "LooseSectorSearch", false }; + + mutable Gaudi::Accumulators::SummingCounter<unsigned int> m_seedsCounter{ this, "#seeds" }; + mutable Gaudi::Accumulators::SummingCounter<unsigned int> m_tracksCounter{ this, "#tracks" }; + + LHCb::UT::TrackUtils::MiniStates getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, + float zMidUT ) const; + + template <typename Boundaries, typename BoundariesTypes> + bool getHitsScalar( VeloState, int, float, int, const UT::Hits&, span<const Boundaries, totalUTLayers>, + UT::Mut::Hits& ) const; + + inline void findHits( const UT::Hits& hh, simd::float_v yProto, simd::float_v ty, simd::float_v tx, + simd::float_v xOnTrackProto, simd::float_v tolProto, simd::float_v xTolNormFact, + UT::Mut::Hits& mutHits, simd::float_v yTol, int firstIndex, int lastIndex ) const; + + template <bool forward, typename Proxy> + bool formClusters( const UT::Mut::Hits& hitsInLayers, Proxy pTracks, float zMidUT ) const; + + template <typename BdlTable, typename ProtoTracks> + void prepareOutputTrackSIMD( const ProtoTracks& protoTracks, span<UT::Mut::Hits> hitsInLayers, + Upstream::Tracks& outputTracks, const Velo::Tracks& inputTracks, + const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const; + + /// Multipurpose tool for Bdl and deflection + ToolHandle<UTMagnetTool> m_PrUTMagnetTool{ this, "PrUTMagnetTool", "PrUTMagnetTool" }; + + constexpr static float c_zKink{ 1780.0 }; + constexpr static float c_sigmaVeloSlope{ 0.10 * Gaudi::Units::mrad }; + constexpr static float c_invSigmaVeloSlope{ 10.0 / Gaudi::Units::mrad }; + }; + + // Declaration of the Algorithm Factory + DECLARE_COMPONENT_WITH_ID( PrVeloUT, "PrVeloUT" ) + + Upstream::Tracks PrVeloUT::operator()( const EventContext& evtCtx, const Velo::Tracks& inputTracks, + const UT::Hits& hh, const VeloUTGeomCache& velogeom, + const DeMagnet& magnet ) const { Upstream::Tracks outputTracks{ &inputTracks, Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx ) }; outputTracks.reserve( inputTracks.size() ); @@ -571,128 +461,131 @@ namespace LHCb::Pr { LHCb::UT::TrackUtils::MiniStates filteredStates = getStates( inputTracks, outputTracks, velogeom.zMidUT ); - simd::float_v invMinPt = 1.0f / m_minPT.value(); - simd::float_v invMinMomentum = 1.0f / m_minMomentum.value(); + const auto invMinPt = 1.0f / m_minPT; + const auto invMinMomentum = 1.0f / m_minMomentum; - // -- Used for the calculation of the size of the search windows - constexpr const std::array<float, totalUTLayers> normFact{ 0.95f, 1.0f, 1.36f, 1.41f }; - - auto extrapFunc = [&]( int layerIndex, simd::float_v x, simd::float_v y, simd::float_v z, simd::float_v tx, - simd::float_v ty, simd::float_v ) { + auto extrapolateToLayer = [this, invMinPt = invMinPt, invMinMomentum = invMinMomentum, &geometry, + &velogeom]( auto layerIndex, auto x, auto y, auto z, auto tx, auto ty, auto /*qop*/ ) { // -- this 0.002 seems a little odd... - const simd::float_v theta = max( 0.002f, sqrt( tx * tx + ty * ty ) ); - const simd::float_v invMinMom = min( invMinPt * theta, invMinMomentum ); + const auto theta = max( 0.002f, sqrt( tx * tx + ty * ty ) ); + const auto invMinMom = min( invMinPt * theta, invMinMomentum ); - const simd::float_v xTol = + const auto xTol = abs( velogeom.distToMomentum * invMinMom * normFact[layerIndex] ) - abs( tx ) * m_intraLayerDist.value(); - const simd::float_v yTol = m_yTol.value() + m_yTolSlope.value() * xTol; + const auto yTol = m_yTol.value() + m_yTolSlope.value() * xTol; - const simd::float_v zGeo{ geometry.layers[layerIndex].z }; - const simd::float_v dxDy{ geometry.layers[layerIndex].dxDy }; + const auto zGeo{ geometry.layers[layerIndex].z }; + const auto dxDy{ geometry.layers[layerIndex].dxDy }; - const simd::float_v yAtZ = y + ty * ( zGeo - z ); - const simd::float_v xLayer = x + tx * ( zGeo - z ); - const simd::float_v yLayer = yAtZ + yTol * dxDy; + const auto yAtZ = y + ty * ( zGeo - z ); + const auto xLayer = x + tx * ( zGeo - z ); + const auto yLayer = yAtZ + yTol * dxDy; return std::make_tuple( xLayer, yLayer, xTol, yTol ); }; - namespace TU = LHCb::UT::TrackUtils; - - std::variant<std::array<TU::BoundariesLoose, totalUTLayers>, std::array<TU::BoundariesNominal, totalUTLayers>> - compBoundsArray; - if ( m_looseSectorSearch ) { - compBoundsArray = LHCb::UTDAQ::findAllSectorsExtrap<TU::BoundariesLoose, TU::BoundariesLooseTag::types, - TU::maxNumRowsBoundariesLoose, TU::maxNumColsBoundariesLoose>( - filteredStates, geometry, extrapFunc, m_minLayers ); - } else { + std::variant<LooseBounds, NominalBounds> compBoundsArray; + if ( !m_looseSectorSearch ) { compBoundsArray = LHCb::UTDAQ::findAllSectorsExtrap<TU::BoundariesNominal, TU::BoundariesNominalTag::types, TU::maxNumRowsBoundariesNominal, TU::maxNumColsBoundariesNominal>( - filteredStates, geometry, extrapFunc, m_minLayers ); + filteredStates, geometry, extrapolateToLayer, m_minLayers ); + } else { + compBoundsArray = LHCb::UTDAQ::findAllSectorsExtrap<TU::BoundariesLoose, TU::BoundariesLooseTag::types, + TU::maxNumRowsBoundariesLoose, TU::maxNumColsBoundariesLoose>( + filteredStates, geometry, extrapolateToLayer, m_minLayers ); } - auto hitsInLayers = LHCb::make_object_array<LHCb::Pr::UT::Mut::Hits, batchSize>( Zipping::generateZipIdentifier(), - LHCb::getMemResource( evtCtx ) ); - for ( auto& hits : hitsInLayers ) { hits.reserve( 32 ); } - ProtoTracks pTracks; - - // -- We cannot put all found hits in an array, as otherwise the stack overflows - // -- so we just do the whole thing in batches - const std::size_t filteredStatesSize = filteredStates.size(); - for ( std::size_t t = 0; t < filteredStatesSize; t += batchSize ) { - - // -- This is scalar, as the hits are found in a scalar way - filteredStates.clear(); - for ( std::size_t t2 = 0; t2 < batchSize && t2 + t < filteredStatesSize; ++t2 ) { - const auto fSize = filteredStates.size(); - hitsInLayers[fSize].clear(); - hitsInLayers[fSize].layerIndices.fill( -1 ); - - if ( m_looseSectorSearch ) { - const bool foundHits = getHitsScalar<TU::BoundariesLoose, TU::BoundariesLooseTag::types>( - hh, filteredStates, std::get<std::array<TU::BoundariesLoose, 4>>( compBoundsArray ), hitsInLayers[fSize], - t + t2, velogeom.zMidUT, m_minLayers ); - filteredStates.copy_back<scalar>( filteredStates, t + t2, foundHits ); - } else { - const bool foundHits = getHitsScalar<TU::BoundariesNominal, TU::BoundariesNominalTag::types>( - hh, filteredStates, std::get<std::array<TU::BoundariesNominal, 4>>( compBoundsArray ), - hitsInLayers[fSize], t + t2, velogeom.zMidUT, m_minLayers ); - filteredStates.copy_back<scalar>( filteredStates, t + t2, foundHits ); - } - } - - pTracks.initTracks( -1, m_maxPseudoChi2.value() ); - for ( const auto& fState : filteredStates.scalar() ) { - const auto tEff = fState.offset(); - - Vec3<scalar::float_v> pos{ fState.get<LHCb::UT::TrackUtils::MiniStateTag::State>().x(), - fState.get<LHCb::UT::TrackUtils::MiniStateTag::State>().y(), - fState.get<LHCb::UT::TrackUtils::MiniStateTag::State>().z() }; - Vec3<scalar::float_v> dir{ fState.get<LHCb::UT::TrackUtils::MiniStateTag::State>().tx(), - fState.get<LHCb::UT::TrackUtils::MiniStateTag::State>().ty(), 1.f }; - - const int trackIndex = pTracks.size; - pTracks.fillHelperParams<scalar>( pos, dir, c_zKink, c_sigmaVeloSlope ); - pTracks.store_pos<scalar::float_v>( trackIndex, pos ); - pTracks.store_dir<scalar::float_v>( trackIndex, dir ); - - if ( !formClusters<true>( hitsInLayers[tEff], pTracks, trackIndex, velogeom.zMidUT ) ) { - formClusters<false>( hitsInLayers[tEff], pTracks, trackIndex, velogeom.zMidUT ); - } + // TODO: these hit collections can be fully integrated into the ProtoTracks collection because currently + // we have exactly one of them per ProtoTrack. This can be done by replacing the static floats_field<4> by + // its vector variant and then storing all hits there (reducing them later to only 3 or 4). A good reason to + // do it is performance because the allocations below are relatively expensive and wasteful memory-wise. + // See issue https://gitlab.cern.ch/lhcb/Rec/-/issues/567 + std::vector<UT::Mut::Hits> hitsInLayers{}; + hitsInLayers.reserve( filteredStates.size() ); + for ( std::size_t i = 0; i < filteredStates.size(); ++i ) { + hitsInLayers.emplace_back( Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx ) ).reserve( 32 ); + } - if ( !m_filterMode && pTracks.hitIndex<scalar::int_v>( trackIndex, 0 ).cast() == -1 ) continue; - - scalar::int_v ancestorIndex = fState.get<LHCb::UT::TrackUtils::MiniStateTag::index>(); - pTracks.store_ancestorIndex<scalar::int_v>( trackIndex, ancestorIndex ); - pTracks.store_hitContIndex<scalar::int_v>( trackIndex, tEff ); - // -- this runs over all 4 layers, even if no hit was found - // -- but it fills a weight of 0 - // -- Note: These are not "physical" layers, as the hits are ordered such that only - // -- the last one can be not filled. - auto hitsInL = hitsInLayers[tEff].scalar(); - pTracks.fillHitInfo<scalar>( hitsInL, trackIndex ); - pTracks.size++; + auto pTracks = ProtoTracks{ Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx ) }; + pTracks.reserve( inputTracks.size() ); + + for ( auto fState : filteredStates.scalar() ) { + const auto x = fState.get<LHCb::UT::TrackUtils::MiniStateTag::State>().x().cast(); + const auto y = fState.get<LHCb::UT::TrackUtils::MiniStateTag::State>().y().cast(); + const auto z = fState.get<LHCb::UT::TrackUtils::MiniStateTag::State>().z().cast(); + const auto tx = fState.get<LHCb::UT::TrackUtils::MiniStateTag::State>().tx().cast(); + const auto ty = fState.get<LHCb::UT::TrackUtils::MiniStateTag::State>().ty().cast(); + const auto hitContainerIdx = pTracks.size(); + hitsInLayers[hitContainerIdx].clear(); + hitsInLayers[hitContainerIdx].layerIndices.fill( -1 ); + if ( !m_looseSectorSearch ? getHitsScalar<TU::BoundariesNominal, TU::BoundariesNominalTag::types>( + { x, y, z, tx, ty }, fState.offset(), velogeom.zMidUT, m_minLayers, hh, + std::get<NominalBounds>( compBoundsArray ), hitsInLayers[hitContainerIdx] ) + : getHitsScalar<TU::BoundariesLoose, TU::BoundariesLooseTag::types>( + { x, y, z, tx, ty }, fState.offset(), velogeom.zMidUT, m_minLayers, hh, + std::get<LooseBounds>( compBoundsArray ), hitsInLayers[hitContainerIdx] ) ) { + const auto ancestorIndex = fState.get<LHCb::UT::TrackUtils::MiniStateTag::index>(); + const auto zDiff = c_zKink - z; + const auto a = c_sigmaVeloSlope * zDiff; + auto pTrack = pTracks.emplace_back<SIMDWrapper::InstructionSet::Scalar>(); + pTrack.field<ProtoTrackTag::XMidField>().set( x + tx * zDiff ); + pTrack.field<ProtoTrackTag::InitialWeight>().set( 1.f / ( a * a ) ); + pTrack.field<ProtoTrackTag::InvKinkVeloDist>().set( 1.f / zDiff ); + pTrack.field<ProtoTrackTag::VeloState>().setPosition( x, y, z ); + pTrack.field<ProtoTrackTag::VeloState>().setDirection( tx, ty ); + pTrack.field<ProtoTrackTag::Ancestor>().set( ancestorIndex ); + pTrack.field<ProtoTrackTag::Chi2>().set( m_maxPseudoChi2 ); + pTrack.field<ProtoTrackTag::HitIndexContainer>().set( hitContainerIdx ); + for ( auto i{ 0 }; i < totalUTLayers; ++i ) { pTrack.field<ProtoTrackTag::HitIndex>( i ).set( -1 ); } } + } - // padding to avoid FPEs - if ( ( pTracks.size + simd::size ) < batchSize ) { - pTracks.store_pos<simd::float_v>( pTracks.size, Vec3<simd::float_v>( 1.f, 1.f, 1.f ) ); - pTracks.store_dir<simd::float_v>( pTracks.size, Vec3<simd::float_v>( 1.f, 1.f, 1.f ) ); + for ( auto pTrack : pTracks.scalar() ) { + const auto hitsOffset = pTrack.offset(); + if ( !formClusters<true>( hitsInLayers[hitsOffset], pTrack, velogeom.zMidUT ) ) { + formClusters<false>( hitsInLayers[hitsOffset], pTrack, velogeom.zMidUT ); } - prepareOutputTrackSIMD( pTracks, hitsInLayers, outputTracks, inputTracks, bdlTable, magnet, velogeom.zMidUT ); + if ( !m_filterMode && pTrack.isInvalid() ) continue; + // -- this runs over all 4 layers, even if no hit was found + // -- but it fills a weight of 0 + // -- Note: These are not "physical" layers, as the hits are ordered such that only + // -- the last one can be not filled. + auto hitsInL = hitsInLayers[hitsOffset].scalar(); + auto nValid{ 0 }; + if ( hitsInL.size() != 0 ) { + for ( auto i = 0; i < totalUTLayers; ++i ) { + auto hitI = pTrack.field<ProtoTrackTag::HitIndex>( i ).get().cast(); + const auto valid = ( hitI != -1 ); + const auto weight = valid ? hitsInL[hitI].weight().cast() : 0.0f; + nValid += valid; + hitI = std::max( 0, hitI ); + const auto id = LHCbID{ LHCb::Detector::UT::ChannelID( hitsInL[hitI].channelID().cast() ) }; + pTrack.field<ProtoTrackTag::Weight>( i ).set( weight ); + pTrack.field<ProtoTrackTag::X>( i ).set( hitsInL[hitI].x() ); + pTrack.field<ProtoTrackTag::Z>( i ).set( hitsInL[hitI].z() ); + pTrack.field<ProtoTrackTag::Sin>( i ).set( hitsInL[hitI].sin() ); + pTrack.field<ProtoTrackTag::HitIndex>( i ).set( hitsInL[hitI].index() ); + pTrack.field<ProtoTrackTag::ID>( i ).set( id.lhcbID() ); + } + } + // this is useful in filterMode + if ( !nValid ) { pTrack.field<ProtoTrackTag::QOverP>().set( nanMomentum ); } } + prepareOutputTrackSIMD( pTracks, hitsInLayers, outputTracks, inputTracks, bdlTable, magnet, velogeom.zMidUT ); + // -- The algorithm should not store duplicated hits... assert( findDuplicates( outputTracks ) && "Hit duplicates found" ); m_tracksCounter += outputTracks.size(); return outputTracks; } + //============================================================================= // Get the state, do some cuts //============================================================================= - __attribute__( ( flatten ) ) LHCb::UT::TrackUtils::MiniStates - VeloUT::getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, float zMidUT ) const { + LHCb::UT::TrackUtils::MiniStates PrVeloUT::getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, + float zMidUT ) const { LHCb::UT::TrackUtils::MiniStates filteredStates{ Zipping::generateZipIdentifier(), { inputTracks.get_allocator().resource() } }; @@ -703,8 +596,8 @@ namespace LHCb::Pr { for ( auto const& velotrack : inputTracks.simd() ) { auto const loopMask = velotrack.loop_mask(); auto const trackVP = velotrack.indices(); - auto pos = velotrack.StatePos( Event::Enum::State::Location::EndVelo ); - auto dir = velotrack.StateDir( Event::Enum::State::Location::EndVelo ); + auto pos = velotrack.StatePos( EndVelo ); + auto dir = velotrack.StateDir( EndVelo ); simd::float_v xMidUT = pos.x() + dir.x() * ( zMidUT - pos.z() ); simd::float_v yMidUT = pos.y() + dir.y() * ( zMidUT - pos.z() ); @@ -747,25 +640,17 @@ namespace LHCb::Pr { // Find the hits //============================================================================= template <typename Boundaries, typename BoundariesTypes> - inline __attribute__( ( always_inline ) ) bool - VeloUT::getHitsScalar( const LHCb::Pr::UT::Hits& hh, const LHCb::UT::TrackUtils::MiniStates& filteredStates, - const std::array<Boundaries, totalUTLayers>& compBoundsArray, - LHCb::Pr::UT::Mut::Hits& hitsInLayers, const std::size_t t, float zMidUT, - int minLayers ) const { - - // -- This is for some sanity checks later - [[maybe_unused]] const int maxNumSectors = m_looseSectorSearch - ? LHCb::UT::TrackUtils::maxNumSectorsBoundariesLoose - : LHCb::UT::TrackUtils::maxNumSectorsBoundariesNominal; + [[gnu::flatten]] bool + PrVeloUT::getHitsScalar( VeloState veloState, int stateOffset, float zMidUT, int minLayers, const UT::Hits& hh, + span<const Boundaries, totalUTLayers> compBoundsArray, UT::Mut::Hits& hitsInLayers ) const { const simd::float_v tolProto{ m_yTol.value() }; - const auto fState = filteredStates.scalar(); - const auto xState = fState[t].get<LHCb::UT::TrackUtils::MiniStateTag::State>().x().cast(); - const auto yState = fState[t].get<LHCb::UT::TrackUtils::MiniStateTag::State>().y().cast(); - const auto zState = fState[t].get<LHCb::UT::TrackUtils::MiniStateTag::State>().z().cast(); - const auto txState = fState[t].get<LHCb::UT::TrackUtils::MiniStateTag::State>().tx().cast(); - const auto tyState = fState[t].get<LHCb::UT::TrackUtils::MiniStateTag::State>().ty().cast(); + const auto xState = veloState.x; + const auto yState = veloState.y; + const auto zState = veloState.z; + const auto txState = veloState.tx; + const auto tyState = veloState.ty; // in filter mode tracks close to the hole in the centre of the UT may have no hits if ( m_filterMode ) { @@ -783,24 +668,26 @@ namespace LHCb::Pr { const simd::float_v xOnTrackProto{ xState - txState * zState }; const simd::float_v ty{ tyState }; const simd::float_v tx{ txState }; - // -- the second condition is to ensure at least 3 layers with hits for ( int layerIndex = 0; layerIndex < totalUTLayers && layerIndex - nLayers <= totalUTLayers - minLayers; ++layerIndex ) { const auto compBoundsArr = compBoundsArray[layerIndex].scalar(); - const auto xTolS = compBoundsArr[t].template get<typename BoundariesTypes::xTol>().cast(); - const auto nPos = compBoundsArr[t].template get<typename BoundariesTypes::nPos>().cast(); - const simd::float_v yTol = m_yTol.value() + m_yTolSlope.value() * xTolS; - const simd::float_v xTol = xTolS + abs( tx ) * m_intraLayerDist.value(); + const auto xTolS = compBoundsArr[stateOffset].template get<typename BoundariesTypes::xTol>().cast(); + const auto nPos = compBoundsArr[stateOffset].template get<typename BoundariesTypes::nPos>().cast(); + const simd::float_v yTol = m_yTol.value() + m_yTolSlope.value() * xTolS; + const simd::float_v xTol = xTolS + abs( tx ) * m_intraLayerDist.value(); - assert( nPos < maxNumSectors && "nPos out of bound" ); + assert( nPos < ( m_looseSectorSearch ? TU::maxNumSectorsBoundariesLoose : TU::maxNumSectorsBoundariesNominal ) && + "nPos out of bound" ); for ( int j = 0; j < nPos; j++ ) { - const int sectA = compBoundsArr[t].template get<typename BoundariesTypes::sects>( j ).cast(); + const int sectA = compBoundsArr[stateOffset].template get<typename BoundariesTypes::sects>( j ).cast(); const int sectB = - ( j == nPos - 1 ) ? sectA : compBoundsArr[t].template get<typename BoundariesTypes::sects>( j + 1 ).cast(); + ( j == nPos - 1 ) + ? sectA + : compBoundsArr[stateOffset].template get<typename BoundariesTypes::sects>( j + 1 ).cast(); assert( sectA != LHCb::UTDAQ::paddingSectorNumber && "sectA points to padding element" ); assert( sectB != LHCb::UTDAQ::paddingSectorNumber && "sectB points to padding element" ); @@ -814,9 +701,9 @@ namespace LHCb::Pr { // -- The idea is to merge adjacent ranges of indices, such that collecting hits is more efficient // -- let's try to make it branchless - const std::pair<int, int>& temp = hh.indices( sectA ); - const std::pair<int, int>& temp2 = hh.indices( sectB ); - const int firstIndex = temp.first; + const auto temp = hh.indices( sectA ); + const auto temp2 = hh.indices( sectB ); + const auto firstIndex = temp.first; // -- We put the lastIndex to the end of the next container if they join up // -- Note that this is _not_ fulfilled if the sector has elements and is a duplicate, // -- but this only happens if it is the padding element, in which case we are already at the last @@ -839,15 +726,13 @@ namespace LHCb::Pr { // ============================================================================== // -- Method that finds the hits in a given layer within a certain range // ============================================================================== - inline __attribute__( ( always_inline ) ) void - VeloUT::findHits( const LHCb::Pr::UT::Hits& hh, const simd::float_v& yProto, const simd::float_v& ty, - const simd::float_v& tx, const simd::float_v& xOnTrackProto, const simd::float_v& tolProto, - const simd::float_v& xTolNormFact, LHCb::Pr::UT::Mut::Hits& mutHits, const simd::float_v& yTol, - const int firstIndex, const int lastIndex ) const { - - const auto& myHits = hh; - const auto myHs = myHits.simd(); + [[gnu::always_inline]] inline void PrVeloUT::findHits( const UT::Hits& hh, simd::float_v yProto, simd::float_v ty, + simd::float_v tx, simd::float_v xOnTrackProto, + simd::float_v tolProto, simd::float_v xTolNormFact, + UT::Mut::Hits& mutHits, simd::float_v yTol, int firstIndex, + int lastIndex ) const { + const auto myHs = hh.simd(); for ( int i = firstIndex; i < lastIndex; i += simd::size ) { const auto mH = myHs[i]; @@ -886,10 +771,8 @@ namespace LHCb::Pr { //========================================================================= // Form clusters //========================================================================= - template <bool forward> - inline __attribute__( ( always_inline ) ) bool VeloUT::formClusters( const LHCb::Pr::UT::Mut::Hits& hitsInLayers, - ProtoTracks& pTracks, const int trackIndex, - float zMidUT ) const { + template <bool forward, typename Proxy> + bool PrVeloUT::formClusters( const UT::Mut::Hits& hitsInLayers, Proxy pTrack, float zMidUT ) const { const int begin0 = forward ? hitsInLayers.layerIndices[0] : hitsInLayers.layerIndices[3]; const int end0 = forward ? hitsInLayers.layerIndices[1] : hitsInLayers.size(); @@ -904,8 +787,8 @@ namespace LHCb::Pr { const int end3 = forward ? hitsInLayers.size() : hitsInLayers.layerIndices[1]; bool fourLayerSolution = false; - const float stateTx = pTracks.dir<scalar::float_v>( trackIndex ).x.cast(); - const auto& hitsInL = hitsInLayers.scalar(); + const auto stateTx = pTrack.template get<ProtoTrackTag::VeloState>().tx().cast(); + const auto hitsInL = hitsInLayers.scalar(); // -- this is scalar for the moment for ( int i0 = begin0; i0 < end0; ++i0 ) { @@ -956,25 +839,21 @@ namespace LHCb::Pr { } // -- All hits found if ( bestHit1Index != -1 && bestHit3Index != -1 ) { - simpleFit( std::array{ i0, bestHit1Index, i2, bestHit3Index }, hitsInLayers, pTracks, trackIndex, zMidUT, - c_zKink, c_invSigmaVeloSlope ); + simpleFit( std::array{ i0, bestHit1Index, i2, bestHit3Index }, hitsInLayers, pTrack, zMidUT, c_zKink, + c_invSigmaVeloSlope ); - if ( !fourLayerSolution && pTracks.hitIndex<scalar::int_v>( trackIndex, 0 ).cast() != -1 ) { - fourLayerSolution = true; - } + if ( !fourLayerSolution && !pTrack.isInvalid() ) { fourLayerSolution = true; } continue; } // -- Nothing found in layer 3 if ( !fourLayerSolution && bestHit1Index != -1 ) { - simpleFit( std::array{ i0, bestHit1Index, i2 }, hitsInLayers, pTracks, trackIndex, zMidUT, c_zKink, - c_invSigmaVeloSlope ); + simpleFit( std::array{ i0, bestHit1Index, i2 }, hitsInLayers, pTrack, zMidUT, c_zKink, c_invSigmaVeloSlope ); continue; } // -- Noting found in layer 1 if ( !fourLayerSolution && bestHit3Index != -1 ) { - simpleFit( std::array{ i0, bestHit3Index, i2 }, hitsInLayers, pTracks, trackIndex, zMidUT, c_zKink, - c_invSigmaVeloSlope ); + simpleFit( std::array{ i0, bestHit3Index, i2 }, hitsInLayers, pTrack, zMidUT, c_zKink, c_invSigmaVeloSlope ); continue; } } @@ -984,98 +863,81 @@ namespace LHCb::Pr { //========================================================================= // Create the Velo-UT tracks //========================================================================= - template <typename BdlTable> - inline __attribute__( ( always_inline ) ) __attribute__( ( flatten ) ) void - VeloUT::prepareOutputTrackSIMD( const ProtoTracks& protoTracks, - const std::array<LHCb::Pr::UT::Mut::Hits, batchSize>& hitsInLayers, - Upstream::Tracks& outputTracks, const Velo::Tracks& inputTracks, - const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const { - - auto const velozipped = inputTracks.simd(); - const auto pSize = protoTracks.size; - for ( std::size_t t = 0; t < pSize; t += simd::size ) { - //== Handle states. copy Velo one, add TT. - const simd::float_v zOrigin = - select( protoTracks.dir<simd::float_v>( t ).y > 0.001f, - protoTracks.pos<simd::float_v>( t ).z - - protoTracks.pos<simd::float_v>( t ).y / protoTracks.dir<simd::float_v>( t ).y, - protoTracks.pos<simd::float_v>( t ).z - - protoTracks.pos<simd::float_v>( t ).x / protoTracks.dir<simd::float_v>( t ).x ); - - // -- this is to filter tracks where the fit had a too large chi2 - simd::mask_v fourHitTrack = protoTracks.weight<simd::float_v>( t, 3 ) > 0.0001f; + template <typename BdlTable, typename ProtoTracks> + void PrVeloUT::prepareOutputTrackSIMD( const ProtoTracks& protoTracks, span<UT::Mut::Hits> hitsInLayers, + Upstream::Tracks& outputTracks, const Velo::Tracks& inputTracks, + const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const { - // const float bdl1 = m_PrUTMagnetTool->bdlIntegral(helper.state.ty,zOrigin,helper.state.z); + const auto velozipped = inputTracks.simd(); + for ( auto [ipTrack, pTrack] : range::enumerate( protoTracks.simd() ) ) { + //== Handle states. copy Velo one, add TT. + const auto ty = pTrack.template field<ProtoTrackTag::VeloState>().ty(); + const auto tx = pTrack.template field<ProtoTrackTag::VeloState>().tx(); + const auto z = pTrack.template field<ProtoTrackTag::VeloState>().z(); + const auto y = pTrack.template field<ProtoTrackTag::VeloState>().y(); + const auto x = pTrack.template field<ProtoTrackTag::VeloState>().x(); + const auto zOrigin = select( ty > 0.001f, z - y / ty, z - x / tx ); // -- These are calculations, copied and simplified from PrTableForFunction // -- FIXME: these rely on the internal details of PrTableForFunction!!! // and should at least be put back in there, and used from here // to make sure everything _stays_ consistent... - auto var = std::array{ protoTracks.dir<simd::float_v>( t ).y, zOrigin, protoTracks.pos<simd::float_v>( t ).z }; - - simd::int_v index1 = min( max( simd::int_v{ ( var[0] + 0.3f ) / 0.6f * 30 }, 0 ), 30 ); - simd::int_v index2 = min( max( simd::int_v{ ( var[1] + 250 ) / 500 * 10 }, 0 ), 10 ); - simd::int_v index3 = min( max( simd::int_v{ var[2] / 800 * 10 }, 0 ), 10 ); - - simd::float_v bdl = gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 ) ); + auto var = std::array{ ty, zOrigin, z }; + auto index1 = min( max( simd::int_v{ ( var[0] + 0.3f ) / 0.6f * 30 }, 0 ), 30 ); + auto index2 = min( max( simd::int_v{ ( var[1] + 250 ) / 500 * 10 }, 0 ), 10 ); + auto index3 = min( max( simd::int_v{ var[2] / 800 * 10 }, 0 ), 10 ); + auto bdl = gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 ) ); // -- TODO: check if we can go outside this table... - const std::array<simd::float_v, 3> bdls = - std::array{ gather( bdlTable.table().data(), masterIndexSIMD( index1 + 1, index2, index3 ) ), - gather( bdlTable.table().data(), masterIndexSIMD( index1, index2 + 1, index3 ) ), - gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 + 1 ) ) }; + const auto bdls = std::array{ gather( bdlTable.table().data(), masterIndexSIMD( index1 + 1, index2, index3 ) ), + gather( bdlTable.table().data(), masterIndexSIMD( index1, index2 + 1, index3 ) ), + gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 + 1 ) ) }; - const std::array<simd::float_v, 3> boundaries = { -0.3f + simd::float_v{ index1 } * deltaBdl[0], - -250.0f + simd::float_v{ index2 } * deltaBdl[1], - 0.0f + simd::float_v{ index3 } * deltaBdl[2] }; + constexpr auto minValsBdl = std::array{ -0.3f, -250.0f, 0.0f }; + constexpr auto maxValsBdl = std::array{ 0.3f, 250.0f, 800.0f }; + constexpr auto deltaBdl = std::array{ 0.02f, 50.0f, 80.0f }; + const auto boundaries = + std::array{ -0.3f + simd::float_v{ index1 } * deltaBdl[0], -250.0f + simd::float_v{ index2 } * deltaBdl[1], + 0.0f + simd::float_v{ index3 } * deltaBdl[2] }; // -- This is an interpolation, to get a bit more precision simd::float_v addBdlVal{ 0.0f }; for ( int i = 0; i < 3; ++i ) { - // -- this should make sure that values outside the range add nothing to the sum - var[i] = select( minValsBdl[i] > var[i], boundaries[i], var[i] ); - var[i] = select( maxValsBdl[i] < var[i], boundaries[i], var[i] ); - - const simd::float_v dTab_dVar = ( bdls[i] - bdl ) / deltaBdl[i]; - const simd::float_v dVar = ( var[i] - boundaries[i] ); + var[i] = select( minValsBdl[i] > var[i], boundaries[i], var[i] ); + var[i] = select( maxValsBdl[i] < var[i], boundaries[i], var[i] ); + const auto dTab_dVar = ( bdls[i] - bdl ) / deltaBdl[i]; + const auto dVar = ( var[i] - boundaries[i] ); addBdlVal += dTab_dVar * dVar; } bdl += addBdlVal; // ---- // -- order is: x, tx, y, chi2 - std::array<simd::float_v, 4> finalParams = { - protoTracks.xTT<simd::float_v>( t ), protoTracks.xSlopeTT<simd::float_v>( t ), - protoTracks.pos<simd::float_v>( t ).y + - protoTracks.dir<simd::float_v>( t ).y * ( zMidUT - protoTracks.pos<simd::float_v>( t ).z ), - protoTracks.chi2TT<simd::float_v>( t ) }; - - const simd::float_v qpxz2p = -1.0f / bdl * 3.3356f / Gaudi::Units::GeV; - simd::mask_v fitMask = simd::mask_true(); - simd::float_v qp = m_finalFit ? fastfitterSIMD( finalParams, protoTracks, zMidUT, qpxz2p, t, fitMask ) - : protoTracks.qp<simd::float_v>( t ) / - sqrt( 1.0f + protoTracks.dir<simd::float_v>( t ).y * - protoTracks.dir<simd::float_v>( t ).y ); // is this correct? - - qp = select( fitMask, qp, protoTracks.qp<simd::float_v>( t ) ); - const simd::float_v qop = select( abs( bdl ) < 1.e-8f, simd::float_v{ 1000.0f }, qp * qpxz2p ); + auto finalParams = std::array{ pTrack.template field<ProtoTrackTag::XOffset>().get(), + pTrack.template field<ProtoTrackTag::XSlope>().get(), y + ty * ( zMidUT - z ), + pTrack.template field<ProtoTrackTag::Chi2>().get() }; + + const auto qpxz2p = -1.0f / bdl * 3.3356f / Gaudi::Units::GeV; + auto fitMask = simd::mask_true(); + auto qp = m_finalFit + ? fastfitterSIMD( finalParams, pTrack, zMidUT, qpxz2p, fitMask ) + : pTrack.template field<ProtoTrackTag::QOverP>().get() / sqrt( 1.0f + ty * ty ); // is this correct? + + qp = select( fitMask, qp, pTrack.template field<ProtoTrackTag::QOverP>().get() ); + const auto qop = select( abs( bdl ) < 1.e-8f, simd::float_v{ 1000.0f }, qp * qpxz2p ); // -- Don't make tracks that have grossly too low momentum // -- Beware of the momentum resolution! - const simd::float_v p = abs( 1.0f / qop ); - const simd::float_v pt = - p * sqrt( protoTracks.dir<simd::float_v>( t ).x * protoTracks.dir<simd::float_v>( t ).x + - protoTracks.dir<simd::float_v>( t ).y * protoTracks.dir<simd::float_v>( t ).y ); - - const simd::float_v xUT = finalParams[0]; - const simd::float_v txUT = finalParams[1]; - const simd::float_v yUT = finalParams[2]; - const auto tyUT = protoTracks.dir<simd::float_v>( t ).y; + const auto p = abs( 1.0f / qop ); + const auto pt = p * sqrt( tx * tx + ty * ty ); + const auto xUT = finalParams[0]; + const auto txUT = finalParams[1]; + const auto yUT = finalParams[2]; // -- apply some fiducial cuts // -- they are optimised for high pT tracks (> 500 MeV) - simd::mask_v fiducialMask = simd::mask_false(); + auto fiducialMask = simd::mask_false(); if ( m_fiducialCuts ) { const float magSign = magnet.signedRelativeCurrent(); @@ -1086,48 +948,43 @@ namespace LHCb::Pr { fiducialMask = fiducialMask || ( magSign * qop < 0.0f && txUT > 0.09f + 0.0003f * pt ); fiducialMask = fiducialMask || ( magSign * qop > 0.0f && txUT < -0.09f - 0.0003f * pt ); } - // -- evaluate the linear discriminant and reject ghosts // -- the values only make sense if the final fit is performed - simd::mask_v mvaMask = simd::mask_true(); - - if ( m_finalFit ) { - - const simd::float_v fourHitDisc = evaluateLinearDiscriminantSIMD<4>( { p, pt, finalParams[3] } ); - const simd::float_v threeHitDisc = evaluateLinearDiscriminantSIMD<3>( { p, pt, finalParams[3] } ); - - simd::mask_v fourHitMask = fourHitDisc > m_LD4Hits.value(); - simd::mask_v threeHitMask = threeHitDisc > m_LD3Hits.value(); - + auto mvaMask = simd::mask_true(); + if ( const auto fourHitTrack = pTrack.template field<ProtoTrackTag::Weight>( 3 ).get() > 0.0001f; m_finalFit ) { + const auto fourHitDisc = evaluateLinearDiscriminantSIMD<4>( { p, pt, finalParams[3] } ); + const auto threeHitDisc = evaluateLinearDiscriminantSIMD<3>( { p, pt, finalParams[3] } ); + const auto fourHitMask = fourHitDisc > m_LD4Hits.value(); + const auto threeHitMask = threeHitDisc > m_LD3Hits.value(); // -- only have 3 or 4 hit tracks mvaMask = ( fourHitTrack && fourHitMask ) || ( !fourHitTrack && threeHitMask ); } const auto pPTMask = p > m_minMomentumFinal.value() && pt > m_minPTFinal.value(); - const auto loopMask = simd::loop_mask( t, pSize ); - const auto validTrackMask = pPTMask && !fiducialMask && mvaMask && loopMask; + const auto loopMask = pTrack.loop_mask(); + const auto validTrackMask = pPTMask && !fiducialMask && mvaMask && loopMask && !pTrack.isInvalid(); const auto finalMask = m_filterMode ? loopMask : validTrackMask; const auto finalQoP = select( validTrackMask, qop, nanMomentum ); - const auto ancestor = protoTracks.ancestorIndex<simd::int_v>( t ); + const auto ancestor = pTrack.template field<ProtoTrackTag::Ancestor>().get(); const auto velo_ancestor = velozipped.gather( ancestor, finalMask ); const auto currentsize = outputTracks.size(); - const auto oTrack = outputTracks.compress_back( finalMask ); - - oTrack.field<TracksTag::trackVP>().set( ancestor ); - oTrack.field<TracksTag::State>().setQOverP( finalQoP ); - oTrack.field<TracksTag::State>().setPosition( xUT, yUT, zMidUT ); - oTrack.field<TracksTag::State>().setDirection( txUT, tyUT ); - oTrack.field<TracksTag::VPHits>().resize( velo_ancestor.nHits() ); - + auto oTrack = outputTracks.compress_back( finalMask ); + oTrack.template field<TracksTag::trackVP>().set( ancestor ); + oTrack.template field<TracksTag::State>().setQOverP( finalQoP ); + oTrack.template field<TracksTag::State>().setPosition( xUT, yUT, zMidUT ); + oTrack.template field<TracksTag::State>().setDirection( txUT, ty ); + oTrack.template field<TracksTag::VPHits>().resize( velo_ancestor.nHits() ); for ( int idx = 0; idx < velo_ancestor.nHits().hmax( finalMask ); ++idx ) { - oTrack.field<TracksTag::VPHits>()[idx].field<TracksTag::Index>().set( velo_ancestor.vp_index( idx ) ); - oTrack.field<TracksTag::VPHits>()[idx].field<TracksTag::LHCbID>().set( velo_ancestor.vp_lhcbID( idx ) ); + oTrack.template field<TracksTag::VPHits>()[idx].template field<TracksTag::Index>().set( + velo_ancestor.vp_index( idx ) ); + oTrack.template field<TracksTag::VPHits>()[idx].template field<TracksTag::LHCbID>().set( + velo_ancestor.vp_lhcbID( idx ) ); } if ( m_filterMode ) { - oTrack.field<TracksTag::UTHits>().resize( 0 ); + oTrack.template field<TracksTag::UTHits>().resize( 0 ); continue; } @@ -1141,17 +998,17 @@ namespace LHCb::Pr { // -- from here on, go over each track individually to find and add the overlap hits // -- this is not particularly elegant... // -- As before, these are "pseudo layers", i.e. it is not guaranteed that if i > j, z[i] > z[j] - + const auto t = ipTrack * simd::size; + const auto pScalar = protoTracks.scalar(); for ( int iLayer = 0; iLayer < totalUTLayers; ++iLayer ) { int trackIndex2 = 0; for ( unsigned int t2 = 0; t2 < simd::size; ++t2 ) { if ( !testbit( finalMask, t2 ) ) continue; const auto tscalar = t + t2; - - const bool goodHit = ( protoTracks.weight<scalar::float_v>( tscalar, iLayer ).cast() > 0.0001f ); - const auto hitIdx = protoTracks.hitIndex<scalar::int_v>( tscalar, iLayer ); - const auto id = protoTracks.id<scalar::int_v>( tscalar, iLayer ); - + const bool goodHit = + ( pScalar[tscalar].template field<ProtoTrackTag::Weight>( iLayer ).get().cast() > 0.0001f ); + const auto hitIdx = pScalar[tscalar].template field<ProtoTrackTag::HitIndex>( iLayer ).get(); + const auto id = pScalar[tscalar].template field<ProtoTrackTag::ID>( iLayer ).get(); // -- Only add the hit, if it is not in an empty layer (that sounds like a tautology, // -- but given that one always has 4 hits, even if only 3 make sense, it is needed) // -- Only the last pseudo-layer can be an empty layer @@ -1169,9 +1026,10 @@ namespace LHCb::Pr { // -- In layers where no hit was found initially, we use the better parametrization of the final // -- track fit to pick up hits that were lost in the initial search // ----------------------------------------------------------------------------------- - const float zhit = goodHit ? protoTracks.z<scalar::float_v>( tscalar, iLayer ).cast() : zMidUT; - const float xhit = goodHit ? protoTracks.x<scalar::float_v>( tscalar, iLayer ).cast() : xArray[t2]; - const int hitContIndex = protoTracks.hitContIndex<scalar::int_v>( tscalar ).cast(); + const auto zhit = goodHit ? pScalar[tscalar].template field<ProtoTrackTag::Z>( iLayer ).get().cast() : zMidUT; + const auto xhit = + goodHit ? pScalar[tscalar].template field<ProtoTrackTag::X>( iLayer ).get().cast() : xArray[t2]; + const auto hitContIndex = pScalar[tscalar].template field<ProtoTrackTag::HitIndexContainer>().get().cast(); // -- The total sum of all plane codes is: 0 + 1 + 2 + 3 = 6 // -- We can therefore get the plane code of the last pseudo-layer @@ -1186,20 +1044,22 @@ namespace LHCb::Pr { const int begin = hitsInLayers[hitContIndex].layerIndices[pC]; const int end = ( pC == 3 ) ? hitsInLayers[hitContIndex].size() : hitsInLayers[hitContIndex].layerIndices[pC + 1]; - const auto& hitsInL = hitsInLayers[hitContIndex].scalar(); + const auto hitsInL = hitsInLayers[hitContIndex].scalar(); + for ( int index2 = begin; index2 < end; ++index2 ) { const float zohit = hitsInL[index2].z().cast(); if ( essentiallyEqual( zohit, zhit ) ) continue; const float xohit = hitsInL[index2].x().cast(); const float xextrap = xhit + txUTS * ( zohit - zhit ); + if ( xohit - xextrap < -m_overlapTol ) continue; if ( xohit - xextrap > m_overlapTol ) break; - if ( nUTHits[t2] >= int( LHCb::Pr::Upstream::Tracks::MaxUTHits ) ) continue; - const scalar::int_v utidx = hitsInL[index2].index(); - LHCb::LHCbID oid( LHCb::Detector::UT::ChannelID( hitsInL[index2].channelID().cast() ) ); - const scalar::int_v lhcbid = bit_cast<int>( oid.lhcbID() ); + if ( nUTHits[t2] >= static_cast<int>( Upstream::Tracks::MaxUTHits ) ) continue; + const auto utidx = hitsInL[index2].index(); + const auto oid = LHCbID{ LHCb::Detector::UT::ChannelID( hitsInL[index2].channelID().cast() ) }; + const auto lhcbid = bit_cast<int>( oid.lhcbID() ); auto hits = outputTracks.scalar()[currentsize + trackIndex2].field<TracksTag::UTHits>(); hits.resize( nUTHits[t2] + 1 ); @@ -1215,4 +1075,4 @@ namespace LHCb::Pr { } } // loop on t } -} // namespace LHCb::Pr +} // namespace LHCb::Pr::VeloUT