diff --git a/HLT/Trigger/TrigControl/TrigServices/TrigServices/HltEventLoopMgr.h b/HLT/Trigger/TrigControl/TrigServices/TrigServices/HltEventLoopMgr.h index cadd6d2e04792c871113d15ac1fc594b83f7b8dc..ec5388bb03afc84bef305e4bc93f458a4f3bc834 100644 --- a/HLT/Trigger/TrigControl/TrigServices/TrigServices/HltEventLoopMgr.h +++ b/HLT/Trigger/TrigControl/TrigServices/TrigServices/HltEventLoopMgr.h @@ -23,6 +23,13 @@ // TDAQ includes #include "eformat/write/FullEventFragment.h" +// System includes +#include <atomic> +#include <chrono> +#include <condition_variable> +#include <memory> +#include <thread> + // Forward declarations namespace coral { class AttributeList; @@ -101,6 +108,7 @@ public: hltinterface::HLTResult& hlt_result, const hltinterface::EventId& evId); + [[deprecated]] virtual StatusCode timeOutReached(const boost::property_tree::ptree& pt); private: @@ -142,6 +150,9 @@ private: /// Send an HLT result FullEventFragment to the DataCollector void eventDone(eformat::write::FullEventFragment* hltrFragment) const; + /// The method executed by the event timeout monitoring thread + void runEventTimer(); + // ------------------------- Reimplemented AthenaHiveEventLoopMgr helpers ---- /// Create event context StatusCode createEventContext(EventContext*& eventContext) const; @@ -220,6 +231,19 @@ private: /// Vector of top level algorithms std::vector<SmartIF<IAlgorithm> > m_topAlgList; + + /// Vector of event start-processing time stamps in each slot + std::vector<std::chrono::steady_clock::time_point> m_eventTimerStartPoint; + /// Timeout mutex + std::mutex m_timeoutMutex; + /// Timeout condition variable + std::condition_variable m_timeoutCond; + /// Timeout thread + std::unique_ptr<std::thread> m_timeoutThread; + /// Soft timeout value + int m_softTimeoutValue; + /// Flag set to false if timer thread should be stopped + std::atomic<bool> m_runEventTimer; }; //============================================================================== diff --git a/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.cxx b/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.cxx index a6b9ed7aae12050bdec73f75a0695b61be108d94..a5b8c7f571eb3967a6f22e80e88f219f49e41b7b 100644 --- a/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.cxx +++ b/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.cxx @@ -58,7 +58,9 @@ HltEventLoopMgr::HltEventLoopMgr(const std::string& name, ISvcLocator* svcLoc) m_detector_mask(0xffffffff, 0xffffffff, 0, 0), m_nevt(0), m_threadPoolSize(-1), - m_evtSelContext(nullptr) + m_evtSelContext(nullptr), + m_softTimeoutValue(10000), + m_runEventTimer(true) { ATH_MSG_VERBOSE("start of " << __FUNCTION__); @@ -493,6 +495,17 @@ StatusCode HltEventLoopMgr::hltUpdateAfterFork(const ptree& /*pt*/) ATH_MSG_WARNING("Could not retrieve CoreDumpSvc"); } + // Start the timeout thread + m_timeoutThread.reset(new std::thread(std::bind(&HltEventLoopMgr::runEventTimer,this))); + + // Initialise vector of time points for event timeout monitoring + { + std::unique_lock<std::mutex> lock(m_timeoutMutex); + m_eventTimerStartPoint.clear(); + m_eventTimerStartPoint.resize(m_whiteboard->getNumberOfStores(), std::chrono::steady_clock::time_point()); + m_timeoutCond.notify_all(); + } + ATH_MSG_VERBOSE("end of " << __FUNCTION__); return StatusCode::SUCCESS; } @@ -512,6 +525,7 @@ StatusCode HltEventLoopMgr::processRoIs( // ============================================================================= // Implementation of ITrigEventLoopMgr::timeOutReached +// DUE TO BE REMOVED // ============================================================================= StatusCode HltEventLoopMgr::timeOutReached(const boost::property_tree::ptree& /*pt*/) { @@ -532,6 +546,17 @@ StatusCode HltEventLoopMgr::executeRun(int maxevt) } // do some cleanup here + + // stop the timer thread - this should be in the stop() transition, but it is not called at the moment + { + ATH_MSG_DEBUG("Stopping the timeout thread"); + std::unique_lock<std::mutex> lock(m_timeoutMutex); + m_runEventTimer = false; + m_timeoutCond.notify_all(); + m_timeoutThread->join(); + ATH_MSG_DEBUG("The timeout thread finished"); + } + ATH_MSG_VERBOSE("end of " << __FUNCTION__); return sc; } @@ -608,6 +633,14 @@ StatusCode HltEventLoopMgr::nextEvent(int /*maxevt*/) continue; } + // set event processing start time for timeout monitoring and reset timeout flag + { + std::unique_lock<std::mutex> lock(m_timeoutMutex); + m_eventTimerStartPoint[evtContext->slot()] = std::chrono::steady_clock::now(); + resetTimeout(Athena::Timeout::instance(*evtContext)); + m_timeoutCond.notify_all(); + } + IOpaqueAddress* addr = nullptr; if (m_evtSelector->createAddress(*m_evtSelContext, addr).isFailure()) { ATH_MSG_ERROR("Could not create an IOpaqueAddress"); @@ -1030,6 +1063,33 @@ void HltEventLoopMgr::eventDone(eformat::write::FullEventFragment* hltrFragment) ATH_MSG_VERBOSE("end of " << __FUNCTION__); } +// ============================================================================== +void HltEventLoopMgr::runEventTimer() +{ + ATH_MSG_VERBOSE("start of " << __FUNCTION__); + auto softDuration = std::chrono::milliseconds(m_softTimeoutValue); + std::unique_lock<std::mutex> lock(m_timeoutMutex); + while (m_runEventTimer) { + m_timeoutCond.wait_for(lock,std::chrono::seconds(1)); + auto now=std::chrono::steady_clock::now(); + for (size_t i=0; i<m_eventTimerStartPoint.size(); ++i) { + // iterate over all slots and check for timeout + if (now > m_eventTimerStartPoint.at(i) + softDuration) { + EventContext ctx(i,0); // we only need the slot number for Athena::Timeout instance + // don't duplicate the actions if the timeout was already reached + if (!Athena::Timeout::instance(ctx).reached()) { + auto procTime = now - m_eventTimerStartPoint.at(i); + auto procTimeMillisec = std::chrono::duration_cast<std::chrono::milliseconds>(procTime); + ATH_MSG_WARNING("Soft timeout in slot " << i << ". Processing time = " + << procTimeMillisec.count() << " ms"); + setTimeout(Athena::Timeout::instance(ctx)); + } + } + } + } + ATH_MSG_VERBOSE("end of " << __FUNCTION__); +} + // ============================================================================== StatusCode HltEventLoopMgr::createEventContext(EventContext*& evtContext) const {