diff --git a/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoAlg.cxx b/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoAlg.cxx index 8d9466b1ea96a47993c5cfbfcf58f55bbbdb9ca9..6ad94e7227bb909eb818fe044e00534ee75ef908 100644 --- a/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoAlg.cxx +++ b/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoAlg.cxx @@ -1,9 +1,10 @@ /* - Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration */ #include "MTCalibPebHypoAlg.h" #include "DecisionHandling/HLTIdentifier.h" +#include "AthenaKernel/AthStatusCode.h" // TrigCompositeUtils types used here using TrigCompositeUtils::Decision; @@ -64,7 +65,12 @@ StatusCode MTCalibPebHypoAlg::execute(const EventContext& eventContext) const { // Call the hypo tools for (const auto& tool: m_hypoTools) { ATH_MSG_DEBUG("Calling " << tool); - ATH_CHECK(tool->decide(toolInput)); + StatusCode sc = tool->decide(toolInput); + if (sc == Athena::Status::TIMEOUT) { + ATH_MSG_ERROR("Timeout reached in hypo tool " << tool->name()); + return sc; + } + else ATH_CHECK(sc); } ATH_MSG_DEBUG( "Exiting with "<< outputHandle->size() <<" decisions"); diff --git a/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoTool.cxx b/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoTool.cxx index fb805b06bf1f74c54a2b1ad53c19897ea2da6504..3786018d926326782991b2141c119921b7e676f6 100644 --- a/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoTool.cxx +++ b/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoTool.cxx @@ -1,5 +1,5 @@ /* - Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration */ // Trigger includes @@ -8,6 +8,7 @@ // Athena includes #include "AthenaKernel/Timeout.h" +#include "AthenaKernel/AthStatusCode.h" // System includes #include <random> @@ -148,12 +149,8 @@ StatusCode MTCalibPebHypoTool::decide(const MTCalibPebHypoTool::Input& input) co // --------------------------------------------------------------------------- for (unsigned int iCycle = 0; iCycle < m_numBurnCycles; ++iCycle) { if (Athena::Timeout::instance(input.eventContext).reached()) { - if (m_failOnTimeout) { - ATH_MSG_ERROR("Timeout reached in CPU time burning cycle # " << iCycle+1 << " and FailOnTimeout is true"); - return StatusCode::FAILURE; - } - ATH_MSG_INFO("Timeout reached in CPU time burning cycle # " << iCycle+1); - break; + ATH_MSG_ERROR("Timeout reached in CPU time burning cycle # " << iCycle+1); + return Athena::Status::TIMEOUT; } unsigned int burnTime = m_burnTimeRandomly ? randomInteger<unsigned int>(0, m_burnTimePerCycleMillisec) @@ -168,8 +165,8 @@ StatusCode MTCalibPebHypoTool::decide(const MTCalibPebHypoTool::Input& input) co for (const auto& p : m_robAccessDict) { // Check for timeout if (Athena::Timeout::instance(input.eventContext).reached()) { - ATH_MSG_INFO("Timeout reached in ROB retrieval loop"); - break; + ATH_MSG_ERROR("Timeout reached in ROB retrieval loop"); + return Athena::Status::TIMEOUT; } const std::string& instruction = p.first; const std::vector<uint32_t>& robs = p.second; diff --git a/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoTool.h b/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoTool.h index 9010986303fb09d82790ed43bdee7bc40e657f57..667d44cd73772847948bcf4ff2fdeecd43e1f186 100644 --- a/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoTool.h +++ b/HLT/Trigger/TrigControl/TrigExamples/TrigExPartialEB/src/MTCalibPebHypoTool.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration */ #ifndef TRIGEXPARTIALEB_MTCALIBPEBHYPOTOOL_H @@ -60,10 +60,6 @@ private: this, "BurnTimeRandomly", true, "If true, burn time per cycle is a random value from uniform distribution between 0 and the given value" }; - Gaudi::Property<bool> m_failOnTimeout { - this, "FailOnTimeout", true, - "If true, the execution will return StatusCode::FAILURE when Athena timeout is detected" - }; Gaudi::Property<std::map<std::string,std::vector<uint32_t> > > m_robAccessDict { this, "ROBAccessDict", {}, "List of prefetch/retrieve operations with given ROB IDs." diff --git a/HLT/Trigger/TrigControl/TrigKernel/TrigKernel/HltPscErrorCode.h b/HLT/Trigger/TrigControl/TrigKernel/TrigKernel/HltPscErrorCode.h index de33953d961900e37c478b84949f8565aba58cd1..d8f145bd780591ef76da845e85a0125df4730c56 100644 --- a/HLT/Trigger/TrigControl/TrigKernel/TrigKernel/HltPscErrorCode.h +++ b/HLT/Trigger/TrigControl/TrigKernel/TrigKernel/HltPscErrorCode.h @@ -22,7 +22,8 @@ namespace hltonl { OUTPUT_BUILD_FAILURE = 8, OUTPUT_SEND_FAILURE = 9, AFTER_RESULT_SENT = 10, - COOL_UPDATE = 11 + COOL_UPDATE = 11, + TIMEOUT = 12 }; /// helper class to map HLT PSC error code on a string name or a int index @@ -50,6 +51,7 @@ inline hltonl::MapPscErrorCode::MapPscErrorCode() { add(hltonl::PSCErrorCode::OUTPUT_SEND_FAILURE, "PSCErrorCode::OUTPUT_SEND_FAILURE"); add(hltonl::PSCErrorCode::AFTER_RESULT_SENT, "PSCErrorCode::AFTER_RESULT_SENT"); add(hltonl::PSCErrorCode::COOL_UPDATE, "PSCErrorCode::COOL_UPDATE"); + add(hltonl::PSCErrorCode::TIMEOUT, "PSCErrorCode::TIMEOUT"); // return values in case of invalid code invalidCode("UNDEFINED",-1); } diff --git a/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.cxx b/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.cxx index c49c1fc21767e4165e679f5f6bf109532a046639..9db5388955eabee5d39fea329a53210d9923f715 100644 --- a/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.cxx +++ b/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.cxx @@ -10,6 +10,7 @@ #include "TrigSteeringEvent/HLTResultMT.h" // Athena includes +#include "AthenaKernel/AthStatusCode.h" #include "ByteStreamCnvSvcBase/IROBDataProviderSvc.h" #include "ByteStreamData/ByteStreamMetadata.h" #include "StoreGate/StoreGateSvc.h" @@ -102,7 +103,10 @@ StatusCode HltEventLoopMgr::initialize() // Setup properties //---------------------------------------------------------------------------- - // read DataFlow configuration properties + // Set the timeout value (cast float to int) + m_softTimeoutValue = static_cast<int>(m_hardTimeout.value() * m_softTimeoutFraction.value()); + + // Read DataFlow configuration properties updateDFProps(); // JobOptions type @@ -124,9 +128,11 @@ StatusCode HltEventLoopMgr::initialize() ATH_MSG_INFO(" ---> ApplicationName = " << m_applicationName); ATH_MSG_INFO(" ---> HardTimeout = " << m_hardTimeout.value()); ATH_MSG_INFO(" ---> SoftTimeoutFraction = " << m_softTimeoutFraction.value()); + ATH_MSG_INFO(" ---> SoftTimeoutValue = " << m_softTimeoutValue); ATH_MSG_INFO(" ---> MaxFrameworkErrors = " << m_maxFrameworkErrors.value()); ATH_MSG_INFO(" ---> FwkErrorDebugStreamName = " << m_fwkErrorDebugStreamName.value()); ATH_MSG_INFO(" ---> AlgErrorDebugStreamName = " << m_algErrorDebugStreamName.value()); + ATH_MSG_INFO(" ---> TimeoutDebugStreamName = " << m_timeoutDebugStreamName.value()); ATH_MSG_INFO(" ---> EventContextWHKey = " << m_eventContextWHKey.key()); ATH_MSG_INFO(" ---> EventInfoRHKey = " << m_eventInfoRHKey.key()); @@ -943,7 +949,8 @@ void HltEventLoopMgr::printSORAttrList(const coral::AttributeList& atr) const // ============================================================================= StatusCode HltEventLoopMgr::failedEvent(hltonl::PSCErrorCode errorCode, const EventContext& eventContext) { - ATH_MSG_VERBOSE("start of " << __FUNCTION__); + ATH_MSG_VERBOSE("start of " << __FUNCTION__ << " with errorCode = " << hltonl::PrintPscErrorCode(errorCode) + << ", context = " << eventContext << " eventID = " << eventContext.eventID()); // Used by MsgSvc (and possibly others but not relevant here) Gaudi::Hive::setCurrentContext(eventContext); @@ -1014,15 +1021,8 @@ StatusCode HltEventLoopMgr::failedEvent(hltonl::PSCErrorCode errorCode, const Ev } //---------------------------------------------------------------------------- - // Try to retrieve event info for printouts + // Handle SCHEDULING_FAILURE //---------------------------------------------------------------------------- - auto eventInfo = SG::makeHandle(m_eventInfoRHKey,eventContext); - std::ostringstream ss; - if (eventInfo.isValid()) { - ss << *eventInfo->event_ID(); - } - std::string eventInfoString = ss.str(); - if (errorCode==hltonl::PSCErrorCode::SCHEDULING_FAILURE) { // Here we cannot be certain if the scheduler started processing the event or not, so we can only try to drain // the scheduler and continue. Trying to create a debug stream result for this event and clear the event slot may @@ -1030,7 +1030,7 @@ StatusCode HltEventLoopMgr::failedEvent(hltonl::PSCErrorCode errorCode, const Ev ATH_REPORT_MESSAGE(MSG::ERROR) << "Failure occurred with PSCErrorCode=" << hltonl::PrintPscErrorCode(errorCode) << ". Cannot determine if the" << " event processing started or not. Current local event number is " << eventContext.evt() - << ", slot " << eventContext.slot() << ". " << eventInfoString + << ", slot " << eventContext.slot() << ", eventID = " << eventContext.eventID() << " All slots of this HltEventLoopMgr instance will be drained before proceeding."; return drainAllAndProceed(); } @@ -1057,7 +1057,7 @@ StatusCode HltEventLoopMgr::failedEvent(hltonl::PSCErrorCode errorCode, const Ev ATH_REPORT_MESSAGE(MSG::ERROR) << "Failed to record the HLT Result in event store while handling a failed event. Likely an issue with the store." << " PSCErrorCode=" << hltonl::PrintPscErrorCode(errorCode) << ", local event number " << eventContext.evt() - << ", slot " << eventContext.slot() << ". " << eventInfoString + << ", slot " << eventContext.slot() << ", eventID = " << eventContext.eventID() << " All slots of this HltEventLoopMgr instance will be drained before proceeding."; return drainAllAndProceed(); } @@ -1066,9 +1066,17 @@ StatusCode HltEventLoopMgr::failedEvent(hltonl::PSCErrorCode errorCode, const Ev // Set error code and make sure the debug stream tag is added //---------------------------------------------------------------------------- hltResultWH->addErrorCode( static_cast<uint32_t>(errorCode) ); - std::string debugStreamName = (errorCode==hltonl::PSCErrorCode::PROCESSING_FAILURE) ? - m_algErrorDebugStreamName.value() : m_fwkErrorDebugStreamName.value(); - hltResultWH->addStreamTag({debugStreamName,eformat::DEBUG_TAG,true}); + switch (errorCode) { + case hltonl::PSCErrorCode::PROCESSING_FAILURE: + hltResultWH->addStreamTag({m_algErrorDebugStreamName.value(), eformat::DEBUG_TAG, true}); + break; + case hltonl::PSCErrorCode::TIMEOUT: + hltResultWH->addStreamTag({m_timeoutDebugStreamName.value(), eformat::DEBUG_TAG, true}); + break; + default: + hltResultWH->addStreamTag({m_fwkErrorDebugStreamName.value(), eformat::DEBUG_TAG, true}); + break; + } //---------------------------------------------------------------------------- // Try to build and send the output @@ -1077,7 +1085,7 @@ StatusCode HltEventLoopMgr::failedEvent(hltonl::PSCErrorCode errorCode, const Ev ATH_REPORT_MESSAGE(MSG::ERROR) << "The output conversion service failed in connectOutput() while handling a failed event. No HLT result can be" << " recorded for this event. PSCErrorCode=" << hltonl::PrintPscErrorCode(errorCode) << ", local event number " - << eventContext.evt() << ", slot " << eventContext.slot() << ". " << eventInfoString + << eventContext.evt() << ", slot " << eventContext.slot() << ", eventID = " << eventContext.eventID() << " All slots of this HltEventLoopMgr instance will be drained before proceeding."; return drainAllAndProceed(); } @@ -1087,7 +1095,7 @@ StatusCode HltEventLoopMgr::failedEvent(hltonl::PSCErrorCode errorCode, const Ev ATH_REPORT_MESSAGE(MSG::ERROR) << "Failed to retrieve DataObject for the HLT result object while handling a failed event. No HLT result" << " can be recorded for this event. PSCErrorCode=" << hltonl::PrintPscErrorCode(errorCode) - << ", local event number " << eventContext.evt() << ", slot " << eventContext.slot() << ". " << eventInfoString + << ", local event number " << eventContext.evt() << ", slot " << eventContext.slot() << ", eventID = " << eventContext.eventID() << " All slots of this HltEventLoopMgr instance will be drained before proceeding."; return drainAllAndProceed(); } @@ -1097,7 +1105,7 @@ StatusCode HltEventLoopMgr::failedEvent(hltonl::PSCErrorCode errorCode, const Ev ATH_REPORT_MESSAGE(MSG::ERROR) << "Conversion of HLT result object to the output format failed while handling a failed event. No HLT result" << " can be recorded for this event. PSCErrorCode=" << hltonl::PrintPscErrorCode(errorCode) - << ", local event number " << eventContext.evt() << ", slot " << eventContext.slot() << ". " << eventInfoString + << ", local event number " << eventContext.evt() << ", slot " << eventContext.slot() << ", eventID = " << eventContext.eventID() << " All slots of this HltEventLoopMgr instance will be drained before proceeding."; return drainAllAndProceed(); } @@ -1106,7 +1114,7 @@ StatusCode HltEventLoopMgr::failedEvent(hltonl::PSCErrorCode errorCode, const Ev ATH_REPORT_MESSAGE(MSG::ERROR) << "The output conversion service failed in commitOutput() while handling a failed event. No HLT result can be" << " recorded for this event. PSCErrorCode=" << hltonl::PrintPscErrorCode(errorCode) << ", local event number " - << eventContext.evt() << ", slot " << eventContext.slot() << ". " << eventInfoString + << eventContext.evt() << ", slot " << eventContext.slot() << ", eventID = " << eventContext.eventID() << " All slots of this HltEventLoopMgr instance will be drained before proceeding."; return drainAllAndProceed(); } @@ -1123,14 +1131,14 @@ StatusCode HltEventLoopMgr::failedEvent(hltonl::PSCErrorCode errorCode, const Ev // Finish handling the failed event //---------------------------------------------------------------------------- - // Unless this is a processing (i.e. algorithm) failure, increment the number of framework failures - if (errorCode != hltonl::PSCErrorCode::PROCESSING_FAILURE) { + // Unless this is a timeout or processing (i.e. algorithm) failure, increment the number of framework failures + if (errorCode != hltonl::PSCErrorCode::TIMEOUT && errorCode != hltonl::PSCErrorCode::PROCESSING_FAILURE) { if ( (++m_nFrameworkErrors)>m_maxFrameworkErrors.value() ) { ATH_REPORT_MESSAGE(MSG::ERROR) << "Failure with PSCErrorCode=" << hltonl::PrintPscErrorCode(errorCode) << " was successfully handled, but the" << " number of tolerable framework errors for this HltEventLoopMgr instance, which is " << m_maxFrameworkErrors.value() << ", was exceeded. Current local event number is " << eventContextCopy.evt() - << ", slot " << eventContextCopy.slot() << ". " << eventInfoString + << ", slot " << eventContextCopy.slot() << ", eventID = " << eventContextCopy.eventID() << " All slots of this HltEventLoopMgr instance will be drained and the loop will exit."; ATH_CHECK(drainAllSlots()); return StatusCode::FAILURE; @@ -1140,7 +1148,7 @@ StatusCode HltEventLoopMgr::failedEvent(hltonl::PSCErrorCode errorCode, const Ev // Even if handling the failed event succeeded, print an error message with failed event details ATH_REPORT_MESSAGE(MSG::ERROR) << "Failed event with PSCErrorCode=" << hltonl::PrintPscErrorCode(errorCode) << " Current local event number is " - << eventContextCopy.evt() << ", slot " << eventContextCopy.slot() << ". " << eventInfoString; + << eventContextCopy.evt() << ", slot " << eventContextCopy.slot() << ", eventID = " << eventContextCopy.eventID(); ATH_MSG_VERBOSE("end of " << __FUNCTION__); return StatusCode::SUCCESS; // continue the event loop @@ -1174,6 +1182,17 @@ void HltEventLoopMgr::runEventTimer() ATH_MSG_VERBOSE("end of " << __FUNCTION__); } +// ============================================================================= +bool HltEventLoopMgr::isTimedOut(const EventContext& eventContext) const { + for (const auto& [key, state] : m_aess->algExecStates(eventContext)) { + if (state.execStatus() == Athena::Status::TIMEOUT) { + ATH_MSG_DEBUG("Algorithm " << key << " returned Athena::Status::TIMEOUT in event " << eventContext.eventID()); + return true; + } + } + return false; +} + // ============================================================================= /** * @brief Retrieves finished events from the scheduler, processes their output and cleans up the slots @@ -1229,9 +1248,11 @@ HltEventLoopMgr::DrainSchedulerStatusCode HltEventLoopMgr::drainScheduler() // Check the event processing status if (m_aess->eventStatus(*thisFinishedEvtContext) != EventStatus::Success) markFailed(); + hltonl::PSCErrorCode errCode = isTimedOut(*thisFinishedEvtContext) ? + hltonl::PSCErrorCode::TIMEOUT : hltonl::PSCErrorCode::PROCESSING_FAILURE; HLT_DRAINSCHED_CHECK(sc, "Processing event with context " << *thisFinishedEvtContext << " failed with status " << m_aess->eventStatus(*thisFinishedEvtContext), - hltonl::PSCErrorCode::PROCESSING_FAILURE, *thisFinishedEvtContext); + errCode, *thisFinishedEvtContext); // Select the whiteboard slot sc = m_whiteboard->selectStore(thisFinishedEvtContext->slot()); diff --git a/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.h b/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.h index 182d6a8f7a210d729cd079d99e065784a4e21831..e3553adc11adb4d7afa46823d86485fcbc0418b8 100644 --- a/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.h +++ b/HLT/Trigger/TrigControl/TrigServices/src/HltEventLoopMgr.h @@ -153,6 +153,9 @@ private: /// The method executed by the event timeout monitoring thread void runEventTimer(); + /// Uses AlgExecStateSvc to determine if any algorithm in the event returned Athena::Status::TIMEOUT + bool isTimedOut(const EventContext& eventContext) const; + /// Drain the scheduler from all actions that may be queued DrainSchedulerStatusCode drainScheduler(); @@ -212,9 +215,13 @@ private: "Debug stream name for events with HLT framework errors"}; Gaudi::Property<std::string> m_algErrorDebugStreamName{ - this, "AlgErrorDebugStreamName", "HLTError", + this, "AlgErrorDebugStreamName", "HltError", "Debug stream name for events with HLT algorithm errors"}; + Gaudi::Property<std::string> m_timeoutDebugStreamName{ + this, "TimeoutDebugStreamName", "HltTimeout", + "Debug stream name for events with HLT timeout"}; + Gaudi::Property<std::string> m_sorPath{ this, "SORPath", "/TDAQ/RunCtrl/SOR_Params", "Path to StartOfRun parameters in detector store"}; @@ -243,7 +250,7 @@ private: /// Event counter used for local bookkeeping; incremental per instance of HltEventLoopMgr, unrelated to global_id size_t m_localEventNumber{0}; /// Event selector context - IEvtSelector::Context* m_evtSelContext; + IEvtSelector::Context* m_evtSelContext{nullptr}; /// Vector of top level algorithms std::vector<SmartIF<IAlgorithm> > m_topAlgList; /// Vector of event start-processing time stamps in each slot @@ -256,10 +263,10 @@ private: std::condition_variable m_timeoutCond; /// Timeout thread std::unique_ptr<std::thread> m_timeoutThread; - /// Soft timeout value - int m_softTimeoutValue; + /// Soft timeout value set to HardTimeout*SoftTimeoutFraction at initialisation + int m_softTimeoutValue{0}; /// Flag set to false if timer thread should be stopped - std::atomic<bool> m_runEventTimer; + std::atomic<bool> m_runEventTimer{true}; /// Counter of framework errors int m_nFrameworkErrors{0}; /// Application name diff --git a/Trigger/TrigEvent/TrigSteeringEvent/src/HLTResultMT.cxx b/Trigger/TrigEvent/TrigSteeringEvent/src/HLTResultMT.cxx index f1ad2d38c24c4c52777699cf8fd6568e39f52e7a..873d3a2db6df17c28e35127a1790bcc9f87d0310 100644 --- a/Trigger/TrigEvent/TrigSteeringEvent/src/HLTResultMT.cxx +++ b/Trigger/TrigEvent/TrigSteeringEvent/src/HLTResultMT.cxx @@ -235,6 +235,7 @@ std::ostream& operator<<(std::ostream& str, const HLT::HLTResultMT& hltResult) { for (const auto& detid : st.dets) printWord(detid,2); str << "]}" << std::endl; } + if (hltResult.getStreamTags().empty()) str << std::endl; // HLT bits std::vector<uint32_t> hltBitWords; diff --git a/Trigger/TrigSteer/TrigHLTResultByteStream/src/HLTResultMTByteStreamCnv.cxx b/Trigger/TrigSteer/TrigHLTResultByteStream/src/HLTResultMTByteStreamCnv.cxx index 1bca9613f6b1e53d62e7129faf851f56a51aaf6b..7d7a38d94545b5e67c83ed85d7bebbd8b5c1e09b 100644 --- a/Trigger/TrigSteer/TrigHLTResultByteStream/src/HLTResultMTByteStreamCnv.cxx +++ b/Trigger/TrigSteer/TrigHLTResultByteStream/src/HLTResultMTByteStreamCnv.cxx @@ -104,6 +104,41 @@ StatusCode HLT::HLTResultMTByteStreamCnv::createRep(DataObject* pObj, IOpaqueAdd // its lifetime has to be at least as long as the lifetime of RawEventWrite which points to the StreamTag data delete m_streamTagData.release(); + // Read the stream tags to check for debug stream tag and decide which HLT ROBFragments to write out + std::set<eformat::helper::SourceIdentifier> resultIdsToWrite; + bool debugEvent=false; + auto isDebugStreamTag = [](const eformat::helper::StreamTag& st){ + return eformat::helper::string_to_tagtype(st.type) == eformat::TagType::DEBUG_TAG; + }; + for (const eformat::helper::StreamTag& st : hltResult->getStreamTags()) { + // Flag debug stream events + if (isDebugStreamTag(st)) debugEvent=true; + // In case of full event building, add the full result ID + if (st.robs.empty() && st.dets.empty()) { + eformat::helper::SourceIdentifier sid(eformat::SubDetector::TDAQ_HLT, fullResultModuleId); + resultIdsToWrite.insert(sid); + } + // In case of partial event building, add the results explicitly requested in the stream tag + for (const uint32_t robid : st.robs) { + eformat::helper::SourceIdentifier sid(robid); + if (sid.subdetector_id() == eformat::SubDetector::TDAQ_HLT) + resultIdsToWrite.insert(sid); + } + } + + // Remove all non-debug stream tags if the event goes to the debug stream. + // Write all HLT results (if available) to the debug stream. + if (debugEvent) { + std::vector<eformat::helper::StreamTag>& writableStreamTags = hltResult->getStreamTagsNonConst(); + writableStreamTags.erase( + std::remove_if(writableStreamTags.begin(),writableStreamTags.end(),std::not_fn(isDebugStreamTag)), + writableStreamTags.end() + ); + for (eformat::helper::StreamTag& st : writableStreamTags) + for (const eformat::helper::SourceIdentifier& sid : resultIdsToWrite) + st.robs.insert(sid.code()); + } + // Fill the stream tags uint32_t nStreamTagWords = eformat::helper::size_word(hltResult->getStreamTags()); m_streamTagData = std::make_unique<uint32_t[]>(nStreamTagWords); @@ -126,22 +161,6 @@ StatusCode HLT::HLTResultMTByteStreamCnv::createRep(DataObject* pObj, IOpaqueAdd const std::vector<uint32_t>& hltBits = hltResult->getHltBitsAsWords(); re->hlt_info(hltBits.size(), hltBits.data()); - // Read the stream tags to decide which HLT ROBFragments to write out - std::set<eformat::helper::SourceIdentifier> resultIdsToWrite; - for (const eformat::helper::StreamTag& st : hltResult->getStreamTags()) { - // In case of full event building, add the full result ID - if (st.robs.empty() && st.dets.empty()) { - eformat::helper::SourceIdentifier sid(eformat::SubDetector::TDAQ_HLT, fullResultModuleId); - resultIdsToWrite.insert(sid); - } - // In case of partial event building, add the results explicitly requested in the stream tag - for (const uint32_t robid : st.robs) { - eformat::helper::SourceIdentifier sid(robid); - if (sid.subdetector_id() == eformat::SubDetector::TDAQ_HLT) - resultIdsToWrite.insert(sid); - } - } - // Clear the FEA stack m_fullEventAssembler.clear(); @@ -152,6 +171,11 @@ StatusCode HLT::HLTResultMTByteStreamCnv::createRep(DataObject* pObj, IOpaqueAdd // Find the serialised data for this module ID const auto it = serialisedData.find(resultId.module_id()); if (it==serialisedData.end()) { + if (debugEvent) { + ATH_MSG_DEBUG("HLT result with ID 0x" << MSG::hex << resultId.code() << MSG::dec + << " requested by a debug stream tag, but missing in the serialised data - skipping this result"); + continue; + } ATH_MSG_ERROR("HLT result with ID 0x" << MSG::hex << resultId.code() << MSG::dec << " requested by a stream tag, but missing in the serialised data"); return StatusCode::FAILURE; diff --git a/Trigger/TrigSteer/TrigOutputHandling/src/HLTResultMTMaker.cxx b/Trigger/TrigSteer/TrigOutputHandling/src/HLTResultMTMaker.cxx index 48618245884dcdc7e3fb95ce1fa367ed24eadff2..ab709e020ff277e5497597af3bb97fa1389a0926 100644 --- a/Trigger/TrigSteer/TrigOutputHandling/src/HLTResultMTMaker.cxx +++ b/Trigger/TrigSteer/TrigOutputHandling/src/HLTResultMTMaker.cxx @@ -134,8 +134,12 @@ StatusCode HLTResultMTMaker::makeResult(const EventContext& eventContext) const // Fill the object using the result maker tools auto time = Monitored::Timer("TIME_build" ); + StatusCode finalStatus = StatusCode::SUCCESS; for (auto& maker: m_makerTools) { - ATH_CHECK(maker->fill(*hltResult)); + if (StatusCode sc = maker->fill(*hltResult); sc.isFailure()) { + ATH_MSG_ERROR(maker->name() << " failed"); + finalStatus = sc; + } } time.stop(); @@ -154,7 +158,7 @@ StatusCode HLTResultMTMaker::makeResult(const EventContext& eventContext) const Monitored::Group(m_monTool, time, nstreams, nfrags, sizeMain, bitWords); - return StatusCode::SUCCESS; + return finalStatus; } // ============================================================================= diff --git a/Trigger/TrigSteer/TrigOutputHandling/src/StreamTagMakerTool.cxx b/Trigger/TrigSteer/TrigOutputHandling/src/StreamTagMakerTool.cxx index d57eb54ae630416e72dc05b6803fd53269ebac54..e38acebc8b04b92d441dedc3f9cbb3c6730a88c7 100644 --- a/Trigger/TrigSteer/TrigOutputHandling/src/StreamTagMakerTool.cxx +++ b/Trigger/TrigSteer/TrigOutputHandling/src/StreamTagMakerTool.cxx @@ -52,6 +52,10 @@ StatusCode StreamTagMakerTool::fill( HLT::HLTResultMT& resultToFill ) const { // obtain chain decisions, using namespace TrigCompositeUtils; auto chainsHandle = SG::makeHandle( m_finalChainDecisions ); + if (!chainsHandle.isValid()) { + ATH_MSG_ERROR("Unable to read in the HLTSummary from the DecisionSummaryMakerAlg"); + return StatusCode::FAILURE; + } const Decision* passRawChains = nullptr; const Decision* rerunChains = nullptr; diff --git a/Trigger/TrigSteer/TrigOutputHandling/src/TriggerBitsMakerTool.cxx b/Trigger/TrigSteer/TrigOutputHandling/src/TriggerBitsMakerTool.cxx index 4daf544ebe4ad49043be32e289b2eb2b778cc694..a4ec63cd3c599972ad8a1bfa8c466709d72eb815 100644 --- a/Trigger/TrigSteer/TrigOutputHandling/src/TriggerBitsMakerTool.cxx +++ b/Trigger/TrigSteer/TrigOutputHandling/src/TriggerBitsMakerTool.cxx @@ -1,5 +1,5 @@ /* - Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration */ #include "DecisionHandling/HLTIdentifier.h" #include "TrigOutputHandling/TriggerBitsMakerTool.h" @@ -25,6 +25,10 @@ StatusCode TriggerBitsMakerTool::initialize() { StatusCode TriggerBitsMakerTool::fill( HLT::HLTResultMT& resultToFill ) const { auto chainsHandle = SG::makeHandle( m_finalChainDecisions ); + if (!chainsHandle.isValid()) { + ATH_MSG_ERROR("Unable to read in the HLTSummary from the DecisionSummaryMakerAlg"); + return StatusCode::FAILURE; + } const TrigCompositeUtils::Decision* passRawChains = nullptr; for (const TrigCompositeUtils::Decision* d : *chainsHandle) { diff --git a/Trigger/TrigValidation/TrigP1Test/share/testHLT_timeout.py b/Trigger/TrigValidation/TrigP1Test/share/testHLT_timeout.py new file mode 100644 index 0000000000000000000000000000000000000000..36ec57e5ef219f667c2ae48c45a2dc44fdaa7d23 --- /dev/null +++ b/Trigger/TrigValidation/TrigP1Test/share/testHLT_timeout.py @@ -0,0 +1,143 @@ +# +# Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +# + +# This file is based on MTCalibPeb.py + +################################################################################ +# General set up +################################################################################ + +# The top algo sequence +from AthenaCommon.AlgSequence import AlgSequence +topSequence = AlgSequence() + +# SGInputLoader takes care of unmet input dependencies (e.g. triggering conversion from BS) +from AthenaCommon.AlgScheduler import AlgScheduler +AlgScheduler.setDataLoaderAlg ('SGInputLoader') + +# EventInfoCnvAlg produces xAOD::EventInfo from the plain non-xAOD EventInfo +from xAODEventInfoCnv.xAODEventInfoCreator import xAODMaker__EventInfoCnvAlg +topSequence += xAODMaker__EventInfoCnvAlg() + +################################################################################ +# L1 result decoding +################################################################################ + +# Configure BS converter and address provider for RoIBResult decoding +from AthenaCommon.AppMgr import ServiceMgr as svcMgr +svcMgr.ByteStreamCnvSvc.GetDetectorMask = True +svcMgr.ByteStreamCnvSvc.InitCnvs += [ "ROIB::RoIBResult" ] +svcMgr.ByteStreamAddressProviderSvc.TypeNames += [ "ROIB::RoIBResult/RoIBResult" ] + +# Ensure LVL1ConfigSvc is initialised before L1Decoder handles BeginRun incident +# This should be done by the L1Decoder configuration in new-style job options (with component accumulator) +from TrigConfigSvc.TrigConfigSvcConfig import LVL1ConfigSvc, findFileInXMLPATH +svcMgr += LVL1ConfigSvc() + +# Set the LVL1 menu (needed for initialising LVL1ConfigSvc) +from TriggerJobOpts.TriggerFlags import TriggerFlags +svcMgr.LVL1ConfigSvc.XMLMenuFile = findFileInXMLPATH(TriggerFlags.inputLVL1configFile()) + +# Initialise L1 decoding tools +from L1Decoder.L1DecoderConf import CTPUnpackingTool +ctpUnpacker = CTPUnpackingTool(ForceEnableAllChains = True) +# Can add other tools here if needed + +# Define the "menu" - L1 items do not matter if we set ForceEnableAllChains = True, +# but they have to be defined in the L1 menu xml +chainCTPMap = {"HLT_MTCalibPeb1": "L1_RD0_FILLED", + "HLT_MTCalibPeb2": "L1_RD0_FILLED"} + +# Schedule the L1Decoder algo with the above tools +from L1Decoder.L1DecoderConf import L1Decoder +l1decoder = L1Decoder() +l1decoder.ctpUnpacker = ctpUnpacker +l1decoder.ChainToCTPMapping = chainCTPMap +topSequence += l1decoder + +################################################################################ +# HLT configuration +################################################################################ + +from TrigExPartialEB.TrigExPartialEBConf import MTCalibPebHypoAlg,MTCalibPebHypoTool +hypo = MTCalibPebHypoAlg() +hypo.HypoInputDecisions = "HLTChains" +hypo.HypoOutputDecisions = "MTCalibPebDecisions" + +# Chain 1 - high accept rate, sleeps for up to 2 seconds +hypoTool1 = MTCalibPebHypoTool("HLT_MTCalibPeb1") +hypoTool1.RandomAcceptRate = 0.75 +hypoTool1.BurnTimePerCycleMillisec = 200 +hypoTool1.NumBurnCycles = 10 + +# Chain 2 - lower accept rate, sleeps for up to 3 seconds +hypoTool2 = MTCalibPebHypoTool("HLT_MTCalibPeb2") +hypoTool2.RandomAcceptRate = 0.25 +hypoTool2.BurnTimePerCycleMillisec = 600 +hypoTool2.NumBurnCycles = 5 + +# Add the hypo tools to the algorithm +hypo.HypoTools = [hypoTool1, hypoTool2] + +################################################################################ +# HLT result maker configuration +################################################################################ +from TrigOutputHandling.TrigOutputHandlingConf import HLTResultMTMakerAlg, StreamTagMakerTool, TriggerBitsMakerTool +from TrigOutputHandling.TrigOutputHandlingConfig import TriggerEDMSerialiserToolCfg, HLTResultMTMakerCfg + +# Tool serialising EDM objects to fill the HLT result +serialiser = TriggerEDMSerialiserToolCfg("Serialiser") +serialiser.addCollectionListToMainResult([ + "xAOD::TrigCompositeContainer_v1#"+hypo.HypoOutputDecisions, + "xAOD::TrigCompositeAuxContainer_v2#"+hypo.HypoOutputDecisions+"Aux.", +]) + +# StreamTag definitions +streamPhysicsMain = ['Main', 'physics', "True", "True"] + +# Tool adding stream tags to HLT result +stmaker = StreamTagMakerTool() +stmaker.ChainDecisions = "HLTSummary" +stmaker.PEBDecisionKeys = [hypo.HypoOutputDecisions] +stmaker.ChainToStream = {} +stmaker.ChainToStream["HLT_MTCalibPeb1"] = streamPhysicsMain +stmaker.ChainToStream["HLT_MTCalibPeb2"] = streamPhysicsMain + +# Tool adding HLT bits to HLT result +bitsmaker = TriggerBitsMakerTool() +bitsmaker.ChainDecisions = "HLTSummary" +bitsmaker.ChainToBit = {} +bitsmaker.ChainToBit["HLT_MTCalibPeb1"] = 4 +bitsmaker.ChainToBit["HLT_MTCalibPeb2"] = 32 + +# Configure the HLT result maker to use the above tools +hltResultMaker = svcMgr.HltEventLoopMgr.ResultMaker +hltResultMaker.MakerTools = [ stmaker, bitsmaker, serialiser ] + +################################################################################ +# Control Flow construction +################################################################################ + +from DecisionHandling.DecisionHandlingConf import TriggerSummaryAlg +summary = TriggerSummaryAlg( "TriggerSummaryAlg" ) +summary.InputDecision = "HLTChains" +summary.FinalDecisions = [ hypo.HypoOutputDecisions ] + +from TrigOutputHandling.TrigOutputHandlingConf import DecisionSummaryMakerAlg +summMaker = DecisionSummaryMakerAlg() +summMaker.FinalDecisionKeys = [ hypo.HypoOutputDecisions ] +summMaker.FinalStepDecisions = dict( [ ( tool.getName(), hypo.HypoOutputDecisions ) for tool in hypo.HypoTools ] ) +print summMaker + +# Create a top-level algorithm as a sequence +from AthenaCommon.CFElements import seqOR +hltTop = seqOR( "hltTop", [ hypo, summary, summMaker ] ) +topSequence += hltTop + +# Print configuration for debugging +print("Dump of topSequence") +from AthenaCommon.AlgSequence import dumpSequence +dumpSequence(topSequence) +print("Dump of serviceMgr") +dumpSequence(ServiceMgr) diff --git a/Trigger/TrigValidation/TrigP1Test/test/test_athenaHLT_timeout.sh b/Trigger/TrigValidation/TrigP1Test/test/test_athenaHLT_timeout.sh new file mode 100755 index 0000000000000000000000000000000000000000..1ab6c02a472bc7a3da97c99f3e6d1d3e8f64280b --- /dev/null +++ b/Trigger/TrigValidation/TrigP1Test/test/test_athenaHLT_timeout.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# art-description: athenaHLT test of timeout handling +# art-type: build +# art-include: master/Athena +# art-output: *.log +# art-output: *.out +# art-output: *.err +# art-output: *.data +# art-output: *.root + +outputBaseName="output.test_athenaHLT_timeout.data" +nEvents=20 + +(set -x; \ +athenaHLT \ +-n ${nEvents} \ +--timeout 2000 \ +--nprocs 2 \ +--threads 2 \ +--concurrent-events 2 \ +-o ${outputBaseName} \ +-f /cvmfs/atlas-nightlies.cern.ch/repo/data/data-art/TrigP1Test/data17_13TeV.00327265.physics_EnhancedBias.merge.RAW._lb0100._SFO-1._0001.1 \ +TrigP1Test/testHLT_timeout.py \ +2>&1 > athena.log) + +status=$? +echo "art-result: ${status} athenaHLT-mother" + +outputFile=`ls ${outputBaseName}* | tail -n 1` + +# Check if there is at least one event with the debug_HltTimeout stream tag +nDebugEvents=`trigbs_dumpHLTContentInBS.py --stag output.test_athenaHLT_timeout.data_Child-001._0001.data | grep "\[('HltTimeout', 'debug')\]" | wc -l` +if [ $nDebugEvents -gt 0 ]; then + echo "Found ${nDebugEvents} events with stream tag debug_HltTimeout in the output file ${outputFile}" + echo "art-result: 0 StreamTags" +else + echo "ERROR no event with stream tag debug_HltTimeout was found in the output file ${outputFile}" + echo "art-result: 0 StreamTags" +fi + +# Check if the number of events in output is the same as number of processed events +nOutputEvents=`trigbs_dumpHLTContentInBS.py --stats output.test_athenaHLT_timeout.data_Child-001._0001.data | grep Global_ID | wc -l` +if [ $nOutputEvents -eq $nEvents ]; then + echo "All ${nEvents} processed events were found in the output file ${outputFile}" + echo "art-result: 0 NOutputEvents" +else + echo "Processed ${nEvents} events, but found ${nOutputEvents} in the output file ${outputFile}" + echo "art-result: 1 NOutputEvents" +fi +