Skip to content
Snippets Groups Projects
Commit 58b223cc authored by Edward Moyse's avatar Edward Moyse
Browse files

Merge branch 'trigCostToNeJO_3' into 'master'

More (x3) Trig Cost updates following T11

See merge request atlas/athena!34692
parents 338044e9 bf7dd814
No related branches found
No related tags found
No related merge requests found
......@@ -19,7 +19,9 @@ m_eventMonitored(),
m_slotMutex(),
m_algStartInfo(),
m_algStopTime(),
m_threadToAlgMap()
m_threadToAlgMap(),
m_threadToCounterMap(),
m_threadCounter(0)
{
ATH_MSG_DEBUG("TrigCostMTSvc regular constructor");
}
......@@ -191,6 +193,32 @@ StatusCode TrigCostMTSvc::endEvent(const EventContext& context, SG::WriteHandle<
// we can now perform whole-map inspection of this event's TrigCostDataStores without the danger that it will be changed further
// Let's start by getting the global STOP time we just wrote
uint64_t eventStopTime = 0;
{
const AlgorithmIdentifier myAi = AlgorithmIdentifierMaker::make(context, m_decisionSummaryMakerAlgName, msg());
ATH_CHECK( myAi.isValid() );
tbb::concurrent_hash_map<AlgorithmIdentifier, TrigTimeStamp, AlgorithmIdentifierHashCompare>::const_accessor stopTimeAcessor;
if (m_algStopTime.retrieve(myAi, stopTimeAcessor).isFailure()) {
ATH_MSG_ERROR("No end time for '" << myAi.m_caller << "', '" << myAi.m_store << "'"); // Error as we JUST entered this info!
} else { // retrieve was a success
eventStopTime = stopTimeAcessor->second.microsecondsSinceEpoch();
}
}
// And the global START time for the event
uint64_t eventStartTime = 0;
{
const AlgorithmIdentifier l1DecoderAi = AlgorithmIdentifierMaker::make(context, m_l1DecoderName, msg());
ATH_CHECK( l1DecoderAi.isValid() );
tbb::concurrent_hash_map<AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_accessor startAcessor;
if (m_algStartInfo.retrieve(l1DecoderAi, startAcessor).isFailure()) {
ATH_MSG_ERROR("No alg info for '" << l1DecoderAi.m_caller << "', '" << l1DecoderAi.m_store << "'"); // Error as we know this info must be present
} else { // retrieve was a success
eventStartTime = startAcessor->second.m_algStartTime.microsecondsSinceEpoch();
}
}
// Read payloads. Write to persistent format
tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator beginIt;
tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator endIt;
......@@ -202,9 +230,10 @@ StatusCode TrigCostMTSvc::endEvent(const EventContext& context, SG::WriteHandle<
for (it = beginIt; it != endIt; ++it) {
const AlgorithmIdentifier& ai = it->first;
const AlgorithmPayload& ap = it->second;
uint64_t startTime = ap.m_algStartTime.microsecondsSinceEpoch();
// Can we find the end time for this alg?
uint64_t stopTime = 0;
// Can we find the end time for this alg? If not, it is probably still running. Hence we use "now" as the default time.
uint64_t stopTime = eventStopTime;
{
tbb::concurrent_hash_map<AlgorithmIdentifier, TrigTimeStamp, AlgorithmIdentifierHashCompare>::const_accessor stopTimeAcessor;
if (m_algStopTime.retrieve(ai, stopTimeAcessor).isFailure()) {
......@@ -215,19 +244,62 @@ StatusCode TrigCostMTSvc::endEvent(const EventContext& context, SG::WriteHandle<
// stopTimeAcessor goes out of scope - lock released
}
// It is possible (when in the master-slot) to catch just the END of an Alg's exec from another slot, and then the START of the same
// alg executing in the next event in that same other-slot.
// This gives us an end time which is before the start time. Disregard these entries.
if (startTime > stopTime) {
ATH_MSG_VERBOSE("Disregard start-time:" << startTime << " > stop-time:" << stopTime
<< " for " << TrigConf::HLTUtils::hash2string( ai.callerHash(), "ALG") << " in slot " << ap.m_slot << ", this is slot " << context.slot());
continue;
}
// Lock the start and stop times to be no later than eventStopTime.
// E.g. it's possible for an alg in another slot to start or stop running after 'processAlg(context, m_decisionSummaryMakerAlgName, AuditType::After))'
// but before 'lockUnique( m_slotMutex[ context.slot() ] )', creating a timestamp after the nominal end point for this event.
// If the alg starts afterwards, we disregard it in lieu of setting to have zero walltime.
// If the alg stops afterwards, we truncate its stop time to be no later than eventStopTime
if (startTime > eventStopTime) {
ATH_MSG_VERBOSE("Disregard " << TrigConf::HLTUtils::hash2string( ai.callerHash(), "ALG") << " as it started after endEvent() was finished being called" );
continue;
}
if (stopTime > eventStopTime) {
ATH_MSG_VERBOSE(TrigConf::HLTUtils::hash2string( ai.callerHash(), "ALG") << " stopped after endEvent() was called, but before the cost container was locked,"
<< " truncating its ending time stamp from " << stopTime << " to " << eventStopTime);
stopTime = eventStopTime;
}
// Do the same, locking the start and stop times to be no earlier than eventStartTime
// If the alg stops before eventStartTime, we disregard it in lieu of setting it to have zero walltime
// If the alg starts before eventStartTime, we truncate its start time to be no later than eventStopTime
if (stopTime < eventStartTime) {
ATH_MSG_VERBOSE("Disregard " << TrigConf::HLTUtils::hash2string( ai.callerHash(), "ALG") << " as it stopped before startEvent() was finished being called" );
continue;
}
if (startTime < eventStartTime) {
ATH_MSG_VERBOSE(TrigConf::HLTUtils::hash2string( ai.callerHash(), "ALG") << " started just after the cost container was unlocked, but before the L1Decoder record was written."
<< " truncating its starting time stamp from " << startTime << " to " << eventStartTime);
startTime = eventStartTime;
}
// Make a new TrigComposite to persist monitoring payload for this alg
xAOD::TrigComposite* tc = new xAOD::TrigComposite();
outputHandle->push_back( tc );
// tc is now owned by storegate and, and has an aux store provided by the TrigCompositeCollection
// Reminder: we are under a unique mutex here
const uint32_t threadID = static_cast<uint32_t>( std::hash< std::thread::id >()(ap.m_algThreadID) );
if (m_threadToCounterMap.count(threadID) == 0) {
m_threadToCounterMap[threadID] = m_threadCounter++;
}
bool result = true;
result &= tc->setDetail("alg", ai.callerHash());
result &= tc->setDetail("store", ai.storeHash());
result &= tc->setDetail("view", ai.m_viewID);
result &= tc->setDetail("thread", static_cast<uint32_t>( std::hash< std::thread::id >()(ap.m_algThreadID) ));
result &= tc->setDetail("thread", m_threadToCounterMap[threadID]);
result &= tc->setDetail("slot", ap.m_slot);
result &= tc->setDetail("roi", ap.m_algROIID);
result &= tc->setDetail("start", ap.m_algStartTime.microsecondsSinceEpoch());
result &= tc->setDetail("start", startTime);
result &= tc->setDetail("stop", stopTime);
if (!result) ATH_MSG_WARNING("Failed to append one or more details to trigger cost TC");
}
......
......@@ -129,6 +129,9 @@ class TrigCostMTSvc : public extends <AthService, ITrigCostMTSvc> {
tbb::concurrent_hash_map<std::thread::id, AlgorithmIdentifier, ThreadHashCompare> m_threadToAlgMap; //!< Keeps track of what is running right now in each thread.
std::unordered_map<uint32_t, uint32_t> m_threadToCounterMap; //!< Map thread's hash ID to a counting numeral
size_t m_threadCounter; //!< Count how many unique thread ID we have seen
Gaudi::Property<bool> m_monitorAllEvents{this, "MonitorAllEvents", false, "Monitor every HLT event, e.g. for offline validation."};
Gaudi::Property<bool> m_enableMultiSlot{this, "EnableMultiSlot", true, "Monitored events in the MasterSlot collect data from events running in other slots."};
Gaudi::Property<bool> m_saveHashes{this, "SaveHashes", false, "Store a copy of the hash dictionary for easier debugging"};
......@@ -136,6 +139,8 @@ class TrigCostMTSvc : public extends <AthService, ITrigCostMTSvc> {
Gaudi::Property<std::string> m_l1DecoderName{this, "L1DecoderName", "L1Decoder", "The name of the Gaudi Configurable of type L1Decoder"};
Gaudi::Property<std::string> m_decisionSummaryMakerAlgName{this, "DecisionSummaryMakerAlgName", "DecisionSummaryMakerAlg", "The name of the Gaudi Configurable of type DecisionSummaryMakerAlg"};
};
#endif // TRIGCOSTMONITORMT_TRIGCOSTMTSVC_H
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment