Skip to content
Snippets Groups Projects
Commit e57da672 authored by Rosen Matev's avatar Rosen Matev :sunny:
Browse files

Fix condition for stopping after N failures

parent 62049825
No related branches found
No related tags found
1 merge request!4248Scheduler tests and fixes
......@@ -208,8 +208,7 @@ private:
// the tbb "threadpool"
std::unique_ptr<tbb::task_arena> m_taskArena;
/// atomic count of the number of finished events
mutable std::atomic<uint32_t> m_finishedEvt{0};
mutable Gaudi::Accumulators::Counter<> m_finishedEvt{this, "Processed events"};
/// condition variable to wake up main thread when we need to create a new event
mutable std::condition_variable m_createEventCond;
/// mutex assoiciated with m_createEventCond condition variable
......@@ -866,7 +865,7 @@ StatusCode HLTControlFlowMgr::nextEvent( int maxevt ) {
if ( !endTimeAndTicks ) {
endTimeAndTicks.emplace( Clock::now(), getTotalWorkerTicks() );
m_stopTimeAtEvt = m_finishedEvt;
m_stopTimeAtEvt = m_finishedEvt.value();
m_alloc_tracker->endTracking();
}
......@@ -876,7 +875,7 @@ StatusCode HLTControlFlowMgr::nextEvent( int maxevt ) {
releaseEvtSelContext().ignore();
if ( !startTimeAndTicks ) {
info() << "---> Loop over " << m_finishedEvt << " Events Finished - "
info() << "---> Loop over " << m_finishedEvt.value() << " Events Finished - "
<< " WSS " << System::mappedMemory( System::MemoryUnit::kByte ) * 1. / 1024. << ", timing failed.."
<< endmsg;
} else {
......@@ -885,7 +884,7 @@ StatusCode HLTControlFlowMgr::nextEvent( int maxevt ) {
.count();
const auto timeDiff = m_stopTimeAtEvt.value() - m_startTimeAtEvt.value();
const auto evtsPerSec = ( totalTime > 0 ? timeDiff * 1. / totalTime * 1e3 : 0 );
info() << "---> Loop over " << m_finishedEvt << " Events Finished - "
info() << "---> Loop over " << m_finishedEvt.value() << " Events Finished - "
<< " WSS " << System::mappedMemory( System::MemoryUnit::kByte ) * 1. / 1024. << ", timed " << timeDiff
<< " Events: " << totalTime << " ms"
<< ", Evts/s = " << evtsPerSec << endmsg;
......@@ -926,7 +925,7 @@ StatusCode HLTControlFlowMgr::eventFailed( EventContext const& eventContext ) co
std::ostringstream ost;
m_algExecStateSvc->dump( ost, eventContext );
info() << "Dumping Alg Exec State for slot " << eventContext.slot() << ":\n" << ost.str() << endmsg;
if ( ++m_failed_evts_detected > m_stopAfterNFailures ) { m_shutdown_now = true; }
if ( ++m_failed_evts_detected >= m_stopAfterNFailures ) { m_shutdown_now = true; }
return StatusCode::FAILURE;
}
......
......@@ -18,13 +18,11 @@
<argument name="exit_code"><integer>3</integer></argument>
<argument name="validator"><text>
def test(stdout):
expected_4 = "LAZY_AND: moore #=4 Sum=4 Eff=|( 100.0000 +- 0.00000 )%|"
expected_5 = "LAZY_AND: moore #=5 Sum=5 Eff=|( 100.0000 +- 0.00000 )%|"
expected_6 = "LAZY_AND: moore #=6 Sum=6 Eff=|( 100.0000 +- 0.00000 )%|"
if all(e not in stdout for e in [expected_4, expected_5, expected_6]):
causes.append("early abortion didn't work correctly")
test(stdout)
from GaudiConf.QMTest.LHCbTest import extract_counters
counters = extract_counters(stdout)
n_processed = int(counters["HLTControlFlowMgr"]["Processed events"][0])
if n_processed > 6:
causes.append("early abort didn't work correctly")
</text></argument>
</extension>
......
......@@ -124,3 +124,6 @@ LAZY_AND: top #=77 Sum=77 Eff=|( 100.0000 +- 0.000
HLTControlFlowMgr INFO Histograms converted successfully according to request.
ApplicationMgr INFO Application Manager Finalized successfully
ApplicationMgr INFO Application Manager Terminated successfully
HLTControlFlowMgr INFO Number of counters : 1
| Counter | # | sum | mean/eff^* | rms/err^* | min | max |
| "Processed events" | 77 |
......@@ -88,3 +88,6 @@ TFile** write.legacy_histos.root GAUDI Histograms
KEY: TH2D 2;1 2D fix binning
KEY: TH3D 3;1 3D fix binning
KEY: TDirectoryFile Prof;1 Prof
HLTControlFlowMgr INFO Number of counters : 1
| Counter | # | sum | mean/eff^* | rms/err^* | min | max |
| "Processed events" | 100 |
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment