Commit 0f268e1f authored by Marco Clemencic's avatar Marco Clemencic

Revert "Merge branch 'SubSlotExceptions' into 'master'"

This reverts merge request !979
parent 13d5d487
Pipeline #1683771 passed with stages
in 34 minutes and 8 seconds
......@@ -109,19 +109,6 @@ const AlgExecState& AlgExecStateSvc::algExecState( const Gaudi::StringKey& algNa
if ( UNLIKELY( itr == algState.end() ) ) {
throw GaudiException{"cannot find Alg " + algName.str() + " in AlgStateMap", name(), StatusCode::FAILURE};
}
// Assuming the alg is known, look for its state in the sub-slot
if ( ctx.usesSubSlot() ) {
auto& subSlots = m_algSubSlotStates[ctx.slot()];
auto& thisSubSlot = subSlots[ctx.subSlot()];
auto subitr = thisSubSlot.find( algName );
if ( UNLIKELY( subitr == thisSubSlot.end() ) ) {
throw GaudiException{"cannot find Alg " + algName.str() + " in AlgStateMap", name(), StatusCode::FAILURE};
} else {
return subitr->second;
}
}
return itr->second;
}
......@@ -132,34 +119,12 @@ AlgExecState& AlgExecStateSvc::algExecState( IAlgorithm* iAlg, const EventContex
auto& algState = m_algStates.at( ctx.slot() );
auto itr = algState.find( iAlg->nameKey() );
if ( UNLIKELY( itr == algState.end() ) ) {
throw GaudiException{std::string{"cannot find Alg "} + iAlg->name() + " in AlgStateMap", name(),
StatusCode::FAILURE};
}
// Sub-slots are dynamic
// Assuming the alg is known, look for a subslot
// Return the existing state, or create a new one
if ( ctx.usesSubSlot() ) {
// Check that there is any sub-slot information for this slot
if ( ctx.slot() >= m_algSubSlotStates.size() ) m_algSubSlotStates.resize( ctx.slot() + 1 );
// Check that there is information for this sub-slot
auto& subSlots = m_algSubSlotStates[ctx.slot()];
if ( ctx.subSlot() >= subSlots.size() ) subSlots.resize( ctx.subSlot() + 1 );
// Find (or create) the state of the algorithm in this sub-slot
auto& thisSubSlot = subSlots[ctx.subSlot()];
auto subitr = thisSubSlot.find( iAlg->name() );
if ( subitr == thisSubSlot.end() ) {
thisSubSlot[iAlg->name()] = AlgExecState();
return thisSubSlot[iAlg->name()];
} else {
return subitr->second;
}
}
return itr->second;
}
......@@ -204,13 +169,6 @@ void AlgExecStateSvc::reset( const EventContext& ctx ) {
std::call_once( m_initFlag, &AlgExecStateSvc::init, this );
for ( auto& e : m_algStates.at( ctx.slot() ) ) e.second.reset();
// Also clear sub slots
if ( ctx.slot() < m_algSubSlotStates.size() ) {
for ( auto& subSlot : m_algSubSlotStates[ctx.slot()] ) {
for ( auto& e : subSlot ) e.second.reset();
}
}
m_eventStatus.at( ctx.slot() ) = EventStatus::Invalid;
}
......
......@@ -53,8 +53,7 @@ public:
private:
// one vector entry per event slot
std::vector<AlgStateMap_t> m_algStates;
std::vector<std::vector<AlgStateMap_t>> m_algSubSlotStates;
std::vector<AlgStateMap_t> m_algStates;
std::vector<EventStatus::Status> m_eventStatus;
std::vector<Gaudi::StringKey> m_preInitAlgs;
......
#!/usr/bin/env gaudirun.py
#####################################################################################
# (c) Copyright 1998-2020 CERN for the benefit of the LHCb and ATLAS collaborations #
# #
# This software is distributed under the terms of the Apache version 2 licence, #
# copied verbatim in the file "LICENSE". #
# #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization #
# or submit itself to any jurisdiction. #
#####################################################################################
'''
A test for an incorrect handling of exceptions from algorithms running in sub-slots
Throwing an exception causes the event to be marked as failed.
It also means that the part of the code that updates the algorithm state is bypassed.
Since the AlgExecStateSvc does not (currently) understand sub-slots,
if the exception is thrown by an alg in sub-slot 2, the state for that same alg
in sub-slot 1 is retrieved.
So, it is possible to have a failed event, without any algorihms in ERROR state.
The scheduler does not have handling for this, and hangs.
'''
from Gaudi.Configuration import *
from Configurables import (HiveWhiteBoard, HiveSlimEventLoopMgr,
AvalancheSchedulerSvc, AlgResourcePool, CPUCruncher,
GaudiSequencer, Test__ViewTester,
GaudiTesting__StopLoopAlg)
# metaconfig -------------------------------------------------------------------
# It's confortable to collect the relevant parameters at the top of the optionfile
evtslots = 1
evtMax = 10
cardinality = 1
threads = 1
viewsPerEvt = 2
# -------------------------------------------------------------------------------
# The configuration of the whiteboard ------------------------------------------
# It is useful to call it EventDataSvc to replace the usual data service with
# the whiteboard transparently.
whiteboard = HiveWhiteBoard("EventDataSvc", EventSlots=evtslots)
# -------------------------------------------------------------------------------
# Event Loop Manager -----------------------------------------------------------
# It's called slim since it has less functionalities overall than the good-old
# event loop manager. Here we just set its outputlevel to INFO.
slimeventloopmgr = HiveSlimEventLoopMgr(
SchedulerName="AvalancheSchedulerSvc", OutputLevel=INFO)
# -------------------------------------------------------------------------------
# ForwardScheduler -------------------------------------------------------------
# We just decide how many algorithms in flight we want to have and how many
# threads in the pool. The default value is -1, which is for TBB equivalent
# to take over the whole machine.
scheduler = AvalancheSchedulerSvc(
ThreadPoolSize=threads, OutputLevel=INFO, VerboseSubSlots=True)
# -------------------------------------------------------------------------------
# Algo Resource Pool -----------------------------------------------------------
# Nothing special here, we just set the debug level.
AlgResourcePool(OutputLevel=INFO)
# -------------------------------------------------------------------------------
# Set up of the crunchers, daily business --------------------------------------
a1 = Test__ViewTester("A1")
a1.baseViewName = 'view'
a1.viewNumber = viewsPerEvt
a1.viewNodeName = 'viewNode'
a2 = Test__ViewTester("A2")
a2.viewNodeName = ''
# EventCount is tracked by a private member of the algorithm, so increments whenever it is run
# EventCount = 2 corresponds to the 1st view of the 2nd event, giving a correctly-handled exception
# EventCount = 3 corresponds to the 2nd view of the 2nd event, causing the hang
a3 = GaudiTesting__StopLoopAlg("A3", EventCount=3, Mode="exception")
a4 = Test__ViewTester("A4")
a4.viewNodeName = ''
for algo in [a1, a2, a3, a4]:
algo.Cardinality = cardinality
algo.OutputLevel = INFO
viewNode = GaudiSequencer(
"viewNode",
Members=[a2, a3],
Sequential=False,
ShortCircuit=False,
OutputLevel=INFO)
createViewSeq = GaudiSequencer(
"createViewSeq",
Members=[a1, viewNode, a4],
Sequential=True,
OutputLevel=INFO)
# Application Manager ----------------------------------------------------------
# We put everything together and change the type of message service
ApplicationMgr(
EvtMax=evtMax,
EvtSel='NONE',
ExtSvc=[whiteboard],
EventLoop=slimeventloopmgr,
TopAlg=[createViewSeq],
MessageSvcType="InertMessageSvc")
# -------------------------------------------------------------------------------
<?xml version="1.0" ?><!DOCTYPE extension PUBLIC '-//QM/2.3/Extension//EN' 'http://www.codesourcery.com/qm/dtds/2.3/-//qm/2.3/extension//en.dtd'>
<!--
(c) Copyright 1998-2020 CERN for the benefit of the LHCb and ATLAS collaborations
This software is distributed under the terms of the Apache version 2 licence,
copied verbatim in the file "LICENSE".
In applying this licence, CERN does not waive the privileges and immunities
granted to it by virtue of its status as an Intergovernmental Organization
or submit itself to any jurisdiction.
-->
<extension class="GaudiTest.GaudiExeTest" kind="test">
<argument name="program"><text>gaudirun.py</text></argument>
<argument name="args"><set>
<text>-v</text>
<text>../../options/SubSlotException.py</text>
</set></argument>
<argument name="use_temp_dir"><enumeral>true</enumeral></argument>
<argument name="timeout"><integer>120</integer></argument>
<argument name="reference"><text>refs/ViewExceptionTest.ref</text></argument>
<argument name="exit_code"><integer>6</integer></argument>
</extension>
# setting LC_ALL to "C"
# --> Including file '/workdir/gaudi/GaudiHive/options/SubSlotException.py'
# <-- End of file '/workdir/gaudi/GaudiHive/options/SubSlotException.py'
# Dumping all configurables and properties (different from default)
{'A1': {'Cardinality': 1,
'OutputLevel': 3,
'baseViewName': 'view',
'viewNodeName': 'viewNode',
'viewNumber': 2},
'A2': {'Cardinality': 1, 'OutputLevel': 3, 'viewNodeName': ''},
'A3': {'Cardinality': 1,
'EventCount': 3,
'Mode': 'exception',
'OutputLevel': 3},
'A4': {'Cardinality': 1, 'OutputLevel': 3, 'viewNodeName': ''},
'AlgResourcePool': {'OutputLevel': 3},
'ApplicationMgr': {'EventLoop': 'HiveSlimEventLoopMgr/HiveSlimEventLoopMgr',
'EvtMax': 10,
'EvtSel': 'NONE',
'ExtSvc': ['HiveWhiteBoard/EventDataSvc'],
'MessageSvcType': 'InertMessageSvc',
'TopAlg': ['GaudiSequencer/createViewSeq']},
'AvalancheSchedulerSvc': {'OutputLevel': 3,
'ThreadPoolSize': 1,
'VerboseSubSlots': True},
'EventDataSvc': {'EventSlots': 1},
'HiveSlimEventLoopMgr': {'OutputLevel': 3,
'SchedulerName': 'AvalancheSchedulerSvc'},
'createViewSeq': {'Members': ['Test::ViewTester/A1',
'GaudiSequencer/viewNode',
'Test::ViewTester/A4'],
'OutputLevel': 3,
'Sequential': True},
'viewNode': {'Members': ['Test::ViewTester/A2',
'GaudiTesting::StopLoopAlg/A3'],
'OutputLevel': 3,
'Sequential': False,
'ShortCircuit': False}}
MessageSvc INFO Activating in a separate thread
ApplicationMgr SUCCESS
====================================================================================================================================
Welcome to ApplicationMgr (GaudiCoreSvc v33r1)
running on cbab1f0b7ae0 on Wed May 20 18:19:08 2020
====================================================================================================================================
ApplicationMgr INFO Application Manager Configured successfully
ThreadPoolSvc INFO no thread init tools attached
AvalancheSchedu... INFO Activating scheduler in a separate thread
AlgResourcePool INFO TopAlg list empty. Recovering the one of Application Manager
createViewSeq INFO Member list: Test::ViewTester/A1, GaudiSequencer/viewNode, Test::ViewTester/A4
RndmGenSvc.Engine INFO Generator engine type:CLHEP::RanluxEngine
RndmGenSvc.Engine INFO Current Seed:1234567 Luxury:3
RndmGenSvc INFO Using Random engine:HepRndm::Engine<CLHEP::RanluxEngine>
ToolSvc.Sequenc... INFO This machine has a speed about 3.45 times the speed of a 2.8 GHz Xeon.
viewNode INFO Member list: Test::ViewTester/A2, GaudiTesting::StopLoopAlg/A3
AvalancheSchedu... INFO Found 4 algorithms
AvalancheSchedu... INFO Data Dependencies for Algorithms:
A1
none
A2
none
A3
none
A4
none
PrecedenceSvc INFO Assembling CF and DF task precedence rules
PrecedenceSvc INFO PrecedenceSvc initialized successfully
AvalancheSchedu... INFO Concurrency level information:
AvalancheSchedu... INFO o Number of events in flight: 1
AvalancheSchedu... INFO o TBB thread pool size: 'ThreadPoolSize':1
AvalancheSchedu... INFO Task scheduling settings:
AvalancheSchedu... INFO o Avalanche generation mode: disabled
AvalancheSchedu... INFO o Preemptive scheduling of CPU-blocking tasks: disabled
AvalancheSchedu... INFO o Scheduling of condition tasks: disabled
HiveSlimEventLo...WARNING Unable to locate service "EventSelector"
HiveSlimEventLo...WARNING No events will be processed from external input.
HiveSlimEventLo... INFO Found 0 events in black list
ApplicationMgr INFO Application Manager Initialized successfully
ApplicationMgr INFO Application Manager Started successfully
HiveSlimEventLo... INFO Starting loop on events
A1 INFO Running in whole event context
A1 INFO Attached view view0 to node viewNode for s: 0 e: 0
A1 INFO Attached view view1 to node viewNode for s: 0 e: 0
A2 INFO Running in view view0
A3 INFO 3 events to go
A2 INFO Running in view view1
A3 INFO 2 events to go
A4 INFO Running in whole event context
A1 INFO Running in whole event context
A1 INFO Attached view view0 to node viewNode for s: 0 e: 1
A1 INFO Attached view view1 to node viewNode for s: 0 e: 1
A2 INFO Running in view view0
A3 INFO 1 events to go
A2 INFO Running in view view1
A3 INFO Stopping loop with exception
A3 FATAL GaudiTesting::StopLoopAlg:: Exception throw: Stopping loop StatusCode=FAILURE
A3 FATAL Exception with tag= is caught
A3 ERROR A3:: Stopping loop StatusCode=FAILURE
AlgTask FATAL .executeEvent(): Exception with tag= thrown by A3
AlgTask ERROR A3:: Stopping loop StatusCode=FAILURE
AvalancheSchedu... ERROR *** Stall detected in slot 0! ***
AvalancheSchedu... ERROR Event 1 on slot 0 failed
AvalancheSchedu... INFO Dumping scheduler state
=========================================================================================
++++++++++++++++++++++++++++++++++++ SCHEDULER STATE ++++++++++++++++++++++++++++++++++++
=========================================================================================
------------------ Last schedule: Task/Event/Slot/Thread/State Mapping ------------------
WARNING Enable TimelineSvc in record mode (RecordTimeline = True) to trace the mapping
---------------------------- Task/CF/FSM Mapping [target slot] --------------------------
[ slot: 0 event: 1 ]:
RootDecisionHub (0), w/ decision: UNDEFINED(-1)
createViewSeq (1), w/ decision: UNDEFINED(-1)
A1 (2), w/ decision: TRUE(1), in state: EVTACCEPTED
viewNode (3), w/ decision: UNDEFINED(-1)
A2 (4), w/ decision: UNDEFINED(-1), in state: INITIAL
A3 (5), w/ decision: UNDEFINED(-1), in state: INITIAL
A4 (6), w/ decision: UNDEFINED(-1), in state: INITIAL
Number of sub-slots: 2
[ slot: 0, sub-slot: 0, entry: viewNode, event: 1 ]:
viewNode (3), w/ decision: UNDEFINED(-1)
A2 (4), w/ decision: TRUE(1), in state: EVTACCEPTED
A3 (5), w/ decision: TRUE(1), in state: EVTACCEPTED
[ slot: 0, sub-slot: 1, entry: viewNode, event: 1 ]:
viewNode (3), w/ decision: UNDEFINED(-1)
A2 (4), w/ decision: TRUE(1), in state: EVTACCEPTED
A3 (5), w/ decision: UNDEFINED(-1), in state: ERROR
------------------------------ Algorithm Execution States -----------------------------
[slot: 0, incident: AlgStall]:
+ A4 e: n
+ A3 e: n
+ A2 e: n
+ viewNode e: n
+ createViewSeq e: n
+ A1 e: d f: 1 sc: SUCCESS
=========================================================================================
++++++++++++++++++++++++++++++++++++++ END OF DUMP ++++++++++++++++++++++++++++++++++++++
=========================================================================================
PrecedenceSvc WARNING To trace temporal and topological aspects of execution flow, set DumpPrecedenceRules property to True
HiveSlimEventLo... FATAL Failed event detected on s: 0 e: 1
HiveSlimEventLo... INFO ---> Loop Finished (skipping 1st evt) - WSS 88.9414 total time 81999053
HiveSlimEventLo... INFO 0 events were SKIPed
ApplicationMgr INFO Application Manager Stopped successfully
A3 SUCCESS Exceptions/Errors/Warnings/Infos Statistics : 1/0/0/0
A3 SUCCESS #EXCEPTIONS = 1 Message = 'Stopping loop'
HiveSlimEventLo... INFO Histograms converted successfully according to request.
AvalancheSchedu... INFO Joining Scheduler thread
ToolSvc INFO Removing all tools created by ToolSvc
ApplicationMgr INFO Application Manager Finalized successfully
ApplicationMgr ERROR Application Manager Terminated with error code 6
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment