Skip to content
Snippets Groups Projects
Commit 28a238ba authored by Frank Winklmeier's avatar Frank Winklmeier
Browse files

THistSvc: abort job in case file size is too large

Instead of gracefully shutting down the job when the file size becomes
too large, abort the job with an exception. This ensures that such jobs
are correctly detected by the production system as failed.

Closes ATEAM-1030.
parent c220faef
No related branches found
No related tags found
No related merge requests found
Pipeline #8655535 failed
......@@ -11,6 +11,7 @@
// system headers
#include <cassert>
#include <cstdio>
#include <format>
#include <sstream>
#include <streambuf>
......@@ -172,8 +173,7 @@ StatusCode THistSvc::initialize() {
}
}
if ( !all_good ) {
error() << "problem while registering input/output files with "
<< "the I/O component manager !" << endmsg;
error() << "problem while registering input/output files with " << "the I/O component manager !" << endmsg;
st = StatusCode::FAILURE;
}
}
......@@ -773,8 +773,8 @@ StatusCode THistSvc::getTHists( const std::string& dir, TList& tl, bool rcs ) co
r2 += rem;
if ( msgLevel( MSG::DEBUG ) ) {
debug() << "getTHists: \"" << dir << "\" looks like a stream name."
<< " associated TFile: \"" << itr->second.first->GetName() << "\"" << endmsg;
debug() << "getTHists: \"" << dir << "\" looks like a stream name." << " associated TFile: \""
<< itr->second.first->GetName() << "\"" << endmsg;
}
if ( gDirectory->cd( r2.c_str() ) ) {
......@@ -879,8 +879,8 @@ StatusCode THistSvc::getTHists( const std::string& dir, TList& tl, bool rcs, boo
r2 += rem;
if ( msgLevel( MSG::DEBUG ) ) {
debug() << "getTHists: \"" << dir << "\" looks like a stream name."
<< " associated TFile: \"" << itr->second.first->GetName() << "\"" << endmsg;
debug() << "getTHists: \"" << dir << "\" looks like a stream name." << " associated TFile: \""
<< itr->second.first->GetName() << "\"" << endmsg;
}
if ( gDirectory->cd( r2.c_str() ) ) {
......@@ -899,8 +899,7 @@ StatusCode THistSvc::getTHists( const std::string& dir, TList& tl, bool rcs, boo
sc = StatusCode::FAILURE;
} else {
if ( reg ) {
warning() << "Unable to register histograms automatically "
<< "without a valid stream name" << endmsg;
warning() << "Unable to register histograms automatically " << "without a valid stream name" << endmsg;
reg = false;
}
sc = getTHists( gDirectory, tl, rcs, reg );
......@@ -971,8 +970,8 @@ StatusCode THistSvc::getTTrees( const std::string& dir, TList& tl, bool rcs ) co
r2 += rem;
if ( msgLevel( MSG::DEBUG ) ) {
debug() << "getTTrees: \"" << dir << "\" looks like a stream name."
<< " associated TFile: \"" << itr->second.first->GetName() << "\"" << endmsg;
debug() << "getTTrees: \"" << dir << "\" looks like a stream name." << " associated TFile: \""
<< itr->second.first->GetName() << "\"" << endmsg;
}
if ( gDirectory->cd( r2.c_str() ) ) { return getTTrees( gDirectory, tl, rcs ); }
......@@ -1069,8 +1068,8 @@ StatusCode THistSvc::getTTrees( const std::string& dir, TList& tl, bool rcs, boo
r2 += rem;
if ( msgLevel( MSG::DEBUG ) ) {
debug() << "getTTrees: \"" << dir << "\" looks like a stream name."
<< " associated TFile: \"" << itr->second.first->GetName() << "\"" << endmsg;
debug() << "getTTrees: \"" << dir << "\" looks like a stream name." << " associated TFile: \""
<< itr->second.first->GetName() << "\"" << endmsg;
}
if ( gDirectory->cd( r2.c_str() ) ) {
......@@ -1154,8 +1153,8 @@ StatusCode THistSvc::getTEfficiencies( const std::string& dir, TList& tl, bool r
r2 += rem;
if ( msgLevel( MSG::DEBUG ) ) {
debug() << "getTEfficiencies: \"" << dir << "\" looks like a stream name."
<< " associated TFile: \"" << itr->second.first->GetName() << "\"" << endmsg;
debug() << "getTEfficiencies: \"" << dir << "\" looks like a stream name." << " associated TFile: \""
<< itr->second.first->GetName() << "\"" << endmsg;
}
if ( gDirectory->cd( r2.c_str() ) ) {
......@@ -1261,8 +1260,8 @@ StatusCode THistSvc::getTEfficiencies( const std::string& dir, TList& tl, bool r
r2 += rem;
if ( msgLevel( MSG::DEBUG ) ) {
debug() << "getTEfficiencies: \"" << dir << "\" looks like a stream name."
<< " associated TFile: \"" << itr->second.first->GetName() << "\"" << endmsg;
debug() << "getTEfficiencies: \"" << dir << "\" looks like a stream name." << " associated TFile: \""
<< itr->second.first->GetName() << "\"" << endmsg;
}
if ( gDirectory->cd( r2.c_str() ) ) {
......@@ -1281,8 +1280,7 @@ StatusCode THistSvc::getTEfficiencies( const std::string& dir, TList& tl, bool r
sc = StatusCode::FAILURE;
} else {
if ( reg ) {
warning() << "Unable to register histograms automatically "
<< "without a valid stream name" << endmsg;
warning() << "Unable to register histograms automatically " << "without a valid stream name" << endmsg;
reg = false;
}
sc = getTEfficiencies( gDirectory, tl, rcs, reg );
......@@ -1294,7 +1292,6 @@ StatusCode THistSvc::getTEfficiencies( const std::string& dir, TList& tl, bool r
//*************************************************************************//
void THistSvc::handle( const Incident& /* inc */ ) {
if ( m_signaledStop ) return;
if ( m_maxFileSize.value() == -1 ) return;
......@@ -1307,26 +1304,19 @@ void THistSvc::handle( const Incident& /* inc */ ) {
for ( const auto& f : m_files ) {
TFile* tf = f.second.first;
#ifndef NDEBUG
if ( msgLevel( MSG::DEBUG ) ) {
debug() << "stream: " << f.first << " name: " << tf->GetName() << " size: " << tf->GetSize() << endmsg;
}
#endif
// Signal job to terminate if output file is too large
// Terminate job if output file is too large
if ( tf->GetSize() > mfs ) {
m_signaledStop = true;
fatal() << "file \"" << tf->GetName() << "\" associated with stream \"" << f.first
<< "\" has exceeded the max file size of " << m_maxFileSize.value() << "MB. Terminating Job." << endmsg;
if ( writeObjectsToFile().isFailure() ) { error() << "problems writing histograms" << endmsg; }
throw GaudiException( fmt::format( "file \"{}\" associated with stream \"{}\" has exceeded the max "
"file size of {} MB. Terminating Job.",
tf->GetName(), f.first, m_maxFileSize.value() ),
name(), StatusCode::FAILURE );
auto evt = service<IEventProcessor>( "ApplicationMgr", true );
if ( evt ) {
evt->stopRun().ignore( /* AUTOMATICALLY ADDED FOR gaudi/Gaudi!763 */ );
} else {
abort();
}
} else if ( tf->GetSize() > mfs_warn ) {
warning() << "file \"" << tf->GetName() << "\" associated with stream \"" << f.first
<< "\" is at 95% of its maximum allowable file size of " << m_maxFileSize.value() << "MB" << endmsg;
......@@ -1419,11 +1409,9 @@ StatusCode THistSvc::io_reinit() {
olddir->Remove( hid.obj );
newdir->Append( hid.obj );
} else {
error() << "id: \"" << hid.id << "\" is not a inheriting from a class "
<< "we know how to handle (received [" << cl->GetName() << "], "
<< "expected [TTree, TH1, TGraph or TEfficiency]) !" << endmsg << "attaching to current dir ["
<< newdir->GetPath() << "] "
<< "nonetheless..." << endmsg;
error() << "id: \"" << hid.id << "\" is not a inheriting from a class " << "we know how to handle (received ["
<< cl->GetName() << "], " << "expected [TTree, TH1, TGraph or TEfficiency]) !" << endmsg
<< "attaching to current dir [" << newdir->GetPath() << "] " << "nonetheless..." << endmsg;
olddir->Remove( hid.obj );
newdir->Append( hid.obj );
}
......@@ -1872,8 +1860,7 @@ void THistSvc::parseString( const std::string& id, std::string& root, std::strin
void THistSvc::setupInputFile() {
if ( FSMState() < Gaudi::StateMachine::CONFIGURED || !m_okToConnect ) {
debug() << "Delaying connection of Input Files until Initialize"
<< ". now in " << FSMState() << endmsg;
debug() << "Delaying connection of Input Files until Initialize" << ". now in " << FSMState() << endmsg;
m_delayConnect = true;
} else {
......@@ -1896,8 +1883,7 @@ void THistSvc::setupInputFile() {
void THistSvc::setupOutputFile() {
if ( FSMState() < Gaudi::StateMachine::CONFIGURED || !m_okToConnect ) {
debug() << "Delaying connection of Output Files until Initialize"
<< ". now in " << FSMState() << endmsg;
debug() << "Delaying connection of Output Files until Initialize" << ". now in " << FSMState() << endmsg;
m_delayConnect = true;
} else {
StatusCode sc = StatusCode::SUCCESS;
......
......@@ -389,7 +389,6 @@ private:
ServiceHandle<IIncidentSvc> p_incSvc;
ServiceHandle<IFileMgr> p_fileMgr;
bool m_signaledStop = false;
bool m_delayConnect = false;
bool m_okToConnect = false;
bool m_hasTTrees = false;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment