Skip to content
Snippets Groups Projects
Commit 42cc6891 authored by Hasan Ozturk's avatar Hasan Ozturk
Browse files

Improvements in monitoring of initialization and finalization steps

parent ef3ccde3
11 merge requests!58791DataQualityConfigurations: Modify L1Calo config for web display,!46784MuonCondInterface: Enable thread-safety checking.,!46776Updated LArMonitoring config file for WD to match new files produced using MT,!45405updated ART test cron job,!42417Draft: DIRE and VINCIA Base Fragments for Pythia 8.3,!29703PerfMonMT: Reload Component Level Monitoring inside the Event Loop and Improve Reporting Format,!29682PerfMonMT: Reload Component Level Monitoring inside the Event Loop and Improve Reporting Format,!28044PerfMonMT: Update Event Loop Monitoring and add various improvements,!25790Add Memory Monitoring feature for serial steps to PerfMonMT as part of GSoC work,!25770Add Memory Monitoring feature for serial steps to PerfMonMT as part of GSoC work,!25237Adding Component Level Monitoring feature to PerfMonMT as part of GSoC work
......@@ -23,6 +23,9 @@ class PerfMonMTSvc : virtual public IPerfMonMTSvc,
/// Standard Gaudi Service constructor
PerfMonMTSvc( const std::string& name, ISvcLocator* pSvcLocator );
// Destructor
~PerfMonMTSvc();
/// Function declaring the interface(s) implemented by the service
virtual StatusCode queryInterface( const InterfaceID& riid,
void** ppvInterface ) override;
......@@ -41,6 +44,12 @@ class PerfMonMTSvc : virtual public IPerfMonMTSvc,
virtual void stopAud ( const std::string& stepName,
const std::string& compName ) override;
// Report the results
void report();
// Get the IncidentSvc
//virtual void handle ( const Incident& incident ) override;
private:
/// Measurement to capture the CPU time
......
......@@ -10,6 +10,10 @@
#include <ctime>
#include <chrono>
#include "GaudiKernel/MsgStream.h"
#include "GaudiKernel/IMessageSvc.h"
#include "AthenaBaseComps/AthMessaging.h"
/*
* Necessary tools
*/
......@@ -19,6 +23,9 @@ namespace PMonMT {
double get_process_cpu_time();
double get_wall_time();
//IMessageSvc *msgSvc;
//MsgStream msg( msgSvc, "PerfMonMTUtils" );
// Basic Measurement
struct Measurement {
double cpu_time;
......@@ -26,6 +33,12 @@ namespace PMonMT {
void capture() {
cpu_time = get_process_cpu_time();
wall_time = get_wall_time();
IMessageSvc *msgSvc;
MsgStream msg( msgSvc, "PerfMonMTUtils" );
msg << MSG::INFO << "Capture: CPU: " << cpu_time << ", Wall: " << wall_time << endmsg;
//ATH_MSG_INFO("Capture: CPU: " << cpu_time << ", Wall: " << wall_time);
}
Measurement() : cpu_time{0.}, wall_time{0.} { }
};
......@@ -34,13 +47,24 @@ namespace PMonMT {
struct MeasurementData {
double m_tmp_cpu, m_delta_cpu;
double m_tmp_wall, m_delta_wall;
void addPointStart(const Measurement& meas) {
m_tmp_cpu = meas.cpu_time;
m_tmp_wall = meas.wall_time;
IMessageSvc *msgSvc;
MsgStream msg( msgSvc, "PerfMonMTUtils" );
msg << MSG::INFO << "addPointStart: CPU: " << m_tmp_cpu << ", Wall: " << m_tmp_wall << endmsg;
}
void addPointStop(const Measurement& meas) {
m_delta_cpu = meas.cpu_time - m_tmp_cpu;
m_delta_wall = meas.wall_time - m_tmp_wall;
IMessageSvc *msgSvc;
MsgStream msg( msgSvc, "PerfMonMTUtils" );
msg << MSG::INFO << "addPointStop: delta_CPU: " << meas.cpu_time << " - " << m_tmp_cpu << " = " << m_delta_cpu << endmsg;
msg << MSG::INFO << "addPointStop: delta_Wall: " << meas.wall_time << " - " << m_tmp_wall << " = " << m_delta_wall << endmsg;
}
MeasurementData() : m_tmp_cpu{0.}, m_delta_cpu{0.}, m_tmp_wall{0.}, m_delta_wall{0.} { }
};
......
# Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
# Job options configuration file for PerfMonMTSvc
from PerfMonComps.PerfMonCompsConf import PerfMonMTSvc as _PerfMonMTSvc
class PerfMonMTSvc ( _PerfMonMTSvc ):
def __init__(self, name = "PerfMonMTSvc", **kwargs):
kwargs['name'] = name
super( PerfMonMTSvc, self ).__init__( **kwargs )
return
def setDefaults(cls, handle):
from AthenaCommon import CfgMgr
from AthenaCommon.AppMgr import theApp
theApp.AuditAlgorithms = True
theApp.AuditTools = True
theApp.AuditServices = True
from AthenaCommon.AppMgr import ServiceMgr as svcMgr
if hasattr(handle, "getFullJobOptName") :
handleName = handle.getFullJobOptName()
if not handleName in theApp.CreateSvc:
# Be the very first service to be initialized
theApp.CreateSvc = [ handleName ] + theApp.CreateSvc
if not hasattr(svcMgr, 'AuditorSvc'):
from GaudiSvc.GaudiSvcConf import AuditorSvc
svcMgr += AuditorSvc()
pass
......@@ -5,6 +5,16 @@ from AthenaCommon.Logging import logging
log = logging.getLogger("PerfMonMTSvc_jobOptions.py")
log.info("Setting up PerfMonMT...")
from AthenaCommon.AppMgr import ServiceMgr as svcMgr
if not hasattr(svcMgr, 'PerfMonMTSvc'):
from PerfMonComps.MTJobOptCfg import PerfMonMTSvc
svcMgr += PerfMonMTSvc( "PerfMonMTSvc" )
pass
'''
###############################
# Configure AuditorSvc if needed
###############################
......@@ -37,5 +47,7 @@ if not hasattr(topSequence, "PerfMonMTSvcAlg"):
###############################
from AthenaCommon.AppMgr import theApp
theApp.AuditAlgorithms = True
theApp.AuditTools = False
theApp.AuditServices = False
theApp.AuditTools = True
theApp.AuditServices = True
'''
......@@ -10,6 +10,7 @@
// PerfMonComps includes
#include "PerfMonComps/PerfMonMTAuditor.h"
#include "GaudiKernel/MsgStream.h"
/*
* Constructor
......@@ -39,6 +40,8 @@ StatusCode PerfMonMTAuditor::initialize()
* Implementation of base class methods
*/
void PerfMonMTAuditor::before( StandardEventType etype, INamedInterface* component ) {
MsgStream msg( msgSvc(), name() );
//msg << MSG::INFO << "before Standard: " << etype << " - " << component->name() << endmsg;
return m_perfMonMTSvc->startAud( toStr(etype) , component->name() );
}
......@@ -51,12 +54,17 @@ void PerfMonMTAuditor::before( CustomEventTypeRef, INamedInterface* ) {
return;
}
void PerfMonMTAuditor::before( CustomEventTypeRef, const std::string& ) {
void PerfMonMTAuditor::before( CustomEventTypeRef etype, const std::string& component ) {
MsgStream msg( msgSvc(), name() );
//msg << MSG::INFO << "before Custom: " << etype << " - " << component << endmsg;
return m_perfMonMTSvc->startAud( etype , component );
// don't poll for custom event type for now...
return;
//return;
}
void PerfMonMTAuditor::after( StandardEventType etype, INamedInterface* component, const StatusCode& ) {
MsgStream msg( msgSvc(), name() );
//msg << MSG::INFO << "after Standard : " << etype << " - " << component->name() << endmsg;
return m_perfMonMTSvc->stopAud( toStr(etype), component->name() );
}
......@@ -69,7 +77,11 @@ void PerfMonMTAuditor::after( CustomEventTypeRef, INamedInterface*, const Status
return;
}
void PerfMonMTAuditor::after( CustomEventTypeRef, const std::string&, const StatusCode& ) {
void PerfMonMTAuditor::after( CustomEventTypeRef etype, const std::string& component , const StatusCode& sc ) {
MsgStream msg( msgSvc(), name() );
//msg << MSG::INFO << "after Custom: " << etype << " - " << component << endmsg;
return m_perfMonMTSvc->stopAud( etype, component );
// don't poll for custom event type for now...
return;
//return;
}
......@@ -15,6 +15,17 @@
PerfMonMTSvc::PerfMonMTSvc( const std::string& name,
ISvcLocator* pSvcLocator )
: AthService( name, pSvcLocator ) {
ATH_MSG_INFO(" Pre initialization is captured! ");
m_measurement.capture();
m_data[0].addPointStart(m_measurement);
}
PerfMonMTSvc::~PerfMonMTSvc(){
ATH_MSG_INFO("Destructor!!!");
}
/*
......@@ -45,40 +56,31 @@ StatusCode PerfMonMTSvc::initialize() {
ATH_MSG_ERROR("Could not register auditor [PerfMonMTAuditor]!");
return StatusCode::FAILURE;
}
/*
// Get the IncidentSvc to capture the the moment when all components halt
ServiceHandle < IIncidentSvc > incSvc( "IncidentSvc", this->name() );
if( !incSvc.retrieve().isSuccess() ){
ATH_MSG_ERROR("Could not retrieve the IncidentSvc!");
return StatusCode::FAILURE;
}
incSvc->addListener( this, IncidentType::SvcPostFinalize );
*/
return StatusCode::SUCCESS;
}
/*
* Finalize the Service
*/
StatusCode PerfMonMTSvc::finalize() {
StatusCode PerfMonMTSvc::finalize(){
ATH_MSG_INFO("Finalize");
ATH_MSG_INFO("Post Finalization is captured!");
m_measurement.capture();
m_data[2].addPointStop(m_measurement);
ATH_MSG_INFO("=========================================================");
ATH_MSG_INFO(" PerfMonMT Results Summary ");
ATH_MSG_INFO("=========================================================");
ATH_MSG_INFO("Total Wall time in the Initialization is " << m_data[0].m_delta_wall << " ms ");
ATH_MSG_INFO("Total CPU time in the Initialization is " << m_data[0].m_delta_cpu << " ms ");
ATH_MSG_INFO("Average CPU utilization in the Initialization is " <<
m_data[0].m_delta_cpu/m_data[0].m_delta_wall );
ATH_MSG_INFO("");
ATH_MSG_INFO("Total Wall time in the event loop is " << m_data[1].m_delta_wall << " ms ");
ATH_MSG_INFO("Total CPU time in the event loop is " << m_data[1].m_delta_cpu << " ms ");
ATH_MSG_INFO("Average CPU utilization in the event loop is " <<
m_data[1].m_delta_cpu/m_data[1].m_delta_wall );
ATH_MSG_INFO("");
ATH_MSG_INFO("Total Wall time in the Finalize is " << m_data[2].m_delta_wall << " ms ");
ATH_MSG_INFO("Total CPU time in the Finalize is " << m_data[2].m_delta_cpu << " ms ");
ATH_MSG_INFO("Average CPU utilization in the Finalize is " <<
m_data[2].m_delta_cpu/m_data[2].m_delta_wall );
ATH_MSG_INFO("");
ATH_MSG_INFO("=========================================================");
report();
return StatusCode::SUCCESS;
}
......@@ -88,30 +90,30 @@ StatusCode PerfMonMTSvc::finalize() {
void PerfMonMTSvc::startAud( const std::string& stepName,
const std::string& compName ) {
ATH_MSG_INFO("PerfMonMTSvc::startAud: stepName: " << stepName << ", compName: " << compName);
//Last thing to be called before initialize step begins ( NOT HEALTHY )
if ( compName == "SGInputLoader" && stepName == "Initialize"){
ATH_MSG_INFO("PerfMonMTSvc::capturing - ini begin");
m_measurement.capture();
m_data[0].addPointStart(m_measurement);
}
ATH_MSG_INFO("PerfMonMTSvc::startAud: stepName: " << stepName << ", compName: " << compName);
m_measurement.capture(); // just to debug
// Last thing to be called before the event loop begins
if( compName == "AthRegSeq" && stepName == "Start") {
ATH_MSG_INFO("PerfMonMTSvc::capturing - evt begin");
ATH_MSG_INFO("Pre Event Loop is captured!");
m_measurement.capture();
m_data[1].addPointStart(m_measurement);
}
// Last thing to be called before finalize step begins
if ( compName == "AthMasterSeq" && stepName == "Finalize"){
ATH_MSG_INFO("PerfMonMTSvc::capturing - fin begin");
ATH_MSG_INFO("Pre Finalization is captured!");
m_measurement.capture();
m_data[2].addPointStart(m_measurement);
}
// Just to debug: Understand how much time we miss (AuditorSvc finalizes last)
if ( compName == "AuditorSvc" && stepName == "Finalize"){
ATH_MSG_INFO("AUDITOR SERVICE IS CAPTURED!");
m_measurement.capture();
}
}
/*
......@@ -120,10 +122,11 @@ void PerfMonMTSvc::startAud( const std::string& stepName,
void PerfMonMTSvc::stopAud( const std::string& stepName,
const std::string& compName ) {
ATH_MSG_INFO("PerfMonMTSvc::stopAud: stepName: " << stepName << ", compName: " << compName);
m_measurement.capture(); // just to debug
// First thing to be called after the initialize step ends
if ( compName == "AthMasterSeq" && stepName == "Initialize"){
ATH_MSG_INFO("PerfMonMTSvc::capturing - ini end");
ATH_MSG_INFO("Post Initialization is captured!");
m_measurement.capture();
m_data[0].addPointStop(m_measurement);
}
......@@ -131,16 +134,56 @@ void PerfMonMTSvc::stopAud( const std::string& stepName,
// First thing to be called after the event loop ends
if( compName == "AthMasterSeq" && stepName == "Stop") {
ATH_MSG_INFO("PerfMonMTSvc::capturing - evt end");
ATH_MSG_INFO("Post Event Loop is captured!");
m_measurement.capture();
m_data[1].addPointStop(m_measurement);
}
// First thing to be called after the finalize step ends
if ( compName == "AthMasterSeq" && stepName == "Finalize"){
ATH_MSG_INFO("PerfMonMTSvc::capturing - fin end");
// Just to debug: How much time passes in PerfMonMTSvc configuration
if ( compName == "PerfMonMTSvc" && stepName == "Initialize"){
ATH_MSG_INFO("PerfMonMTSvc IS CAPTURED!");
m_measurement.capture();
}
}
/*
void PerfMonMTSvc::handle( const Incident& incident ){
// When all finalize steps of all components end, create report
if(incident.type() == IncidentType::SvcPostFinalize ){
ATH_MSG_INFO("POST FINALIZATION IS CAPTURED!");
m_measurement.capture();
m_data[2].addPointStop(m_measurement);
report();
}
}
*/
// Report the results
void PerfMonMTSvc::report(){
ATH_MSG_INFO("=========================================================");
ATH_MSG_INFO(" PerfMonMT Results Summary ");
ATH_MSG_INFO("=========================================================");
ATH_MSG_INFO("Total Wall time in the Initialization is " << m_data[0].m_delta_wall << " ms ");
ATH_MSG_INFO("Total CPU time in the Initialization is " << m_data[0].m_delta_cpu << " ms ");
ATH_MSG_INFO("Average CPU utilization in the Initialization is " <<
m_data[0].m_delta_cpu/m_data[0].m_delta_wall );
ATH_MSG_INFO("");
ATH_MSG_INFO("Total Wall time in the event loop is " << m_data[1].m_delta_wall << " ms ");
ATH_MSG_INFO("Total CPU time in the event loop is " << m_data[1].m_delta_cpu << " ms ");
ATH_MSG_INFO("Average CPU utilization in the event loop is " <<
m_data[1].m_delta_cpu/m_data[1].m_delta_wall );
ATH_MSG_INFO("");
ATH_MSG_INFO("Total Wall time in the Finalize is " << m_data[2].m_delta_wall << " ms ");
ATH_MSG_INFO("Total CPU time in the Finalize is " << m_data[2].m_delta_cpu << " ms ");
ATH_MSG_INFO("Average CPU utilization in the Finalize is " <<
m_data[2].m_delta_cpu/m_data[2].m_delta_wall );
ATH_MSG_INFO("");
ATH_MSG_INFO("=========================================================");
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment