diff --git a/Control/PerformanceMonitoring/PerfMonComps/CMakeLists.txt b/Control/PerformanceMonitoring/PerfMonComps/CMakeLists.txt index 1e271d4231385e54011b24e1f4beeb810e97b330..5fa7952eea22b63b6173d41eedc75c73aba6d6f3 100644 --- a/Control/PerformanceMonitoring/PerfMonComps/CMakeLists.txt +++ b/Control/PerformanceMonitoring/PerfMonComps/CMakeLists.txt @@ -25,6 +25,7 @@ find_package( Boost ) find_package( PythonLibs ) find_package( ROOT COMPONENTS Core PyROOT ) find_package( pytools ) +find_package( nlohmann_json ) # Component(s) in the package: atlas_add_component( PerfMonComps @@ -34,7 +35,7 @@ atlas_add_component( PerfMonComps LINK_LIBRARIES ${Boost_LIBRARIES} ${ROOT_LIBRARIES} ${PYTHON_LIBRARIES} ${CMAKE_DL_LIBS} AthenaBaseComps AthenaKernel RootUtils CxxUtils PerfMonEvent PerfMonKernel SGTools StoreGateLib GaudiKernel - AthDSoCallBacks ) + AthDSoCallBacks nlohmann_json::nlohmann_json) # Install files from the package: atlas_install_python_modules( python/*.py ) diff --git a/Control/PerformanceMonitoring/PerfMonComps/PerfMonComps/PerfMonMTSvc.h b/Control/PerformanceMonitoring/PerfMonComps/PerfMonComps/PerfMonMTSvc.h index 3ef1b02951587ecf1045a985f84df3d99242b46b..dfb62de659567b831b55e0a6279f5b2d64020005 100644 --- a/Control/PerformanceMonitoring/PerfMonComps/PerfMonComps/PerfMonMTSvc.h +++ b/Control/PerformanceMonitoring/PerfMonComps/PerfMonComps/PerfMonMTSvc.h @@ -44,9 +44,27 @@ class PerfMonMTSvc : virtual public IPerfMonMTSvc, virtual void stopAud ( const std::string& stepName, const std::string& compName ) override; + + /// Snapshot Auditing: Take snapshots at the beginning and at the end of each step + void startSnapshotAud ( const std::string& stepName, + const std::string& compName ); + + void stopSnapshotAud ( const std::string& stepName, + const std::string& compName ); + + /// Component Level Auditing ( just for serial parts for now) + void startCompLevelAud ( const std::string& stepName, + const std::string& compName ); + + void stopCompLevelAud ( const std::string& stepName, + const std::string& compName ); + // Report the results void report(); + void report2Stdout(); + void report2JsonFile(); + // Get the IncidentSvc //virtual void handle ( const Incident& incident ) override; @@ -56,6 +74,11 @@ class PerfMonMTSvc : virtual public IPerfMonMTSvc, PMonMT::Measurement m_measurement; /// Data to hold the measurement + /// We use pointer to the MeasurementData, because we use new keyword while creating them. Clear! + //std::unordered_map < PMonMT::StepCompPair , PMonMT::MeasurementData* > m_compLevelDataMap; + std::map < PMonMT::StepCompPair , PMonMT::MeasurementData* > m_compLevelDataMap; + + // Clear! PMonMT::MeasurementData m_data[3]; }; // class PerfMonMTSvc diff --git a/Control/PerformanceMonitoring/PerfMonComps/PerfMonComps/PerfMonMTUtils.h b/Control/PerformanceMonitoring/PerfMonComps/PerfMonComps/PerfMonMTUtils.h index 1bdbbd779e6106520d98d141e5f0db1a9d61108b..8a71ace2b09e8b653a5439b3505971cfb873e7b5 100644 --- a/Control/PerformanceMonitoring/PerfMonComps/PerfMonComps/PerfMonMTUtils.h +++ b/Control/PerformanceMonitoring/PerfMonComps/PerfMonComps/PerfMonMTUtils.h @@ -14,6 +14,7 @@ #include "GaudiKernel/IMessageSvc.h" #include "AthenaBaseComps/AthMessaging.h" + /* * Necessary tools */ @@ -26,6 +27,7 @@ namespace PMonMT { //IMessageSvc *msgSvc; //MsgStream msg( msgSvc, "PerfMonMTUtils" ); + // Basic Measurement struct Measurement { double cpu_time; @@ -37,7 +39,7 @@ namespace PMonMT { IMessageSvc *msgSvc; MsgStream msg( msgSvc, "PerfMonMTUtils" ); - msg << MSG::INFO << "Capture: CPU: " << cpu_time << ", Wall: " << wall_time << endmsg; + //msg << MSG::INFO << std::fixed << "Capture: CPU: " << cpu_time << ", Wall: " << wall_time << endmsg; //ATH_MSG_INFO("Capture: CPU: " << cpu_time << ", Wall: " << wall_time); } Measurement() : cpu_time{0.}, wall_time{0.} { } @@ -55,7 +57,7 @@ namespace PMonMT { IMessageSvc *msgSvc; MsgStream msg( msgSvc, "PerfMonMTUtils" ); - msg << MSG::INFO << "addPointStart: CPU: " << m_tmp_cpu << ", Wall: " << m_tmp_wall << endmsg; + //msg << MSG::INFO << "addPointStart: CPU: " << m_tmp_cpu << ", Wall: " << m_tmp_wall << endmsg; } void addPointStop(const Measurement& meas) { m_delta_cpu = meas.cpu_time - m_tmp_cpu; @@ -63,13 +65,64 @@ namespace PMonMT { IMessageSvc *msgSvc; MsgStream msg( msgSvc, "PerfMonMTUtils" ); - msg << MSG::INFO << "addPointStop: delta_CPU: " << meas.cpu_time << " - " << m_tmp_cpu << " = " << m_delta_cpu << endmsg; - msg << MSG::INFO << "addPointStop: delta_Wall: " << meas.wall_time << " - " << m_tmp_wall << " = " << m_delta_wall << endmsg; + //msg << MSG::INFO << "addPointStop: delta_CPU: " << meas.cpu_time << " - " << m_tmp_cpu << " = " << m_delta_cpu << endmsg; + //msg << MSG::INFO << "addPointStop: delta_Wall: " << meas.wall_time << " - " << m_tmp_wall << " = " << m_delta_wall << endmsg; } MeasurementData() : m_tmp_cpu{0.}, m_delta_cpu{0.}, m_tmp_wall{0.}, m_delta_wall{0.} { } }; + + // Step name and Component name pairs. Ex: Initialize - StoreGateSvc + struct StepCompPair { + + std::string stepName; + std::string compName; + + // Overload std::less + bool operator<(const StepCompPair& sc) const { + //return (this->compName < t.compName); + //return std::make_pair( this->compName, this->stepName ) < std::make_pair( sc.compName, sc.stepName ); + //return std::make_pair( sc.compName, sc.stepName ) < std::make_pair( this->compName, this->stepName ) ; + return std::make_pair( this->stepName, this->compName ) < std::make_pair( sc.stepName, sc.compName ); + } + + /* Overload < operator for std::map, because we are using custom struct as a key + bool operator < (const StepCompPair& A) const { + return stepName < A.stepName; + }*/ + + }; + } +/* +namespace std { + + auto comp = [](const PMonMT::StepCompPair& c1, const PMonMT::StepCompPair& c2){ + return c1.stepName < c2.stepName || (c1.stepName == c2.stepName && c1.compName < c2.compName); + }; + +};*/ + + + +/* For usage of undordered_map +namespace std { + template <> struct hash<PMonMT::StepCompPair> { + size_t operator()(const PMonMT::StepCompPair & x) const { + std::hash<std::string> h; + return h(x.stepName) ^ h(x.compName); + } + }; + + bool operator == (const PMonMT::StepCompPair &s1, const PMonMT::StepCompPair &s2) + { + if( s1.stepName == s2.stepName && s1.compName == s2.compName) + return true; + else + return false; + } +} +*/ /////////////////////////////////////////////////////////////////// // Inline methods: /////////////////////////////////////////////////////////////////// diff --git a/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.cxx b/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.cxx index 199ac56e7c6891928dd5e9af86625a2ca6cdf02d..f07d128f937cc3dd6e01ffab2da6a1d4c1c59f64 100644 --- a/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.cxx +++ b/Control/PerformanceMonitoring/PerfMonComps/src/PerfMonMTSvc.cxx @@ -9,6 +9,12 @@ #include "PerfMonComps/PerfMonMTSvc.h" #include "PerfMonUtils.h" // borrow from existing code +#include <fstream> + +#include <nlohmann/json.hpp> + +using json = nlohmann::json; // for convenience + /* * Constructor */ @@ -51,6 +57,9 @@ StatusCode PerfMonMTSvc::queryInterface( const InterfaceID& riid, StatusCode PerfMonMTSvc::initialize() { ATH_MSG_INFO("Initialize"); + // Trying to start our service as soon as possible + startAud ( "Initialize", "PerfMonMTSlice" ); + /// Configure the auditor if( !PerfMon::makeAuditor("PerfMonMTAuditor", auditorSvc(), msg()).isSuccess()) { ATH_MSG_ERROR("Could not register auditor [PerfMonMTAuditor]!"); @@ -89,44 +98,61 @@ StatusCode PerfMonMTSvc::finalize(){ */ void PerfMonMTSvc::startAud( const std::string& stepName, const std::string& compName ) { - + if( compName != "PerfMonMTSvc" ){ + startSnapshotAud(stepName, compName); + startCompLevelAud(stepName, compName); + } +} + +/* + * Stop Auditing + */ +void PerfMonMTSvc::stopAud( const std::string& stepName, + const std::string& compName ) { + if( compName != "PerfMonMTSvc" ){ + stopCompLevelAud(stepName, compName); + stopSnapshotAud(stepName, compName); + } +} + +void PerfMonMTSvc::startSnapshotAud( const std::string& stepName, + const std::string& compName ) { + ATH_MSG_INFO("PerfMonMTSvc::startAud: stepName: " << stepName << ", compName: " << compName); m_measurement.capture(); // just to debug - + // Last thing to be called before the event loop begins if( compName == "AthRegSeq" && stepName == "Start") { - ATH_MSG_INFO("Pre Event Loop is captured!"); + //ATH_MSG_INFO("Pre Event Loop is captured!"); m_measurement.capture(); m_data[1].addPointStart(m_measurement); } // Last thing to be called before finalize step begins if ( compName == "AthMasterSeq" && stepName == "Finalize"){ - ATH_MSG_INFO("Pre Finalization is captured!"); + //ATH_MSG_INFO("Pre Finalization is captured!"); m_measurement.capture(); m_data[2].addPointStart(m_measurement); } // Just to debug: Understand how much time we miss (AuditorSvc finalizes last) if ( compName == "AuditorSvc" && stepName == "Finalize"){ - ATH_MSG_INFO("AUDITOR SERVICE IS CAPTURED!"); + //ATH_MSG_INFO("AUDITOR SERVICE IS CAPTURED!"); m_measurement.capture(); - } - + } + } -/* - * Stop Auditing - */ -void PerfMonMTSvc::stopAud( const std::string& stepName, - const std::string& compName ) { +void PerfMonMTSvc::stopSnapshotAud( const std::string& stepName, + const std::string& compName ) { + ATH_MSG_INFO("PerfMonMTSvc::stopAud: stepName: " << stepName << ", compName: " << compName); m_measurement.capture(); // just to debug // First thing to be called after the initialize step ends if ( compName == "AthMasterSeq" && stepName == "Initialize"){ - ATH_MSG_INFO("Post Initialization is captured!"); + //ATH_MSG_INFO("Post Initialization is captured!"); m_measurement.capture(); m_data[0].addPointStop(m_measurement); } @@ -134,18 +160,54 @@ void PerfMonMTSvc::stopAud( const std::string& stepName, // First thing to be called after the event loop ends if( compName == "AthMasterSeq" && stepName == "Stop") { - ATH_MSG_INFO("Post Event Loop is captured!"); + //ATH_MSG_INFO("Post Event Loop is captured!"); m_measurement.capture(); m_data[1].addPointStop(m_measurement); } // Just to debug: How much time passes in PerfMonMTSvc configuration if ( compName == "PerfMonMTSvc" && stepName == "Initialize"){ - ATH_MSG_INFO("PerfMonMTSvc IS CAPTURED!"); + //ATH_MSG_INFO("PerfMonMTSvc IS CAPTURED!"); m_measurement.capture(); } } + + +void PerfMonMTSvc::startCompLevelAud( const std::string& stepName, + const std::string& compName) { + + PMonMT::StepCompPair currentState; + currentState.stepName = stepName; + currentState.compName = compName; + + m_measurement.capture(); + + /* + * Dynamically create a MeasurementData instance for the current step-component pair + * Do not forget to delete the dynamically allocated space! + */ + m_compLevelDataMap[currentState] = new PMonMT::MeasurementData; + m_compLevelDataMap[currentState]->addPointStart(m_measurement); + +} + +void PerfMonMTSvc::stopCompLevelAud( const std::string& stepName, + const std::string& compName) { + + m_measurement.capture(); + + PMonMT::StepCompPair currentState; + currentState.stepName = stepName; + currentState.compName = compName; + + // We do not need this, because we have just created it in startCompLevelAud + //m_compLevelDataMap[currentState] = new PMonMT::MeasurementData; + m_compLevelDataMap[currentState]->addPointStop(m_measurement); + +} + + /* void PerfMonMTSvc::handle( const Incident& incident ){ @@ -162,6 +224,71 @@ void PerfMonMTSvc::handle( const Incident& incident ){ // Report the results void PerfMonMTSvc::report(){ + report2Stdout(); + report2JsonFile(); + + /* + ATH_MSG_INFO("========================================================="); + ATH_MSG_INFO(" PerfMonMT Results Summary "); + ATH_MSG_INFO("========================================================="); + + ATH_MSG_INFO("Total Wall time in the Initialization is " << m_data[0].m_delta_wall << " ms "); + ATH_MSG_INFO("Total CPU time in the Initialization is " << m_data[0].m_delta_cpu << " ms "); + ATH_MSG_INFO("Average CPU utilization in the Initialization is " << + m_data[0].m_delta_cpu/m_data[0].m_delta_wall ); + ATH_MSG_INFO(""); + + ATH_MSG_INFO("Total Wall time in the event loop is " << m_data[1].m_delta_wall << " ms "); + ATH_MSG_INFO("Total CPU time in the event loop is " << m_data[1].m_delta_cpu << " ms "); + ATH_MSG_INFO("Average CPU utilization in the event loop is " << + m_data[1].m_delta_cpu/m_data[1].m_delta_wall ); + ATH_MSG_INFO(""); + + ATH_MSG_INFO("Total Wall time in the Finalize is " << m_data[2].m_delta_wall << " ms "); + ATH_MSG_INFO("Total CPU time in the Finalize is " << m_data[2].m_delta_cpu << " ms "); + ATH_MSG_INFO("Average CPU utilization in the Finalize is " << + m_data[2].m_delta_cpu/m_data[2].m_delta_wall ); + ATH_MSG_INFO(""); + + ATH_MSG_INFO("========================================================="); + + ATH_MSG_INFO("========================================================="); + ATH_MSG_INFO(" Component Level Monitoring "); + ATH_MSG_INFO("========================================================="); + + // Clear! -> + ATH_MSG_INFO( "Step CPU Wall Component" ); + for(auto& it : m_compLevelDataMap) + ATH_MSG_INFO( it.first.stepName << ": " << it.second->m_delta_cpu << " - " << it.second->m_delta_wall << " " << it.first.compName ); + + for(auto& it : m_compLevelDataMap) + delete it.second; + */ +} + +void PerfMonMTSvc::report2JsonFile(){ + + json j; + + for(auto& it : m_compLevelDataMap){ + + std::string stepName = it.first.stepName; + std::string compName = it.first.compName; + + double wall_time = it.second->m_delta_wall; + double cpu_time = it.second->m_delta_cpu; + + j[stepName][compName] = { {"cpu_time", cpu_time}, {"wall_time", wall_time} }; + + } + + std::ofstream o("pretty.json"); + o << std::setw(4) << j << std::endl; + +} + +void PerfMonMTSvc::report2Stdout(){ + ATH_MSG_INFO("========================================================="); ATH_MSG_INFO(" PerfMonMT Results Summary "); ATH_MSG_INFO("========================================================="); @@ -186,4 +313,17 @@ void PerfMonMTSvc::report(){ ATH_MSG_INFO("========================================================="); + ATH_MSG_INFO("========================================================="); + ATH_MSG_INFO(" Component Level Monitoring "); + ATH_MSG_INFO("========================================================="); + + // Clear! -> + ATH_MSG_INFO( "Step CPU Wall Component" ); + for(auto& it : m_compLevelDataMap) + ATH_MSG_INFO( it.first.stepName << ": " << it.second->m_delta_cpu << " - " << it.second->m_delta_wall << " " << it.first.compName ); + + for(auto& it : m_compLevelDataMap) + delete it.second; + } +