From 26890325c35a5a57b07f25be99aaddf211a75255 Mon Sep 17 00:00:00 2001 From: Chris Jones <jonesc@hep.phy.cam.ac.uk> Date: Tue, 17 Jan 2023 13:12:44 +0000 Subject: [PATCH] ProcStats: Attempt to reconnection to process proc stat when information appeats corrupted --- GaudiAud/src/ProcStats.cpp | 29 +++++++++++++++++------------ GaudiAud/src/ProcStats.h | 13 ++++++++----- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/GaudiAud/src/ProcStats.cpp b/GaudiAud/src/ProcStats.cpp index a96beee71e..386115a512 100644 --- a/GaudiAud/src/ProcStats.cpp +++ b/GaudiAud/src/ProcStats.cpp @@ -30,10 +30,6 @@ # endif // __linux__ # include <cstdio> -using std::cerr; -using std::cout; -using std::endl; - /* Format of the Linux proc/stat (man 5 proc, kernel 2.6.35): pid %d The process ID. @@ -245,35 +241,39 @@ ProcStats* ProcStats::instance() { return &inst; } -ProcStats::ProcStats() { +void ProcStats::open_ufd() { + m_valid = false; #if defined( __linux__ ) or defined( __APPLE__ ) + m_ufd.close(); m_pg_size = sysconf( _SC_PAGESIZE ); // getpagesize(); const auto fname = "/proc/" + std::to_string( getpid() ) + "/stat"; m_ufd.open( fname.c_str(), O_RDONLY ); if ( !m_ufd ) { - cerr << "Failed to open " << fname << endl; - return; + std::cerr << "ProcStats : Failed to open " << fname << std::endl; + } else { + m_valid = true; } #endif // __linux__ or __APPLE__ - m_valid = true; } +ProcStats::ProcStats() { open_ufd(); } + bool ProcStats::fetch( procInfo& f ) { if ( !m_valid ) { return false; } - std::scoped_lock lock{ m_mutex }; + std::scoped_lock lock{m_mutex}; #if defined( __linux__ ) or defined( __APPLE__ ) - double pr_size{ 0 }, pr_rssize{ 0 }; + double pr_size{0}, pr_rssize{0}; linux_proc pinfo; - int cnt{ 0 }; + int cnt{0}; char buf[500]; m_ufd.lseek( 0, SEEK_SET ); if ( ( cnt = m_ufd.read( buf, sizeof( buf ) ) ) < 0 ) { - cout << "LINUX Read of Proc file failed:" << endl; + std::cerr << "ProcStats : LINUX Read of Proc file failed:" << std::endl; return false; } @@ -310,5 +310,10 @@ bool ProcStats::fetch( procInfo& f ) { m_curr.rss = f.rss; m_curr.vsize = f.vsize; + if ( 0 == m_curr.vsize ) { + std::cerr << "ProcStats : 0==vsize -> Will try reopening process proc stats" << std::endl; + open_ufd(); + } + return rc; } diff --git a/GaudiAud/src/ProcStats.h b/GaudiAud/src/ProcStats.h index 0452127134..27c22ea08f 100644 --- a/GaudiAud/src/ProcStats.h +++ b/GaudiAud/src/ProcStats.h @@ -46,8 +46,8 @@ struct procInfo { } // see proc(4) man pages for units and a description - double vsize{ 0 }; // in MB (used to be in pages?) - double rss{ 0 }; // in MB (used to be in pages?) + double vsize{0}; // in MB (used to be in pages?) + double rss{0}; // in MB (used to be in pages?) }; class ProcStats { @@ -62,7 +62,7 @@ private: class unique_fd { private: - int m_fd{ -1 }; + int m_fd{-1}; unique_fd( const unique_fd& ) = delete; unique_fd& operator=( const unique_fd& ) = delete; @@ -107,10 +107,13 @@ private: #undef unique_fd_forward }; +private: + void open_ufd(); + private: unique_fd m_ufd; - double m_pg_size{ 0 }; + double m_pg_size{0}; procInfo m_curr; - bool m_valid{ false }; + bool m_valid{false}; std::mutex m_mutex; }; -- GitLab