From 77b6ba5fb8fd1b71ff7c405393253ebddaef2d6d Mon Sep 17 00:00:00 2001 From: Marcin Nowak <Marcin.Nowak@cern.ch> Date: Mon, 31 May 2021 18:59:00 +0200 Subject: [PATCH] Add configurable DHForm cache size to DataHeaderCnv New property "AthenaPoolCnvSvc.maxDHFormCacheSize" controls the max size of DHForm cache in DataHeaderCnv. Default size is 100. The setting works for each input file. Introduced to avoid excesive memory usage in case of files with many large DHForms. (noticed in ATLASRECTS-6370) --- .../AthenaPoolCnvSvc/src/AthenaPoolCnvSvc.h | 3 + .../src/DataHeaderCnv.cxx | 62 ++++++++++++++----- .../src/DataHeaderCnv.h | 12 +++- 3 files changed, 60 insertions(+), 17 deletions(-) diff --git a/Database/AthenaPOOL/AthenaPoolCnvSvc/src/AthenaPoolCnvSvc.h b/Database/AthenaPOOL/AthenaPoolCnvSvc/src/AthenaPoolCnvSvc.h index 31241fbe6d89..eb58529bd49d 100644 --- a/Database/AthenaPOOL/AthenaPoolCnvSvc/src/AthenaPoolCnvSvc.h +++ b/Database/AthenaPOOL/AthenaPoolCnvSvc/src/AthenaPoolCnvSvc.h @@ -255,6 +255,9 @@ private: // properties StringProperty m_streamPortString{this,"StreamPortString","?pmerge=localhost:0"}; /// When using TMemFile call Write on number of Events, respecting CollectionTree auto_flush IntegerProperty m_numberEventsPerWrite{this,"NumberEventsPerWrite",-1}; + + /// Property for DataHeaderCnv input DHForm cache size + IntegerProperty m_DHFormCacheSize { this, "maxDHFormCacheSize", 100 }; }; #endif diff --git a/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.cxx b/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.cxx index b8f10827510c..5ecd276e8797 100755 --- a/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.cxx +++ b/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.cxx @@ -44,8 +44,21 @@ DataHeaderCnv::~DataHeaderCnv() //______________________________________________________________________________ StatusCode DataHeaderCnv::initialize() { - // listen to EndInputFile incidents to clear old DataHeaderForms from the cache - //Get IncidentSvc + m_inDHFMapMaxsize = 100; // default DHForm cache size + IConversionSvc* cnvSvc(nullptr); + if( service("AthenaPoolCnvSvc", cnvSvc, true ).isSuccess() ) { + IProperty* prop = dynamic_cast<IProperty*>( cnvSvc ); + if( prop ) { + IntegerProperty sizeProp("maxDHFormCacheSize", m_inDHFMapMaxsize); + if( prop->getProperty(&sizeProp).isSuccess() ) { + m_inDHFMapMaxsize = sizeProp.value(); + } + } + } + ATH_MSG_VERBOSE("Using DHForm cache size: " << m_inDHFMapMaxsize); + + // Listen to EndInputFile incidents to clear old DataHeaderForms from the cache + // Get IncidentSvc ServiceHandle<IIncidentSvc> incSvc("IncidentSvc", "DataHeaderCnv"); ATH_CHECK( incSvc.retrieve() ); incSvc->addListener(this, IncidentType::EndInputFile, 0); @@ -57,17 +70,24 @@ void DataHeaderCnv::handle(const Incident& incident) { if( incident.type() == IncidentType::EndInputFile ) { // remove cached DHForms that came from the file that is now being closed - const std::string& guid = static_cast<const FileIncident&>(incident).fileGuid(); - auto iter = m_persFormMap.begin(); - while( iter != m_persFormMap.end() ) { - size_t dbpos = iter->first.find("[DB="); - if( dbpos != std::string::npos && iter->first.substr(dbpos+4, dbpos+36) == guid ) { - iter = m_persFormMap.erase( iter ); - } else { - iter++; - } + const std::string& guid = static_cast<const FileIncident&>(incident).fileGuid(); + clearInputDHFormCache( guid ); + } +} + + +void DataHeaderCnv::clearInputDHFormCache( const std::string& dbGuid ) +{ + auto iter = m_inputDHForms.begin(); + while( iter != m_inputDHForms.end() ) { + size_t dbpos = iter->first.find("[DB="); + if( dbpos != std::string::npos && iter->first.substr(dbpos+4, dbpos+36) == dbGuid ) { + iter = m_inputDHForms.erase( iter ); + } else { + iter++; } } + m_inDHFormCount[ dbGuid ] = 0; } //______________________________________________________________________________ @@ -265,10 +285,20 @@ std::unique_ptr<DataHeader_p6> DataHeaderCnv::poolReadObject_p6() throw std::runtime_error("Could not get object for token = " + m_i_poolToken->toString()); } std::unique_ptr<DataHeader_p6> header( reinterpret_cast<DataHeader_p6*>(voidPtr1) ); - + // see if the DataHeaderForm is already cached - std::unique_ptr<DataHeaderForm_p6>& dh_form = m_persFormMap[ header->dhFormToken() ]; - if( !dh_form ) { + const std::string &dhFormToken = header->dhFormToken(); + if( m_inputDHForms.find(dhFormToken) == m_inputDHForms.end() ) { + // no cached DHForm + size_t dbpos = dhFormToken.find("[DB="); + if( dbpos != std::string::npos ) { + const std::string dbGuid = dhFormToken.substr(dbpos+4, dbpos+36); + if( ++m_inDHFormCount[dbGuid] > m_inDHFMapMaxsize ) { + // prevent the input DHFMap from growing too large + clearInputDHFormCache( dbGuid ); + m_inDHFormCount[dbGuid] = 1; + } + } // we need to read a new DHF void* voidPtr2 = nullptr; Token mapToken; @@ -280,7 +310,7 @@ std::unique_ptr<DataHeader_p6> DataHeaderCnv::poolReadObject_p6() throw std::runtime_error("Could not get object for token = " + mapToken.toString()); } } - dh_form.reset( reinterpret_cast<DataHeaderForm_p6*>(voidPtr2) ); + m_inputDHForms[dhFormToken].reset( reinterpret_cast<DataHeaderForm_p6*>(voidPtr2) ); } return header; } @@ -312,7 +342,7 @@ DataHeader* DataHeaderCnv::createTransient() { try { if( compareClassGuid( p6_guid ) ) { std::unique_ptr<DataHeader_p6> header( poolReadObject_p6() ); - auto dh = m_tpInConverter.createTransient( header.get(), *(m_persFormMap[ header->dhFormToken() ]) ); + auto dh = m_tpInConverter.createTransient( header.get(), *(m_inputDHForms[ header->dhFormToken() ]) ); // To dump the DataHeader uncomment below // std::ostringstream ss; dh->dump(ss); cout << ss.str() << endl; return dh; diff --git a/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.h b/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.h index 083d247389b1..708221ab3670 100755 --- a/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.h +++ b/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration + Copyright (C) 2002-2021 CERN for the benefit of the ATLAS collaboration */ #ifndef DATAHEADERCNV_H @@ -55,6 +55,9 @@ public: /// Incident service handle listening for EndInputFile. virtual void handle(const Incident& incident) override; + /// Delete cached DHForms for a given input file GUID + void clearInputDHFormCache( const std::string& dbGuid ); + /// query if a new DHForm was written in the last createPersistent() bool wroteNewDHForm() { return m_wroteDHForm; } @@ -66,6 +69,13 @@ protected: /// DHForm cache indexed by filename or reference for writing std::map<std::string, std::unique_ptr<DataHeaderForm_p6> > m_persFormMap; + /// DHForm cache indexed by its parent DataHeader reference (for readinh) + std::map<std::string, std::unique_ptr<DataHeaderForm_p6> > m_inputDHForms; + + /// How many DHForms for an input file are in the cache + std::map<std::string, unsigned> m_inDHFormCount; + /// Max DHForms to cache per input file + unsigned m_inDHFMapMaxsize; /// true if the last writing of the DataHeader had to write a new DHForm bool m_wroteDHForm {false}; -- GitLab