From 77b6ba5fb8fd1b71ff7c405393253ebddaef2d6d Mon Sep 17 00:00:00 2001
From: Marcin Nowak <Marcin.Nowak@cern.ch>
Date: Mon, 31 May 2021 18:59:00 +0200
Subject: [PATCH] Add configurable DHForm cache size to DataHeaderCnv

New property "AthenaPoolCnvSvc.maxDHFormCacheSize" controls the max size
of DHForm cache in DataHeaderCnv. Default size is 100. The setting works
for each input file.
Introduced to avoid excesive memory usage in case of files with many
large DHForms. (noticed in ATLASRECTS-6370)
---
 .../AthenaPoolCnvSvc/src/AthenaPoolCnvSvc.h   |  3 +
 .../src/DataHeaderCnv.cxx                     | 62 ++++++++++++++-----
 .../src/DataHeaderCnv.h                       | 12 +++-
 3 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/Database/AthenaPOOL/AthenaPoolCnvSvc/src/AthenaPoolCnvSvc.h b/Database/AthenaPOOL/AthenaPoolCnvSvc/src/AthenaPoolCnvSvc.h
index 31241fbe6d89..eb58529bd49d 100644
--- a/Database/AthenaPOOL/AthenaPoolCnvSvc/src/AthenaPoolCnvSvc.h
+++ b/Database/AthenaPOOL/AthenaPoolCnvSvc/src/AthenaPoolCnvSvc.h
@@ -255,6 +255,9 @@ private: // properties
    StringProperty  m_streamPortString{this,"StreamPortString","?pmerge=localhost:0"};
    /// When using TMemFile call Write on number of Events, respecting CollectionTree auto_flush
    IntegerProperty m_numberEventsPerWrite{this,"NumberEventsPerWrite",-1};
+
+   /// Property for DataHeaderCnv input DHForm cache size
+   IntegerProperty  m_DHFormCacheSize { this, "maxDHFormCacheSize", 100 };
 };
 
 #endif
diff --git a/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.cxx b/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.cxx
index b8f10827510c..5ecd276e8797 100755
--- a/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.cxx
+++ b/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.cxx
@@ -44,8 +44,21 @@ DataHeaderCnv::~DataHeaderCnv()
 //______________________________________________________________________________
 StatusCode DataHeaderCnv::initialize()
 {
-   // listen to EndInputFile incidents to clear old DataHeaderForms from the cache
-   //Get IncidentSvc
+   m_inDHFMapMaxsize = 100;   // default DHForm cache size
+   IConversionSvc* cnvSvc(nullptr);
+   if( service("AthenaPoolCnvSvc", cnvSvc, true ).isSuccess() ) {
+      IProperty* prop = dynamic_cast<IProperty*>( cnvSvc );
+      if( prop ) {
+         IntegerProperty sizeProp("maxDHFormCacheSize", m_inDHFMapMaxsize);
+         if( prop->getProperty(&sizeProp).isSuccess() ) {
+            m_inDHFMapMaxsize = sizeProp.value();
+         }
+      }
+   }
+   ATH_MSG_VERBOSE("Using DHForm cache size: " << m_inDHFMapMaxsize);
+
+   // Listen to EndInputFile incidents to clear old DataHeaderForms from the cache
+   // Get IncidentSvc
    ServiceHandle<IIncidentSvc> incSvc("IncidentSvc", "DataHeaderCnv");
    ATH_CHECK( incSvc.retrieve() );
    incSvc->addListener(this, IncidentType::EndInputFile, 0);
@@ -57,17 +70,24 @@ void DataHeaderCnv::handle(const Incident& incident)
 {
    if( incident.type() == IncidentType::EndInputFile ) {
       // remove cached DHForms that came from the file that is now being closed
-      const std::string& guid = static_cast<const FileIncident&>(incident).fileGuid(); 
-      auto iter = m_persFormMap.begin();
-      while( iter != m_persFormMap.end() ) {
-         size_t dbpos = iter->first.find("[DB=");
-         if( dbpos != std::string::npos && iter->first.substr(dbpos+4, dbpos+36) == guid ) {
-            iter = m_persFormMap.erase( iter );
-         } else {
-            iter++;
-         }
+      const std::string& guid = static_cast<const FileIncident&>(incident).fileGuid();
+      clearInputDHFormCache( guid );
+   }
+}
+
+
+void DataHeaderCnv::clearInputDHFormCache( const std::string& dbGuid )
+{
+   auto iter = m_inputDHForms.begin();
+   while( iter != m_inputDHForms.end() ) {
+      size_t dbpos = iter->first.find("[DB=");
+      if( dbpos != std::string::npos && iter->first.substr(dbpos+4, dbpos+36) == dbGuid ) {
+         iter = m_inputDHForms.erase( iter );
+      } else {
+         iter++;
       }
    }
+   m_inDHFormCount[ dbGuid ] = 0;
 }
 
 //______________________________________________________________________________
@@ -265,10 +285,20 @@ std::unique_ptr<DataHeader_p6> DataHeaderCnv::poolReadObject_p6()
       throw std::runtime_error("Could not get object for token = " + m_i_poolToken->toString());
    }
    std::unique_ptr<DataHeader_p6> header( reinterpret_cast<DataHeader_p6*>(voidPtr1) );
-      
+
    // see if the DataHeaderForm is already cached
-   std::unique_ptr<DataHeaderForm_p6>& dh_form = m_persFormMap[ header->dhFormToken() ];
-   if( !dh_form ) {
+   const std::string &dhFormToken =  header->dhFormToken();
+   if( m_inputDHForms.find(dhFormToken) == m_inputDHForms.end() ) {
+      // no cached DHForm
+      size_t dbpos = dhFormToken.find("[DB=");
+      if( dbpos != std::string::npos ) {
+         const std::string dbGuid = dhFormToken.substr(dbpos+4, dbpos+36);
+         if( ++m_inDHFormCount[dbGuid] > m_inDHFMapMaxsize ) {
+            // prevent the input DHFMap from growing too large
+            clearInputDHFormCache( dbGuid );
+            m_inDHFormCount[dbGuid] = 1;
+         }
+      }
       // we need to read a new DHF
       void* voidPtr2 = nullptr;
       Token mapToken;
@@ -280,7 +310,7 @@ std::unique_ptr<DataHeader_p6> DataHeaderCnv::poolReadObject_p6()
             throw std::runtime_error("Could not get object for token = " + mapToken.toString());
          }
       }
-      dh_form.reset( reinterpret_cast<DataHeaderForm_p6*>(voidPtr2) );
+      m_inputDHForms[dhFormToken].reset( reinterpret_cast<DataHeaderForm_p6*>(voidPtr2) );
    }
    return header;
 }
@@ -312,7 +342,7 @@ DataHeader* DataHeaderCnv::createTransient() {
    try {
       if( compareClassGuid( p6_guid ) ) {
          std::unique_ptr<DataHeader_p6> header( poolReadObject_p6() );
-         auto dh = m_tpInConverter.createTransient( header.get(), *(m_persFormMap[ header->dhFormToken() ]) );
+         auto dh = m_tpInConverter.createTransient( header.get(), *(m_inputDHForms[ header->dhFormToken() ]) );
          // To dump the DataHeader uncomment below
          // std::ostringstream ss;  dh->dump(ss); cout << ss.str() << endl;
          return dh;
diff --git a/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.h b/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.h
index 083d247389b1..708221ab3670 100755
--- a/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.h
+++ b/Database/PersistentDataModelAthenaPool/src/DataHeaderCnv.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
+  Copyright (C) 2002-2021 CERN for the benefit of the ATLAS collaboration
 */
 
 #ifndef DATAHEADERCNV_H
@@ -55,6 +55,9 @@ public:
    /// Incident service handle listening for EndInputFile.
    virtual void handle(const Incident& incident) override;
 
+   /// Delete cached DHForms for a given input file GUID
+   void clearInputDHFormCache( const std::string& dbGuid );
+
    /// query if a new DHForm was written in the last createPersistent()
    bool         wroteNewDHForm()    { return m_wroteDHForm; }
   
@@ -66,6 +69,13 @@ protected:
 
    /// DHForm cache indexed by filename or reference for writing
    std::map<std::string,  std::unique_ptr<DataHeaderForm_p6> >    m_persFormMap;
+   /// DHForm cache indexed by its parent DataHeader reference (for  readinh)
+   std::map<std::string,  std::unique_ptr<DataHeaderForm_p6> >  m_inputDHForms;
+
+   /// How many DHForms for an input file are in the cache
+   std::map<std::string,  unsigned>                             m_inDHFormCount;
+   /// Max DHForms to cache per input file
+   unsigned                                                     m_inDHFMapMaxsize;
 
    /// true if the last writing of the DataHeader had to write a new DHForm
    bool                 m_wroteDHForm {false};
-- 
GitLab