Skip to content
Snippets Groups Projects

Draft: Prototype of MetaReader caching

Open Tadej Novak requested to merge tadej/athena:metareader-cache into main
2 unresolved threads
2 files
+ 58
41
Compare changes
  • Side-by-side
  • Inline
Files
2
@@ -3,6 +3,9 @@
from PyUtils.MetaReader import read_metadata, lite_primary_keys_to_keep, lite_TagInfo_keys_to_keep
from AthenaCommon.Logging import logging
from functools import lru_cache
from pathlib import Path
import pickle
import os
msg = logging.getLogger('AutoConfigFlags')
@@ -10,29 +13,57 @@ msg = logging.getLogger('AutoConfigFlags')
_fileMetaData = dict()
class DynamicallyLoadMetadata:
def __init__(self, filename):
def __init__(self, filename, maxLevel='peeker'):
self.metadata = {}
self.filename = filename
self.metAccessLevel = 'lite'
self.currentAccessLevel = 'lite'
self.maxAccessLevel = maxLevel
thisFileMD = read_metadata(filename, None, 'lite')
self.metadata.update(thisFileMD[self.filename])
msg.debug("Loaded using 'lite' %s", str(self.metadata))
def _loadMore(self):
thisFileMD = read_metadata(self.filename, None, 'peeker')
self.cache = bool(os.environ.get('ATHENA_CACHE_METADATA', 0))
if self.cache:
msg.warning("Metadata caching is enabled. This should only be used for development purposes!")
cache_file = Path(f'{self.metadata["file_guid"]}.metadata')
if cache_file.exists():
msg.info("Loading the metadata from the cache for %s", self.filename)
with cache_file.open('rb') as f:
self.metadata.update(pickle.load(f))
def _loadMore(self, level):
self.currentAccessLevel = level
thisFileMD = read_metadata(self.filename, None, level)
self.metadata.update(thisFileMD[self.filename])
if self.cache:
msg.info("Caching the metadata for %s", self.filename)
with open(f'{self.metadata["file_guid"]}.metadata', 'wb') as f:
pickle.dump(self.metadata, f)
def get(self, key, default):
if key in self.metadata:
return self.metadata[key]
if self.metAccessLevel != 'peeker' \
and key not in lite_primary_keys_to_keep \
and key not in lite_TagInfo_keys_to_keep:
msg.info("Looking into the file in 'peeker' mode as the configuration requires more details: %s ", key)
self.metAccessLevel = 'peeker'
self._loadMore()
if key in lite_primary_keys_to_keep or key in lite_TagInfo_keys_to_keep:
# no need to load more
return default
if self.currentAccessLevel == self.maxAccessLevel:
return default
levels = []
if self.currentAccessLevel == 'lite':
levels = ['peeker', 'full'] if self.maxAccessLevel == 'full' else ['peeker']
elif self.currentAccessLevel == 'peeker':
levels = ['full']
for level in levels:
msg.info("Looking into the file in '%s' mode as the configuration requires more details: %s ", level, key)
self._loadMore(level)
if key in self.metadata:
return self.metadata[key]
return default
def __contains__(self, key):
@@ -48,7 +79,7 @@ class DynamicallyLoadMetadata:
def keys(self):
return self.metadata.keys()
def GetFileMD(filenames, allowEmpty=True):
def GetFileMD(filenames, allowEmpty=True, maxLevel='peeker'):
if not filenames:
if allowEmpty:
msg.info("Running an input-less job. Will have empty metadata.")
@@ -61,7 +92,9 @@ def GetFileMD(filenames, allowEmpty=True):
for filename in filenames:
if filename not in _fileMetaData:
msg.info("Obtaining metadata of auto-configuration by peeking into '%s'", filename)
_fileMetaData[filename] = DynamicallyLoadMetadata(filename)
_fileMetaData[filename] = DynamicallyLoadMetadata(filename, maxLevel)
if _fileMetaData[filename].maxAccessLevel != maxLevel:
_fileMetaData[filename].maxAccessLevel = maxLevel
if _fileMetaData[filename]['nentries'] not in [None, 0]:
return _fileMetaData[filename]
else:
Loading