Forked from
atlas / athena
83808 commits behind the upstream repository.
-
scott snyder authored
Have MetaReader recognize keys in CLASS_SGKEY form as well. Fixes problems seen in TrigAnalysisTest.
scott snyder authoredHave MetaReader recognize keys in CLASS_SGKEY form as well. Fixes problems seen in TrigAnalysisTest.
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
MetaReader.py 33.00 KiB
# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
from __future__ import absolute_import
import os
import re
from fnmatch import fnmatchcase
from AthenaCommon.Logging import logging
msg = logging.getLogger('MetaReader')
# compile the regex needed in _convert_value() outside it to optimize the code.
regexEventStreamInfo = re.compile(r'^EventStreamInfo(_p\d+)?$')
regexIOVMetaDataContainer = re.compile(r'^IOVMetaDataContainer(_p\d+)?$')
regexByteStreamMetadataContainer = re.compile(r'^ByteStreamMetadataContainer(_p\d+)?$')
regexXAODEventFormat = re.compile(r'^xAOD::EventFormat(_v\d+)?$')
regexXAODTriggerMenu = re.compile(r'^DataVector<xAOD::TriggerMenu(_v\d+)?>$')
regexXAODTriggerMenuAux = re.compile(r'^xAOD::TriggerMenuAuxContainer(_v\d+)?$')
regex_cppname = re.compile(r'^([\w:]+)(<.*>)?$')
# regex_persistent_class = re.compile(r'^([a-zA-Z]+_p\d+::)*[a-zA-Z]+_p\d+$')
regex_persistent_class = re.compile(r'^([a-zA-Z]+(_[pv]\d+)?::)*[a-zA-Z]+_[pv]\d+$')
regex_BS_files = re.compile(r'^(\w+):.*((\.D?RAW\..*)|(\.data$))')
def read_metadata(filenames, file_type = None, mode = 'lite', promote = None, meta_key_filter = [],
unique_tag_info_values = True):
"""
This tool is independent of Athena framework and returns the metadata from a given file.
:param filenames: the input file from which metadata needs to be extracted.
:param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files.
:param mode: if true, will return all metadata associated with the filename. By default, is false and this will
return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'.
:return: a dictionary of metadata for the given input file.
"""
from RootUtils import PyROOTFixes # noqa F401
# Check if the input is a file or a list of files.
if isinstance(filenames, str):
filenames = [filenames]
# Check if file_type is an allowed value
if file_type is not None:
if file_type not in ('POOL', 'BS'):
raise NameError('Allowed values for \'file_type\' parameter are: "POOL" or "BS": you provided "' + file_type + '"')
else:
msg.info('Forced file_type: {0}'.format(file_type))
# Check the value of mode parameter
if mode not in ('tiny', 'lite', 'full', 'peeker'):
raise NameError('Allowed values for "mode" parameter are: "tiny", "lite", "peeker" or "full"')
msg.info('Current mode used: {0}'.format(mode))
msg.info('Current filenames: {0}'.format(filenames))
if mode != 'full' and len(meta_key_filter) > 0:
raise NameError('It is possible to use the meta_key_filter option only for full mode')
if meta_key_filter:
msg.info('Filter used: {0}'.format(meta_key_filter))
# create the storage object for metadata.
meta_dict = {}
# ----- retrieve metadata from all filename or filenames --------------------------------------------------------#
for filename in filenames:
meta_dict[filename] = {}
current_file_type = None
# Determine the file_type of the input and store this information into meta_dict
if not file_type:
if os.path.isfile(filename):
with open(filename, 'rb') as binary_file:
magic_file = binary_file.read(4)
if magic_file == 'root' or magic_file == b'root':
current_file_type = 'POOL'
meta_dict[filename]['file_type'] = 'POOL'
else:
current_file_type = 'BS'
meta_dict[filename]['file_type'] = 'BS'
# add information about the file_size of the input filename
meta_dict[filename]['file_size'] = os.path.getsize(filename)
# determine the file type for the remote input files
else:
if regex_BS_files.match(filename):
current_file_type = 'BS'
meta_dict[filename]['file_type'] = 'BS'
else:
current_file_type = 'POOL'
meta_dict[filename]['file_type'] = 'POOL'
# add information about the file_size of the input filename
meta_dict[filename]['file_size'] = None # None -> we can't read the file size for a remote file
else:
current_file_type = file_type
# ----- retrieves metadata from POOL files ------------------------------------------------------------------#
if current_file_type == 'POOL':
import ROOT
# open the file using ROOT.TFile
current_file = ROOT.TFile.Open( _get_pfn(filename) )
# open the tree 'POOLContainer' to read the number of entries
if current_file.GetListOfKeys().Contains('POOLContainer'):
meta_dict[filename]['nentries'] = current_file.Get('POOLContainer').GetEntriesFast()
else:
meta_dict[filename]['nentries'] = None
# open the tree 'CollectionTree' to read auto flush
if current_file.GetListOfKeys().Contains('CollectionTree'):
meta_dict[filename]['auto_flush'] = current_file.Get('CollectionTree').GetAutoFlush()
# read and add the 'GUID' value
meta_dict[filename]['file_guid'] = _read_guid(filename)
# read and add compression level and algorithm
meta_dict[filename]['file_comp_alg'] = current_file.GetCompressionAlgorithm()
meta_dict[filename]['file_comp_level'] = current_file.GetCompressionLevel()
# ----- read extra metadata required for 'lite' and 'full' modes ----------------------------------------#
if mode != 'tiny':
# selecting from all tree the only one which contains metadata, respectively "MetaData"
metadata_tree = current_file.Get('MetaData')
# read all list of branches stored in "MetaData" tree
metadata_branches = metadata_tree.GetListOfBranches()
nr_of_branches = metadata_branches.GetEntriesFast()
# object to store the names of metadata containers and their corresponding class name.
meta_dict[filename]['metadata_items'] = {}
# create a container for the list of filters used for the lite version
meta_filter = {}
# set the filters for name
if mode == 'lite':
meta_filter = {
'/TagInfo': 'IOVMetaDataContainer_p1',
'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
'*': 'EventStreamInfo_p*'
}
# set the filters for name
if mode == 'peeker':
meta_filter = {
'/TagInfo': 'IOVMetaDataContainer_p1',
'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
'/Simulation/Parameters': 'IOVMetaDataContainer_p1',
'/Digitization/Parameters': 'IOVMetaDataContainer_p1',
'/EXT/DCS/MAGNETS/SENSORDATA': 'IOVMetaDataContainer_p1',
'TriggerMenu': 'DataVector<xAOD::TriggerMenu_v1>',
'TriggerMenuAux.': 'xAOD::TriggerMenuAuxContainer_v1',
'*': 'EventStreamInfo_p*'
}
if mode == 'full' and meta_key_filter:
meta_filter = {f: '*' for f in meta_key_filter}
# store all persistent classes for metadata container existing in a POOL/ROOT file.
persistent_instances = {}
for i in range(0, nr_of_branches):
branch = metadata_branches.At(i)
name = branch.GetName()
class_name = branch.GetClassName()
if regexIOVMetaDataContainer.match(class_name):
name = name.replace('IOVMetaDataContainer_p1_', '').replace('_', '/')
if regexIOVMetaDataContainer.match(class_name):
meta_dict[filename]['metadata_items'][name] = 'IOVMetaDataContainer'
elif regexByteStreamMetadataContainer.match(class_name):
meta_dict[filename]['metadata_items'][name] = 'ByteStreamMetadataContainer'
elif regexEventStreamInfo.match(class_name):
meta_dict[filename]['metadata_items'][name] = 'EventStreamInfo'
else:
meta_dict[filename]['metadata_items'][name] = class_name
if len(meta_filter) > 0:
keep = False
for filter_key, filter_class in meta_filter.items():
if (filter_key.replace('/', '_') == name.replace('/', '_') or filter_key == '*') and fnmatchcase(class_name, filter_class):
keep = True
break
if not keep:
continue
# assign the corresponding persistent class based of the name of the metadata container
if regexEventStreamInfo.match(class_name):
if class_name.endswith('_p1'):
persistent_instances[name] = ROOT.EventStreamInfo_p1()
elif class_name.endswith('_p2'):
persistent_instances[name] = ROOT.EventStreamInfo_p2()
else:
persistent_instances[name] = ROOT.EventStreamInfo_p3()
elif regexIOVMetaDataContainer.match(class_name):
persistent_instances[name] = ROOT.IOVMetaDataContainer_p1()
elif regexXAODEventFormat.match(class_name):
persistent_instances[name] = ROOT.xAOD.EventFormat_v1()
elif regexXAODTriggerMenu.match(class_name):
persistent_instances[name] = ROOT.xAOD.TriggerMenuContainer_v1()
elif regexXAODTriggerMenuAux.match(class_name):
persistent_instances[name] = ROOT.xAOD.TriggerMenuAuxContainer_v1()
if name in persistent_instances:
branch.SetAddress(ROOT.AddressOf(persistent_instances[name]))
metadata_tree.GetEntry(0)
# clean the meta-dict if the meta_key_filter flag is used, to return only the key of interest
if meta_key_filter:
meta_dict[filename] = {}
# read the metadata
for name, content in persistent_instances.items():
key = name
if hasattr(content, 'm_folderName'):
key = getattr(content, 'm_folderName')
aux = None
if key == 'TriggerMenu' and 'TriggerMenuAux.' in persistent_instances:
aux = persistent_instances['TriggerMenuAux.']
elif key == 'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu' and 'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.' in persistent_instances:
aux = persistent_instances['xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.']
elif key == 'TriggerMenuAux.':
continue
elif key == 'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.':
continue
meta_dict[filename][key] = _convert_value(content, aux)
# This is a required workaround which will temporarily be fixing ATEAM-560 originated from ATEAM-531
# ATEAM-560: https://its.cern.ch/jira/browse/ATEAM-560
# ATEAM-531: https://its.cern.ch/jira/browse/ATEAM-531
# This changes will remove all duplicates values presented in some files due
# to the improper merging of two IOVMetaDataContainers.
if unique_tag_info_values:
msg.info('MetaReader is called with the parameter "unique_tag_info_values" set to True. '
'This is a workaround to remove all duplicate values from "/TagInfo" key')
if '/TagInfo' in meta_dict[filename]:
for key, value in meta_dict[filename]['/TagInfo'].items():
if isinstance(value, list):
unique_list = list(set(value))
meta_dict[filename]['/TagInfo'][key] = unique_list[0] if len(unique_list) == 1 else unique_list
if promote is None:
promote = mode == 'lite' or mode == 'peeker'
# Filter the data and create a prettier output for the 'lite' mode
if mode == 'lite':
meta_dict = make_lite(meta_dict)
if mode == 'peeker':
meta_dict = make_peeker(meta_dict)
if promote:
meta_dict = promote_keys(meta_dict)
# ----- retrieves metadata from bytestream (BS) files (RAW, DRAW) ------------------------------------------#
elif current_file_type == 'BS':
import eformat
# store the number of entries
bs = eformat.istream(filename)
meta_dict[filename]['nentries'] = bs.total_events
# store the 'guid' value
data_reader = eformat.EventStorage.pickDataReader(filename)
assert data_reader, 'problem picking a data reader for file [%s]' % filename
if hasattr(data_reader, 'GUID'):
meta_dict[filename]['file_guid'] = getattr(data_reader, 'GUID')()
# if the flag full is set to true then grab all metadata
# ------------------------------------------------------------------------------------------------------#
if mode != "tiny":
bs_metadata = {}
for md in data_reader.freeMetaDataStrings():
if md.startswith('Event type:'):
k = 'eventTypes'
v = []
if 'is sim' in md:
v.append('IS_SIMULATION')
else:
v.append('IS_DATA')
if 'is atlas' in md:
v.append('IS_ATLAS')
else:
v.append('IS_TESTBEAM')
if 'is physics' in md:
v.append('IS_PHYSICS')
else:
v.append('IS_CALIBRATION')
bs_metadata[k] = tuple(v)
elif md.startswith('GeoAtlas:'):
k = 'geometry'
v = md.split('GeoAtlas:')[1].strip()
bs_metadata[k] = v
elif md.startswith('IOVDbGlobalTag:'):
k = 'conditions_tag'
v = md.split('IOVDbGlobalTag:')[1].strip()
bs_metadata[k] = v
elif '=' in md:
k, v = md.split('=')
bs_metadata[k] = v
bs_metadata['runNumbers'] = getattr(data_reader, 'runNumber')()
bs_metadata['lumiBlockNumbers'] = getattr(data_reader, 'lumiblockNumber')()
bs_metadata['projectTag'] = getattr(data_reader, 'projectTag')()
bs_metadata['stream'] = getattr(data_reader, 'stream')()
#bs_metadata['beamType'] = getattr(data_reader, 'beamType')()
beamTypeNbr= getattr(data_reader, 'beamType')()
#According to info from Rainer and Guiseppe the beam type is
#O: no beam
#1: protons
#2: ions
if (beamTypeNbr==0): bs_metadata['beamType'] = 'cosmics'
elif (beamTypeNbr==1 or beamTypeNbr==2): bs_metadata['beamType'] = 'collisions'
else: bs_metadata['beamType'] = 'unknown'
bs_metadata['beamEnergy'] = getattr(data_reader, 'beamEnergy')()
meta_dict[filename]['eventTypes'] = bs_metadata.get('eventTypes', [])
meta_dict[filename]['GeoAtlas'] = bs_metadata.get('geometry', None)
meta_dict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None)
# Promote up one level
meta_dict[filename]['runNumbers'] = [bs_metadata.get('runNumbers', None)]
meta_dict[filename]['lumiBlockNumbers'] = [bs_metadata.get('lumiBlockNumbers', None)]
meta_dict[filename]['beam_type'] = bs_metadata.get('beamType', None)
meta_dict[filename]['beam_energy'] = bs_metadata.get('beamEnergy', None)
meta_dict[filename]['stream'] = bs_metadata.get('stream', None)
if not data_reader.good():
# event-less file...
meta_dict[filename]['runNumbers'].append(bs_metadata.get('run_number', 0))
meta_dict[filename]['lumiBlockNumbers'].append(bs_metadata.get('LumiBlock', 0))
ievt = iter(bs)
evt = next(ievt)
evt.check() # may raise a RuntimeError
processing_tags = [dict(stream_type = tag.type, stream_name = tag.name, obeys_lbk = bool(tag.obeys_lumiblock)) for tag in evt.stream_tag()]
meta_dict[filename]['processingTags'] = [x['stream_name'] for x in processing_tags]
meta_dict[filename]['evt_number'] = [evt.global_id()]
meta_dict[filename]['run_type'] = [eformat.helper.run_type2string(evt.run_type())]
# fix for ATEAM-122
if len(bs_metadata.get('eventTypes', '')) == 0: # see: ATMETADATA-6
evt_type = ['IS_DATA', 'IS_ATLAS']
if bs_metadata.get('stream', '').startswith('physics_'):
evt_type.append('IS_PHYSICS')
elif bs_metadata.get('stream', '').startswith('calibration_'):
evt_type.append('IS_CALIBRATION')
elif bs_metadata.get('projectTag', '').endswith('_calib'):
evt_type.append('IS_CALIBRATION')
else:
evt_type.append('Unknown')
meta_dict[filename]['eventTypes'] = evt_type
if mode == 'full':
meta_dict[filename]['bs_metadata'] = bs_metadata
# ------ Throw an error if the user provide other file types -------------------------------------------------#
else:
msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type))
return None
return meta_dict
def _get_pfn(filename):
"""
Extract the actuall filename if LFN or PFN notation is used
"""
pfx = filename[0:4]
if pfx == 'PFN:':
return filename[4:]
if pfx == 'LFN:':
import subprocess, os
os.environ['POOL_OUTMSG_LEVEL'] = 'Error'
output = subprocess.check_output(['FClistPFN','-l',filename[4:]]).split('\n')
if len(output) == 2:
return output[0]
msg.error( 'FClistPFN({0}) returned unexpected number of lines:'.format(filename) )
msg.error( '\n'.join(output) )
return filename
def _read_guid(filename):
"""
Extracts the "guid" (Globally Unique Identfier in POOL files and Grid catalogs) value from a POOL file.
:param filename: the input file
:return: the guid value
"""
import ROOT
root_file = ROOT.TFile.Open( _get_pfn(filename) )
params = root_file.Get('##Params')
regex = re.compile(r'^\[NAME=([a-zA-Z0-9_]+)\]\[VALUE=(.*)\]')
fid = None
for i in range(params.GetEntries()):
params.GetEntry(i)
# Work around apparent pyroot issue:
# If we try to access params.db_string directly, we see trailing
# garbage, which can confuse python's bytes->utf8 conversion
# and result in an error.
param = params.GetLeaf('db_string').GetValueString()
result = regex.match(param)
if result:
if result.group(1) == 'FID' :
# don't exit yet, it's the last FID entry that counts
fid = result.group(2)
return fid
def _extract_fields(obj):
result = {}
for meth in dir(obj):
if not meth.startswith('_'):
if meth.startswith('m_'):
field_name = str(meth)[2:]
field_value = getattr(obj, meth)
result[field_name] = _convert_value(field_value)
return result
def _convert_value(value, aux = None):
cl=value.__class__
if hasattr(cl, '__cpp_name__'):
result = regex_cppname.match(cl.__cpp_name__)
if result:
cpp_type = result.group(1)
if cpp_type == 'vector':
return [_convert_value(val) for val in value]
elif cpp_type == 'pair':
return _convert_value(value.first), _convert_value(value.second)
# elif cpp_type == 'long':
# return int(value)
elif cl.__cpp_name__ == "_Bit_reference":
return bool(value)
# special case which extracts data in a better format from IOVPayloadContainer_p1 class
elif cl.__cpp_name__ == 'IOVMetaDataContainer_p1':
return _extract_fields_iovmdc(value)
elif cl.__cpp_name__ == 'IOVPayloadContainer_p1':
return _extract_fields_iovpc(value)
elif cl.__cpp_name__ == 'xAOD::EventFormat_v1':
return _extract_fields_ef(value)
elif cl.__cpp_name__ == 'DataVector<xAOD::TriggerMenu_v1>' :
return _extract_fields_triggermenu(interface=value, aux=aux)
elif (cl.__cpp_name__ == 'EventStreamInfo_p2' or
cl.__cpp_name__ == 'EventStreamInfo_p3'):
return _extract_fields_esi(value)
elif (cl.__cpp_name__ == 'EventType_p1' or
cl.__cpp_name__ == 'EventType_p3'):
return _convert_event_type_bitmask(_extract_fields(value))
elif regex_persistent_class.match(cl.__cpp_name__):
return _extract_fields(value)
return value
def _extract_fields_iovmdc(value):
return _convert_value(value.m_payload)
def _extract_fields_iovpc(value):
result = {}
for attr_idx in value.m_attrIndexes:
name_idx = attr_idx.nameIndex()
type_idx = attr_idx.typeIndex()
obj_idx = attr_idx.objIndex()
attr_name = value.m_attrName[name_idx]
attr_value = None
if type_idx == 0:
attr_value = bool(value.m_bool[obj_idx])
elif type_idx == 1:
attr_value = int(value.m_char[obj_idx])
elif type_idx == 2:
attr_value = int(value.m_unsignedChar[obj_idx])
elif type_idx == 3:
attr_value = int(value.m_short[obj_idx])
elif type_idx == 4:
attr_value = int(value.m_unsignedShort[obj_idx])
elif type_idx == 5:
attr_value = int(value.m_int[obj_idx])
elif type_idx == 6:
attr_value = int(value.m_unsignedInt[obj_idx])
elif type_idx == 7:
attr_value = int(value.m_long[obj_idx])
elif type_idx == 8:
attr_value = int(value.m_unsignedLong[obj_idx])
elif type_idx == 9:
attr_value = int(value.m_longLong[obj_idx])
elif type_idx == 10:
attr_value = int(value.m_unsignedLongLong[obj_idx])
elif type_idx == 11:
attr_value = float(value.m_float[obj_idx])
elif type_idx == 12:
attr_value = float(value.m_double[obj_idx])
elif type_idx == 13:
# skipping this type because is file IOVPayloadContainer_p1.h (line 120) is commented and not considered
pass
elif type_idx == 14:
attr_value = str(value.m_string[obj_idx])
# Cleaning class name from value
if attr_value.startswith('IOVMetaDataContainer_p1_'):
attr_value = attr_value.replace('IOVMetaDataContainer_p1_', '')
if attr_value.startswith('_'):
attr_value = attr_value.replace('_', '/')
# Now it is clean
elif type_idx == 15:
attr_value = int(value.m_date[obj_idx])
elif type_idx == 16:
attr_value = int(value.m_timeStamp[obj_idx])
else:
raise ValueError('Unknown type id {0} for attribute {1}'.format(type_idx, attr_name))
if attr_name not in result:
result[attr_name] = []
result[attr_name].append(attr_value)
max_element_count = 0
for name, content in result.items():
if len(content) > max_element_count:
max_element_count = len(content)
if max_element_count <= 1:
for name, content in result.items():
if len(content) > 0:
result[name] = content[0]
else:
result[name] = None
return result
def _extract_fields_esi(value):
result = {}
result['eventTypes'] = []
for eventType in value.m_eventTypes:
result['eventTypes'].append(_convert_value(eventType))
result['numberOfEvents'] = value.m_numberOfEvents
result['runNumbers'] = list(value.m_runNumbers)
result['lumiBlockNumbers'] = list(value.m_lumiBlockNumbers)
result['processingTags'] = [str(v) for v in value.m_processingTags]
result['itemList'] = []
# Get the class name in the repository with CLID <clid>
from CLIDComps.clidGenerator import clidGenerator
cgen = clidGenerator("")
for clid, sgkey in value.m_itemList:
result['itemList'].append((cgen.getNameFromClid(clid), sgkey))
return result
def _extract_fields_ef(value):
result = {}
for ef_element in value:
result[ef_element.first] = ef_element.second.className()
return result
def _extract_fields_triggermenu(interface, aux):
L1Items = []
HLTChains = []
try:
interface.setStore( aux )
if interface.size() > 0:
# We make the assumption that the first stored SMK is
# representative of all events in the input collection.
firstMenu = interface.at(0)
L1Items = [ item for item in firstMenu.itemNames() ]
HLTChains = [ chain for chain in firstMenu.chainNames() ]
except Exception as err:
msg.warn('Problem reading xAOD::TriggerMenu:')
msg.warn(err)
result = {}
result['L1Items'] = L1Items
result['HLTChains'] = HLTChains
return result
def _convert_event_type_bitmask(value):
types = None
for key in value:
if key == 'bit_mask':
val = value[key]
bitmask_length = len(val)
is_simulation = False
is_testbeam = False
is_calibration = False
if bitmask_length > 0: # ROOT.EventType.IS_SIMULATION
is_simulation = val[0]
if bitmask_length > 1: # ROOT.EventType.IS_TESTBEAM
is_testbeam = val[1]
if bitmask_length > 2: # ROOT.EventType.IS_CALIBRATION:
is_calibration = val[2]
types = [
'IS_SIMULATION' if is_simulation else 'IS_DATA',
'IS_TESTBEAM' if is_testbeam else 'IS_ATLAS',
'IS_CALIBRATION' if is_calibration else 'IS_PHYSICS'
]
value['type'] = types
return value
def make_lite(meta_dict):
for filename, file_content in meta_dict.items():
for key in file_content:
if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
keys_to_keep = ['lumiBlockNumbers', 'runNumbers', 'mc_event_number', 'mc_channel_number', 'eventTypes', 'processingTags']
for item in list(meta_dict[filename][key]):
if item not in keys_to_keep:
meta_dict[filename][key].pop(item)
if '/TagInfo' in file_content:
keys_to_keep = ['beam_energy', 'beam_type', 'GeoAtlas', 'IOVDbGlobalTag', 'AODFixVersion']
for item in list(meta_dict[filename]['/TagInfo']):
if item not in keys_to_keep:
meta_dict[filename]['/TagInfo'].pop(item)
return meta_dict
def make_peeker(meta_dict):
for filename, file_content in meta_dict.items():
for key in file_content:
if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
keys_to_keep = [
'lumiBlockNumbers',
'runNumbers',
'mc_event_number',
'mc_channel_number',
'eventTypes',
'processingTags',
'itemList'
]
for item in list(meta_dict[filename][key]):
if item not in keys_to_keep:
meta_dict[filename][key].pop(item)
if '/TagInfo' in file_content:
keys_to_keep = [
'beam_energy',
'beam_type',
'GeoAtlas',
'IOVDbGlobalTag',
'AODFixVersion',
'AMITag',
'project_name',
'triggerStreamOfFile',
'AtlasRelease'
]
for item in list(meta_dict[filename]['/TagInfo']):
if item not in keys_to_keep:
meta_dict[filename]['/TagInfo'].pop(item)
if '/Simulation/Parameters' in file_content:
keys_to_keep = [
'TruthStrategy',
'SimBarcodeOffset',
'TRTRangeCut',
]
for item in list(meta_dict[filename]['/Simulation/Parameters']):
if item not in keys_to_keep:
meta_dict[filename]['/Simulation/Parameters'].pop(item)
if '/Digitization/Parameters' in file_content:
keys_to_keep = [
'numberOfCollisions',
'intraTrainBunchSpacing',
'BeamIntensityPattern'
]
for item in list(meta_dict[filename]['/Digitization/Parameters']):
if item not in keys_to_keep:
meta_dict[filename]['/Digitization/Parameters'].pop(item)
return meta_dict
def promote_keys(meta_dict):
for filename, file_content in meta_dict.items():
md = meta_dict[filename]
for key in file_content:
if key in md['metadata_items'] and regexEventStreamInfo.match(md['metadata_items'][key]):
md.update(md[key])
if 'eventTypes' in md and len(md['eventTypes']):
et = md['eventTypes'][0]
md['mc_event_number'] = et.get('mc_event_number', md['runNumbers'][0])
md['mc_channel_number'] = et.get('mc_channel_number', 0)
md['eventTypes'] = et['type']
if 'lumiBlockNumbers' in md[key]:
md['lumiBlockNumbers'] = md[key]['lumiBlockNumbers']
if 'processingTags' in md[key]:
md['processingTags'] = md[key]['processingTags']
meta_dict[filename].pop(key)
break
if '/TagInfo' in file_content:
md.update(md['/TagInfo'])
md.pop('/TagInfo')
if '/Simulation/Parameters' in file_content:
md.update(md['/Simulation/Parameters'])
md.pop('/Simulation/Parameters')
if '/Digitization/Parameters' in file_content:
md.update(md['/Digitization/Parameters'])
md.pop('/Digitization/Parameters')
return meta_dict
def convert_itemList(metadata, layout):
"""
This function will rearrange the itemList values to match the format of 'eventdata_items', 'eventdata_itemsList'
or 'eventdata_itemsDic' generated with AthFile
:param metadata: a dictionary obtained using read_metadata method.
The mode for read_metadata must be 'peeker of 'full'
:param layout: the mode in which the data will be converted:
* for 'eventdata_items' use: layout= None
* for 'eventdata_itemsList' use: layout= '#join'
* for 'eventdata_itemsDic' use: layout= 'dict'
"""
# Find the itemsList:
item_list = None
if 'itemList' in metadata:
item_list = metadata['itemList']
else:
current_key = None
for key in metadata:
if key in metadata['metadata_items'] and metadata['metadata_items'][key] == 'EventStreamInfo_p3':
current_key = key
break
if current_key is not None:
item_list = metadata[current_key]['itemList']
if item_list is not None:
if layout is None:
return item_list
elif layout == '#join':
return [k + '#' + v for k, v in item_list if k]
elif layout == 'dict':
from collections import defaultdict
dic = defaultdict(list)
for k, v in item_list:
dic[k].append(v)
return dict(dic)