diff --git a/Tools/PyUtils/bin/meta-reader.py b/Tools/PyUtils/bin/meta-reader.py index 34b27f9177f61c9c04e805c501c397fb011d367e..735bb6e6aed9a06dbc64a04adcfc327a28b9079e 100755 --- a/Tools/PyUtils/bin/meta-reader.py +++ b/Tools/PyUtils/bin/meta-reader.py @@ -11,7 +11,7 @@ import logging import os # escape sequence [?1034h which aprear on several runs due to smm capability (Meta Mode On) for xterm. -if 'TERM' in os.envirn: +if 'TERM' in os.environ: del os.environ['TERM'] msg = logging.getLogger('MetaReader') diff --git a/Tools/PyUtils/python/MetaReader.py b/Tools/PyUtils/python/MetaReader.py index ecab98818c517daf20560834b8e1cde09fb73f7c..9bde5afe46ac7644c64aaf597cc88671b1d3519b 100644 --- a/Tools/PyUtils/python/MetaReader.py +++ b/Tools/PyUtils/python/MetaReader.py @@ -1,669 +1,660 @@ # Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration from __future__ import absolute_import -import os, re +import os +import re from fnmatch import fnmatchcase from AthenaCommon.Logging import logging msg = logging.getLogger('MetaReader') -regexEventStreamInfo = re.compile(r'^EventStreamInfo(_p\d+)?$') -regexIOVMetaDataContainer = re.compile(r'^IOVMetaDataContainer(_p\d+)?$') -regexByteStreamMetadataContainer = re.compile(r'^ByteStreamMetadataContainer(_p\d+)?$') -regexXAODEventFormat = re.compile(r'^xAOD::EventFormat(_v\d+)?$') +# compile the regex needed in _convert_value() outside it to optimize the code. +regexEventStreamInfo = re.compile(r'^EventStreamInfo(_p\d+)?$') +regexIOVMetaDataContainer = re.compile(r'^IOVMetaDataContainer(_p\d+)?$') +regexByteStreamMetadataContainer = re.compile(r'^ByteStreamMetadataContainer(_p\d+)?$') +regexXAODEventFormat = re.compile(r'^xAOD::EventFormat(_v\d+)?$') +regex_cppname = re.compile(r'^([\w:]+)(<.*>)?$') +# regex_persistent_class = re.compile(r'^([a-zA-Z]+_p\d+::)*[a-zA-Z]+_p\d+$') +regex_persistent_class = re.compile(r'^([a-zA-Z]+(_[pv]\d+)?::)*[a-zA-Z]+_[pv]\d+$') +regex_BS_files = re.compile(r'^(\w+):.*((\.D?RAW\..*)|(\.data$))') def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter= []): - """ - This tool is independent of Athena framework and returns the metadata from a given file. - :param filenames: the input file from which metadata needs to be extracted. - :param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files. - :param mode: if true, will return all metadata associated with the filename. By default, is false and this will - return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'. - :return: a dictionary of metadata for the given input file. - """ - - from RootUtils import PyROOTFixes - - # Check if the input is a file or a list of files. - if isinstance(filenames, basestring): - filenames = [filenames] - - # Check if file_type is an allowed value - if file_type is not None: - if file_type not in ('POOL', 'BS'): - raise NameError('Allowed values for \'file_type\' parameter are: "POOL" or "BS": you provided "' + file_type + '"') - else: - msg.info('Forced file_type: {0}'.format(file_type)) - - # Check the value of mode parameter - if mode not in ('tiny', 'lite', 'full', 'peeker'): - raise NameError('Allowed values for "mode" parameter are: "tiny", "lite", "peeker" or "full"') - msg.info('Current mode used: {0}'.format(mode)) - - if mode != 'full' and len(meta_key_filter) > 0: - raise NameError('It is possible to use the meta_key_filter option only for full mode') - if len(meta_key_filter) > 0: - msg.info('Filter used: {0}'.format(meta_key_filter)) - - # create the storage object for metadata. - meta_dict = {} - - # ----- retrieve metadata from all filename or filenames --------------------------------------------------------# - for filename in filenames: - meta_dict[filename] = {} - current_file_type = None - # Determine the file_type of the input and store this information into meta_dict - if not file_type: - with open(filename, 'rb') as binary_file: - magic_file = binary_file.read(4) - - if magic_file == 'root': - current_file_type = 'POOL' - meta_dict[filename]['file_type'] = 'POOL' - - else: - current_file_type = 'BS' - meta_dict[filename]['file_type'] = 'BS' - - else: - current_file_type = file_type - - # add information about the file_size of the input filename - meta_dict[filename]['file_size'] = os.path.getsize(filename) - - # ----- retrieves metadata from POOL files ------------------------------------------------------------------# - if current_file_type == 'POOL': - import ROOT - # open the file using ROOT.TFile - current_file = ROOT.TFile(filename) - - # open the tree 'POOLContainer' to read the number of entries - if current_file.GetListOfKeys().Contains('POOLContainer'): - meta_dict[filename]['nentries'] = current_file.Get('POOLContainer').GetEntriesFast() - else: - meta_dict[filename]['nentries'] = None - - # read and add the 'GUID' value - meta_dict[filename]['file_guid'] = _read_guid(filename) - - # ----- read extra metadata required for 'lite' and 'full' modes ----------------------------------------# - if mode != 'tiny': - # selecting from all tree the only one which contains metadata, respectively "MetaData" - metadata_tree = current_file.Get('MetaData') - # read all list of branches stored in "MetaData" tree - metadata_branches = metadata_tree.GetListOfBranches() - nr_of_branches = metadata_branches.GetEntriesFast() - - # object to store the names of metadata containers and their corresponding class name. - meta_dict[filename]['metadata_items'] = {} - - # create a container for the list of filters used for the lite version - meta_filter = {} - - # set the filters for name - if mode == 'lite': - meta_filter = { - '/TagInfo': 'IOVMetaDataContainer_p1', - 'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1', - '*': 'EventStreamInfo_p*' - } - - # set the filters for name - if mode == 'peeker': - meta_filter = { - '/TagInfo': 'IOVMetaDataContainer_p1', - 'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1', - '/Simulation/Parameters': 'IOVMetaDataContainer_p1', - '/Digitization/Parameters': 'IOVMetaDataContainer_p1', - '/EXT/DCS/MAGNETS/SENSORDATA': 'IOVMetaDataContainer_p1', - '*': 'EventStreamInfo_p*' - } - - if mode == 'full' and len(meta_key_filter) > 0: - meta_filter = {f: '*' for f in meta_key_filter} - # store all persistent classes for metadata container existing in a POOL/ROOT file. - persistent_instances = {} - - for i in range(0, nr_of_branches): - branch = metadata_branches.At(i) - name = branch.GetName() - - class_name = branch.GetClassName() - - if regexIOVMetaDataContainer.match(class_name): - name = name.replace('IOVMetaDataContainer_p1_','').replace('_','/') - - - if regexIOVMetaDataContainer.match(class_name): - meta_dict[filename]['metadata_items'][name] = 'IOVMetaDataContainer' - elif regexByteStreamMetadataContainer.match(class_name): - meta_dict[filename]['metadata_items'][name] = 'ByteStreamMetadataContainer' - elif regexEventStreamInfo.match(class_name): - meta_dict[filename]['metadata_items'][name] = 'EventStreamInfo' - else: - meta_dict[filename]['metadata_items'][name] = class_name - - if len(meta_filter) > 0: - keep = False - for filter_key, filter_class in meta_filter.items(): - if (filter_key.replace('/', '_') == name.replace('/', '_') or filter_key == '*') and fnmatchcase(class_name, filter_class): - keep = True - break - - if not keep: - continue - - # assign the corresponding persistent class based of the name of the metadata container - if regexEventStreamInfo.match(class_name): - if class_name.endswith('_p2'): - persistent_instances[name] = ROOT.EventStreamInfo_p2() - else: - persistent_instances[name] = ROOT.EventStreamInfo_p3() - elif regexIOVMetaDataContainer.match(class_name): - persistent_instances[name] = ROOT.IOVMetaDataContainer_p1() - elif regexXAODEventFormat.match(class_name): - persistent_instances[name] = ROOT.xAOD.EventFormat_v1() - - if name in persistent_instances: - branch.SetAddress(ROOT.AddressOf(persistent_instances[name])) - - metadata_tree.GetEntry(0) - - # clean the meta-dict if the meta_key_filter flag is used, to return only the key of interest - if len(meta_key_filter) > 0: - meta_dict[filename] = {} - - # read the metadata - for name, content in persistent_instances.items(): - key = name - - if hasattr(content, 'm_folderName'): - key = getattr(content, 'm_folderName') - - meta_dict[filename][key] = _convert_value(content) - - if promote is None: - promote = mode == 'lite' or mode == 'peeker' - - # Filter the data and create a prettier output for the 'lite' mode - if mode == 'lite': - meta_dict = make_lite(meta_dict) - - if mode == 'peeker': - meta_dict = make_peeker(meta_dict) - - if promote: - meta_dict = promote_keys(meta_dict) - - - # ----- retrieves metadata from bytestream (BS) files (RAW, DRAW) ------------------------------------------# - elif current_file_type == 'BS': - import eformat - - # store the number of entries - bs = eformat.istream(filename) - meta_dict[filename]['nentries'] = bs.total_events - - # store the 'guid' value - data_reader = eformat.EventStorage.pickDataReader(filename) - assert data_reader, 'problem picking a data reader for file [%s]' % filename - - if hasattr(data_reader, 'GUID'): - meta_dict[filename]['file_guid'] = getattr(data_reader, 'GUID')() - - # if the flag full is set to true then grab all metadata - # ------------------------------------------------------------------------------------------------------# - if mode != "tiny": - bs_metadata = {} - - for md in data_reader.freeMetaDataStrings(): - if md.startswith('Event type:'): - k = 'eventTypes' - v = [] - if 'is sim' in md: - v.append('IS_SIMULATION') - else: - v.append('IS_DATA') - - if 'is atlas' in md: - v.append('IS_ATLAS') - else: - v.append('IS_TESTBEAM') - - if 'is physics' in md: - v.append('IS_PHYSICS') - else: - v.append('IS_CALIBRATION') - - bs_metadata[k] = tuple(v) - - elif md.startswith('GeoAtlas:'): - k = 'geometry' - v = md.split('GeoAtlas:')[1].strip() - bs_metadata[k] = v - - elif md.startswith('IOVDbGlobalTag:'): - k = 'conditions_tag' - v = md.split('IOVDbGlobalTag:')[1].strip() - bs_metadata[k] = v - - elif '=' in md: - k, v = md.split('=') - bs_metadata[k] = v - - bs_metadata['runNumbers'] = getattr(data_reader, 'runNumber')() - bs_metadata['lumiBlockNumbers'] = getattr(data_reader, 'lumiblockNumber')() - bs_metadata['projectTag'] = getattr(data_reader, 'projectTag')() - bs_metadata['stream'] = getattr(data_reader, 'stream')() - bs_metadata['beamType'] = getattr(data_reader, 'beamType')() - bs_metadata['beamEnergy'] = getattr(data_reader, 'beamEnergy')() - - meta_dict[filename]['eventTypes'] = bs_metadata.get('eventTypes', []) - meta_dict[filename]['GeoAtlas'] = bs_metadata.get('geometry', None) - meta_dict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None) - - # Promote up one level - meta_dict[filename]['runNumbers'] = [bs_metadata.get('runNumbers', None)] - meta_dict[filename]['lumiBlockNumbers'] = [bs_metadata.get('lumiBlockNumbers', None)] - meta_dict[filename]['beam_type'] = [bs_metadata.get('beamType', None)] - meta_dict[filename]['beam_energy'] = bs_metadata.get('beamEnergy', None) - meta_dict[filename]['stream'] = bs_metadata.get('stream', None) - - if not data_reader.good(): - # event-less file... - meta_dict[filename]['runNumbers'].append(bs_metadata.get('run_number', 0)) - meta_dict[filename]['lumiBlockNumbers'].append(bs_metadata.get('LumiBlock', 0)) - - ievt = iter(bs) - evt = ievt.next() - evt.check() # may raise a RuntimeError - processing_tags = [dict(stream_type = tag.type, stream_name = tag.name, obeys_lbk = bool(tag.obeys_lumiblock)) for tag in evt.stream_tag()] - meta_dict[filename]['processingTags'] = [x['stream_name'] for x in processing_tags] - meta_dict[filename]['evt_number'] = [evt.global_id()] - meta_dict[filename]['run_type'] = [eformat.helper.run_type2string(evt.run_type())] - - - # fix for ATEAM-122 - if len(bs_metadata.get('eventTypes', '')) == 0: # see: ATMETADATA-6 - evt_type = ['IS_DATA', 'IS_ATLAS'] - if bs_metadata.get('stream', '').startswith('physics_'): - evt_type.append('IS_PHYSICS') - elif bs_metadata.get('stream', '').startswith('calibration_'): - evt_type.append('IS_CALIBRATION') - elif bs_metadata.get('projectTag', '').endswith('_calib'): - evt_type.append('IS_CALIBRATION') - else: - evt_type.append('Unknown') - - meta_dict[filename]['eventTypes'] = evt_type - - if mode == 'full': - meta_dict[filename]['bs_metadata'] = bs_metadata - - # ------ Throw an error if the user provide other file types -------------------------------------------------# - else: - msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type)) - return None - - return meta_dict - - -# Currently not used -# def _md5(fname, block_size=2**20, do_fast_md5=True): -# import hashlib -# # do_fast_md5 is for compatibility with AthFile. -# # -- Sebastian Liem -# hash_md5 = hashlib.md5() -# with open(fname, 'rb') as f: -# for chunk in iter(lambda: f.read(block_size), b''): -# hash_md5.update(chunk) -# if do_fast_md5: -# break -# return hash_md5.hexdigest() + """ + This tool is independent of Athena framework and returns the metadata from a given file. + :param filenames: the input file from which metadata needs to be extracted. + :param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files. + :param mode: if true, will return all metadata associated with the filename. By default, is false and this will + return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'. + :return: a dictionary of metadata for the given input file. + """ + + from RootUtils import PyROOTFixes + + # Check if the input is a file or a list of files. + if isinstance(filenames, basestring): + filenames = [filenames] + + # Check if file_type is an allowed value + if file_type is not None: + if file_type not in ('POOL', 'BS'): + raise NameError('Allowed values for \'file_type\' parameter are: "POOL" or "BS": you provided "' + file_type + '"') + else: + msg.info('Forced file_type: {0}'.format(file_type)) + + # Check the value of mode parameter + if mode not in ('tiny', 'lite', 'full', 'peeker'): + raise NameError('Allowed values for "mode" parameter are: "tiny", "lite", "peeker" or "full"') + msg.info('Current mode used: {0}'.format(mode)) + + if mode != 'full' and len(meta_key_filter) > 0: + raise NameError('It is possible to use the meta_key_filter option only for full mode') + if len(meta_key_filter) > 0: + msg.info('Filter used: {0}'.format(meta_key_filter)) + + # create the storage object for metadata. + meta_dict = {} + + # ----- retrieve metadata from all filename or filenames --------------------------------------------------------# + for filename in filenames: + meta_dict[filename] = {} + current_file_type = None + # Determine the file_type of the input and store this information into meta_dict + if not file_type: + if os.path.isfile(filename): + with open(filename, 'rb') as binary_file: + magic_file = binary_file.read(4) + + if magic_file == 'root': + current_file_type = 'POOL' + meta_dict[filename]['file_type'] = 'POOL' + + else: + current_file_type = 'BS' + meta_dict[filename]['file_type'] = 'BS' + + # add information about the file_size of the input filename + meta_dict[filename]['file_size'] = os.path.getsize(filename) + + # determine the file type for the remote input files + else: + if regex_BS_files.match(filename): + current_file_type = 'BS' + meta_dict[filename]['file_type'] = 'BS' + print('############### BS online') + else: + current_file_type = 'POOL' + meta_dict[filename]['file_type'] = 'POOL' + print('############### POOL online') + + # add information about the file_size of the input filename + meta_dict[filename]['file_size'] = None # None -> we can't read the file size for a remote file + + else: + current_file_type = file_type + + # ----- retrieves metadata from POOL files ------------------------------------------------------------------# + if current_file_type == 'POOL': + import ROOT + # open the file using ROOT.TFile + current_file = ROOT.TFile.Open(filename) + + # open the tree 'POOLContainer' to read the number of entries + if current_file.GetListOfKeys().Contains('POOLContainer'): + meta_dict[filename]['nentries'] = current_file.Get('POOLContainer').GetEntriesFast() + else: + meta_dict[filename]['nentries'] = None + + # read and add the 'GUID' value + meta_dict[filename]['file_guid'] = _read_guid(filename) + + # ----- read extra metadata required for 'lite' and 'full' modes ----------------------------------------# + if mode != 'tiny': + # selecting from all tree the only one which contains metadata, respectively "MetaData" + metadata_tree = current_file.Get('MetaData') + # read all list of branches stored in "MetaData" tree + metadata_branches = metadata_tree.GetListOfBranches() + nr_of_branches = metadata_branches.GetEntriesFast() + + # object to store the names of metadata containers and their corresponding class name. + meta_dict[filename]['metadata_items'] = {} + + # create a container for the list of filters used for the lite version + meta_filter = {} + + # set the filters for name + if mode == 'lite': + meta_filter = { + '/TagInfo': 'IOVMetaDataContainer_p1', + 'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1', + '*': 'EventStreamInfo_p*' + } + + # set the filters for name + if mode == 'peeker': + meta_filter = { + '/TagInfo': 'IOVMetaDataContainer_p1', + 'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1', + '/Simulation/Parameters': 'IOVMetaDataContainer_p1', + '/Digitization/Parameters': 'IOVMetaDataContainer_p1', + '/EXT/DCS/MAGNETS/SENSORDATA': 'IOVMetaDataContainer_p1', + '*': 'EventStreamInfo_p*' + } + + if mode == 'full' and len(meta_key_filter) > 0: + meta_filter = {f: '*' for f in meta_key_filter} + # store all persistent classes for metadata container existing in a POOL/ROOT file. + persistent_instances = {} + + for i in range(0, nr_of_branches): + branch = metadata_branches.At(i) + name = branch.GetName() + + class_name = branch.GetClassName() + + if regexIOVMetaDataContainer.match(class_name): + name = name.replace('IOVMetaDataContainer_p1_', '').replace('_', '/') + + if regexIOVMetaDataContainer.match(class_name): + meta_dict[filename]['metadata_items'][name] = 'IOVMetaDataContainer' + elif regexByteStreamMetadataContainer.match(class_name): + meta_dict[filename]['metadata_items'][name] = 'ByteStreamMetadataContainer' + elif regexEventStreamInfo.match(class_name): + meta_dict[filename]['metadata_items'][name] = 'EventStreamInfo' + else: + meta_dict[filename]['metadata_items'][name] = class_name + + if len(meta_filter) > 0: + keep = False + for filter_key, filter_class in meta_filter.items(): + if (filter_key.replace('/', '_') == name.replace('/', '_') or filter_key == '*') and fnmatchcase(class_name, filter_class): + keep = True + break + + if not keep: + continue + + # assign the corresponding persistent class based of the name of the metadata container + if regexEventStreamInfo.match(class_name): + if class_name.endswith('_p2'): + persistent_instances[name] = ROOT.EventStreamInfo_p2() + else: + persistent_instances[name] = ROOT.EventStreamInfo_p3() + elif regexIOVMetaDataContainer.match(class_name): + persistent_instances[name] = ROOT.IOVMetaDataContainer_p1() + elif regexXAODEventFormat.match(class_name): + persistent_instances[name] = ROOT.xAOD.EventFormat_v1() + + if name in persistent_instances: + branch.SetAddress(ROOT.AddressOf(persistent_instances[name])) + + metadata_tree.GetEntry(0) + + # clean the meta-dict if the meta_key_filter flag is used, to return only the key of interest + if len(meta_key_filter) > 0: + meta_dict[filename] = {} + + # read the metadata + for name, content in persistent_instances.items(): + key = name + + if hasattr(content, 'm_folderName'): + key = getattr(content, 'm_folderName') + + meta_dict[filename][key] = _convert_value(content) + + if promote is None: + promote = mode == 'lite' or mode == 'peeker' + + # Filter the data and create a prettier output for the 'lite' mode + if mode == 'lite': + meta_dict = make_lite(meta_dict) + + if mode == 'peeker': + meta_dict = make_peeker(meta_dict) + + if promote: + meta_dict = promote_keys(meta_dict) + + # ----- retrieves metadata from bytestream (BS) files (RAW, DRAW) ------------------------------------------# + elif current_file_type == 'BS': + import eformat + + # store the number of entries + bs = eformat.istream(filename) + meta_dict[filename]['nentries'] = bs.total_events + + # store the 'guid' value + data_reader = eformat.EventStorage.pickDataReader(filename) + assert data_reader, 'problem picking a data reader for file [%s]' % filename + + if hasattr(data_reader, 'GUID'): + meta_dict[filename]['file_guid'] = getattr(data_reader, 'GUID')() + + # if the flag full is set to true then grab all metadata + # ------------------------------------------------------------------------------------------------------# + if mode != "tiny": + bs_metadata = {} + + for md in data_reader.freeMetaDataStrings(): + if md.startswith('Event type:'): + k = 'eventTypes' + v = [] + if 'is sim' in md: + v.append('IS_SIMULATION') + else: + v.append('IS_DATA') + + if 'is atlas' in md: + v.append('IS_ATLAS') + else: + v.append('IS_TESTBEAM') + + if 'is physics' in md: + v.append('IS_PHYSICS') + else: + v.append('IS_CALIBRATION') + + bs_metadata[k] = tuple(v) + + elif md.startswith('GeoAtlas:'): + k = 'geometry' + v = md.split('GeoAtlas:')[1].strip() + bs_metadata[k] = v + + elif md.startswith('IOVDbGlobalTag:'): + k = 'conditions_tag' + v = md.split('IOVDbGlobalTag:')[1].strip() + bs_metadata[k] = v + + elif '=' in md: + k, v = md.split('=') + bs_metadata[k] = v + + bs_metadata['runNumbers'] = getattr(data_reader, 'runNumber')() + bs_metadata['lumiBlockNumbers'] = getattr(data_reader, 'lumiblockNumber')() + bs_metadata['projectTag'] = getattr(data_reader, 'projectTag')() + bs_metadata['stream'] = getattr(data_reader, 'stream')() + bs_metadata['beamType'] = getattr(data_reader, 'beamType')() + bs_metadata['beamEnergy'] = getattr(data_reader, 'beamEnergy')() + + meta_dict[filename]['eventTypes'] = bs_metadata.get('eventTypes', []) + meta_dict[filename]['GeoAtlas'] = bs_metadata.get('geometry', None) + meta_dict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None) + + # Promote up one level + meta_dict[filename]['runNumbers'] = [bs_metadata.get('runNumbers', None)] + meta_dict[filename]['lumiBlockNumbers'] = [bs_metadata.get('lumiBlockNumbers', None)] + meta_dict[filename]['beam_type'] = [bs_metadata.get('beamType', None)] + meta_dict[filename]['beam_energy'] = bs_metadata.get('beamEnergy', None) + meta_dict[filename]['stream'] = bs_metadata.get('stream', None) + + if not data_reader.good(): + # event-less file... + meta_dict[filename]['runNumbers'].append(bs_metadata.get('run_number', 0)) + meta_dict[filename]['lumiBlockNumbers'].append(bs_metadata.get('LumiBlock', 0)) + + ievt = iter(bs) + evt = ievt.next() + evt.check() # may raise a RuntimeError + processing_tags = [dict(stream_type = tag.type, stream_name = tag.name, obeys_lbk = bool(tag.obeys_lumiblock)) for tag in evt.stream_tag()] + meta_dict[filename]['processingTags'] = [x['stream_name'] for x in processing_tags] + meta_dict[filename]['evt_number'] = [evt.global_id()] + meta_dict[filename]['run_type'] = [eformat.helper.run_type2string(evt.run_type())] + + # fix for ATEAM-122 + if len(bs_metadata.get('eventTypes', '')) == 0: # see: ATMETADATA-6 + evt_type = ['IS_DATA', 'IS_ATLAS'] + if bs_metadata.get('stream', '').startswith('physics_'): + evt_type.append('IS_PHYSICS') + elif bs_metadata.get('stream', '').startswith('calibration_'): + evt_type.append('IS_CALIBRATION') + elif bs_metadata.get('projectTag', '').endswith('_calib'): + evt_type.append('IS_CALIBRATION') + else: + evt_type.append('Unknown') + + meta_dict[filename]['eventTypes'] = evt_type + + if mode == 'full': + meta_dict[filename]['bs_metadata'] = bs_metadata + + # ------ Throw an error if the user provide other file types -------------------------------------------------# + else: + msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type)) + return None + + return meta_dict def _read_guid(filename): - """ - Extracts the "guid" (Globally Unique Identfier in POOL files and Grid catalogs) value from a POOL file. - :param filename: the input file - :return: the guid value - """ - import ROOT - root_file = ROOT.TFile(filename) - params = root_file.Get('##Params') + """ + Extracts the "guid" (Globally Unique Identfier in POOL files and Grid catalogs) value from a POOL file. + :param filename: the input file + :return: the guid value + """ + import ROOT + root_file = ROOT.TFile.Open(filename) + params = root_file.Get('##Params') - regex = re.compile(r'^\[NAME\=([a-zA-Z0-9\_]+)\]\[VALUE\=(.*)\]') + regex = re.compile(r'^\[NAME=([a-zA-Z0-9_]+)\]\[VALUE=(.*)\]') - for i in range(params.GetEntries()): - params.GetEntry(i) - param = params.db_string + for i in range(params.GetEntries()): + params.GetEntry(i) + param = params.db_string - result = regex.match(param) - if result: - name = result.group(1) - value = result.group(2) + result = regex.match(param) + if result: + name = result.group(1) + value = result.group(2) - if name == 'FID': - return value + if name == 'FID': + return value - return None + return None def _extract_fields(obj): - result = {} - - for meth in dir(obj): - if not meth.startswith('_'): - if meth.startswith('m_'): - - field_name = str(meth)[2:] - field_value = getattr(obj, meth) + result = {} - result[field_name] = _convert_value(field_value) + for meth in dir(obj): + if not meth.startswith('_'): + if meth.startswith('m_'): - return result + field_name = str(meth)[2:] + field_value = getattr(obj, meth) + result[field_name] = _convert_value(field_value) -# compile the regex needed in _convert_value() outside it to optimize the code. -regex_cppname = re.compile(r'^([\w:]+)(<.*>)?$') -# regex_persistent_class = re.compile(r'^([a-zA-Z]+_p\d+::)*[a-zA-Z]+_p\d+$') -regex_persistent_class = re.compile(r'^([a-zA-Z]+(_[pv]\d+)?::)*[a-zA-Z]+_[pv]\d+$') - + return result def _convert_value(value): - if hasattr(value, '__cppname__'): + if hasattr(value, '__cppname__'): - result = regex_cppname.match(value.__cppname__) + result = regex_cppname.match(value.__cppname__) - if result: - cpp_type = result.group(1) - if cpp_type == 'vector': - return [_convert_value(val) for val in value] + if result: + cpp_type = result.group(1) + if cpp_type == 'vector': + return [_convert_value(val) for val in value] - elif cpp_type == 'pair': - return _convert_value(value.first), _convert_value(value.second) + elif cpp_type == 'pair': + return _convert_value(value.first), _convert_value(value.second) - # elif cpp_type == 'long': - # return int(value) + # elif cpp_type == 'long': + # return int(value) - elif value.__cppname__ == "_Bit_reference": - return bool(value) + elif value.__cppname__ == "_Bit_reference": + return bool(value) - # special case which extracts data in a better format from IOVPayloadContainer_p1 class - elif value.__cppname__ == 'IOVMetaDataContainer_p1': - return _extract_fields_iovmdc(value) + # special case which extracts data in a better format from IOVPayloadContainer_p1 class + elif value.__cppname__ == 'IOVMetaDataContainer_p1': + return _extract_fields_iovmdc(value) - elif value.__cppname__ == 'IOVPayloadContainer_p1': - return _extract_fields_iovpc(value) + elif value.__cppname__ == 'IOVPayloadContainer_p1': + return _extract_fields_iovpc(value) - elif value.__cppname__ == 'xAOD::EventFormat_v1': - return _extract_fields_ef(value) + elif value.__cppname__ == 'xAOD::EventFormat_v1': + return _extract_fields_ef(value) - elif (value.__cppname__ == 'EventStreamInfo_p2' or - value.__cppname__ == 'EventStreamInfo_p3'): - return _extract_fields_esi(value) + elif (value.__cppname__ == 'EventStreamInfo_p2' or + value.__cppname__ == 'EventStreamInfo_p3'): + return _extract_fields_esi(value) - elif (value.__cppname__ == 'EventType_p1' or - value.__cppname__ == 'EventType_p3'): - return _convert_event_type_bitmask( _extract_fields(value)) + elif (value.__cppname__ == 'EventType_p1' or + value.__cppname__ == 'EventType_p3'): + return _convert_event_type_bitmask(_extract_fields(value)) - elif regex_persistent_class.match(value.__cppname__): - return _extract_fields(value) + elif regex_persistent_class.match(value.__cppname__): + return _extract_fields(value) - return value + return value def _extract_fields_iovmdc(value): - return _convert_value(value.m_payload) + return _convert_value(value.m_payload) def _extract_fields_iovpc(value): - result = {} - - for attr_idx in value.m_attrIndexes: - name_idx = attr_idx.nameIndex() - type_idx = attr_idx.typeIndex() - obj_idx = attr_idx.objIndex() - - attr_name = value.m_attrName[name_idx] - attr_value = None - - if type_idx == 0: - attr_value = bool(value.m_bool[obj_idx]) - elif type_idx == 1: - attr_value = int(value.m_char[obj_idx]) - elif type_idx == 2: - attr_value = int(value.m_unsignedChar[obj_idx]) - elif type_idx == 3: - attr_value = int(value.m_short[obj_idx]) - elif type_idx == 4: - attr_value = int(value.m_unsignedShort[obj_idx]) - elif type_idx == 5: - attr_value = int(value.m_int[obj_idx]) - elif type_idx == 6: - attr_value = int(value.m_unsignedInt[obj_idx]) - elif type_idx == 7: - attr_value = int(value.m_long[obj_idx]) - elif type_idx == 8: - attr_value = int(value.m_unsignedLong[obj_idx]) - elif type_idx == 9: - attr_value = long(value.m_longLong[obj_idx]) - elif type_idx == 10: - attr_value = long(value.m_unsignedLongLong[obj_idx]) - elif type_idx == 11: - attr_value = float(value.m_float[obj_idx]) - elif type_idx == 12: - attr_value = float(value.m_double[obj_idx]) - elif type_idx == 13: - # skipping this type because is file IOVPayloadContainer_p1.h (line 120) is commented and not considered - pass - elif type_idx == 14: - attr_value = str(value.m_string[obj_idx]) - # Cleaning class name from value - if attr_value.startswith('IOVMetaDataContainer_p1_'): - attr_value = attr_value.replace('IOVMetaDataContainer_p1_','') - if attr_value.startswith('_'): - attr_value = attr_value.replace('_','/') - # Now it is clean - elif type_idx == 15: - attr_value = long(value.m_date[obj_idx]) - elif type_idx == 16: - attr_value = long(value.m_timeStamp[obj_idx]) - else: - raise ValueError('Unknown type id {0} for attribute {1}'.format(type_idx, attr_name)) - - if attr_name not in result: - result[attr_name] = [] - - result[attr_name].append(attr_value) - - max_element_count = 0 - for name, content in result.items(): - if len(content) > max_element_count: - max_element_count = len(content) - - if max_element_count <= 1: - for name, content in result.items(): - if len(content) > 0: - result[name] = content[0] - else: - result[name] = None - - return result + result = {} + + for attr_idx in value.m_attrIndexes: + name_idx = attr_idx.nameIndex() + type_idx = attr_idx.typeIndex() + obj_idx = attr_idx.objIndex() + + attr_name = value.m_attrName[name_idx] + attr_value = None + + if type_idx == 0: + attr_value = bool(value.m_bool[obj_idx]) + elif type_idx == 1: + attr_value = int(value.m_char[obj_idx]) + elif type_idx == 2: + attr_value = int(value.m_unsignedChar[obj_idx]) + elif type_idx == 3: + attr_value = int(value.m_short[obj_idx]) + elif type_idx == 4: + attr_value = int(value.m_unsignedShort[obj_idx]) + elif type_idx == 5: + attr_value = int(value.m_int[obj_idx]) + elif type_idx == 6: + attr_value = int(value.m_unsignedInt[obj_idx]) + elif type_idx == 7: + attr_value = int(value.m_long[obj_idx]) + elif type_idx == 8: + attr_value = int(value.m_unsignedLong[obj_idx]) + elif type_idx == 9: + attr_value = long(value.m_longLong[obj_idx]) + elif type_idx == 10: + attr_value = long(value.m_unsignedLongLong[obj_idx]) + elif type_idx == 11: + attr_value = float(value.m_float[obj_idx]) + elif type_idx == 12: + attr_value = float(value.m_double[obj_idx]) + elif type_idx == 13: + # skipping this type because is file IOVPayloadContainer_p1.h (line 120) is commented and not considered + pass + elif type_idx == 14: + attr_value = str(value.m_string[obj_idx]) + # Cleaning class name from value + if attr_value.startswith('IOVMetaDataContainer_p1_'): + attr_value = attr_value.replace('IOVMetaDataContainer_p1_', '') + if attr_value.startswith('_'): + attr_value = attr_value.replace('_', '/') + # Now it is clean + elif type_idx == 15: + attr_value = long(value.m_date[obj_idx]) + elif type_idx == 16: + attr_value = long(value.m_timeStamp[obj_idx]) + else: + raise ValueError('Unknown type id {0} for attribute {1}'.format(type_idx, attr_name)) + + if attr_name not in result: + result[attr_name] = [] + + result[attr_name].append(attr_value) + + max_element_count = 0 + for name, content in result.items(): + if len(content) > max_element_count: + max_element_count = len(content) + + if max_element_count <= 1: + for name, content in result.items(): + if len(content) > 0: + result[name] = content[0] + else: + result[name] = None + + return result def _extract_fields_esi(value): - result = {} + result = {} + + result['eventTypes'] = [] + for eventType in value.m_eventTypes: + result['eventTypes'].append(_convert_value(eventType)) - result['eventTypes'] = [] - for eventType in value.m_eventTypes: - result['eventTypes'].append(_convert_value(eventType)) - - result['numberOfEvents'] = value.m_numberOfEvents - result['runNumbers'] = list(value.m_runNumbers) - result['lumiBlockNumbers'] = list(value.m_lumiBlockNumbers) - result['processingTags'] = list(value.m_processingTags) - result['itemList'] = [] + result['numberOfEvents'] = value.m_numberOfEvents + result['runNumbers'] = list(value.m_runNumbers) + result['lumiBlockNumbers'] = list(value.m_lumiBlockNumbers) + result['processingTags'] = list(value.m_processingTags) + result['itemList'] = [] - # Get the class name in the repository with CLID <clid> - from CLIDComps.clidGenerator import clidGenerator - cgen = clidGenerator("") - for clid, sgkey in value.m_itemList: - result['itemList'].append((cgen.getNameFromClid(clid), sgkey)) + # Get the class name in the repository with CLID <clid> + from CLIDComps.clidGenerator import clidGenerator + cgen = clidGenerator("") + for clid, sgkey in value.m_itemList: + result['itemList'].append((cgen.getNameFromClid(clid), sgkey)) - return result + return result def _extract_fields_ef(value): - result = {} + result = {} - for ef_element in value: - result[ef_element.first] = ef_element.second.className() + for ef_element in value: + result[ef_element.first] = ef_element.second.className() - return result + return result def _convert_event_type_bitmask(value): - types = None - for key in value: - if key == 'bit_mask': - val = value[key] + types = None + for key in value: + if key == 'bit_mask': + val = value[key] - bitmask_length = len(val) + bitmask_length = len(val) - is_simulation = False - is_testbeam = False - is_calibration = False + is_simulation = False + is_testbeam = False + is_calibration = False - if bitmask_length > 0: # ROOT.EventType.IS_SIMULATION - is_simulation = val[0] + if bitmask_length > 0: # ROOT.EventType.IS_SIMULATION + is_simulation = val[0] - if bitmask_length > 1: # ROOT.EventType.IS_TESTBEAM - is_testbeam = val[1] + if bitmask_length > 1: # ROOT.EventType.IS_TESTBEAM + is_testbeam = val[1] - if bitmask_length > 2: # ROOT.EventType.IS_CALIBRATION: - is_calibration = val[2] + if bitmask_length > 2: # ROOT.EventType.IS_CALIBRATION: + is_calibration = val[2] - types = [ - 'IS_SIMULATION' if is_simulation else 'IS_DATA', - 'IS_TESTBEAM' if is_testbeam else 'IS_ATLAS', - 'IS_CALIBRATION' if is_calibration else 'IS_PHYSICS' - ] + types = [ + 'IS_SIMULATION' if is_simulation else 'IS_DATA', + 'IS_TESTBEAM' if is_testbeam else 'IS_ATLAS', + 'IS_CALIBRATION' if is_calibration else 'IS_PHYSICS' + ] - value['type'] = types - return value + value['type'] = types + return value def make_lite(meta_dict): - for filename, file_content in meta_dict.items(): - for key in file_content: - if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]): - keys_to_keep = ['lumiBlockNumbers', 'runNumbers', 'mc_event_number', 'mc_channel_number', 'eventTypes', 'processingTags'] + for filename, file_content in meta_dict.items(): + for key in file_content: + if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]): + keys_to_keep = ['lumiBlockNumbers', 'runNumbers', 'mc_event_number', 'mc_channel_number', 'eventTypes', 'processingTags'] - for item in list(meta_dict[filename][key]): - if item not in keys_to_keep: - meta_dict[filename][key].pop(item) + for item in list(meta_dict[filename][key]): + if item not in keys_to_keep: + meta_dict[filename][key].pop(item) - if '/TagInfo' in file_content: - keys_to_keep = ['beam_energy', 'beam_type', 'GeoAtlas', 'IOVDbGlobalTag', 'AODFixVersion'] + if '/TagInfo' in file_content: + keys_to_keep = ['beam_energy', 'beam_type', 'GeoAtlas', 'IOVDbGlobalTag', 'AODFixVersion'] - for item in list(meta_dict[filename]['/TagInfo']): - if item not in keys_to_keep: - meta_dict[filename]['/TagInfo'].pop(item) - return meta_dict + for item in list(meta_dict[filename]['/TagInfo']): + if item not in keys_to_keep: + meta_dict[filename]['/TagInfo'].pop(item) + return meta_dict def make_peeker(meta_dict): - for filename, file_content in meta_dict.items(): - for key in file_content: - if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]): - keys_to_keep = [ - 'lumiBlockNumbers', - 'runNumbers', - 'mc_event_number', - 'mc_channel_number', - 'eventTypes', - 'processingTags', - 'itemList' - ] - for item in list(meta_dict[filename][key]): - if item not in keys_to_keep: - meta_dict[filename][key].pop(item) - - if '/TagInfo' in file_content: - keys_to_keep = [ - 'beam_energy', - 'beam_type', - 'GeoAtlas', - 'IOVDbGlobalTag', - 'AODFixVersion', - 'AMITag', - 'project_name', - 'triggerStreamOfFile', - 'AtlasRelease' - ] - for item in list(meta_dict[filename]['/TagInfo']): - if item not in keys_to_keep: - meta_dict[filename]['/TagInfo'].pop(item) - - if '/Simulation/Parameters' in file_content: - keys_to_keep = [ - 'TruthStrategy', - 'SimBarcodeOffset', - ] - for item in list(meta_dict[filename]['/Simulation/Parameters']): - if item not in keys_to_keep: - meta_dict[filename]['/Simulation/Parameters'].pop(item) - - if '/Digitization/Parameters' in file_content: - keys_to_keep = [ - 'numberOfCollisions', - 'intraTrainBunchSpacing', - 'BeamIntensityPattern' - ] - for item in list(meta_dict[filename]['/Digitization/Parameters']): - if item not in keys_to_keep: - meta_dict[filename]['/Digitization/Parameters'].pop(item) - - return meta_dict + for filename, file_content in meta_dict.items(): + for key in file_content: + if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]): + keys_to_keep = [ + 'lumiBlockNumbers', + 'runNumbers', + 'mc_event_number', + 'mc_channel_number', + 'eventTypes', + 'processingTags', + 'itemList' + ] + for item in list(meta_dict[filename][key]): + if item not in keys_to_keep: + meta_dict[filename][key].pop(item) + + if '/TagInfo' in file_content: + keys_to_keep = [ + 'beam_energy', + 'beam_type', + 'GeoAtlas', + 'IOVDbGlobalTag', + 'AODFixVersion', + 'AMITag', + 'project_name', + 'triggerStreamOfFile', + 'AtlasRelease' + ] + for item in list(meta_dict[filename]['/TagInfo']): + if item not in keys_to_keep: + meta_dict[filename]['/TagInfo'].pop(item) + + if '/Simulation/Parameters' in file_content: + keys_to_keep = [ + 'TruthStrategy', + 'SimBarcodeOffset', + ] + for item in list(meta_dict[filename]['/Simulation/Parameters']): + if item not in keys_to_keep: + meta_dict[filename]['/Simulation/Parameters'].pop(item) + + if '/Digitization/Parameters' in file_content: + keys_to_keep = [ + 'numberOfCollisions', + 'intraTrainBunchSpacing', + 'BeamIntensityPattern' + ] + for item in list(meta_dict[filename]['/Digitization/Parameters']): + if item not in keys_to_keep: + meta_dict[filename]['/Digitization/Parameters'].pop(item) + + return meta_dict def promote_keys(meta_dict): - for filename, file_content in meta_dict.items(): - md = meta_dict[filename] - for key in file_content: - if key in md['metadata_items'] and regexEventStreamInfo.match(md['metadata_items'][key]): - md.update(md[key]) - et = md['eventTypes'][0] - md['mc_event_number'] = et.get('mc_event_number', md['runNumbers'][0]) - - md['mc_channel_number'] = et.get('mc_channel_number', 0) - md['eventTypes'] = et['type'] - md['lumiBlockNumbers'] = md['lumiBlockNumbers'] - md['processingTags'] = md[key]['processingTags'] - - meta_dict[filename].pop(key) - break - - if '/TagInfo' in file_content: - md.update(md['/TagInfo']) - md.pop('/TagInfo') - - if '/Simulation/Parameters' in file_content: - md.update(md['/Simulation/Parameters']) - md.pop('/Simulation/Parameters') - - if '/Digitization/Parameters' in file_content: - md.update(md['/Digitization/Parameters']) - md.pop('/Digitization/Parameters') - - return meta_dict - - - - - - + for filename, file_content in meta_dict.items(): + md = meta_dict[filename] + for key in file_content: + if key in md['metadata_items'] and regexEventStreamInfo.match(md['metadata_items'][key]): + md.update(md[key]) + et = md['eventTypes'][0] + md['mc_event_number'] = et.get('mc_event_number', md['runNumbers'][0]) + + md['mc_channel_number'] = et.get('mc_channel_number', 0) + md['eventTypes'] = et['type'] + md['lumiBlockNumbers'] = md['lumiBlockNumbers'] + md['processingTags'] = md[key]['processingTags'] + + meta_dict[filename].pop(key) + break + + if '/TagInfo' in file_content: + md.update(md['/TagInfo']) + md.pop('/TagInfo') + + if '/Simulation/Parameters' in file_content: + md.update(md['/Simulation/Parameters']) + md.pop('/Simulation/Parameters') + + if '/Digitization/Parameters' in file_content: + md.update(md['/Digitization/Parameters']) + md.pop('/Digitization/Parameters') + + return meta_dict