Merge branch 'MetaReader_remote_files' into 'master'

Fix MetaReader to read remote files which are using the root protocol See merge request atlas/athena!22752

Merge branch 'MetaReader_remote_files' into 'master'
e44875c6 · Vakhtang Tsulaia · ae746b17 · 9f916ab2 · e44875c6 · e44875c6
Commit e44875c6 authored 5 years ago by Vakhtang Tsulaia
--- a/Tools/PyUtils/bin/meta-reader.py
+++ b/Tools/PyUtils/bin/meta-reader.py
@@ -11,7 +11,7 @@ import logging
 import os

 # escape sequence [?1034h which aprear on several runs due to smm capability (Meta Mode On) for xterm. 
-if 'TERM' in os.envirn:
+if 'TERM' in os.environ:
 	del os.environ['TERM']

 msg = logging.getLogger('MetaReader')

--- a/Tools/PyUtils/python/MetaReader.py
+++ b/Tools/PyUtils/python/MetaReader.py
 # Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration

 from __future__ import absolute_import
-import os, re
+import os
+import re
 from fnmatch import fnmatchcase
 from AthenaCommon.Logging import logging

 msg = logging.getLogger('MetaReader')

-regexEventStreamInfo				=  re.compile(r'^EventStreamInfo(_p\d+)?$')
-regexIOVMetaDataContainer			=  re.compile(r'^IOVMetaDataContainer(_p\d+)?$')
-regexByteStreamMetadataContainer	=  re.compile(r'^ByteStreamMetadataContainer(_p\d+)?$')
-regexXAODEventFormat				=  re.compile(r'^xAOD::EventFormat(_v\d+)?$')
+# compile the regex needed in _convert_value() outside it to optimize the code.
+regexEventStreamInfo = re.compile(r'^EventStreamInfo(_p\d+)?$')
+regexIOVMetaDataContainer = re.compile(r'^IOVMetaDataContainer(_p\d+)?$')
+regexByteStreamMetadataContainer = re.compile(r'^ByteStreamMetadataContainer(_p\d+)?$')
+regexXAODEventFormat = re.compile(r'^xAOD::EventFormat(_v\d+)?$')
+regex_cppname = re.compile(r'^([\w:]+)(<.*>)?$')
+# regex_persistent_class = re.compile(r'^([a-zA-Z]+_p\d+::)*[a-zA-Z]+_p\d+$')
+regex_persistent_class = re.compile(r'^([a-zA-Z]+(_[pv]\d+)?::)*[a-zA-Z]+_[pv]\d+$')
+regex_BS_files = re.compile(r'^(\w+):.*((\.D?RAW\..*)|(\.data$))')


 def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter= []):
-	"""
-	This tool is independent of Athena framework and returns the metadata from a given file.
-	:param filenames: the input file from which metadata needs to be extracted.
-	:param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files.
-	:param mode: if true, will return all metadata associated with the filename. By default, is false and this will
-	return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'.
-	:return: a dictionary of metadata for the given input file.
-	"""
-
-	from RootUtils import PyROOTFixes
-
-	# Check if the input is a file or a list of files.
-	if isinstance(filenames, basestring):
-		filenames = [filenames]
-
-	# Check if file_type is an allowed value
-	if file_type is not None:
-		if file_type not in ('POOL', 'BS'):
-			raise NameError('Allowed values for \'file_type\' parameter are: "POOL" or "BS": you provided "' + file_type + '"')
-		else:
-			msg.info('Forced file_type: {0}'.format(file_type))
-
-	# Check the value of mode parameter
-	if mode not in ('tiny', 'lite', 'full', 'peeker'):
-		raise NameError('Allowed values for "mode" parameter are: "tiny", "lite", "peeker" or "full"')
-	msg.info('Current mode used: {0}'.format(mode))
-
-	if mode != 'full' and len(meta_key_filter) > 0:
-		raise NameError('It is possible to use the meta_key_filter option only for full mode')
-	if len(meta_key_filter) > 0:
-		msg.info('Filter used: {0}'.format(meta_key_filter))
-
-	# create the storage object for metadata.
-	meta_dict = {}
-
-	# ----- retrieve metadata from all filename or filenames --------------------------------------------------------#
-	for filename in filenames:
-		meta_dict[filename] = {}
-		current_file_type = None
-		# Determine the file_type of the input and store this information into meta_dict
-		if not file_type:
-			with open(filename, 'rb') as binary_file:
-				magic_file = binary_file.read(4)
-
-				if magic_file == 'root':
-					current_file_type = 'POOL'
-					meta_dict[filename]['file_type'] = 'POOL'
-
-				else:
-					current_file_type = 'BS'
-					meta_dict[filename]['file_type'] = 'BS'
-
-		else:
-			current_file_type = file_type
-
-		# add information about the file_size of the input filename
-		meta_dict[filename]['file_size'] = os.path.getsize(filename)
-
-		# ----- retrieves metadata from POOL files ------------------------------------------------------------------#
-		if current_file_type == 'POOL':
-			import ROOT
-			# open the file using ROOT.TFile
-			current_file = ROOT.TFile(filename)
-
-			# open the tree 'POOLContainer' to read the number of entries
-			if current_file.GetListOfKeys().Contains('POOLContainer'):
-				meta_dict[filename]['nentries'] = current_file.Get('POOLContainer').GetEntriesFast()
-			else:
-				meta_dict[filename]['nentries'] = None
-
-			# read and add the 'GUID' value
-			meta_dict[filename]['file_guid'] = _read_guid(filename)
-
-			# ----- read extra metadata required for 'lite' and 'full' modes ----------------------------------------#
-			if mode != 'tiny':
-				# selecting from all tree the only one which contains metadata, respectively "MetaData"
-				metadata_tree = current_file.Get('MetaData')
-				# read all list of branches stored in "MetaData" tree
-				metadata_branches = metadata_tree.GetListOfBranches()
-				nr_of_branches = metadata_branches.GetEntriesFast()
-
-				# object to store the names of metadata containers and their corresponding class name.
-				meta_dict[filename]['metadata_items'] = {}
-
-				# create a container for the list of filters used for the lite version
-				meta_filter = {}
-
-				# set the filters for name
-				if mode == 'lite':
-					meta_filter = {
-						'/TagInfo': 'IOVMetaDataContainer_p1',
-						'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
-						'*': 'EventStreamInfo_p*'
-					}
-
-				# set the filters for name
-				if mode == 'peeker':
-					meta_filter = {
-						'/TagInfo': 'IOVMetaDataContainer_p1',
-						'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
-						'/Simulation/Parameters': 'IOVMetaDataContainer_p1',
-						'/Digitization/Parameters': 'IOVMetaDataContainer_p1',
-						'/EXT/DCS/MAGNETS/SENSORDATA': 'IOVMetaDataContainer_p1',
-						'*': 'EventStreamInfo_p*'
-					}
-
-				if mode == 'full' and len(meta_key_filter) > 0:
-					meta_filter = {f: '*' for f in meta_key_filter}
-				# store all persistent classes for metadata container existing in a POOL/ROOT file.
-				persistent_instances = {}
-
-				for i in range(0, nr_of_branches):
-					branch = metadata_branches.At(i)
-					name = branch.GetName()
-
-					class_name = branch.GetClassName()
-
-					if regexIOVMetaDataContainer.match(class_name):
-						name = name.replace('IOVMetaDataContainer_p1_','').replace('_','/')
-						
-						
-					if regexIOVMetaDataContainer.match(class_name):
-						meta_dict[filename]['metadata_items'][name] = 'IOVMetaDataContainer'
-					elif regexByteStreamMetadataContainer.match(class_name):
-						meta_dict[filename]['metadata_items'][name] = 'ByteStreamMetadataContainer'
-					elif regexEventStreamInfo.match(class_name):
-						meta_dict[filename]['metadata_items'][name] = 'EventStreamInfo'
-					else:
-						meta_dict[filename]['metadata_items'][name] = class_name
-				
-					if len(meta_filter) > 0:
-						keep = False
-						for filter_key, filter_class in meta_filter.items():
-							if (filter_key.replace('/', '_') == name.replace('/', '_') or filter_key == '*') and fnmatchcase(class_name, filter_class):
-								keep = True
-								break
-
-						if not keep:
-							continue
-
-					# assign the corresponding persistent class based of the name of the metadata container
-					if   regexEventStreamInfo.match(class_name):
-                                                if class_name.endswith('_p2'):
-                                                        persistent_instances[name] = ROOT.EventStreamInfo_p2()
-                                                else:
-                                                        persistent_instances[name] = ROOT.EventStreamInfo_p3()
-					elif regexIOVMetaDataContainer.match(class_name):
-						persistent_instances[name] = ROOT.IOVMetaDataContainer_p1()
-					elif regexXAODEventFormat.match(class_name):
-						persistent_instances[name] = ROOT.xAOD.EventFormat_v1()
-
-					if name in persistent_instances:
-						branch.SetAddress(ROOT.AddressOf(persistent_instances[name]))
-
-				metadata_tree.GetEntry(0)
-
-				# clean the meta-dict if the meta_key_filter flag is used, to return only the key of interest
-				if len(meta_key_filter) > 0:
-					meta_dict[filename] = {}
-
-				# read the metadata
-				for name, content in persistent_instances.items():
-					key = name
-
-					if hasattr(content, 'm_folderName'):
-						key = getattr(content, 'm_folderName')
-
-					meta_dict[filename][key] = _convert_value(content)
-
-			if promote is None:
-				promote = mode == 'lite' or mode == 'peeker'
-
-			# Filter the data and create a prettier output for the 'lite' mode
-			if mode == 'lite':
-				meta_dict = make_lite(meta_dict)
-
-			if mode == 'peeker':
-				meta_dict = make_peeker(meta_dict)
-
-			if promote:
-				meta_dict = promote_keys(meta_dict)
-
-
-		# ----- retrieves metadata from bytestream (BS) files (RAW, DRAW) ------------------------------------------#
-		elif current_file_type == 'BS':
-			import eformat
-
-			# store the number of entries
-			bs = eformat.istream(filename)
-			meta_dict[filename]['nentries'] = bs.total_events
-
-			# store the 'guid' value
-			data_reader = eformat.EventStorage.pickDataReader(filename)
-			assert data_reader, 'problem picking a data reader for file [%s]' % filename
-
-			if hasattr(data_reader, 'GUID'):
-				meta_dict[filename]['file_guid'] = getattr(data_reader, 'GUID')()
-
-			# if the flag full is set to true then grab all metadata
-			# ------------------------------------------------------------------------------------------------------#
-			if mode != "tiny":
-				bs_metadata = {}
-
-				for md in data_reader.freeMetaDataStrings():
-					if md.startswith('Event type:'):
-						k = 'eventTypes'
-						v = []
-						if 'is sim' in md:
-							v.append('IS_SIMULATION')
-						else:
-							v.append('IS_DATA')
-
-						if 'is atlas' in md:
-							v.append('IS_ATLAS')
-						else:
-							v.append('IS_TESTBEAM')
-
-						if 'is physics' in md:
-							v.append('IS_PHYSICS')
-						else:
-							v.append('IS_CALIBRATION')
-
-						bs_metadata[k] = tuple(v)
-
-					elif md.startswith('GeoAtlas:'):
-						k = 'geometry'
-						v = md.split('GeoAtlas:')[1].strip()
-						bs_metadata[k] = v
-
-					elif md.startswith('IOVDbGlobalTag:'):
-						k = 'conditions_tag'
-						v = md.split('IOVDbGlobalTag:')[1].strip()
-						bs_metadata[k] = v
-
-					elif '=' in md:
-						k, v = md.split('=')
-						bs_metadata[k] = v
-
-				bs_metadata['runNumbers'] = getattr(data_reader, 'runNumber')()
-				bs_metadata['lumiBlockNumbers'] = getattr(data_reader, 'lumiblockNumber')()
-				bs_metadata['projectTag'] = getattr(data_reader, 'projectTag')()
-				bs_metadata['stream'] = getattr(data_reader, 'stream')()
-				bs_metadata['beamType'] = getattr(data_reader, 'beamType')()
-				bs_metadata['beamEnergy'] = getattr(data_reader, 'beamEnergy')()
-
-				meta_dict[filename]['eventTypes'] = bs_metadata.get('eventTypes', [])
-				meta_dict[filename]['GeoAtlas'] = bs_metadata.get('geometry', None)
-				meta_dict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None)
-
-				# Promote up one level
-				meta_dict[filename]['runNumbers'] = [bs_metadata.get('runNumbers', None)]
-				meta_dict[filename]['lumiBlockNumbers'] = [bs_metadata.get('lumiBlockNumbers', None)]
-				meta_dict[filename]['beam_type'] = [bs_metadata.get('beamType', None)]
-				meta_dict[filename]['beam_energy'] = bs_metadata.get('beamEnergy', None)
-				meta_dict[filename]['stream'] = bs_metadata.get('stream', None)
-
-				if not data_reader.good():
-					# event-less file...
-					meta_dict[filename]['runNumbers'].append(bs_metadata.get('run_number', 0))
-					meta_dict[filename]['lumiBlockNumbers'].append(bs_metadata.get('LumiBlock', 0))
-
-				ievt = iter(bs)
-				evt = ievt.next()
-				evt.check()  # may raise a RuntimeError
-				processing_tags = [dict(stream_type = tag.type, stream_name = tag.name, obeys_lbk = bool(tag.obeys_lumiblock)) for tag in evt.stream_tag()]
-				meta_dict[filename]['processingTags'] = [x['stream_name'] for x in processing_tags]
-				meta_dict[filename]['evt_number'] = [evt.global_id()]
-				meta_dict[filename]['run_type'] = [eformat.helper.run_type2string(evt.run_type())]
-
-
-				# fix for ATEAM-122
-				if len(bs_metadata.get('eventTypes', '')) == 0:	 # see: ATMETADATA-6
-					evt_type = ['IS_DATA', 'IS_ATLAS']
-					if bs_metadata.get('stream', '').startswith('physics_'):
-						evt_type.append('IS_PHYSICS')
-					elif bs_metadata.get('stream', '').startswith('calibration_'):
-						evt_type.append('IS_CALIBRATION')
-					elif bs_metadata.get('projectTag', '').endswith('_calib'):
-						evt_type.append('IS_CALIBRATION')
-					else:
-						evt_type.append('Unknown')
-
-					meta_dict[filename]['eventTypes'] = evt_type
-
-				if mode == 'full':
-					meta_dict[filename]['bs_metadata'] = bs_metadata
-
-		# ------ Throw an error if the user provide other file types -------------------------------------------------#
-		else:
-			msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type))
-			return None
-
-	return meta_dict
-
-
-# Currently not used
-# def _md5(fname, block_size=2**20, do_fast_md5=True):
-#     import hashlib
-#     # do_fast_md5 is for compatibility with AthFile.
-#     # -- Sebastian Liem
-#     hash_md5 = hashlib.md5()
-#     with open(fname, 'rb') as f:
-#	  for chunk in iter(lambda: f.read(block_size), b''):
-#	      hash_md5.update(chunk)
-#	      if do_fast_md5:
-#		  break
-#     return hash_md5.hexdigest()
+    """
+    This tool is independent of Athena framework and returns the metadata from a given file.
+    :param filenames: the input file from which metadata needs to be extracted.
+    :param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files.
+    :param mode: if true, will return all metadata associated with the filename. By default, is false and this will
+    return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'.
+    :return: a dictionary of metadata for the given input file.
+    """
+
+    from RootUtils import PyROOTFixes
+
+    # Check if the input is a file or a list of files.
+    if isinstance(filenames, basestring):
+        filenames = [filenames]
+
+    # Check if file_type is an allowed value
+    if file_type is not None:
+        if file_type not in ('POOL', 'BS'):
+            raise NameError('Allowed values for \'file_type\' parameter are: "POOL" or "BS": you provided "' + file_type + '"')
+        else:
+            msg.info('Forced file_type: {0}'.format(file_type))
+
+    # Check the value of mode parameter
+    if mode not in ('tiny', 'lite', 'full', 'peeker'):
+        raise NameError('Allowed values for "mode" parameter are: "tiny", "lite", "peeker" or "full"')
+    msg.info('Current mode used: {0}'.format(mode))
+
+    if mode != 'full' and len(meta_key_filter) > 0:
+        raise NameError('It is possible to use the meta_key_filter option only for full mode')
+    if len(meta_key_filter) > 0:
+        msg.info('Filter used: {0}'.format(meta_key_filter))
+
+    # create the storage object for metadata.
+    meta_dict = {}
+
+    # ----- retrieve metadata from all filename or filenames --------------------------------------------------------#
+    for filename in filenames:
+        meta_dict[filename] = {}
+        current_file_type = None
+        # Determine the file_type of the input and store this information into meta_dict
+        if not file_type:
+            if os.path.isfile(filename):
+                with open(filename, 'rb') as binary_file:
+                    magic_file = binary_file.read(4)
+
+                    if magic_file == 'root':
+                        current_file_type = 'POOL'
+                        meta_dict[filename]['file_type'] = 'POOL'
+
+                    else:
+                        current_file_type = 'BS'
+                        meta_dict[filename]['file_type'] = 'BS'
+
+                    # add information about the file_size of the input filename
+                    meta_dict[filename]['file_size'] = os.path.getsize(filename)
+
+            # determine the file type for the remote input files
+            else:
+                if regex_BS_files.match(filename):
+                    current_file_type = 'BS'
+                    meta_dict[filename]['file_type'] = 'BS'
+                    print('############### BS online')
+                else:
+                    current_file_type = 'POOL'
+                    meta_dict[filename]['file_type'] = 'POOL'
+                    print('############### POOL online')
+
+                # add information about the file_size of the input filename
+                meta_dict[filename]['file_size'] = None  # None -> we can't read the file size for a remote file
+
+        else:
+            current_file_type = file_type
+
+        # ----- retrieves metadata from POOL files ------------------------------------------------------------------#
+        if current_file_type == 'POOL':
+            import ROOT
+            # open the file using ROOT.TFile
+            current_file = ROOT.TFile.Open(filename)
+
+            # open the tree 'POOLContainer' to read the number of entries
+            if current_file.GetListOfKeys().Contains('POOLContainer'):
+                meta_dict[filename]['nentries'] = current_file.Get('POOLContainer').GetEntriesFast()
+            else:
+                meta_dict[filename]['nentries'] = None
+
+            # read and add the 'GUID' value
+            meta_dict[filename]['file_guid'] = _read_guid(filename)
+
+            # ----- read extra metadata required for 'lite' and 'full' modes ----------------------------------------#
+            if mode != 'tiny':
+                # selecting from all tree the only one which contains metadata, respectively "MetaData"
+                metadata_tree = current_file.Get('MetaData')
+                # read all list of branches stored in "MetaData" tree
+                metadata_branches = metadata_tree.GetListOfBranches()
+                nr_of_branches = metadata_branches.GetEntriesFast()
+
+                # object to store the names of metadata containers and their corresponding class name.
+                meta_dict[filename]['metadata_items'] = {}
+
+                # create a container for the list of filters used for the lite version
+                meta_filter = {}
+
+                # set the filters for name
+                if mode == 'lite':
+                    meta_filter = {
+                        '/TagInfo': 'IOVMetaDataContainer_p1',
+                        'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
+                        '*': 'EventStreamInfo_p*'
+                    }
+
+                # set the filters for name
+                if mode == 'peeker':
+                    meta_filter = {
+                        '/TagInfo': 'IOVMetaDataContainer_p1',
+                        'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
+                        '/Simulation/Parameters': 'IOVMetaDataContainer_p1',
+                        '/Digitization/Parameters': 'IOVMetaDataContainer_p1',
+                        '/EXT/DCS/MAGNETS/SENSORDATA': 'IOVMetaDataContainer_p1',
+                        '*': 'EventStreamInfo_p*'
+                    }
+
+                if mode == 'full' and len(meta_key_filter) > 0:
+                    meta_filter = {f: '*' for f in meta_key_filter}
+                # store all persistent classes for metadata container existing in a POOL/ROOT file.
+                persistent_instances = {}
+
+                for i in range(0, nr_of_branches):
+                    branch = metadata_branches.At(i)
+                    name = branch.GetName()
+
+                    class_name = branch.GetClassName()
+
+                    if regexIOVMetaDataContainer.match(class_name):
+                        name = name.replace('IOVMetaDataContainer_p1_', '').replace('_', '/')
+
+                    if regexIOVMetaDataContainer.match(class_name):
+                        meta_dict[filename]['metadata_items'][name] = 'IOVMetaDataContainer'
+                    elif regexByteStreamMetadataContainer.match(class_name):
+                        meta_dict[filename]['metadata_items'][name] = 'ByteStreamMetadataContainer'
+                    elif regexEventStreamInfo.match(class_name):
+                        meta_dict[filename]['metadata_items'][name] = 'EventStreamInfo'
+                    else:
+                        meta_dict[filename]['metadata_items'][name] = class_name
+
+                    if len(meta_filter) > 0:
+                        keep = False
+                        for filter_key, filter_class in meta_filter.items():
+                            if (filter_key.replace('/', '_') == name.replace('/', '_') or filter_key == '*') and fnmatchcase(class_name, filter_class):
+                                keep = True
+                                break
+
+                        if not keep:
+                            continue
+
+                    # assign the corresponding persistent class based of the name of the metadata container
+                    if regexEventStreamInfo.match(class_name):
+                        if class_name.endswith('_p2'):
+                            persistent_instances[name] = ROOT.EventStreamInfo_p2()
+                        else:
+                            persistent_instances[name] = ROOT.EventStreamInfo_p3()
+                    elif regexIOVMetaDataContainer.match(class_name):
+                        persistent_instances[name] = ROOT.IOVMetaDataContainer_p1()
+                    elif regexXAODEventFormat.match(class_name):
+                        persistent_instances[name] = ROOT.xAOD.EventFormat_v1()
+
+                    if name in persistent_instances:
+                        branch.SetAddress(ROOT.AddressOf(persistent_instances[name]))
+
+                metadata_tree.GetEntry(0)
+
+                # clean the meta-dict if the meta_key_filter flag is used, to return only the key of interest
+                if len(meta_key_filter) > 0:
+                    meta_dict[filename] = {}
+
+                # read the metadata
+                for name, content in persistent_instances.items():
+                    key = name
+
+                    if hasattr(content, 'm_folderName'):
+                        key = getattr(content, 'm_folderName')
+
+                    meta_dict[filename][key] = _convert_value(content)
+
+            if promote is None:
+                promote = mode == 'lite' or mode == 'peeker'
+
+            # Filter the data and create a prettier output for the 'lite' mode
+            if mode == 'lite':
+                meta_dict = make_lite(meta_dict)
+
+            if mode == 'peeker':
+                meta_dict = make_peeker(meta_dict)
+
+            if promote:
+                meta_dict = promote_keys(meta_dict)
+
+        # ----- retrieves metadata from bytestream (BS) files (RAW, DRAW) ------------------------------------------#
+        elif current_file_type == 'BS':
+            import eformat
+
+            # store the number of entries
+            bs = eformat.istream(filename)
+            meta_dict[filename]['nentries'] = bs.total_events
+
+            # store the 'guid' value
+            data_reader = eformat.EventStorage.pickDataReader(filename)
+            assert data_reader, 'problem picking a data reader for file [%s]' % filename
+
+            if hasattr(data_reader, 'GUID'):
+                meta_dict[filename]['file_guid'] = getattr(data_reader, 'GUID')()
+
+            # if the flag full is set to true then grab all metadata
+            # ------------------------------------------------------------------------------------------------------#
+            if mode != "tiny":
+                bs_metadata = {}
+
+                for md in data_reader.freeMetaDataStrings():
+                    if md.startswith('Event type:'):
+                        k = 'eventTypes'
+                        v = []
+                        if 'is sim' in md:
+                            v.append('IS_SIMULATION')
+                        else:
+                            v.append('IS_DATA')
+
+                        if 'is atlas' in md:
+                            v.append('IS_ATLAS')
+                        else:
+                            v.append('IS_TESTBEAM')
+
+                        if 'is physics' in md:
+                            v.append('IS_PHYSICS')
+                        else:
+                            v.append('IS_CALIBRATION')
+
+                        bs_metadata[k] = tuple(v)
+
+                    elif md.startswith('GeoAtlas:'):
+                        k = 'geometry'
+                        v = md.split('GeoAtlas:')[1].strip()
+                        bs_metadata[k] = v
+
+                    elif md.startswith('IOVDbGlobalTag:'):
+                        k = 'conditions_tag'
+                        v = md.split('IOVDbGlobalTag:')[1].strip()
+                        bs_metadata[k] = v
+
+                    elif '=' in md:
+                        k, v = md.split('=')
+                        bs_metadata[k] = v
+
+                bs_metadata['runNumbers'] = getattr(data_reader, 'runNumber')()
+                bs_metadata['lumiBlockNumbers'] = getattr(data_reader, 'lumiblockNumber')()
+                bs_metadata['projectTag'] = getattr(data_reader, 'projectTag')()
+                bs_metadata['stream'] = getattr(data_reader, 'stream')()
+                bs_metadata['beamType'] = getattr(data_reader, 'beamType')()
+                bs_metadata['beamEnergy'] = getattr(data_reader, 'beamEnergy')()
+
+                meta_dict[filename]['eventTypes'] = bs_metadata.get('eventTypes', [])
+                meta_dict[filename]['GeoAtlas'] = bs_metadata.get('geometry', None)
+                meta_dict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None)
+
+                # Promote up one level
+                meta_dict[filename]['runNumbers'] = [bs_metadata.get('runNumbers', None)]
+                meta_dict[filename]['lumiBlockNumbers'] = [bs_metadata.get('lumiBlockNumbers', None)]
+                meta_dict[filename]['beam_type'] = [bs_metadata.get('beamType', None)]
+                meta_dict[filename]['beam_energy'] = bs_metadata.get('beamEnergy', None)
+                meta_dict[filename]['stream'] = bs_metadata.get('stream', None)
+
+                if not data_reader.good():
+                    # event-less file...
+                    meta_dict[filename]['runNumbers'].append(bs_metadata.get('run_number', 0))
+                    meta_dict[filename]['lumiBlockNumbers'].append(bs_metadata.get('LumiBlock', 0))
+
+                ievt = iter(bs)
+                evt = ievt.next()
+                evt.check()  # may raise a RuntimeError
+                processing_tags = [dict(stream_type = tag.type, stream_name = tag.name, obeys_lbk = bool(tag.obeys_lumiblock)) for tag in evt.stream_tag()]
+                meta_dict[filename]['processingTags'] = [x['stream_name'] for x in processing_tags]
+                meta_dict[filename]['evt_number'] = [evt.global_id()]
+                meta_dict[filename]['run_type'] = [eformat.helper.run_type2string(evt.run_type())]
+
+                # fix for ATEAM-122
+                if len(bs_metadata.get('eventTypes', '')) == 0:	 # see: ATMETADATA-6
+                    evt_type = ['IS_DATA', 'IS_ATLAS']
+                    if bs_metadata.get('stream', '').startswith('physics_'):
+                        evt_type.append('IS_PHYSICS')
+                    elif bs_metadata.get('stream', '').startswith('calibration_'):
+                        evt_type.append('IS_CALIBRATION')
+                    elif bs_metadata.get('projectTag', '').endswith('_calib'):
+                        evt_type.append('IS_CALIBRATION')
+                    else:
+                        evt_type.append('Unknown')
+
+                    meta_dict[filename]['eventTypes'] = evt_type
+
+                if mode == 'full':
+                    meta_dict[filename]['bs_metadata'] = bs_metadata
+
+        # ------ Throw an error if the user provide other file types -------------------------------------------------#
+        else:
+            msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type))
+            return None
+
+    return meta_dict


 def _read_guid(filename):
-	"""
-	Extracts the "guid" (Globally Unique Identfier in POOL files and Grid catalogs) value from a POOL file.
-	:param filename: the input file
-	:return: the guid value
-	"""
-	import ROOT
-	root_file = ROOT.TFile(filename)
-	params = root_file.Get('##Params')
+    """
+    Extracts the "guid" (Globally Unique Identfier in POOL files and Grid catalogs) value from a POOL file.
+    :param filename: the input file
+    :return: the guid value
+    """
+    import ROOT
+    root_file = ROOT.TFile.Open(filename)
+    params = root_file.Get('##Params')

-	regex = re.compile(r'^\[NAME\=([a-zA-Z0-9\_]+)\]\[VALUE\=(.*)\]')
+    regex = re.compile(r'^\[NAME=([a-zA-Z0-9_]+)\]\[VALUE=(.*)\]')

-	for i in range(params.GetEntries()):
-		params.GetEntry(i)
-		param = params.db_string
+    for i in range(params.GetEntries()):
+        params.GetEntry(i)
+        param = params.db_string

-		result = regex.match(param)
-		if result:
-			name = result.group(1)
-			value = result.group(2)
+        result = regex.match(param)
+        if result:
+            name = result.group(1)
+            value = result.group(2)

-			if name == 'FID':
-				return value
+            if name == 'FID':
+                return value

-	return None
+    return None


 def _extract_fields(obj):
-	result = {}
-
-	for meth in dir(obj):
-		if not meth.startswith('_'):
-			if meth.startswith('m_'):
-
-				field_name = str(meth)[2:]
-				field_value = getattr(obj, meth)
+    result = {}

-				result[field_name] = _convert_value(field_value)
+    for meth in dir(obj):
+        if not meth.startswith('_'):
+            if meth.startswith('m_'):

-	return result
+                field_name = str(meth)[2:]
+                field_value = getattr(obj, meth)

+                result[field_name] = _convert_value(field_value)

-# compile the regex needed in _convert_value() outside it to optimize the code.
-regex_cppname = re.compile(r'^([\w:]+)(<.*>)?$')
-# regex_persistent_class = re.compile(r'^([a-zA-Z]+_p\d+::)*[a-zA-Z]+_p\d+$')
-regex_persistent_class = re.compile(r'^([a-zA-Z]+(_[pv]\d+)?::)*[a-zA-Z]+_[pv]\d+$')
-
+    return result


 def _convert_value(value):
-	if hasattr(value, '__cppname__'):
+    if hasattr(value, '__cppname__'):

-		result = regex_cppname.match(value.__cppname__)
+        result = regex_cppname.match(value.__cppname__)

-		if result:
-			cpp_type = result.group(1)
-			if cpp_type == 'vector':
-				return [_convert_value(val) for val in value]
+        if result:
+            cpp_type = result.group(1)
+            if cpp_type == 'vector':
+                return [_convert_value(val) for val in value]

-			elif cpp_type == 'pair':
-				return _convert_value(value.first), _convert_value(value.second)
+            elif cpp_type == 'pair':
+                return _convert_value(value.first), _convert_value(value.second)

-			# elif cpp_type == 'long':
-			#	return int(value)
+            # elif cpp_type == 'long':
+            #   return int(value)

-			elif value.__cppname__ == "_Bit_reference":
-				return bool(value)
+            elif value.__cppname__ == "_Bit_reference":
+                return bool(value)

-			# special case which extracts data in a better format from IOVPayloadContainer_p1 class
-			elif value.__cppname__ == 'IOVMetaDataContainer_p1':
-				return _extract_fields_iovmdc(value)
+            # special case which extracts data in a better format from IOVPayloadContainer_p1 class
+            elif value.__cppname__ == 'IOVMetaDataContainer_p1':
+                return _extract_fields_iovmdc(value)

-			elif value.__cppname__ == 'IOVPayloadContainer_p1':
-				return _extract_fields_iovpc(value)
+            elif value.__cppname__ == 'IOVPayloadContainer_p1':
+                return _extract_fields_iovpc(value)

-			elif value.__cppname__ == 'xAOD::EventFormat_v1':
-				return _extract_fields_ef(value)
+            elif value.__cppname__ == 'xAOD::EventFormat_v1':
+                return _extract_fields_ef(value)

-			elif (value.__cppname__ == 'EventStreamInfo_p2' or
-			      value.__cppname__ == 'EventStreamInfo_p3'):
-				return _extract_fields_esi(value)
+            elif (value.__cppname__ == 'EventStreamInfo_p2' or
+                  value.__cppname__ == 'EventStreamInfo_p3'):
+                return _extract_fields_esi(value)

-			elif (value.__cppname__ == 'EventType_p1' or
-			      value.__cppname__ == 'EventType_p3'):
-				return _convert_event_type_bitmask( _extract_fields(value))
+            elif (value.__cppname__ == 'EventType_p1' or
+                  value.__cppname__ == 'EventType_p3'):
+                return _convert_event_type_bitmask(_extract_fields(value))

-			elif regex_persistent_class.match(value.__cppname__):
-				return _extract_fields(value)
+            elif regex_persistent_class.match(value.__cppname__):
+                return _extract_fields(value)

-	return value
+    return value


 def _extract_fields_iovmdc(value):
-	return _convert_value(value.m_payload)
+    return _convert_value(value.m_payload)


 def _extract_fields_iovpc(value):
-	result = {}
-
-	for attr_idx in value.m_attrIndexes:
-		name_idx = attr_idx.nameIndex()
-		type_idx = attr_idx.typeIndex()
-		obj_idx = attr_idx.objIndex()
-
-		attr_name = value.m_attrName[name_idx]
-		attr_value = None
-
-		if type_idx == 0:
-			attr_value = bool(value.m_bool[obj_idx])
-		elif type_idx == 1:
-			attr_value = int(value.m_char[obj_idx])
-		elif type_idx == 2:
-			attr_value = int(value.m_unsignedChar[obj_idx])
-		elif type_idx == 3:
-			attr_value = int(value.m_short[obj_idx])
-		elif type_idx == 4:
-			attr_value = int(value.m_unsignedShort[obj_idx])
-		elif type_idx == 5:
-			attr_value = int(value.m_int[obj_idx])
-		elif type_idx == 6:
-			attr_value = int(value.m_unsignedInt[obj_idx])
-		elif type_idx == 7:
-			attr_value = int(value.m_long[obj_idx])
-		elif type_idx == 8:
-			attr_value = int(value.m_unsignedLong[obj_idx])
-		elif type_idx == 9:
-			attr_value = long(value.m_longLong[obj_idx])
-		elif type_idx == 10:
-			attr_value = long(value.m_unsignedLongLong[obj_idx])
-		elif type_idx == 11:
-			attr_value = float(value.m_float[obj_idx])
-		elif type_idx == 12:
-			attr_value = float(value.m_double[obj_idx])
-		elif type_idx == 13:
-			# skipping this type because is file IOVPayloadContainer_p1.h (line 120) is commented and not considered
-			pass
-		elif type_idx == 14:
-			attr_value = str(value.m_string[obj_idx])
-		    # Cleaning class name from value
-			if attr_value.startswith('IOVMetaDataContainer_p1_'):
-				attr_value = attr_value.replace('IOVMetaDataContainer_p1_','')
-			if attr_value.startswith('_'):
-				attr_value = attr_value.replace('_','/')
-			# Now it is clean
-		elif type_idx == 15:
-			attr_value = long(value.m_date[obj_idx])
-		elif type_idx == 16:
-			attr_value = long(value.m_timeStamp[obj_idx])
-		else:
-			raise ValueError('Unknown type id {0} for attribute {1}'.format(type_idx, attr_name))
-
-		if attr_name not in result:
-			result[attr_name] = []
-			
-		result[attr_name].append(attr_value)
-
-	max_element_count = 0
-	for name, content in result.items():
-		if len(content) > max_element_count:
-			max_element_count = len(content)
-
-	if max_element_count <= 1:
-		for name, content in result.items():
-			if len(content) > 0:
-				result[name] = content[0]
-			else:
-				result[name] = None
-
-	return result
+    result = {}
+
+    for attr_idx in value.m_attrIndexes:
+        name_idx = attr_idx.nameIndex()
+        type_idx = attr_idx.typeIndex()
+        obj_idx = attr_idx.objIndex()
+
+        attr_name = value.m_attrName[name_idx]
+        attr_value = None
+
+        if type_idx == 0:
+            attr_value = bool(value.m_bool[obj_idx])
+        elif type_idx == 1:
+            attr_value = int(value.m_char[obj_idx])
+        elif type_idx == 2:
+            attr_value = int(value.m_unsignedChar[obj_idx])
+        elif type_idx == 3:
+            attr_value = int(value.m_short[obj_idx])
+        elif type_idx == 4:
+            attr_value = int(value.m_unsignedShort[obj_idx])
+        elif type_idx == 5:
+            attr_value = int(value.m_int[obj_idx])
+        elif type_idx == 6:
+            attr_value = int(value.m_unsignedInt[obj_idx])
+        elif type_idx == 7:
+            attr_value = int(value.m_long[obj_idx])
+        elif type_idx == 8:
+            attr_value = int(value.m_unsignedLong[obj_idx])
+        elif type_idx == 9:
+            attr_value = long(value.m_longLong[obj_idx])
+        elif type_idx == 10:
+            attr_value = long(value.m_unsignedLongLong[obj_idx])
+        elif type_idx == 11:
+            attr_value = float(value.m_float[obj_idx])
+        elif type_idx == 12:
+            attr_value = float(value.m_double[obj_idx])
+        elif type_idx == 13:
+            # skipping this type because is file IOVPayloadContainer_p1.h (line 120) is commented and not considered
+            pass
+        elif type_idx == 14:
+            attr_value = str(value.m_string[obj_idx])
+            # Cleaning class name from value
+            if attr_value.startswith('IOVMetaDataContainer_p1_'):
+                attr_value = attr_value.replace('IOVMetaDataContainer_p1_', '')
+            if attr_value.startswith('_'):
+                attr_value = attr_value.replace('_', '/')
+            # Now it is clean
+        elif type_idx == 15:
+            attr_value = long(value.m_date[obj_idx])
+        elif type_idx == 16:
+            attr_value = long(value.m_timeStamp[obj_idx])
+        else:
+            raise ValueError('Unknown type id {0} for attribute {1}'.format(type_idx, attr_name))
+
+        if attr_name not in result:
+            result[attr_name] = []
+
+        result[attr_name].append(attr_value)
+
+    max_element_count = 0
+    for name, content in result.items():
+        if len(content) > max_element_count:
+            max_element_count = len(content)
+
+    if max_element_count <= 1:
+        for name, content in result.items():
+            if len(content) > 0:
+                result[name] = content[0]
+            else:
+                result[name] = None
+
+    return result


 def _extract_fields_esi(value):
-	result = {}
+    result = {}
+
+    result['eventTypes'] = []
+    for eventType in value.m_eventTypes:
+        result['eventTypes'].append(_convert_value(eventType))

-	result['eventTypes'] = []
-	for eventType in value.m_eventTypes:
-		result['eventTypes'].append(_convert_value(eventType))
-	
-	result['numberOfEvents'] = value.m_numberOfEvents
-	result['runNumbers'] = list(value.m_runNumbers)
-	result['lumiBlockNumbers'] = list(value.m_lumiBlockNumbers)
-	result['processingTags'] = list(value.m_processingTags)
-	result['itemList'] = []
+    result['numberOfEvents'] = value.m_numberOfEvents
+    result['runNumbers'] = list(value.m_runNumbers)
+    result['lumiBlockNumbers'] = list(value.m_lumiBlockNumbers)
+    result['processingTags'] = list(value.m_processingTags)
+    result['itemList'] = []

-	# Get the class name in the repository with CLID <clid>
-	from CLIDComps.clidGenerator import clidGenerator
-	cgen = clidGenerator("")
-	for clid, sgkey in value.m_itemList:
-		result['itemList'].append((cgen.getNameFromClid(clid), sgkey))
+    # Get the class name in the repository with CLID <clid>
+    from CLIDComps.clidGenerator import clidGenerator
+    cgen = clidGenerator("")
+    for clid, sgkey in value.m_itemList:
+        result['itemList'].append((cgen.getNameFromClid(clid), sgkey))

-	return result
+    return result


 def _extract_fields_ef(value):
-	result = {}
+    result = {}

-	for ef_element in value:
-		result[ef_element.first] = ef_element.second.className()
+    for ef_element in value:
+        result[ef_element.first] = ef_element.second.className()

-	return result
+    return result


 def _convert_event_type_bitmask(value):

-	types = None
-	for key in value:
-		if key == 'bit_mask':
-			val = value[key]
+    types = None
+    for key in value:
+        if key == 'bit_mask':
+            val = value[key]

-			bitmask_length = len(val)
+            bitmask_length = len(val)

-			is_simulation = False
-			is_testbeam = False
-			is_calibration = False
+            is_simulation = False
+            is_testbeam = False
+            is_calibration = False

-			if bitmask_length > 0:	# ROOT.EventType.IS_SIMULATION
-				is_simulation = val[0]
+            if bitmask_length > 0:  # ROOT.EventType.IS_SIMULATION
+                is_simulation = val[0]

-			if bitmask_length > 1:	# ROOT.EventType.IS_TESTBEAM
-				is_testbeam = val[1]
+            if bitmask_length > 1:  # ROOT.EventType.IS_TESTBEAM
+                is_testbeam = val[1]

-			if bitmask_length > 2:	# ROOT.EventType.IS_CALIBRATION:
-				is_calibration = val[2]
+            if bitmask_length > 2:  # ROOT.EventType.IS_CALIBRATION:
+                is_calibration = val[2]

-			types = [
-				'IS_SIMULATION' if is_simulation else 'IS_DATA',
-				'IS_TESTBEAM' if is_testbeam else 'IS_ATLAS',
-				'IS_CALIBRATION' if is_calibration else 'IS_PHYSICS'
-			]
+            types = [
+                'IS_SIMULATION' if is_simulation else 'IS_DATA',
+                'IS_TESTBEAM' if is_testbeam else 'IS_ATLAS',
+                'IS_CALIBRATION' if is_calibration else 'IS_PHYSICS'
+            ]

-	value['type'] = types
-	return value
+    value['type'] = types
+    return value


 def make_lite(meta_dict):
-	for filename, file_content in meta_dict.items():
-		for key in file_content:
-			if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
-				keys_to_keep = ['lumiBlockNumbers', 'runNumbers', 'mc_event_number', 'mc_channel_number', 'eventTypes', 'processingTags']
+    for filename, file_content in meta_dict.items():
+        for key in file_content:
+            if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
+                keys_to_keep = ['lumiBlockNumbers', 'runNumbers', 'mc_event_number', 'mc_channel_number', 'eventTypes', 'processingTags']

-				for item in list(meta_dict[filename][key]):
-					if item not in keys_to_keep:
-						meta_dict[filename][key].pop(item)
+                for item in list(meta_dict[filename][key]):
+                    if item not in keys_to_keep:
+                        meta_dict[filename][key].pop(item)

-		if '/TagInfo' in file_content:
-			keys_to_keep = ['beam_energy', 'beam_type', 'GeoAtlas', 'IOVDbGlobalTag', 'AODFixVersion']
+        if '/TagInfo' in file_content:
+            keys_to_keep = ['beam_energy', 'beam_type', 'GeoAtlas', 'IOVDbGlobalTag', 'AODFixVersion']

-			for item in list(meta_dict[filename]['/TagInfo']):
-				if item not in keys_to_keep:
-					meta_dict[filename]['/TagInfo'].pop(item)
-	return meta_dict
+            for item in list(meta_dict[filename]['/TagInfo']):
+                if item not in keys_to_keep:
+                    meta_dict[filename]['/TagInfo'].pop(item)
+    return meta_dict


 def make_peeker(meta_dict):
-	for filename, file_content in meta_dict.items():
-		for key in file_content:
-			if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
-				keys_to_keep = [
-					'lumiBlockNumbers',
-					'runNumbers',
-					'mc_event_number',
-					'mc_channel_number',
-					'eventTypes',
-					'processingTags',
-					'itemList'
-				]
-				for item in list(meta_dict[filename][key]):
-					if item not in keys_to_keep:
-						meta_dict[filename][key].pop(item)
-
-		if '/TagInfo' in file_content:
-			keys_to_keep = [
-				'beam_energy',
-				'beam_type',
-				'GeoAtlas',
-				'IOVDbGlobalTag',
-				'AODFixVersion',
-				'AMITag',
-				'project_name',
-				'triggerStreamOfFile',
-				'AtlasRelease'
-			]
-			for item in list(meta_dict[filename]['/TagInfo']):
-				if item not in keys_to_keep:
-					meta_dict[filename]['/TagInfo'].pop(item)
-
-		if '/Simulation/Parameters' in file_content:
-			keys_to_keep = [
-				'TruthStrategy',
-				'SimBarcodeOffset',
-			]
-			for item in list(meta_dict[filename]['/Simulation/Parameters']):
-				if item not in keys_to_keep:
-					meta_dict[filename]['/Simulation/Parameters'].pop(item)
-
-		if '/Digitization/Parameters' in file_content:
-			keys_to_keep = [
-				'numberOfCollisions',
-				'intraTrainBunchSpacing',
-				'BeamIntensityPattern'
-			]
-			for item in list(meta_dict[filename]['/Digitization/Parameters']):
-				if item not in keys_to_keep:
-					meta_dict[filename]['/Digitization/Parameters'].pop(item)
-
-	return meta_dict
+    for filename, file_content in meta_dict.items():
+        for key in file_content:
+            if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
+                keys_to_keep = [
+                    'lumiBlockNumbers',
+                    'runNumbers',
+                    'mc_event_number',
+                    'mc_channel_number',
+                    'eventTypes',
+                    'processingTags',
+                    'itemList'
+                ]
+                for item in list(meta_dict[filename][key]):
+                    if item not in keys_to_keep:
+                        meta_dict[filename][key].pop(item)
+
+        if '/TagInfo' in file_content:
+            keys_to_keep = [
+                'beam_energy',
+                'beam_type',
+                'GeoAtlas',
+                'IOVDbGlobalTag',
+                'AODFixVersion',
+                'AMITag',
+                'project_name',
+                'triggerStreamOfFile',
+                'AtlasRelease'
+            ]
+            for item in list(meta_dict[filename]['/TagInfo']):
+                if item not in keys_to_keep:
+                    meta_dict[filename]['/TagInfo'].pop(item)
+
+        if '/Simulation/Parameters' in file_content:
+            keys_to_keep = [
+                'TruthStrategy',
+                'SimBarcodeOffset',
+            ]
+            for item in list(meta_dict[filename]['/Simulation/Parameters']):
+                if item not in keys_to_keep:
+                    meta_dict[filename]['/Simulation/Parameters'].pop(item)
+
+        if '/Digitization/Parameters' in file_content:
+            keys_to_keep = [
+                'numberOfCollisions',
+                'intraTrainBunchSpacing',
+                'BeamIntensityPattern'
+            ]
+            for item in list(meta_dict[filename]['/Digitization/Parameters']):
+                if item not in keys_to_keep:
+                    meta_dict[filename]['/Digitization/Parameters'].pop(item)
+
+    return meta_dict


 def promote_keys(meta_dict):
-	for filename, file_content in meta_dict.items():
-		md = meta_dict[filename]
-		for key in file_content:
-			if key in md['metadata_items'] and regexEventStreamInfo.match(md['metadata_items'][key]):
-				md.update(md[key])
-				et = md['eventTypes'][0]
-				md['mc_event_number'] = et.get('mc_event_number', md['runNumbers'][0])
-														  
-				md['mc_channel_number'] = et.get('mc_channel_number', 0)
-				md['eventTypes'] = et['type']
-				md['lumiBlockNumbers'] = md['lumiBlockNumbers']
-				md['processingTags'] = md[key]['processingTags']
-
-				meta_dict[filename].pop(key)
-				break
-
-		if '/TagInfo' in file_content:
-			md.update(md['/TagInfo'])
-			md.pop('/TagInfo')
-
-		if '/Simulation/Parameters' in file_content:
-			md.update(md['/Simulation/Parameters'])
-			md.pop('/Simulation/Parameters')
-
-		if '/Digitization/Parameters' in file_content:
-			md.update(md['/Digitization/Parameters'])
-			md.pop('/Digitization/Parameters')
-
-	return meta_dict
-	
-	
-
-	
-
-	
+    for filename, file_content in meta_dict.items():
+        md = meta_dict[filename]
+        for key in file_content:
+            if key in md['metadata_items'] and regexEventStreamInfo.match(md['metadata_items'][key]):
+                md.update(md[key])
+                et = md['eventTypes'][0]
+                md['mc_event_number'] = et.get('mc_event_number', md['runNumbers'][0])
+
+                md['mc_channel_number'] = et.get('mc_channel_number', 0)
+                md['eventTypes'] = et['type']
+                md['lumiBlockNumbers'] = md['lumiBlockNumbers']
+                md['processingTags'] = md[key]['processingTags']
+
+                meta_dict[filename].pop(key)
+                break
+
+        if '/TagInfo' in file_content:
+            md.update(md['/TagInfo'])
+            md.pop('/TagInfo')
+
+        if '/Simulation/Parameters' in file_content:
+            md.update(md['/Simulation/Parameters'])
+            md.pop('/Simulation/Parameters')
+
+        if '/Digitization/Parameters' in file_content:
+            md.update(md['/Digitization/Parameters'])
+            md.pop('/Digitization/Parameters')
+
+    return meta_dict