Before a major refactoring

Former-commit-id: 43ca9f81

Before a major refactoring
524e3c12 · danciubotaru · 0d58f068 · 524e3c12 · 524e3c12 · 524e3c12
Commit 524e3c12 authored 7 years ago by danciubotaru
--- a/Tools/PyUtils/CMakeLists.txt
+++ b/Tools/PyUtils/CMakeLists.txt
@@ -17,7 +17,7 @@ find_package( ROOT COMPONENTS Core PyROOT Tree MathCore Hist RIO pthread )
 # Install files from the package:
 atlas_install_python_modules( python/*.py python/AthFile python/scripts )
-atlas_install_scripts( bin/acmd.py bin/atl-gen-athena-d3pd-reader bin/checkFile.py bin/checkPlugins.py bin/checkSG.py bin/checkMetaSG.py bin/checkTP.py bin/checkTag.py bin/checkxAOD.py bin/diff-athfile bin/diff-jobo-cfg.py bin/diffConfigs.py bin/diffPoolFiles.py bin/diffTAGTree.py bin/dlldep.py bin/dso-stats.py bin/dump-athfile.py bin/dumpAthfilelite.py bin/filter-and-merge-d3pd.py bin/getMetadata.py bin/gen_klass.py bin/get-tag-diff.py bin/gprof2dot bin/issues bin/magnifyPoolFile.py bin/merge-poolfiles.py bin/pep8.py bin/pool_extractFileIdentifier.py bin/pool_insertFileToCatalog.py bin/print_auditor_callgraph.py bin/pyroot.py bin/vmem-sz.py )
+atlas_install_scripts( bin/acmd.py bin/atl-gen-athena-d3pd-reader bin/checkFile.py bin/checkPlugins.py bin/checkSG.py bin/checkMetaSG.py bin/checkTP.py bin/checkTag.py bin/checkxAOD.py bin/diff-athfile bin/diff-jobo-cfg.py bin/diffConfigs.py bin/diffPoolFiles.py bin/diffTAGTree.py bin/dlldep.py bin/dso-stats.py bin/dump-athfile.py bin/dumpAthfilelite.py bin/filter-and-merge-d3pd.py bin/getMetadata.py bin/gen_klass.py bin/get-tag-diff.py bin/gprof2dot bin/issues bin/magnifyPoolFile.py bin/merge-poolfiles.py bin/pep8.py bin/pool_extractFileIdentifier.py bin/pool_insertFileToCatalog.py bin/print_auditor_callgraph.py bin/pyroot.py bin/vmem-sz.py bin/meta-reader.py)
 # Aliases:
 atlas_add_alias( checkFile "checkFile.py" )
@@ -41,3 +41,4 @@ atlas_add_alias( diff-jobo-cfg "diff-jobo-cfg.py" )
 atlas_add_alias( acmd "acmd.py" )
 atlas_add_alias( vmem-sz "vmem-sz.py" )
 atlas_add_alias( getMetadata "getMetadata.py" )
+atlas_add_alias( meta-reader "meta-reader.py" )
\ No newline at end of file
--- a/Tools/PyUtils/bin/meta-reader.py
+++ b/Tools/PyUtils/bin/meta-reader.py
@@ -32,6 +32,11 @@ def __pretty_print(content, indent=4, fd=sys.stdout, level=0):
        print >> fd, ' ' * indent * (level + 1) + str(content)
+def __format_output(metadata):
+    for key, value in metadata.items():
+        print('{key:>15}: {value}'.format(key = key, value = value))
 def __main():
    # Parsing the arguments provided by user
    parser = argparse.ArgumentParser(description='This script reads metadata from a given file')
@@ -55,11 +60,16 @@ def __main():
                        type=int,
                        default=2,
                        help="Sets the indent spaces in the output either on screen (without -o flag) either on file (with -o flag). By default, uses two spaces as indent.")
-    parser.add_argument('-f',
+    parser.add_argument('-m',
-                        '--full',
+                        '--mode',
-                        action='store_true',
+                        default= 'lite',
-                        default=False,
+                        metavar='MODE',
-                        help="Retrieve the full set of metadata from file.")
+                        type=str,
+                        choices=['tiny', 'lite', 'full'],
+                        help="This flag provides the user capability to select the amount of metadata retrieved. There three options: "
+                             "tiny (only those values used in PyJobTransforms), "
+                             "lite (same output as dump-athfile) "
+                             "and full ( all  available data found) ")
    parser.add_argument('-t',
                        '--type',
                        default= None,
@@ -74,38 +84,38 @@ def __main():
    output = args.output
    is_json = args.json
    indent = args.indent
-    is_full = args.full
+    mode = args.mode
    file_type = args.type
-    if verbose:
+    msg.setLevel(logging.INFO if verbose else logging.WARNING)
-        msg.setLevel(logging.INFO)
+    # create a stream handler
-        # create a stream handler
+    handler = logging.StreamHandler()
-        handler = logging.StreamHandler()
+    handler.setLevel(logging.INFO if verbose else logging.WARNING)
-        handler.setLevel(logging.INFO)
+    # create a logging format
-        # create a logging format
+    formatter = logging.Formatter('%(name)s                       %(levelname)s %(message)s')
-        formatter = logging.Formatter('%(name)s                       %(levelname)s %(message)s')
+    handler.setFormatter(formatter)
-        handler.setFormatter(formatter)
+    # add the handlers to the logger
-        # add the handlers to the logger
+    msg.addHandler(handler)
-        msg.addHandler(handler)
    startTime = time.time()
    msg.info('Imported headers in: {0} miliseconds'.format((time.time() - startTime) * 1e3)) 
    msg.info('The output file is: {0}'.format(output))
-    d = read_metadata(filenames, file_type, full= is_full)
+    metadata = read_metadata(filenames, file_type, mode= mode)
    if output is None:
        if is_json:
-            print json.dumps(d, indent=indent)
+            print(json.dumps(metadata, indent=indent))
        else:
-            __pretty_print(d, indent=indent)
+            __format_output(metadata)
+            # __pretty_print(metadata, indent=indent)
    else:
        if is_json:
            with open(output, 'w') as fd:
-                print >> fd, json.dumps(d, indent=indent)
+                print >> fd, json.dumps(metadata, indent=indent)
        else:
            with open(output, 'w') as fd:
-                __pretty_print(d, indent=indent, fd=fd)
+                __pretty_print(metadata, indent=indent, fd=fd)
    msg.info('Done!')

--- a/Tools/PyUtils/python/MetaReader.py
+++ b/Tools/PyUtils/python/MetaReader.py
 import os
 import logging
-msg = logging.getLogger(__name__)
+msg = logging.getLogger('MetaReader')
 import re
 import ast
 import sys
-def read_metadata(filenames, file_type=None, full=False):
+def read_metadata(filenames, file_type=None, mode='lite'):
    """
    This tool is independent of Athena framework and returns the metadata from a given file.
    :param filename: the input file from which metadata needs to be extracted.
@@ -16,14 +16,16 @@ def read_metadata(filenames, file_type=None, full=False):
    :return: a dictionary of metadata for the given input file.
    """
-    # Check if the input is a filename or a list of filenames.
+    # Check if the input is a file or a list of files.
    if isinstance(filenames, basestring):
        filenames = [filenames]
+    # Check the value of mode parameter
+    if mode not in ['tiny', 'lite', 'full']:
+        raise NameError('Allowed values for mode are: tiny, lite or full')
+    msg.info('Current mode used: {0}'.format(mode))
    # create the storage object for metadata.
    metaDict = {}
    for filename in filenames:
        current_file_type = None
        # Determine the file type of the input.
@@ -53,37 +55,46 @@ def read_metadata(filenames, file_type=None, full=False):
        # ----------------------------------------------------------------------------------------------------------------#
        # retrieves metadata from POOL files.
        if current_file_type == 'POOL':
            from CLIDComps.clidGenerator import clidGenerator
+            global clidgen
            clidgen = clidGenerator(db = None)
            evt = ROOT.POOL.TEvent()
            evt.readFrom(filename)
            evt.getEntry(0)
-            # add the missing keys from the basis "lite" metadata dictionary
+            # add the missing keys from the basis "tiny" metadata dictionary
            metaDict[filename]['file_guid'] = __read_guid(filename),
-            metaDict[filename]['file_type'] = 'POOL'
+            metaDict[filename]['file_type'] = 'pool'
            metaDict[filename]['nentries'] = evt.getEntries()
+            # if the flag is not set to tiny them it will retrieve more metadata
-            # if the flag full is set to true then grab all metadata
            # ----------------------------------------------------------------------------------------------------------------#
-            if full:
+            if mode != 'tiny':
-                metaDict[';00;MetaDataSvc'] = __convert_DataHeader(evt.retrieveMetaInput('DataHeader', ';00;MetaDataSvc'))
+                # this information is duplicated but is used with the AthFile
+                metaDict[filename]['file_name'] = filename
+                meta_data_srv = __convert_DataHeader(evt.retrieveMetaInput('DataHeader', ';00;MetaDataSvc'))
-                for name, cls in metaDict[';00;MetaDataSvc']:
+                for name, cls in meta_data_srv:
                    try:
                        a = evt.retrieveMetaInput(cls, name)
                    except LookupError:
                        continue
                    if cls == 'IOVMetaDataContainer':
-                        metaDict[name] = __convert_IOVMetaDataContainer(a)
+                        metaDict[filename][name] = __convert_IOVMetaDataContainer(a)
                    if cls == 'xAOD::EventFormat':
-                        metaDict[name] = __convert_EventFormat(a)
+                        metaDict[filename][name] = __convert_EventFormat(a)
                    if cls == 'EventStreamInfo':
-                        metaDict[name] = __convert_EventStreamInfo(a)
+                        metaDict[filename][name] = __convert_EventStreamInfo(a)
+                # if the flag full is set to true then grab all metadata
+                if mode == 'lite':
+                    for key in list(metaDict[filename]):
+                        if key not in ['file_type', 'file_size', 'file_guid', 'nentries', 'run_number']:
+                            metaDict[filename].pop(key, None)
        # ----------------------------------------------------------------------------------------------------------------#
        # retrieves metadata from bytestream (BS) files (RAW, DRAW)
@@ -104,7 +115,7 @@ def read_metadata(filenames, file_type=None, full=False):
            # if the flag full is set to true then grab all metadata
            # ----------------------------------------------------------------------------------------------------------------#
-            if full:
+            if mode != "tiny":
                bs_metadata = {}
                for md in data_reader.freeMetaDataStrings():
@@ -164,6 +175,11 @@ def read_metadata(filenames, file_type=None, full=False):
                    metaDict[filename]['run_number'].append(bs_metadata.get('run_number', 0))
                    metaDict[filename]['lumi_block'].append(bs_metadata.get('LumiBlock', 0))
+                if mode == 'lite':
+                    print('This is the lite version for BS files')
+                    pass
        # ----------------------------------------------------------------------------------------------------------------#
        # Thow an error if the user provide other file types
        else:
@@ -177,12 +193,12 @@ def __convert_EventStreamInfo(esi):
    d = {}
    d['run_number'] = list(esi.getRunNumbers())
    d['processing_tags'] = list(esi.getProcessingTags())
-    d['lumi_blocks'] = list(esi.getLumiBlockNumbers())
+    d['lumi_block'] = list(esi.getLumiBlockNumbers())
-    d['event_types'] = []
+    d['evt_type'] = list()
+    d['evt_number'] =
    for evtype in esi.getEventTypes():
        t = {}
        t['IS_CALIBRATION'] = evtype.IS_CALIBRATION
        t['IS_SIMULATION'] = evtype.IS_SIMULATION
        t['IS_TESTBEAM'] = evtype.IS_TESTBEAM
        t['mc_channel_number'] = evtype.mc_channel_number()
@@ -191,7 +207,13 @@ def __convert_EventStreamInfo(esi):
        d['event_types'].append(t)
    d['ItemList'] = []
    for e in esi.getItemList():
-        d['ItemList'].append((clidgen.getNameFromClid(e.first), e.second))
+        clid_name = clidgen.getNameFromClid(e.first)
+        if clid_name:
+            d['ItemList'].append((clid_name, e.second))
+        else:
+            msg.info('Unable to find a name for clid {0} with value {1}.'.format(e.first, e.second))
+            d['ItemList'].append(('clid_{0}'.format(e.first), e.second))
    return d
@@ -262,6 +284,4 @@ def __read_guid(filename):
            if name == 'FID':
                return value
    return None
\ No newline at end of file
-# Methos for BS
\ No newline at end of file