diff --git a/Control/AthenaServices/test/RecoMetadata_test.py b/Control/AthenaServices/test/RecoMetadata_test.py new file mode 100755 index 0000000000000000000000000000000000000000..acee94898c3e69fc9524b4dd409ce6a678529c6a --- /dev/null +++ b/Control/AthenaServices/test/RecoMetadata_test.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# art-description: Validate in-file metadata in MT q431 +# art-type: grid +# art-athena-mt: 8 +# +import argparse +import json +import os +from subprocess import Popen, STDOUT +import sys + +from AthenaCommon.Logging import logging +from PyUtils.MetaDiff import meta_diff + + +MSG = logging.getLogger('RecoMetadata_test') + +STEPS = ["BStoRAW", "BStoESD", "ESDtoAOD"] + +CONFIG = { + "execOnly": True, + "preInclude": ["mt.py"], + "AMI": "q431", + "inputBSFile": [ + "/eos/user/g/goetz/data/data17/data17_13TeV.00330470.physics_Main.daq.RAW.mix._0001.data", + "/eos/user/g/goetz/data/data17/data17_13TeV.00337215.physics_Main.daq.RAW.mix._0001.data", + "/eos/user/g/goetz/data/data18/data18_13TeV.00363979.physics_Main.daq.RAW.0250._0001.data"] +} + +SERIAL = {step: CONFIG.copy() for step in STEPS} +for k, v in SERIAL.items(): + if k == "BStoRAW": + v.update(outputDRAW_ZMUMUFile=["serial_DRAW"]) + if k == "BStoESD": + v.update(outputESDFile=["serial_ESD.pool.root"]) + if k == "ESDtoAOD": + v.update( + inputESDFile=SERIAL["BStoESD"]["outputESDFile"], + outputAODFile=["serial_AOD.pool.root"]) + del v["inputBSFile"] + +THREAD = {step: CONFIG.copy() for step in STEPS} +for k, v in THREAD.items(): + v["multithreaded"] = True + if k == "BStoRAW": + v.update(outputDRAW_ZMUMUFile=["threaded_DRAW"]) + if k == "BStoESD": + v.update(outputESDFile=["threaded_ESD.pool.root"]) + if k == "ESDtoAOD": + v.update( + inputESDFile=SERIAL["BStoESD"]["outputESDFile"], + outputAODFile=["threaded_AOD.pool.root"]) + del v["inputBSFile"] + + +PRE_INCLUDE = '''\ +rec.doMonitoring.set_Value_and_Lock(True); +from AthenaMonitoring.DQMonFlags import DQMonFlags; +DQMonFlags.doStreamAwareMon=False; +DQMonFlags.doMuonRawMon=False; +DQMonFlags.doMuonSegmentMon=False; +DQMonFlags.doMuonTrackMon=False; +DQMonFlags.doMuonAlignMon=False; +DQMonFlags.doMuonTrkPhysMon=False; +DQMonFlags.doMuonPhysicsMon=False; +DQMonFlags.doGlobalMon.set_Value_and_Lock(False); +DQMonFlags.doMonitoring.set_Value_and_Lock(False); + +rec.doTrigger = False + +# rec.doMuon = True +# rec.doMuonCombined = True +# from DiTauRec.DiTauRecFlags import jobproperties +# jobproperties.DiTauRecFlags.doDiTauRec = True + +from ParticleBuilderOptions.AODFlags import AODFlags +AODFlags.ThinInDetForwardTrackParticles.set_Value_and_Lock(False) + +if True : + from PerfMonComps.PerfMonFlags import jobproperties as pmon_properties + pmon_properties.PerfMonFlags.doSemiDetailedMonitoring.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doMonitoring.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doFastMon.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doSemiDetailedMonitoring.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doSemiDetailedMonitoringFullPrint.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doFullMon.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doMallocMonitoring.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doExtraPrintouts.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doHephaestusMon.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doDsoMonitoring.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doPersistencyMonitoring.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doDataProxySizeMonitoring.set_Value_and_Lock(False) + pmon_properties.PerfMonFlags.doPostProcessing.set_Value_and_Lock(False) +''' + + +def build_transform_command(config): + """ + build command to hand to subproccess from configuration + """ + command = ["Reco_tf.py"] + + for key, value in config.items(): + MSG.debug("adding %s and %s to command", key, value) + + if isinstance(value, bool): + if value: + command.append("--{}".format(key)) + elif isinstance(value, list): + command.append("--{}".format(key)) + command += value + else: + command.append("--{}".format(key)) + command.append(value) + + MSG.debug("command list: %s", command) + + return command + + +def serial(config, verbose=False): + """ + Run threaded Reconstruction transform based on parameters provided in + config parameter + :param: config: configuration dictionary to hand to transform + :param: threads: number of threads to use + :param: verbose: whether to be laconic or not + :return: transform exit code + """ + return run_transform(config, env=os.environ, verbose=verbose) + + +def thread(config, threads=8, verbose=False): + """ + Run threaded Reconstruction transform based on parameters provided in + config parameter + :param: config: configuration dictionary to hand to transform + :param: threads: number of threads to use + :param: verbose: whether to be laconic or not + :return: transform exit code + """ + env = os.environ.copy() + env["ATHENA_CORE_NUMBER"] = str(threads) + + return run_transform(config, env=env, verbose=verbose) + + +def run_transform(config, env, verbose=False): + """ + run Reco_tf.py with given configuration + + :return: return value of transform + """ + # MSG.debug("job environment: %s", json.dumps(env, indent=2)) + MSG.debug("job config: %s", json.dumps(config)) + MSG.debug("job config: %s", json.dumps(config)) + + command = build_transform_command(config) + MSG.info('running: %s', ' '.join(command)) + + try: + if verbose: + proc = Popen(command, env=env) + else: + with open(os.devnull, 'w') as dev_null: + proc = Popen(command, env=env, stdout=dev_null, stderr=STDOUT) + + return proc.wait() + + except KeyboardInterrupt: + proc.kill() + MSG.debug('comparison cancelled') + sys.exit(0) + + +def get_args(): + """ + handle command line arguments + """ + parser = argparse.ArgumentParser(description='''\ + Run q431 reconstruction transform on the given input (default are + some of Goetz's files on EOS). Compare the in-file metadata of the + output files. + ''') + parser.add_argument('-v', '--verbose', action='store_true', + help='do not be laconic') + parser.add_argument('inputBS', metavar='FILE', type=str, nargs='*', + help='bytestream file(s) to use as input') + + args = parser.parse_args() + MSG.setLevel(logging.DEBUG if args.verbose else logging.INFO) + + input_files = args.inputBS if args.inputBS else CONFIG['inputBSFile'] + MSG.info("input files:") + MSG.info(' '.join(input_files)) + + for step, config in SERIAL.items(): + for key in config: + if key == "inputBSFile": + SERIAL[step][key] = input_files + + for step, config in THREAD.items(): + for key in config: + if key == "inputBSFile": + THREAD[step][key] = input_files + + return args + + +def main(): + """write pre-include to file, run transforms, compare results, report + """ + args = get_args() + + for file_name in CONFIG["preInclude"]: + with open(file_name, 'w') as handle: + handle.write(PRE_INCLUDE) + + for step in STEPS: + MSG.info('%s in serial mode', step) + s_status = serial(SERIAL[step], verbose=args.verbose) + print("art-result: {} {}_serial_transform".format(s_status, step)) + + MSG.info('%s in threaded mode', step) + t_status = thread(THREAD[step], verbose=args.verbose) + print("art-result: {} {}_threaded_transform".format(t_status, step)) + + try: + s_file = [value for key, value in SERIAL[step].items() if "output" in key.lower()] + s_file = s_file[0][0] + + t_file = [value for key, value in THREAD[step].items() if "output" in key.lower()] + t_file = t_file[0][0] + except IndexError: + MSG.error("missing output found for steo %s", step) + print("art-result: 1 can_read_metadata") + print("art-result: 1 {}".format(step)) + continue + + MSG.info('Comparing metadata in %s and %s', s_file, t_file) + try: + diff = meta_diff([s_file, t_file], mode='full', drop=['file_guid']) + + print("art-result: 0 can_read_metadata") + if diff: + MSG.warning(''.join(diff)) + print("art-result: {} {}".format(0 if len(diff) < 2 else 1, step)) + except (ReferenceError, StopIteration): + MSG.error('failed to read metadata from: %s and/or %s', s_file, t_file) + print("art-result: 1 can_read_metadata") + print("art-result: 1 {}".format(step)) + continue + + +if __name__ == "__main__": + main() diff --git a/Tools/PyUtils/bin/meta-diff.py b/Tools/PyUtils/bin/meta-diff.py index 08f91b6aae098c2ccd1e075c6794fbfb31b4b2cd..5cbeab4fa0fa2e53393568d6eef09d347e159ac3 100755 --- a/Tools/PyUtils/bin/meta-diff.py +++ b/Tools/PyUtils/bin/meta-diff.py @@ -5,162 +5,103 @@ from __future__ import print_function -import sys -import json import argparse -import time -import logging import os +import sys + +from PyUtils.MetaDiff import meta_diff -# escape sequence [?1034h which appear on several runs due to smm capability (Meta Mode On) for xterm. +# escape sequence [?1034h which appear on several runs due to smm capability +# (Meta Mode On) for xterm. if 'TERM' in os.environ: del os.environ['TERM'] -msg = logging.getLogger('MetaReaderDiff') - -from PyUtils.MetaReader import read_metadata - -def print_diff(parent_key, obj1, obj2): - print('') - if parent_key is not None: - print('{}:'.format(parent_key)) - print('> {}'.format(obj1)) - print('----------') - print('< {}'.format(obj2)) - -def print_diff_type(parent_key, obj1, obj2): - print('') - if parent_key is not None: - print('{}:'.format(parent_key)) - print('> {} (type: {})'.format(obj1, type(obj1))) - print('----------') - print('< {} (type: {})'.format(obj2, type(obj2))) - -def print_diff_dict_keys(parent_key, obj1, obj2): - print('') - if parent_key is not None: - print('{}:'.format(parent_key)) - print('> ' + ', '.join(['{}: {}'.format(k, '{...}' if isinstance(v, dict) else v) for k,v in sorted(obj1.items())])) - print('----------') - print('< ' + ', '.join(['{}: {}'.format(k, '{...}' if isinstance(v, dict) else v) for k,v in sorted(obj2.items())])) - -def compare(obj1, obj2, parent_key=None, ordered=False): - - if isinstance(obj1, dict) and isinstance(obj2, dict): - if sorted(obj1.keys()) != sorted(obj2.keys()): - print_diff_dict_keys(parent_key, obj1, obj2) - else: - for key in sorted(set(obj1.keys() + obj2.keys())): - - child_key = '' - if parent_key is not None: - child_key += parent_key + '/' - child_key += key - - compare(obj1[key], obj2[key], child_key, ordered) - - elif isinstance(obj1, list) and isinstance(obj2, list): - if ordered: - if sorted(obj1) != sorted(obj2): - print_diff(parent_key, obj1, obj2) - else: - if obj1 != obj2: - print_diff(parent_key, obj1, obj2) - elif isinstance(obj1, set) and isinstance(obj2, set): - if obj1 != obj2: - print_diff(parent_key, obj1, obj2) - elif type(obj1) == type(obj2): - if obj1 != obj2: - print_diff(parent_key, obj1, obj2) - else: - print_diff_type(parent_key, obj1, obj2) - - - - -def _main(): - # Parsing the arguments provided by user - parser = argparse.ArgumentParser(description='This script reads metadata from a given file') - - parser.add_argument('-v', - '--verbose', - action='store_true', - help='print detailed output on screen') - parser.add_argument('-s', - '--ordered', - action='store_true', - help='When comparing lists, check the element order too.') - parser.add_argument('-o', - '--output', - metavar='FILE', - default=None, - help="Saves the output in a file. By default, the output is written on the screen (stdout) in a prettier format for better readabiilty.") - parser.add_argument('-m', - '--mode', - default= 'lite', - metavar='MODE', - type=str, - choices=['tiny', 'lite', 'full', 'peeker'], - help="This flag provides the user capability to select the amount of metadata retrieved. There three options: " - "tiny (only those values used in PyJobTransforms), " - "lite (same output as dump-athfile) " - "and full ( all available data found) ") - parser.add_argument('-t', - '--type', - default= None, - metavar='TYPE', - type=str, - choices=['POOL', 'BS'], - help="The file type of the input filename. By default, it tries to determine itself the file type of the input.") - - parser.add_argument('-f', - '--filter', - default= [], - metavar='FILTER', - nargs = '+', - type=str, - help="The metadata keys to filter. ") - parser.add_argument('--promote', - default=None, - type=bool, - help="Force promotion or not of the metadata keys ") - - parser.add_argument('filename1', help='First file to compare.') - parser.add_argument('filename2', help='Second file to compare.') - - args = parser.parse_args() - - verbose = args.verbose - ordered = args.ordered - filename1 = args.filename1 - filename2 = args.filename2 - output = args.output - mode = args.mode - file_type = args.type - meta_key_filter = args.filter - - msg.setLevel(logging.INFO if verbose else logging.WARNING) - # create a stream handler - handler = logging.StreamHandler() - handler.setLevel(logging.INFO if verbose else logging.WARNING) - # create a logging format - formatter = logging.Formatter('%(name)s %(levelname)s %(message)s') - handler.setFormatter(formatter) - # add the handlers to the logger - msg.addHandler(handler) - - metadatas = read_metadata([filename1, filename2], file_type, mode=mode, meta_key_filter= meta_key_filter, promote=args.promote) - metadata1 = metadatas[filename1] - metadata2 = metadatas[filename2] - compare(metadata1, metadata2, ordered=ordered) - +def main(): + """Handle command line arguments and call meta_diff""" + + parser = argparse.ArgumentParser( + description='Compare the metadata content fo two files') + + parser.add_argument( + 'files', + nargs=2, + metavar='FILE', + help='The names of two files to compare') + + parser.add_argument( + '-v', '--verbose', + action='store_true', + help='print detailed output on screen') + + parser.add_argument( + '-s', '--ordered', + action='store_true', + help='When comparing lists, check the element order too.') + + parser.add_argument( + '-d', '--drop', + nargs='*', + default=None, + metavar='KEY', + help='Keys to drop from metadata retrieved from file') + + parser.add_argument( + '-m', '--mode', + default='lite', + metavar='MODE', + type=str, + choices=['tiny', 'lite', 'full', 'peeker'], + help='''\ + This flag provides the user capability to select the amount of + metadata retrieved. There three options: + tiny (only those values used in PyJobTransforms), + lite (same output as dump-athfile) + and full ( all available data found) + ''') + + parser.add_argument( + '-t', '--type', + default=None, + metavar='TYPE', + type=str, + choices=['POOL', 'BS'], + help='''\ + The file type of the input filename. By default, it tries to + determine itself the file type of the input. + ''') + + parser.add_argument( + '-f', '--filter', + default=[], + metavar='FILTER', + nargs='+', + type=str, + help="Expression to select specific metadata fields to retrieve.") + + parser.add_argument( + '--promote', + default=None, + type=bool, + help="Force promotion or not of the metadata keys ") -if __name__ == '__main__': - _main() - + args = parser.parse_args() + try: + diff = meta_diff( + args.files, verbose=args.verbose, ordered=args.ordered, + drop=args.drop, mode=args.mode, meta_key_filter=args.filter, + file_type=args.type, promote=args.promote) + except (ValueError, IndexError): + print("you must supply two files to compare") + sys.exit(1) + if diff: + print('\n'.join(diff)) + sys.exit(1) + sys.exit(0) +if __name__ == '__main__': + main() diff --git a/Tools/PyUtils/python/MetaDiff.py b/Tools/PyUtils/python/MetaDiff.py new file mode 100644 index 0000000000000000000000000000000000000000..df6ba6b03b80e38df1f47b52d5c75c1eb5f93807 --- /dev/null +++ b/Tools/PyUtils/python/MetaDiff.py @@ -0,0 +1,139 @@ +"""The function in this module you should look to be using is meta_diff""" +# -*- coding: utf-8 -*- +# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +# This script reads metadata from a given file +from __future__ import print_function + +import logging + +from PyUtils.MetaReader import read_metadata + + +def print_diff(parent_key, obj1, obj2): + """build comparison string for two non-dictionary objects""" + result = '\n' + + if parent_key is not None: + result += '{}:\n'.format(parent_key) + result += '''\ + > {} + ---------- + < {} + '''.format(obj1, obj2) + + return result + + +def print_diff_type(parent_key, obj1, obj2): + """Build diff string for objet of different type""" + result = '\n' + + if parent_key is not None: + result += '{}:\n'.format(parent_key) + result += '''\ + > {} (type: {}) + ---------- + < {} (type: {}) + '''.format(obj1, type(obj1), obj2, type(obj2)) + + return result + + +def print_diff_dict_keys(parent_key, obj1, obj2): + """build diff style string for dictionary objects""" + result = '\n' + + if parent_key is not None: + result += '{}:\n'.format(parent_key) + result += '> ' + ', '.join([ + '{}: {}'.format(k, '{...}' if isinstance(v, dict) else v) + for k, v in sorted(obj1.items())]) + result += '\n----------\n' + result += '< ' + ', '.join([ + '{}: {}'.format(k, '{...}' if isinstance(v, dict) else v) + for k, v in sorted(obj2.items())]) + result += '\n' + + return result + + +def compare(obj1, obj2, parent_key=None, ordered=False): + """Caclulate difference between two objects + + Keyword arguments: + obj1 -- first object in comparision + obj2 -- second object in comparision + parent_key -- the key for in objects in the parent objects, used in recursion + ordered -- whether to check order of list content + """ + result = list() + + if not ordered and isinstance(obj1, list): + obj1.sort() + + if not ordered and isinstance(obj2, list): + obj2.sort() + + if obj1 == obj2: + return result + + if isinstance(obj1, type(obj2)): + + if isinstance(obj1, dict): + + if sorted(obj1.keys()) != sorted(obj2.keys()): + result += [print_diff_dict_keys(parent_key, obj1, obj2)] + else: + for key in sorted(set(obj1.keys() + obj2.keys())): + if parent_key: + child_key = '{}/{}'.format(parent_key, key) + else: + child_key = key + result += compare(obj1[key], obj2[key], child_key, ordered) + + else: + result += [print_diff(parent_key, obj1, obj2)] + + else: + result += [print_diff_type(parent_key, obj1, obj2)] + + return result + + +def meta_diff(files, verbose=False, ordered=False, drop=None, + mode='lite', meta_key_filter=None, file_type=None, promote=False): + """ + Compare the in-file metadata in two given files. Uses PyUtils.MetaReader + to obtain file content. Generates list of string that show difference. + Returns empty list if no difference is found + + Keyword arguments: + files -- Names of two files to compare + verbose -- toggle to get debug information + ordered -- whether to check order of lists in the metadata + drop -- keys to drop from metadata retrieved by MetaReader + mode -- MetaReader argument setting amount of content (default 'lite'). + Allowed values are: tiny, lite, peeker, and full + meta_key_filter -- MetaReader argument selecting keys to retrieve (default + get all) + file_type -- Type of files, POOL or BS (default: auto-configure) + promote -- MetaReader argument (default: False) + """ + if len(files) != 2: + raise ValueError("Wrong number of files passes, need two") + + msg = logging.getLogger('MetaReader') + msg.setLevel(logging.INFO if verbose else logging.WARNING) + + metadata = read_metadata( + files, file_type, mode=mode, + meta_key_filter=meta_key_filter, promote=promote) + + try: + for key in drop: + for _, value in metadata.items(): + value.pop(key, None) + except TypeError: + pass + + return compare(metadata[files[0]], metadata[files[1]], ordered=ordered) diff --git a/Tools/PyUtils/python/MetaReader.py b/Tools/PyUtils/python/MetaReader.py index b685a46ba54ef3ebad172c10d4f565f45acf8d14..588fa40ef7d5373665d8decd9979fce0e5220062 100644 --- a/Tools/PyUtils/python/MetaReader.py +++ b/Tools/PyUtils/python/MetaReader.py @@ -51,7 +51,7 @@ def read_metadata(filenames, file_type = None, mode = 'lite', promote = None, me if mode != 'full' and len(meta_key_filter) > 0: raise NameError('It is possible to use the meta_key_filter option only for full mode') - if len(meta_key_filter) > 0: + if meta_key_filter: msg.info('Filter used: {0}'.format(meta_key_filter)) # create the storage object for metadata. @@ -149,7 +149,7 @@ def read_metadata(filenames, file_type = None, mode = 'lite', promote = None, me '*': 'EventStreamInfo_p*' } - if mode == 'full' and len(meta_key_filter) > 0: + if mode == 'full' and meta_key_filter: meta_filter = {f: '*' for f in meta_key_filter} # store all persistent classes for metadata container existing in a POOL/ROOT file. persistent_instances = {} @@ -199,7 +199,7 @@ def read_metadata(filenames, file_type = None, mode = 'lite', promote = None, me metadata_tree.GetEntry(0) # clean the meta-dict if the meta_key_filter flag is used, to return only the key of interest - if len(meta_key_filter) > 0: + if meta_key_filter: meta_dict[filename] = {} # read the metadata