From d79db2dd9b51cac6d328d65402b560622fb790a7 Mon Sep 17 00:00:00 2001 From: Alaettin Serhan Mete <alaettin.serhan.mete@cern.ch> Date: Mon, 19 Jul 2021 11:36:02 +0000 Subject: [PATCH] PyUtils: Update diff-root to improve re-syncing and add a new script to dump pythonized event information from a file --- Tools/PyUtils/CMakeLists.txt | 2 +- Tools/PyUtils/bin/dump-event-from-file.py | 35 +++++ Tools/PyUtils/python/RootUtils.py | 2 +- .../PyUtils/python/scripts/diff_root_files.py | 132 ++++++++++-------- 4 files changed, 108 insertions(+), 63 deletions(-) create mode 100755 Tools/PyUtils/bin/dump-event-from-file.py diff --git a/Tools/PyUtils/CMakeLists.txt b/Tools/PyUtils/CMakeLists.txt index 21f7fc908c7c..2da1185aa8db 100644 --- a/Tools/PyUtils/CMakeLists.txt +++ b/Tools/PyUtils/CMakeLists.txt @@ -21,7 +21,7 @@ atlas_install_scripts( bin/acmd.py bin/checkFile.py bin/checkPlugins.py bin/gprof2dot bin/issues bin/magnifyPoolFile.py bin/merge-poolfiles.py bin/apydep.py bin/pool_extractFileIdentifier.py bin/pool_insertFileToCatalog.py bin/print_auditor_callgraph.py bin/pyroot.py - bin/vmem-sz.py bin/meta-reader.py bin/meta-diff.py bin/tree-orderer.py + bin/vmem-sz.py bin/meta-reader.py bin/meta-diff.py bin/tree-orderer.py bin/dump-event-from-file.py POST_BUILD_CMD ${ATLAS_FLAKE8} ) # Aliases: diff --git a/Tools/PyUtils/bin/dump-event-from-file.py b/Tools/PyUtils/bin/dump-event-from-file.py new file mode 100755 index 000000000000..f13ace8c4dbc --- /dev/null +++ b/Tools/PyUtils/bin/dump-event-from-file.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2002-2021 CERN for the benefit of the ATLAS collaboration + +""" Dump a pythonized version of the event content using RootFileDumper. + The main logic is similar to what's being done in diff-root. + Therefore, this can be used in conjunction w/ diff-root for further + debugging. """ +def dumpEvent(fname, idx): + # import ROOT via RootUtils + import PyUtils.RootUtils as ru + ru.import_root() + + # file dumper: file, tree + data = ru.RootFileDumper(fname, "CollectionTree") + + # iterator: tree, idx + values = data.dump("CollectionTree", [int(idx)]) + + # loop over the iterator and print the values + try: + while True: + val = next(values) + print(val) + except StopIteration: + pass + +if __name__ == "__main__": + import sys + if len(sys.argv)!=3: + print("dump-event-from-file.py: A script to dump event information from a file") + print("Usage:") + print(f"{sys.argv[0]} <file> <TTree index of the event>") + else: + dumpEvent(sys.argv[1], sys.argv[2]) diff --git a/Tools/PyUtils/python/RootUtils.py b/Tools/PyUtils/python/RootUtils.py index ebbf499049df..b969bc78fd5b 100644 --- a/Tools/PyUtils/python/RootUtils.py +++ b/Tools/PyUtils/python/RootUtils.py @@ -333,7 +333,7 @@ class RootFileDumper(object): )) self.allgood = False print (err) - for o in vals: + for o in sorted(vals, key = lambda x: '.'.join(s for s in x[0] if isinstance(s, str))): n = list(map(str, o[0])) v = o[1] yield tree_name, ientry, n, v diff --git a/Tools/PyUtils/python/scripts/diff_root_files.py b/Tools/PyUtils/python/scripts/diff_root_files.py index 03cc1bf1f7d3..5a505a940073 100644 --- a/Tools/PyUtils/python/scripts/diff_root_files.py +++ b/Tools/PyUtils/python/scripts/diff_root_files.py @@ -50,7 +50,7 @@ def _is_exit_early(): help='set of regex matching names of branches to compare; assumes all if none specified.') @acmdlib.argument('--ignore-leaves', nargs='*', - default=('Token', 'index_ref',), + default=('Token', 'index_ref', r'(.*)_timings\.(.*)', r'(.*)_mems\.(.*)', r'(.*)TrigCostContainer(.*)'), help='set of leaves names to ignore from comparison; can be a branch name or a partial leaf name (accepts regex)') @acmdlib.argument('--enforce-leaves', nargs='*', @@ -281,6 +281,7 @@ def main(args): itr_entries_old = itr_entries itr_entries_new = itr_entries + branches = sorted(branches) old_dump_iter = fold.dump(args.tree_name, itr_entries_old, branches) new_dump_iter = fnew.dump(args.tree_name, itr_entries_new, branches) @@ -290,6 +291,18 @@ def main(args): else: return '.'.join([s for s in entry[2] if not s.isdigit()]) + def elindex_fromdump(entry): + if entry is None: + return None + else: + digits = [int(s) for s in entry[2] if s.isdigit()] + result = 0 + if digits: + length = len(digits) + for idx,ival in enumerate(digits): + result += ival*pow(10,length-idx-1) + return result + @memoize def skip_leaf(name_from_dump, skip_leaves): """ Here decide if the current leaf should be skipped. @@ -335,10 +348,8 @@ def main(args): while True: if read_old: - prev_d_old = d_old d_old = reach_next(old_dump_iter, skip_leaves, args.leaves_prefix) if read_new: - prev_d_new = d_new d_new = reach_next(new_dump_iter, skip_leaves, args.leaves_prefix) if not d_new and not d_old: @@ -347,14 +358,14 @@ def main(args): read_old = True read_new = True - if (args.order_trees and d_old and d_new and d_old[-1] == d_new[-1]) or d_old == d_new: + if (args.order_trees and d_old and d_new and d_old[2:] == d_new[2:]) or d_old == d_new: n_good += 1 continue if d_old: - tree_name, ientry, name, iold = d_old + tree_name, ientry, iname, iold = d_old if d_new: - tree_name, jentry, name, inew = d_new + tree_name, jentry, jname, inew = d_new # for regression testing we should have NAN == NAN if args.nan_equal: @@ -363,7 +374,7 @@ def main(args): continue # FIXME: that's a plain (temporary?) hack - if name[-1] in args.known_hacks: + if iname[-1] in args.known_hacks or jname[-1] in args.known_hacks: continue n_bad += 1 @@ -375,79 +386,78 @@ def main(args): if not in_synch: if _is_detailed(): if d_old: - print('::sync-old %s' %'.'.join(["%03i"%ientry]+list(map(str, - d_old[2])))) + msg.info('::sync-old %s','.'.join(["%03i"%ientry]+list(map(str, d_old[2])))) else: - print('::sync-old ABSENT') + msg.info('::sync-old ABSENT') if d_new: - print('::sync-new %s' %'.'.join(["%03i"%jentry]+list(map(str, - d_new[2])))) + msg.info('::sync-new %s','.'.join(["%03i"%jentry]+list(map(str, d_new[2])))) else: - print('::sync-new ABSENT') + msg.info('::sync-new ABSENT') pass # remember for later if not d_old: fold.allgood = False - summary[d_new[2][0]] += 1 + summary[leafname_fromdump(d_new)] += 1 elif not d_new: fnew.allgood = False - summary[d_old[2][0]] += 1 + summary[leafname_fromdump(d_old)] += 1 else: - branch_old = '.'.join(["%03i"%ientry, d_old[2][0]]) - branch_new = '.'.join(["%03i"%jentry, d_new[2][0]]) - if branch_old < branch_new: - if _is_detailed(): - print('::sync-old skipping entry') - summary[d_old[2][0]] += 1 - fnew.allgood = False + branch_old = d_old[2][0] + branch_new = d_new[2][0] + leaf_old = leafname_fromdump(d_old) + leaf_new = leafname_fromdump(d_new) + index_old = elindex_fromdump(d_old) + index_new = elindex_fromdump(d_new) + # Branches/Leaves are alphabetically ordered + # If we're out-of-sync, we try to figure out + # if we should advance the old or the new branch + # For same branches, we look at the full leaf name + # If that fails we look at the pseudo-indices + if branch_old > branch_new: + read_old = False + elif branch_old < branch_new: read_new = False - elif branch_old > branch_new: + else: + if leaf_old > leaf_new: + read_old = False + elif leaf_old < leaf_new: + read_new = False + else: + if index_old > index_new: + read_old = False + elif index_old < index_new: + read_new = False + # Let's see if we can reconcile + # If not, just bail out to avoid false positivies + if read_old and not read_new: if _is_detailed(): - print('::sync-new skipping entry') - summary[d_new[2][0]] += 1 + msg.info('::sync-old skipping entry') fold.allgood = False - read_old = False + summary[leaf_old] += 1 + elif read_new and not read_old: + if _is_detailed(): + msg.info('::sync-new skipping entry') + fnew.allgood = False + summary[leaf_new] += 1 else: - # MN: difference in the leaves - prev_leaf_old = leafname_fromdump(prev_d_old) - prev_leaf_new = leafname_fromdump(prev_d_new) - leaf_old = leafname_fromdump(d_old) - leaf_new = leafname_fromdump(d_new) - if prev_leaf_old == prev_leaf_new: - # array size difference? - if leaf_old == leaf_new and leaf_old == prev_leaf_old: - # could be a size difference in >1 dim arrays - # hard to sync, skipping both - pass - elif leaf_old == prev_leaf_old: - # old has bigger array, skip old entry - read_new = False - if _is_detailed(): - print('::sync-old skipping entry') - summary[leaf_old] += 1 - elif leaf_new == prev_leaf_new: - # new has bigger array, skip new entry - read_old = False - if _is_detailed(): - print('::sync-new skipping entry') - summary[leaf_new] += 1 - - if read_old and read_new: - summary[d_new[2][0]] += 1 - if _is_detailed(): - print('::sync-old+new skipping both entries') + msg.error('::sync attempt failed, bailing out...') + msg.error(f"::sync-old Leaf vs Index : {leaf_old} vs {index_old}") + msg.error(f"::sync-new Leaf vs Index : {leaf_new} vs {index_new}") fold.allgood = False fnew.allgood = False + summary[leaf_old] += 1 + summary[leaf_new] += 1 + break if _is_exit_early(): - print('*** exit on first error ***') + msg.info('*** exit on first error ***') break continue if not args.order_trees: - n = '.'.join(list(map(str, ["%03i"%ientry]+name))) + n = '.'.join(list(map(str, ["%03i"%ientry]+iname))) else: - n = '.'.join(list(map(str, ["%03i.%03i"%(ientry,jentry)]+name))) + n = '.'.join(list(map(str, ["%03i"%ientry]+iname+["%03i"%jentry]+jname))) diff_value = 'N/A' try: diff_value = 50.*(iold-inew)/(iold+inew) @@ -455,11 +465,11 @@ def main(args): except Exception: pass if _is_detailed(): - print('%s %r -> %r => diff= [%s]' %(n, iold, inew, diff_value)) + msg.info('%s %r -> %r => diff= [%s]', n, iold, inew, diff_value) pass summary[leafname_fromdump(d_old)] += 1 - if name[0] in args.enforce_leaves: + if iname[0] in args.enforce_leaves or jname[0] in args.enforce_leaves: msg.info("don't compare further") break pass # loop over events/branches @@ -476,7 +486,7 @@ def main(args): pass if (not fold.allgood) or (not fnew.allgood): - msg.info('NOTE: there were errors during the dump') + msg.error('NOTE: there were errors during the dump') msg.info('fold.allgood: %s' , fold.allgood) msg.info('fnew.allgood: %s' , fnew.allgood) n_bad += 0.5 @@ -484,7 +494,7 @@ def main(args): ndiff = diff_tree(fold, fnew, args) if ndiff != 0: - msg.info('files differ!') + msg.error('files differ!') return 2 msg.info('all good.') return 0 -- GitLab