From c12dc1ea9e0bb7c3ce4a445bfd25166f816c8890 Mon Sep 17 00:00:00 2001
From: Frank Winklmeier <frank.winklmeier@cern.ch>
Date: Mon, 13 May 2024 15:45:48 +0200
Subject: [PATCH] PyUtils: delete unused filter-and-merge-d3pd

Delete unused (and broken) `filter-and-merge-d3pd` script.
---
 Tools/PyUtils/CMakeLists.txt               |   3 +-
 Tools/PyUtils/bin/filter-and-merge-d3pd.py | 973 ---------------------
 2 files changed, 1 insertion(+), 975 deletions(-)
 delete mode 100755 Tools/PyUtils/bin/filter-and-merge-d3pd.py

diff --git a/Tools/PyUtils/CMakeLists.txt b/Tools/PyUtils/CMakeLists.txt
index cc99a7256fd4..9073fceff3e4 100644
--- a/Tools/PyUtils/CMakeLists.txt
+++ b/Tools/PyUtils/CMakeLists.txt
@@ -38,7 +38,7 @@ else()
        bin/checkSG.py bin/checkMetaSG.py bin/checkTP.py bin/checkxAOD.py
        bin/diff-jobo-cfg.py bin/diffConfigs.py
        bin/diffPoolFiles.py bin/dlldep.py bin/dso-stats.py
-       bin/filter-and-merge-d3pd.py bin/getMetadata.py
+       bin/getMetadata.py
        bin/gprof2dot bin/issues bin/magnifyPoolFile.py
        bin/apydep.py bin/pool_extractFileIdentifier.py
        bin/pool_insertFileToCatalog.py bin/print_auditor_callgraph.py bin/pyroot.py
@@ -51,7 +51,6 @@ else()
     atlas_add_alias( dso-stats "dso-stats.py" )
     atlas_add_alias( gen_klass "acmd.py" "gen-klass" )
     atlas_add_alias( diffConfigs "diffConfigs.py" )
-    atlas_add_alias( filter-and-merge-d3pd "filter-and-merge-d3pd.py" )
     atlas_add_alias( diffPoolFiles "diffPoolFiles.py" )
     atlas_add_alias( print_auditor_callgraph "print_auditor_callgraph.py" )
     atlas_add_alias( pyroot "pyroot.py" )
diff --git a/Tools/PyUtils/bin/filter-and-merge-d3pd.py b/Tools/PyUtils/bin/filter-and-merge-d3pd.py
deleted file mode 100755
index 6cae1e2dfa6c..000000000000
--- a/Tools/PyUtils/bin/filter-and-merge-d3pd.py
+++ /dev/null
@@ -1,973 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
-
-# bwd compat
-from __future__ import with_statement, print_function
-
-# stdlib imports
-import os
-import sys
-import getopt
-import atexit
-
-# 3rd party imports
-import ROOT
-
-# root globals to prevent ROOT garbage collector to sweep the rug....
-_root_files = []
-_root_trees = []
-
-# Root has a global dtor ordering problem: the cintex trampolines
-# may be deleted before open files are closed.  Workaround is to explicitly
-# close open files before terminating.
-#
-def _close_root_files():
-    for f in _root_files:
-        if hasattr (f, 'Close'): f.Close()
-    del _root_files[0:-1]
-    return
-atexit.register(_close_root_files)
-
-def _fnmatch(fname, patterns):
-    """helper function wrapping the original `fnmatch:fnmatch` function but providing
-    support for a list of patterns to match against
-    """
-    from fnmatch import fnmatch
-    if isinstance(patterns, str):
-        patterns = [patterns]
-    for pattern in patterns:
-        if fnmatch(fname, pattern):
-            return True
-    return False
-
-def _make_fake_output(fname, tree_name, tree_title=None):
-    f = ROOT.TFile.Open(fname, "recreate")
-    if tree_title is None:
-        tree_title = tree_name
-    t = ROOT.TTree(tree_name, tree_title)
-    f.Write()
-    f.Close()
-    del t, f
-    return
-    
-class LBRange(object):
-    def __init__(self, run, lbmin, lbmax):
-        self.run = run
-        self.lbmin = lbmin
-        self.lbmax = lbmax
-
-def _interpret_grl(fname):
-    if not os.path.exists(fname):
-        raise OSError
-
-    lbs = []
-    if fname.endswith('.dat'):
-        for l in open(fname):
-            l = l.strip()
-            run, lbmin, lbmax = map(int, l.split())
-            lbs.append(LBRange(run, lbmin, lbmax))
-    elif fname.endswith('.xml'):
-        data = extract_data_from_xml(fname)
-        for i in data:
-            run, lbmin, lbmax = map(int, i)
-            lbs.append(LBRange(run, lbmin, lbmax))
-    else:
-        raise RuntimeError("unknown file extension (%s)" % (fname,))
-    return lbs
-
-def interpret_grl(fname="GRL.dat"):
-    fnames = []
-    if isinstance(fname, str):
-        fnames = [fname]
-    elif isinstance(fname, (list,tuple)):
-        fnames = fname[:]
-    else:
-        raise TypeError('fname must be a string or a sequence (got: %s)' %
-                        type(fname))
-    lbs = []
-    for fname in fnames:
-        lbs.extend(_interpret_grl(fname))
-    return lbs
-
-def pass_grl(run, lb, good_lbs):
-
-    for ilb in good_lbs:
-        if run != ilb.run:
-            continue
-
-        if ilb.lbmin <= lb <= ilb.lbmax:
-            return True
-
-    return False
-
-def apply_filters(branches, patterns):
-    """extract the branches which match the patterns.
-    a pattern can add or remove a branch.
-    if a branch matches no pattern, it is discarded.
-    if a branch matches several patterns, the last pattern wins.
-    """
-    from fnmatch import fnmatch
-    from collections import defaultdict
-    filtered = defaultdict(list)
-    matched_patterns = []
-    for br in branches:
-        for p in patterns:
-            if p == '':
-                continue
-            op = '-'
-            if p.startswith('+') or not p.startswith('-'):
-                if p[0] == '+':
-                    p = p[1:]
-                op = '+'
-            if p.startswith('-'):
-                op = '-'
-                p = p[1:]
-            if fnmatch(br, p):
-                filtered[br].append(op)
-                matched_patterns.append(p)
-    for p in patterns:
-        if not (p in matched_patterns):
-            print ('::: warning: pattern [%s] could not be matched against any branch' % p)
-            pass
-        pass
-    filtered = dict(filtered)
-    return sorted([k for k,v in filtered.iteritems() if v[-1] == '+'])
-
-def merge_all_trees(fnames, tree_name, memory, sfo,
-                    vars_fname=None, grl_fname=None,
-                    filter_fct=None,
-                    keep_all_trees=False,
-                    apply_recursive_opt=True):
-    
-    oname = sfo[:]
-    if not oname.endswith(".root"):
-        oname = oname + ".root"
-        pass
-
-    root_open = ROOT.TFile.Open
-    fout = root_open(oname, "RECREATE", "", 1)
-    fout.ResetBit(ROOT.kCanDelete)
-    
-    memory *= 1024 # change to bytes
-
-    tree_maxsz = ROOT.TTree.GetMaxTreeSize()
-
-    ## summing up branch sizes over all the files
-    orig_file = root_open(fnames[0], "read")
-    orig_tree = getattr(orig_file, tree_name)
-    br_names = []
-    all_br_names = set(br.GetName() for br in orig_tree.GetListOfBranches())
-    
-    if not (vars_fname is None):
-        # open the file containing the list of branches to keep or discard
-        patterns = []
-        with open(vars_fname, 'r') as br_file:
-            for p in br_file:
-                patterns.append(p.strip())
-        orig_tree.SetBranchStatus("*", 0)
-        # apply_filters returns the list of branches to keep
-        br_names = apply_filters(all_br_names, patterns)
-        print ("::: keeping only the following branches: (from file-list %s)" %
-               vars_fname)
-        for b in br_names:
-            print (":::   [%s]" % (b,))
-            orig_tree.SetBranchStatus(b,1)                            
-    else:
-        br_names = [br.GetName() for br in orig_tree.GetListOfBranches()]
-
-    nleaves = len(br_names)
-    print ("::: nleaves=[%04i] tree=[%s]" % (nleaves, orig_tree.GetName()))
-
-    tot_sz = [0]*nleaves    # zipped sizes collected from all files
-    basket_sz = [0]*nleaves # size to be optimized (starts with `tot_sz`)
-    baskets = [1]*nleaves   # cache
-
-    for idx,fname in enumerate(fnames):
-        f = root_open(fname, "read")
-        tree = getattr(f, tree_name)
-        for ibr,br_name in enumerate(br_names):
-            branch = tree.GetBranch(br_name)
-            if not branch:
-                print ("***warning*** - tree [%s] has no branch [%s]" % (tree.GetName(),
-                                                                         br_name))
-                continue
-            branch.SetAddress(0)
-
-            tot_sz[ibr] += branch.GetTotBytes()
-            basket_sz[ibr] = tot_sz[ibr]
-            #baskets[ibr] = 1
-
-            pass # loop over branches
-        del tree
-        f.Close()
-        del f
-        pass # loop over trees
-
-    if apply_recursive_opt:
-        while 1: # recursive optimization
-            tot_mem = sum(basket_sz)
-            if tot_mem < memory:
-                break
-
-            max_spare = -1
-            max_spare_idx = None
-            for i in range(nleaves):
-                spare = tot_sz[i]/baskets[i] - tot_sz[i]/(baskets[i]+1)
-                if max_spare < spare:
-                    max_spare = spare
-                    max_spare_idx = i
-            if max_spare_idx is not None:
-                idx = max_spare_idx
-                baskets[idx] += 1
-                basket_sz[idx] = tot_sz[idx]/baskets[idx]
-            pass # end-while
-        pass # apply_recursive_opt
-    
-    # create the new (optimized) tree
-    new_tree = orig_tree.CloneTree(0) # no copy of events
-    new_tree.ResetBit(ROOT.kCanDelete)
-    new_tree.SetDirectory(fout)
-    # once cloning is done, separate the trees to avoid as many side-effects
-    # as possible
-    #orig_tree.GetListOfClones().Remove(new_tree)
-    orig_tree.ResetBranchAddresses()
-    new_tree.ResetBranchAddresses()
-
-    if vars_fname is not None:
-        orig_tree.SetBranchStatus("*", 0)
-        new_tree.SetBranchStatus("*", 0)
-        for br_name in br_names:
-            orig_tree.SetBranchStatus(br_name, 1)
-            new_tree.SetBranchStatus(br_name, 1)
-
-    # a list of other tree names to filter-and-merge
-    other_trees = []
-    if keep_all_trees:
-        print ("::: capturing other trees to filter and merge...")
-        # also handle all other payload-trees
-        # to decide if a tree is a payload-tree (and not a metadata tree
-        # which we don't know -by default- what is the correct way to merge)
-        # we just compare the number of events...
-        # FIXME: handle deeply-nested structures (/dir1/dir2/tree0,...)
-        _all_tree_names = list(n.GetName()
-                               for n in orig_file.GetListOfKeys()
-                               if (isinstance(getattr(orig_file, n.GetName()),
-                                              ROOT.TTree)
-                                   and n.GetName() != tree_name))
-        for n in _all_tree_names:
-            _old_tree = orig_file.Get(n)
-            print (":::  ->",n, end='')
-            if _old_tree.GetEntries() != orig_tree.GetEntries():
-                # probably not a payload-tree but a metadata one...
-                del _old_tree
-                print ("[reject]")
-                continue
-            print ("[keep]")
-            _new_tree = _old_tree.CloneTree(0) # no copy of events
-            _new_tree.ResetBit(ROOT.kCanDelete)
-            _new_tree.SetDirectory(fout)
-            _old_tree.ResetBranchAddresses()
-            _new_tree.ResetBranchAddresses()
-            other_trees.append(_new_tree)
-            del _old_tree
-        print ("::: capturing other trees to filter and merge... [done]")
-
-    if apply_recursive_opt:
-        # setting optimized basket sizes
-        tot_mem = 0.
-        tot_bkt = 0
-        max_bkt = 0
-        min_bkt = 1024**3
-
-        for ibr in range(nleaves):
-            br = new_tree.GetBranch(br_names[ibr])
-            if basket_sz[ibr] == 0:
-                basket_sz[ibr] = 16
-
-            basket_sz[ibr] = basket_sz[ibr] - (basket_sz[ibr] % 8)
-            br.SetBasketSize(basket_sz[ibr])
-
-            tot_mem += basket_sz[ibr]
-            tot_bkt += baskets[ibr]
-
-            if basket_sz[ibr] < min_bkt:
-                min_bkt = basket_sz[ibr]
-            if basket_sz[ibr] > max_bkt:
-                max_bkt = basket_sz[ibr]
-
-            pass # loop over leaves
-
-        print ("::: optimize baskets: ")
-        print (":::   total memory buffer: %8.3f kb" % (tot_mem/1024,))
-        print (":::   total baskets:       %8.3f (min= %8.3f) (max= %8.3f) kb" % (
-            tot_bkt, min_bkt, max_bkt))
-
-        del tot_sz, basket_sz, baskets
-        pass # apply_recursive_opt
-
-    # copying data
-    n_pass = 0
-    n_tot = 0
-    do_grl_selection = not (grl_fname is None)
-    
-    if do_grl_selection:
-        good_lbs = interpret_grl(fname=grl_fname)
-
-    print ("::: processing [%i] trees..." % (len(fnames,)))
-    for idx, fname in enumerate(fnames):
-        f = root_open(fname, "READ")
-
-        for other_tree in other_trees:
-            tree = getattr(f, other_tree.GetName())
-            other_tree.CopyAddresses(tree)
-                    
-        tree = getattr(f, tree_name)
-        new_tree.CopyAddresses(tree)
-        nentries = tree.GetEntries()
-        print (":::   entries:", nentries)
-        for i in range(nentries):
-
-            nb = tree.GetEntry(i)
-            if nb <= 0:
-                print ("*** error loading entry [%i]. got (%i) bytes" % (i,nb))
-                raise RuntimeError
-            n_tot += 1
-
-            accept_entry = True
-            if do_grl_selection:
-                if not pass_grl(tree.RunNumber, tree.lbn, good_lbs):
-                    accept_entry = False
-                pass
-            
-            if filter_fct and accept_entry:
-                try:
-                    if not filter_fct(tree):
-                        accept_entry = False
-                except Exception as err:
-                    print ("*** problem running user filter fct:")
-                    print (err)
-                    print ("*** (filter fct is now disabled)")
-                    filter_fct = None
-
-            if accept_entry:
-                n_pass += 1
-                if n_pass > 10:
-                    _nentries_cur = new_tree.GetEntries()
-                    fout = new_tree.GetCurrentFile()
-                    fout.Flush()
-                    out_fsize = fout.GetSize()
-                    avg_entry_sz = out_fsize / float(_nentries_cur or 1.)
-                    do_change_file = out_fsize + avg_entry_sz > 0.9 * tree_maxsz
-                    if do_change_file:
-                        #print ("--- manually triggering TTree::ChangeFile...")
-                        # manually trigger the file split...
-                        # this is to ensure the split doesn't happen in between
-                        # the new_tree.Fill() and the other_tree.Fill() which
-                        # would de-synchronize the entries between the trees...
-                        fout = new_tree.ChangeFile(fout)
-                new_tree.Fill()
-                for other_tree in other_trees:
-                    _tree = f.Get(other_tree.GetName())
-                    nb = _tree.GetEntry(i)
-                    if nb <= 0:
-                        print ("*** error loading entry [%i] for tree [%s]. got (%i) bytes" % (
-                            i, other_tree.GetName(), nb))
-                        continue
-                    other_tree.Fill()
-                    del _tree
-                    pass # loop over other trees
-                pass # entry accepted
-            pass # loop over entries
-        del tree
-        f.Close()
-        del f
-        pass # loop over input trees
-    print ("::: processing [%i] trees... [done]" % (len(fnames,)))
-
-    eff = 0.
-    if n_tot != 0:
-        eff = float(n_pass)/float(n_tot)
-    print ("::: filter efficiency: %d/%d -> %s" % (n_pass, n_tot, eff))
-
-    fout = new_tree.GetCurrentFile()
-    fout.Write()
-    fout.Close()
-    del fout
-
-    return
-
-def order(m, chain_name, fnames, workdir):
-
-    # disabling the file-split as it may interfere badly with the re-ordering...
-    # set it to 2Tb
-    ROOT.TTree.SetMaxTreeSize(2 * 1024 * 1024 * 1024 * 1024)
-
-    print ("::: nbr of files:", len(fnames))
-    for i,fn in enumerate(fnames):
-
-        timer = ROOT.TStopwatch()
-        timer.Start()
-        print ("::: optimizing   [%s]..." % (fn,))
-
-        timer.Start()
-        fin = ROOT.TFile.Open(fn, "read")
-        tmp_fname = "%s_temporary_%03i.root" % (
-            chain_name.replace("/","_").replace(" ","_"),
-            i)
-        fout = ROOT.TFile.Open(tmp_fname, "recreate", "", 6)
-
-        # perform the (re)ordering for all trees
-        _all_tree_names = list(
-            n.GetName()
-            for n in fin.GetListOfKeys()
-            if isinstance(getattr(fin, n.GetName()),
-                          ROOT.TTree))
-        for chain_name in _all_tree_names:
-            tc2 = fin.Get(chain_name)
-            opt = {
-                0: "SortBasketsByOffset",
-                1: "SortBasketsByBranch",
-                2: "SortBasketsByEntry",
-                }.get(m, "SortBasketsByBranch")
-            opt_tree = tc2.CloneTree(-1, opt + " fast")
-            opt_tree.Write("", ROOT.TObject.kOverwrite)
-        # -
-
-        timer.Stop()
-
-        print (":::   wallclock time:", timer.RealTime())
-        print (":::   CPU time:      ", timer.CpuTime())
-
-        try:
-            # fout may have been invalidated if the file-size limit was hit
-            # and _1.root, _2.root,... files were created...
-            if fout:
-                fout.Close()
-        except Exception as err:
-            print ("**error**:",err)
-        fin.Close()
-
-        dst = os.path.join(workdir, os.path.basename(fn))
-        print ("::: optimized as [%s]... [done]" % (dst,))
-        
-        # rename the temporary into the original
-        import shutil
-        shutil.move(src=tmp_fname,
-                    dst=dst)
-                                                    
-        #os.rename(tmp_fname, fn)
-    return
-
-def _load_filter_fct(selection):
-    """
-    helper function to locate a filter function or compile one from the
-    source code snippet
-    if `selection` begins with 'file:' selection is then interpreted as a
-    string holding the location to a file where a 'filter_fct' fonction is
-    defined and importable.
-    otherwise, `selection` is compiled into a lambda function
-    """
-    import imp
-    import inspect
-    import os.path as osp
-
-    filter_fct = None
-
-    if selection is None:
-        return filter_fct
-    
-    if not isinstance(selection, str):
-        print ("** invalid filter-fct type (%r)" % (type(selection),))
-        return filter_fct
-    
-    if selection == "":
-        return filter_fct
-
-    def plugin_filter(obj):
-        if inspect.isfunction(obj):
-            return obj.__name__ == 'filter_fct'
-        
-    if selection.startswith('file:'):
-        fname = selection[len('file:'):]
-        fname = osp.expanduser(osp.expandvars(fname))
-        plugin = open(fname, 'r')
-        mod = imp.load_source(plugin.name[:-3], plugin.name, plugin)
-        plugin.close()
-        filter_fct = inspect.getmembers(mod, plugin_filter)[0][1]
-    else:
-        fct_code = "filter_fct = lambda t: %s" % selection
-        my_locals = dict(locals())
-        exec (fct_code, {}, my_locals)
-        filter_fct = my_locals['filter_fct']
-    return filter_fct
-
-class Options(object):
-    """place holder for command line options values"""
-    pass
-
-def main():
-
-    global _root_files, _root_trees
-    
-    _opts = []
-    _useropts = "i:o:t:m:s:h"
-    _userlongopts = [
-        "in=", "out=", "tree=", "var=", "maxsize=", "grl=", "fakeout",
-        "selection=",
-        "keep-all-trees",
-        "disable-recursive-opt",
-        "help"
-        ]
-    _error_msg = """\
-Accepted command line options:
- -i, --in=<INFNAME>                   ...  file containing the list of input files
- -o, --out=<OUTFNAME>                 ...  output file name
- -t, --tree=<TREENAME>                ...  name of the tree to be filtered.
-                                           other trees won't be copied by default
-                                           (except if you pass --keep-all-trees)
-     --var=<VARSFNAME>                ...  path to file listing the branch names
-                                           to be kept in the output file.
-     --grl=<GRLFNAME>                 ...  path to a GRL XML file or a list of
-                                           comma-separated GRL XML files
- -m, --maxsize=<sz>                   ...  maximum zip size of the main tree (in Mb.)
-     --fakeout                        ...  create fake output file if empty or
-                                           non valid input tree is found (ease
-                                           the pain on the GRID)
- -s, --selection=<PYTHON_CODE>        ...  a python snippet to select events
-                                           or the path to python file holding
-                                           the definition of a 'filter_fct'.
-                                           ex:
-                                             t.eg_px[0] > 10000 and t.eg_py[0] > 10000
-                                           NOTE: the tree must be named 't' in your code.
-                                           or:
-                                            file:foo.py
-                                            where foo.py contains:
-                                            def filter_fct(t):
-                                                return t.eg_px[0] > 10000
-                                           NOTE: the function must be named 'filter_fct' and take the tree as a parameter
-     --keep-all-trees                 ...  keep, filter and merge all other trees.
-     --disable-recursive-opt          ...  switch to disable a recursive (size)
-                                           optimization. (The recursive optimization
-                                           might be excessively SLOW on large n-tuples.)
- """
-
-    for arg in sys.argv[1:]:
-        _opts.append(arg)
-    
-    opts = Options()
-    opts.maxsize = 1800
-    opts.output_file = None
-    opts.vars_fname = None
-    opts.grl_fname = None
-    opts.fake_output = False
-    opts.selection = None
-    opts.keep_all_trees = False
-    opts.apply_recursive_opt = True
-    
-    try:
-        optlist, args = getopt.getopt(_opts, _useropts, _userlongopts)
-    except getopt.error:
-        print (sys.exc_value)
-        print (_error_msg)
-        sys.exit(1)
-
-    for opt,arg in optlist:
-        if opt in ("-i", "--in"):
-            opts.input_files = arg
-
-        elif opt in ("-o", "--out"):
-            opts.output_file = arg
-
-        elif opt in ("-t", "--tree"):
-            opts.tree_name = str(arg).strip()
-
-        elif opt in ("--var",):
-            opts.vars_fname = arg
-
-        elif opt in ("-m", "--maxsize"):
-            opts.maxsize = int(arg)
-
-        elif opt in ('--grl',):
-            opts.grl_fname = arg
-
-        elif opt in ('--fakeout',):
-            opts.fake_output = True
-
-        elif opt in ('-s', '--selection',):
-            opts.selection = str(arg).strip()
-
-        elif opt in ('--keep-all-trees',):
-            opts.keep_all_trees = True
-
-        elif opt in ('--disable-recursive-opt',):
-            opts.apply_recursive_opt = False
-            
-        elif opt in ("-h", "--help"):
-            print (_error_msg)
-            sys.exit(0)
-
-    print (":"*80)
-    print ("::: filter'n'merge d3pds")
-    print (":::")
-    # for AttributeListLayout which uses CINT for its dict...
-    #ROOT.gSystem.Load('liblcg_RootCollection')
-    
-    workdir = os.path.dirname(opts.output_file)
-    if workdir == '':
-        workdir = '.'
-    if not os.path.exists(workdir):
-        os.makedirs(workdir)
-
-    if isinstance(opts.grl_fname, str):
-        opts.grl_fname = opts.grl_fname.split(',')
-        from glob import glob
-        grl_fnames = []
-        for grl_fname in opts.grl_fname:
-            grl_fnames.extend(glob(grl_fname))
-        opts.grl_fname = grl_fnames
-        
-    print ("::: input files:   ",opts.input_files)
-    print ("::: output file:   ",opts.output_file)
-    print ("::: vars fname:    ",opts.vars_fname)
-    print ("::: tree name:     ",opts.tree_name)
-    print ("::: GRL file:      ",opts.grl_fname)
-    print ("::: max tree sz:   ",opts.maxsize, "Mb")
-    if opts.fake_output:
-        print ("::: creation of fake-output (if needed) [ON]")
-    print ("::: user filter:   ",opts.selection)
-    print ("::: keep all trees:", opts.keep_all_trees)
-    print ("::: recursive opt: ", opts.apply_recursive_opt)
-    
-    # slightly increase the max size (so that the manual ChangeFile at 0.9 of
-    # the current MaxTreeSize will fall within the user-provided one...)
-    ROOT.TTree.SetMaxTreeSize(int(opts.maxsize * 1024 * 1024 / 0.9))
-    
-    ## try to compile the user filtering function
-    filter_fct = None
-    try:
-        filter_fct = _load_filter_fct(opts.selection)
-    except Exception as err:
-        print ("*** problem loading filter-fct:")
-        print (err)
-        print ("*** filter-fct is now disabled")
-        filter_fct = None
-        
-    iflist = [l.strip() for l in open(opts.input_files, "r") if l.strip()]
-    for l in iflist:
-        fname = l.strip()
-        if not fname:
-            continue
-        f = ROOT.TFile.Open(fname,"read")
-        if not f:
-            raise RuntimeError("no such file [%s]" % fname)
-
-        tree = f.Get(opts.tree_name)
-        if not tree:
-            print ("***warning*** no such tree [%s] in file [%s] (IGNORING!)" % (
-            opts.tree_name, fname,
-                ))
-            continue
-        if tree.GetEntries()==0:
-            print ("**warning** no entries in tree [%s] in file [%s] (IGNORING!)" % (
-            opts.tree_name, fname,
-            ))
-            continue
-        if tree.GetListOfBranches().GetEntriesFast() == 0:
-            print ("**warning** tree [%s] in file [%s] has no branches (IGNORING!)" % (
-                opts.tree_name, fname,
-                ))
-            continue
-                
-        #f.ResetBit(ROOT.kCanDelete)
-        _root_files.append(fname)
-        print (" - loaded [%s]" % (fname,))
-
-        #tree.ResetBit(ROOT.kCanDelete)
-        _root_trees.append(opts.tree_name) # whatever...
-        del tree
-        f.Close()
-        del f
-
-    if len(_root_trees) == 0:
-        print ("::: no valid tree left")
-        if opts.fake_output:
-            print ("::: crafting an empty output file")
-            _make_fake_output(opts.output_file, opts.tree_name)
-            return 0
-        return 0 # FIXME: should this become an error of some sort ?
-    
-    ## chain = ROOT.TChain(opts.tree_name)
-    ## _root_chains.append(chain)
-    
-    nfiles = len(_root_files)
-    if nfiles <= 0:
-        print ("::: no input files found")
-        return 2
-
-    timer = ROOT.TStopwatch()
-    timer.Start()
-    merge_all_trees(fnames=_root_files,
-                    tree_name =opts.tree_name,
-                    memory=1024*30,
-                    sfo=opts.output_file,
-                    vars_fname=opts.vars_fname,
-                    grl_fname=opts.grl_fname,
-                    filter_fct=filter_fct,
-                    keep_all_trees=opts.keep_all_trees,
-                    apply_recursive_opt=opts.apply_recursive_opt)
-
-    timer.Stop()
-
-    print ("::: merging done in:")
-    print (":::   wallclock:",timer.RealTime())
-    print (":::   CPU time: ",timer.CpuTime())
-
-    # del _root_chains[:]
-    
-    print ("::: performing re-ordering...")
-    import glob
-    import os.path as osp
-    fname_pattern = osp.splitext(opts.output_file)[0]
-    # re-order all output files (in case they were split off)
-    fnames= sorted(glob.glob(fname_pattern + "*.root"))
-    order(m=2,
-          chain_name=opts.tree_name,
-          fnames=fnames,
-          workdir=workdir)
-    print ("::: performing re-ordering... [done]")
-
-    print ("::: bye.")
-    print (":"*80)
-    return 0
-
-###################### xmldict #########################
-# @file PyUtils/python/xmldict.py
-# @brief converts an XML file into a python dict, back and forth
-# @author http://code.activestate.com/recipes/573463
-#         slightly adapted to follow PEP8 conventions
-
-__doc__ = """\
-functions to convert an XML file into a python dict, back and forth
-"""
-__author__ = "Sebastien Binet <binet@cern.ch>"
-
-
-# hack: LCGCMT had the py-2.5 xml.etree module hidden by mistake.
-#       this is to import it, by hook or by crook
-def import_etree():
-    import xml
-    # first try the usual way
-    try:
-        import xml.etree
-        return xml.etree
-    except ImportError:
-        pass
-    # do it by hook or by crook...
-    import os, imp
-    xml_site_package = os.path.join(os.path.dirname(os.__file__), 'xml')
-    m = imp.find_module('etree', [xml_site_package])
-
-    etree = imp.load_module('xml.etree', *m)
-    setattr(xml, 'etree', etree)
-    return etree
-try:
-    etree = import_etree()
-    from xml.etree import ElementTree
-
-    ## module implementation ---------------------------------------------------
-    class XmlDictObject(dict):
-        def __init__(self, initdict=None):
-            if initdict is None:
-                initdict = {}
-            dict.__init__(self, initdict)
-
-        def __getattr__(self, item):
-            return self.__getitem__(item)
-
-        def __setattr__(self, item, value):
-            self.__setitem__(item, value)
-
-        def __str__(self):
-            if '_text' in self:
-                return self['_text']
-            else:
-                return dict.__str__(self)
-
-        @staticmethod
-        def wrap(x):
-            if isinstance(x, dict):
-                return XmlDictObject ((k, XmlDictObject.wrap(v))
-                                      for (k, v) in x.iteritems())
-            elif isinstance(x, list):
-                return [XmlDictObject.wrap(v) for v in x]
-            else:
-                return x
-
-        @staticmethod
-        def _unwrap(x):
-            if isinstance(x, dict):
-                return dict ((k, XmlDictObject._unwrap(v))
-                             for (k, v) in x.iteritems())
-            elif isinstance(x, list):
-                return [XmlDictObject._unwrap(v) for v in x]
-            else:
-                return x
-
-        def unwrap(self):
-            return XmlDictObject._unwrap(self)
-
-        pass # Class XmlDictObject
-    
-    def _dict2xml_recurse(parent, dictitem):
-        assert type(dictitem) is not type(list)
-
-        if isinstance(dictitem, dict):
-            for (tag, child) in dictitem.iteritems():
-                if str(tag) == '_text':
-                    parent.text = str(child)
-                elif type(child) is type(list):
-                    for listchild in child:
-                        elem = ElementTree.Element(tag)
-                        parent.append(elem)
-                        _dict2xml_recurse (elem, listchild)
-                else:                
-                    elem = ElementTree.Element(tag)
-                    parent.append(elem)
-                    _dict2xml_recurse (elem, child)
-        else:
-            parent.text = str(dictitem)
-    
-    def dict2xml(xmldict):
-        """convert a python dictionary into an XML tree"""
-        roottag = xmldict.keys()[0]
-        root = ElementTree.Element(roottag)
-        _dict2xml_recurse (root, xmldict[roottag])
-        return root
-
-    def _xml2dict_recurse (node, dictclass):
-        nodedict = dictclass()
-
-        if len(node.items()) > 0:
-            # if we have attributes, set them
-            nodedict.update(dict(node.items()))
-
-        for child in node:
-            # recursively add the element's children
-            newitem = _xml2dict_recurse (child, dictclass)
-            if child.tag in nodedict:
-                # found duplicate tag, force a list
-                if isinstance(nodedict[child.tag], list):
-                    # append to existing list
-                    nodedict[child.tag].append(newitem)
-                else:
-                    # convert to list
-                    nodedict[child.tag] = [nodedict[child.tag], newitem]
-            else:
-                # only one, directly set the dictionary
-                nodedict[child.tag] = newitem
-
-        if node.text is None: 
-            text = ''
-        else: 
-            text = node.text.strip()
-
-        if len(nodedict) > 0:            
-            # if we have a dictionary add the text as a dictionary value
-            # (if there is any)
-            if len(text) > 0:
-                nodedict['_text'] = text
-        else:
-            # if we don't have child nodes or attributes, just set the text
-            if node.text: nodedict = node.text.strip()
-            else:         nodedict = ""
-
-
-
-        return nodedict
-        
-    def xml2dict (root, dictclass=XmlDictObject):
-        """convert an xml tree into a python dictionary
-        """
-        return dictclass({root.tag: _xml2dict_recurse (root, dictclass)})
-    #####################################################################
-
-except ImportError:
-    print ("**WARNING: could not import 'xml.etree' (check your python version)")
-    print ("           you won't be able to correctly read GRL XML files !")
-    
-def extract_data_from_xml(fname="GRL.xml"):
-    """simple helper function to convert a GRL xml file into a list
-    of tuples (run-nbr, lumi-block-start, lumi-block-stop)
-    """
-    import sys
-    assert "xml.etree" in sys.modules, \
-           "no 'xml.etree' module were imported/available"
-    data =[]
-    dd=xml2dict(etree.ElementTree.parse(str(fname)).getroot())
-
-    lbks = dd['LumiRangeCollection']['NamedLumiRange']['LumiBlockCollection']
-    if not isinstance(lbks, (list, tuple)):
-        lbks = [lbks]
-    for lbk in lbks:
-        assert isinstance(lbk,dict), \
-               "expect a dict-like object (got type=%s - value=%r)" % (type(lbk), repr(lbk))
-        runnumber=lbk['Run']
-        run_ranges=lbk['LBRange']
-
-        #xml2dict return a dataset when only one lbn range per run
-        #and return a list when there are several lbn ranges per run
-        #==> need different piece of code
-        #The following lines 'convert' a dict into a list of 1 dict 
-        if isinstance(run_ranges,dict):
-            run_ranges=[run_ranges]
-            pass
-
-        #loop over run ranges
-        for lbrange in run_ranges: 
-            lbn_min=lbrange['Start']
-            lbn_max=lbrange['End']
-            # GRL schema changed from:
-            #  <LumiBlockCollection>
-            #     <Run>178044</Run>
-            #     <LBRange Start="42" End="666"/>
-            #    ...
-            # to:
-            #  <LumiBlockCollection>
-            #     <Run PrescaleRD0="8" PrescaleRD1="8">178044</Run>
-            #     <LBRange Start="42" End="666"/>
-            #    ...
-            if isinstance(runnumber, XmlDictObject):
-                runnumber = runnumber['_text']
-            #print (runnumber,"  ", lbn_min,"  ", lbn_max)
-            data.append((runnumber, lbn_min, lbn_max))
-            pass
-    return data
-
-### script entry point ###
-if __name__ == "__main__":
-    sys.exit(main())
-
-"""
-tests:
-
-xrdcp root://eosatlas.cern.ch//eos/atlas/user/b/binet/utests/utests/filter-d3pd/ntuple.0.root .
-xrdcp root://eosatlas.cern.ch//eos/atlas/user/b/binet/utests/utests/filter-d3pd/ntuple.1.root .
-cat > input.txt << EOF
-ntuple.0.root
-ntuple.1.root
-EOF
-cat > vars.txt << EOF
--*
-+el_vertx
-+el_verty
-+el_L2_errpt
-EOF
-filter-and-merge-d3pd -i input.txt -o merged.root -t egamma --var=vars.txt -s ''
-filter-and-merge-d3pd -i input.txt -o merged.root -t egamma --var=vars.txt -s 't.el_verty.size() > 0 and t.el_verty[0]>=0.'
-cat > foo.py << EOF
-def filter_fct(t):
-    return t.el_verty.size() > 0 and t.el_verty[0]>=0.
-EOF
-filter-and-merge-d3pd -i input.txt -o merged.root -t egamma --var=vars.txt -s 'file:foo.py'
-"""
-- 
GitLab