RunDepProfileGenerator.py

#!/usr/bin/env python
#file RunDepProfileGenerator.py
#Mu profile generator. Given mu histogram in ROOT file from Data-Preparation, scale to desired number of events, and create configLumi job option.

import sys,argparse,os,time
import ROOT
import math

from AthenaCommon.Logging import logging
log = logging.getLogger("RunDepProfileGenerator")
log.setLevel(logging.DEBUG)

parser = argparse.ArgumentParser(description='Generate configLumi option from mu profile.')
parser.add_argument('-i','--input-file-name',help='name of ROOT file containing unscaled mu histogram',dest='in_file')
parser.add_argument('-n','--histogram-name',help='optional name of histogram to generate profile for if multiple exist in IN_FILE',dest='in_hist')
parser.add_argument('-e','--external-dictionary',help='unscaled input dictionary given as {mu:events}',dest='ex_dict')
parser.add_argument('-r','--run-number',help='run number used in default output file name',dest='run_num',required=True,type=int)
parser.add_argument('-s','--start-time-stamp',help='starting timestamp',dest='start_tstamp',required=True,type=int)
parser.add_argument('-o','--output-file-name',help='custom output file name',dest='out_file')
parser.add_argument('-c','--scale',help='total number of events by which to scale the input histogram/dictionary',dest='total_events',required=True,type=int)

#Following RunDepTaskMaker.py for output format
formatLine="'run':{run}, 'lb':{lb}, 'starttstamp':{starttstamp}, 'dt':{dt:.3f}, 'evts':_evts({evts:.0f}), 'mu':{mu:.3f}, 'force_new':False"

scaled_integral = 0

#Check total number of events in a dictionary
def integrate(input_dict):
    total = 0
    for val in input_dict.values():
        total += val
    return total

#Scales the given data desired total number of events passed as argument to option --scale
def scaleDict(input_dict,scale):
    scaled_dict = dict()
    global scaled_integral
    skipped = 0
    integral = integrate(input_dict)
    for key in input_dict.keys():
        new_val = round((input_dict[key]*scale)/integral)
        scaled_dict[key] = new_val
        #This condition checks for zero event elements getting stored in dictionary
        if new_val == 0.0:
            skipped += 1
    #User will be warned in log of how many lumiblocks went unpopulated, and excluded from being written by dressSkeleton to the output file
    log.warning("Skipped %i lumiblocks for containing less than one event.",skipped)
    scaled_integral = integrate(scaled_dict)
    if scaled_integral != scale:
        log.warning("Final distribution has {} total events! Does not match with {} events requested.".format(scaled_integral,scale))
    return scaled_dict

#Create scaled dictionary from histogram with:
# key = mu value at ceneter of bin,
# value = number of events in that bin
def extractDict(hist,scale):
    data = dict()
    nbins = hist.GetNbinsX()
    for i in range(1,nbins+1):
        data[hist.GetBinCenter(i)] = hist.GetBinContent(i)
    return scaleDict(data,scale)

#Generates the configuration preInclude file for transform jobs
# rewritten from RunDepTaskMaker.py
#Parses skeleton file skeletonName and replaces the lines between #<!-- and #-->
#skeletonName is static, set in this script to RunDependentSimData/share/OverrideRunLBLumiDigitConfig.py
def dressSkeleton(skeletonName,outName,jobIterator):
    from AthenaCommon.Include import FindFile,optionsPath
    sname = FindFile(skeletonName, optionsPath, os.R_OK)
    if not sname: raise IOError("Input skeleton file {} cannot be read.".format(skeletonName))
    s = open(sname,'r')
    outfile = open(outName,'w')
    outfile.write('#'*20)
    outfile.write('\n## File {}: autogenerated configuration file from command\n'.format(outName))
    outfile.write('##' + ' '.join(sys.argv) + '\n')
    outfile.write('## Created on {}\n'.format(time.asctime()))
    outfile.write('#'*20+'\n')

    incomment = False
    dumpedAlready = False
    for line in s:
        if (line.find("#<!--") == 0): incomment = True
        if (line.find("#-->") == 0): incomment = False
        if not incomment:
            outfile.write(line)
        elif not dumpedAlready:
            for j in jobIterator:
                if j['evts'] != 0.0:
                    outfile.write("  {"+formatLine.format(**j)+"},\n")  #Skips all zero event lumiblocks
                pass
            dumpedAlready = True
            pass
        pass
    pass

def main():
    args = parser.parse_args()
    #Check that the user provided either a .root file or a dictionary
    if len(sys.argv) == 0 or not args.in_file and not args.ex_dict:
        parser.print_help()
        exit(1)

    #If no output file name privided, generate default: "configLumi_runRUN_NUM.py"
    outFName = args.out_file
    if not args.out_file:
        outFName = 'configLumi_run{}.py'.format(args.run_num)
    #Use first histogram in file to generate dictionary, if no name provided
    hist = None
    inFile = None
    scaled_dict = dict()
    global scaled_integral
    if args.in_file:    #If file provided, use it
        inFile = ROOT.TFile(args.in_file)
    if (not args.in_hist) and inFile:    #Use first histogram
        if inFile.GetListOfKeys().GetSize() > 1:
            log.warning('Multiple objects found in {}. Only converting first histogram.'.format(args.in_file))
        for x in range(0,inFile.GetListOfKeys().GetSize()):    #Loop works even if only one element in file
            if inFile.Get(inFile.GetListOfKeys()[x].GetName()).ClassName() == 'TH1F':
                hist = inFile.Get(inFile.GetListOfKeys()[x].GetName())
                break
            else:
                continue
        if not hist and not args.ex_dict:
            print "No histogram found in {} and no dictionary provided. Exiting!".format(args.in_file)
            exit(1)
        scaled_dict = extractDict(hist,args.total_events)    #This function collects data from histogram and scales to the input
    elif (args.in_hist and inFile):
        hist = inFile.Get(args.in_hist)
        scaled_dict = extractDict(hist,args.total_events)

    #If a file with histogram was not provided, then scaled_dict should contain zero items at this point.
    #Checking that a command line dictionary was provided, and scaled_dict not populated will keep the dictionary
    # from being over written by EX_DICT, unless the scaled dictionary contains only mu values with zero scaled events
    if args.ex_dict and (len(scaled_dict) == 0):
        scaled_dict = scaleDict(eval(str(args.ex_dict)),args.total_events)

    if scaled_integral == 0.0:
        log.warning("All lumiblocks contain zero events after scaling! No luminosity in selected range.")
    else:
        log.info('Preparing a RunDMC task configuration object for {} total events'.format(args.total_events))
        average_mu = sum([ pair[0]*pair[1] for pair in scaled_dict.items() if (pair[0] > 0) and (pair[1] > 0) ])
        average_mu /= scaled_integral
        log.info('Average pileup in this task is {}'.format(average_mu))
        #Now place mu and events into list of lumiblock info called JobMaker, to be written into the configLumi file by dressSkeleton()
        JobMaker = []
        #Iterate through items in the dictionary and add to a list in the order they were added to dictionary, should be with increasing lb
        #Also offsets the timestamp proportional to first nonzero lb.
        #scaled_dict.items() are (mu,evts) tuples.
        #Python sorts according to keys, so this loop will iterate over mu values, which is the order they were inserted
        #    This sorting is unnecessary if using >=python3.6 since dicts in 3.6 are apparently ordered by insertion order.
        #Uses a counter variable i to calculate the lb starting at 1, and displacement from starting timestamp
        i = 0
        for item in sorted(scaled_dict.items()):
            JobMaker.append( {'run':args.run_num,
                              'lb':i+1,
                              'starttstamp':args.start_tstamp+(i*60),
                              'dt':0.00,
                              'evts':item[1],
                              'mu':item[0]} )
            i += 1

        l = len(JobMaker)
        log.info('There are {} lumiblocks in this task.'.format(l))
        if l > 10:
            log.info('Displaying first and last 5 lumiblocks:')
            for j in JobMaker[:5]+JobMaker[-5:]: print " ",j
        else:
            log.info('Displaying all lumiblocks')
            for j in JobMaker: print " ",j
        allLB = iter(JobMaker)
        skeletonName = "RunDependentSimData/OverrideRunLBLumiDigitConfig.py"
        dressSkeleton(skeletonName,outFName,allLB)


if __name__=="__main__":
    main()