diff --git a/plotPostProcess.py b/plotPostProcess.py index 800bedad32fd7a34a331b6951a04f2e251cc6eb9..e05a26ec7a0c12c112712c71b3b6ae8b81d66675 100644 --- a/plotPostProcess.py +++ b/plotPostProcess.py @@ -19,7 +19,7 @@ import argparse # to parse command line options import functions.RootTools as RootTools# root tool that I have taken from a program by Turra import os import collections # so we can use collections.defaultdict to more easily construct nested dicts on the fly - +import resource # print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss class DSIDHelper: @@ -53,6 +53,23 @@ class DSIDHelper: 364135, 364136, 364137, 364138, 364139, 364140, 364141] } + ###################################################### + # Define some default or hardcoded values + ###################################################### + + + # campaigns integrated luminosity, complete + partial + lumiMap = { "mc16a" : 36.21496, "mc16d" : 44.3074, "mc16e": 59.9372, "mc16ade": 140.45956, "units" : "fb-1"} + #taken by Justin from: https://twiki.cern.ch/twiki/bin/view/Atlas/LuminosityForPhysics#2018_13_TeV_proton_proton_placeh + #2015: 3.21956 fb^-1 +- 2.1% (final uncertainty) (3.9 fb^-1 recorded) + #2016: 32.9954 fb^-1 +- 2.2% (final uncertainty) (35.6 fb^-1 recorded) + #2017: 44.3074 fb^-1 +- 2.4% (preliminary uncertainty) (46.9 fb^-1 recorded) + #2018: 59.9372 fb^-1 +- 5% (uncertainty TBD, use this as a placeholder) (62.2 fb^-1 recorded) + #Total: 140.46 fb^-1 + + + sumOfEventWeightsDict = {} + def __init__(self): assert self.physicsProcess.keys() == self.physicsProcessColor.keys(), 'physicsProcess and physicsProcessColor do not have a consistent set of dict-keys' @@ -84,15 +101,37 @@ class DSIDHelper: return None - def getProduct_CrossSec_kFactor_genFiltEff(self, DSID): + + def fillSumOfEventWeightsDict(self, TDir): + + TDirKeys = TDir.GetListOfKeys() # output is a TList + + for TKey in TDirKeys: + weightHistCandidate = TDir.Get(TKey.GetName()) # this is how I access the element that belongs to the current TKey + + if isinstance(weightHistCandidate, ROOT.TDirectoryFile ): continue + + if isinstance(weightHistCandidate, ROOT.TH1 ) and "sumOfWeights" in TKey.GetName(): + + DSID = re.search("\d{6}", TKey.GetName() ).group() # if we found a regular expression + DSID = int(DSID) + + sumAODWeights = weightHistCandidate.GetBinContent(weightHistCandidate.GetXaxis().FindBin("sumOfEventWeights_xAOD")) + + if DSID in self.sumOfEventWeightsDict.keys(): self.sumOfEventWeightsDict[DSID] += sumAODWeights + else: self.sumOfEventWeightsDict[DSID] = sumAODWeights + + return None + + def getMCScale(self, DSID, mcTag): + DSID = int(DSID) - + prod = self.metaDataDict[DSID]["crossSection"] * self.metaDataDict[DSID]["kFactor"] * self.metaDataDict[DSID]["genFiltEff"] - - return prod + scale = self.lumiMap[mcTag] * 1000000. * prod / self.sumOfEventWeightsDict[int(DSID)] - + return scale def makeReverseDict(self, inputDict): # Lets say we have a dict that goes like: @@ -146,8 +185,79 @@ class DSIDHelper: # end class DSIDHelper +def generateTDirContents(TDir): + # this is a python generator + # this one allows me to loop over all of the contents in a given ROOT TDir with a for loop + + TDirKeys = TDir.GetListOfKeys() # output is a TList + + for TKey in TDirKeys: + yield TDir.Get(TKey.GetName()) # this is how I access the element that belongs to the current TKey + + +def generateTDirPathAndContentsRecursive(TDir, baseString = "" , newOwnership = None): + # for a given TDirectory (remember that a TFile is also a TDirectory) get all non-directory objects + # redturns a tuple ('rootFolderPath', TObject) and is a generator + + baseString += TDir.GetName() +"/" + + for TObject in generateTDirContents(TDir): + #import pdb; pdb.set_trace() # import the debugger and instruct it to stop here + if newOwnership is not None: ROOT.SetOwnership(TObject, newOwnership) # do this to prevent an std::bad_alloc error, setting it to to 'True' gives permission to delete it, https://root.cern.ch/root/htmldoc/guides/users-guide/ObjectOwnership.html + if isinstance(TObject, ROOT.TDirectoryFile ): + + for recursiveTObject in generateTDirPathAndContentsRecursive(TObject, baseString = baseString, newOwnership = newOwnership): + yield recursiveTObject + + else : + yield baseString + TObject.GetName() , TObject + +def generateTDirContentsRecursive(TDir): + # for a given TDirectory (remember that a TFile is also a TDirectory) get all non-directory objects + # redturns all TObject and is a generator + + for path, TObject in generateTDirPathAndContentsRecursive(TDir): + yield TObject + +def idDSID(path): + # look for the following patter: after a / , loog for 6 digits preceeded by any number of character that are not / + # return the non / strings and the 6 digits + DSIDRegExpression = re.search("(?<=/)[^/]*\d{6}", path) + + # if we found such a pattern, select the six digits, or return 0 + if DSIDRegExpression: DSID = re.search("\d{6}", DSIDRegExpression.group() ).group() # if we found a regular expression + else: DSID ="0" + + return DSID + +def idPlotTitle(path, DSID=None ): + + if DSID is None: DSID = idDSID(path) + + pathDSIDCleaned = path.replace("_"+DSID+"_","_") -def getTDirsAndContents(TDir, outputDict = {}, recursiveCounter = 0): + plotTitle = pathDSIDCleaned.split("/h_")[-1] + + return plotTitle + +def badBaseHist(path, baseHist): + + if "Nominal" not in path: return True + elif "Cutflow" in path: return True + elif "cutflow" in path: return True + elif "h_raw_" in path: return True + elif "hraw_" in path: return True + elif isinstance( baseHist, ROOT.TH2 ): return True + elif not isinstance( baseHist, ROOT.TH1 ): return True + + return False + + + + + + +def getTDirsAndContents(TDir, outputDict = {}, recursiveCounter = 0, TDirConstraint = "Nominal"): # for a given TDirectory (remember that a TFile is also a TDirectory) get all TH1 (recursively if needed) # output will be {TDirObject : names of objects that are not TDirs themselves} # we can do this recursively up to a depth of 'recursiveCounter' (if it is set to >0) @@ -155,6 +265,7 @@ def getTDirsAndContents(TDir, outputDict = {}, recursiveCounter = 0): # {TDirObject1 : names of objects in TDirObject1 that are not TDirs themselves, # TDirObject2 : names of objects in TDirObject2 that are not TDirs themselves } # The relationship between the TDirs is not being stored in this way + # TDirConstraint - consider only TH1s that are in a TDirectoru that contains the string <TDirConstraint> TDirKeys = TDir.GetListOfKeys() # output is a TList @@ -170,7 +281,8 @@ def getTDirsAndContents(TDir, outputDict = {}, recursiveCounter = 0): subdir = TDir.Get(TKey.GetName()) outputDict = getTDirsAndContents(subdir, outputDict, recursiveCounter = recursiveCounter-1 ) elif isinstance(TDir.Get(TKey.GetName()), ROOT.TH1 ) and not isinstance(TDir.Get(TKey.GetName()), ROOT.TH2 ): #check that the TKey indeed refers to a TH1 - TH1List.append(TKey.GetName()) + + if TDirConstraint in TDir.GetName() and "VR1" in TKey.GetName(): TH1List.append(TKey.GetName()) # add the histogram to our list if it is in a TDir that contains the string <TDirConstraint> outputDict[TDir] = TH1List @@ -391,8 +503,6 @@ def fillMasterHistDict( inputFileDict , aDSIDHelper, masterHistDict = collection # get the histograms in the diffrent TDirectories within the provided .root file dirsAndContents = getTDirsAndContents(postProcessedData, outputDict = {}, recursiveCounter = float("inf")) - - ######### get sumOfWeights ######### # we take the histograms that store the sumOfWeights are in the top-level of the TFile, i.e. not in a sub-TDirectory topLevelTObjects = {postProcessedData : dirsAndContents[postProcessedData]} @@ -422,13 +532,33 @@ def fillMasterHistDict( inputFileDict , aDSIDHelper, masterHistDict = collection currentTH1 = TDir.Get(histName)#.Clone() # get a clone of the histogram, so that we can scale it, without changeing the original if int(DSID) > 0: # Signal & Background have DSID > 0 - scale = lumiMap[mcTag] * 1000000. * aDSIDHelper.getProduct_CrossSec_kFactor_genFiltEff(DSID) / sumOfWeights[int(DSID)] + scale = aDSIDHelper.getMCScale(DSID, mcTag) currentTH1.Scale(scale) # scale the histogram masterHistDict[ histEnding ][ mcTag ][ DSID ] = currentTH1 return masterHistDict + +def fillMasterHistDict2( currentTH1, histEnding, mcTag, DSID, aDSIDHelper, masterHistDict = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(dict))) ): + # split the histograms in inputFileDict by HistEnding, mcCampaign, and DSID + # and put them into a collections.defaultdict to indext them by HistEnding, mcCampaign, and DSID + # + # remember that python instantiates functions only once, + # so unless we provice a masterHistDict, we will aggregate results + # in the default one over multiple function calls + # + # masterHistDict[ HistEnding ][ mcCampaign ][ DSID ][ ROOT.TH1 ] + + if int(DSID) != 0: # Signal & Background have DSID > 0 + scale = aDSIDHelper.getMCScale(DSID, mcTag) + currentTH1.Scale(scale) # scale the histogram + + masterHistDict[histEnding][mcTag][DSID]=currentTH1 + + return masterHistDict + + def mergeMultiMCTagMasterHistDict(masterHistDict, combinedMCTagHistDict = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(dict))) ): if len( masterHistDict.values()[0].keys() ) == 1: # if there's only one MCTag we do not need to combine the hists, and we can return the input dict @@ -505,16 +635,15 @@ def printSubsetOfHists(histList, searchStrings=["M12","M34","M4l"], outputDir = return None -def addRegionAndChannelToStatsText(canvasName): +def addRegionAndChannelToStatsText(shortName): outList = "" - shortName = canvasName.split("_")[0] - # fill in region - if "SR" in shortName: outList += "Signal Region" - elif "CRC" in shortName: outList += "VR1" - elif "CRD" in shortName: outList += "VR2" + if "ZXSR" in shortName: outList += "Signal Region" + elif "ZXCRC" in shortName: outList += "VR1" + elif "ZXCRD" in shortName: outList += "VR2" + else: outList += shortName outList += ", " @@ -522,25 +651,12 @@ def addRegionAndChannelToStatsText(canvasName): elif "2e2m" in shortName: outList += "2e2#mu" elif "2m2e" in shortName: outList += "2#mu2e" elif "4e" in shortName: outList += "4e" - else: outList += "4#mu, 2e2#mu, 2#mu2e, 2#mu2e" + else: outList += "4#mu, 2e2#mu, 2#mu2e, 4e" return outList if __name__ == '__main__': - ###################################################### - # Define some default or hardcoded values - ###################################################### - - - # campaigns integrated luminosity, complete + partial - lumiMap= { "mc16a" : 36.21496, "mc16d" : 44.3074, "mc16e": 59.9372, "mc16ade": 140.45956, "units" : "fb-1"} - #taken by Justin from: https://twiki.cern.ch/twiki/bin/view/Atlas/LuminosityForPhysics#2018_13_TeV_proton_proton_placeh - #2015: 3.21956 fb^-1 +- 2.1% (final uncertainty) (3.9 fb^-1 recorded) - #2016: 32.9954 fb^-1 +- 2.2% (final uncertainty) (35.6 fb^-1 recorded) - #2017: 44.3074 fb^-1 +- 2.4% (preliminary uncertainty) (46.9 fb^-1 recorded) - #2018: 59.9372 fb^-1 +- 5% (uncertainty TBD, use this as a placeholder) (62.2 fb^-1 recorded) - #Total: 140.46 fb^-1 ###################################################### @@ -549,7 +665,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument("input", type=str, nargs='*', help="name or path to the input files") + parser.add_argument("input", type=str, help="name or path to the input files") parser.add_argument("-c", "--mcCampaign", nargs='*', type=str, choices=["mc16a","mc16d","mc16e","mc16ade"], required=True, help="name of the mc campaign, i.e. mc16a or mc16d, need to provide exactly 1 mc-campaign tag for each input file, \ @@ -580,9 +696,9 @@ if __name__ == '__main__': # do some checks to make sure the command line options have been provided correctly ###################################################### - assert len(args.input) == len(args.mcCampaign), "We do not have exactly one mc-campaign tag per input file" + assert 1 == len(args.mcCampaign), "We do not have exactly one mc-campaign tag per input file" + #assert len(args.input) == len(args.mcCampaign) - assert all( x==1 for x in collections.Counter( args.mcCampaign ).values() ), "\ Some mc-campaign tags have been declared more than once. \ For now we are only setup to support one file per MC-tag. Until we changed that, 'hadd' them in bash" @@ -602,8 +718,37 @@ if __name__ == '__main__': # assemble the input files, mc-campaign tags and metadata file locations into dict # well structered dict is sorted by mc-campign tag and has - inputFileDict = getWellStructedDictFromCommandLineOptions( args, inputFileDict = collections.defaultdict(dict) ) + #inputFileDict = getWellStructedDictFromCommandLineOptions( args, inputFileDict = collections.defaultdict(dict) ) + ####################### + # Test generator access to ROOT TH1s + + #postProcessedData = inputFileDict.values()[0]["TFile"] + + + postProcessedData = ROOT.TFile(args.input,"READ"); # open the file with te data from the ZdZdPostProcessing + + myDSIDHelper.fillSumOfEventWeightsDict(postProcessedData) + + histCounter = 0 # count how many relevant hists we have + + # loop over all of the TObjects in the given ROOT file + for path, baseHist in generateTDirPathAndContentsRecursive(postProcessedData, newOwnership = True): + + if badBaseHist(path, baseHist): continue # skip non-relevant histograms + + ROOT.SetOwnership(baseHist, False) # if we pass badBaseHist the histogram is relevant, so we change the ownership here to False in the attempt to prevent deletion + + # discern DSID and plotTitle to use them when sorting into a tree structure + DSID = idDSID(path) + plotTitle = idPlotTitle(path, DSID=DSID) + + # build my tree structure here to house the relevant histograms, pre-sorted for plotting + masterHistDict = fillMasterHistDict2( baseHist, plotTitle, args.mcCampaign[0], DSID, myDSIDHelper ) + + # output a running counter of processed hists and used memory + histCounter += 1 + if histCounter %1000 == 0: print str(histCounter) + " relevant hists processed. \t Memory usage: %s (kB)" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/8) @@ -613,9 +758,10 @@ if __name__ == '__main__': - masterHistDict = fillMasterHistDict( inputFileDict, myDSIDHelper ) + #masterHistDict = fillMasterHistDict( inputFileDict, myDSIDHelper ) + #combinedMCTagHistDict = mergeMultiMCTagMasterHistDict(masterHistDict) - combinedMCTagHistDict = mergeMultiMCTagMasterHistDict(masterHistDict) + combinedMCTagHistDict = masterHistDict canvasList = [] @@ -678,7 +824,7 @@ if __name__ == '__main__': statsTexts = [] statsTexts.append( "#font[72]{ATLAS} internal") - statsTexts.append( "#sqrt{s} = 13 TeV, %.1f fb^{-1}" %(lumiMap[mcTag] ) ) + statsTexts.append( "#sqrt{s} = 13 TeV, %.1f fb^{-1}" %( myDSIDHelper.lumiMap[mcTag] ) ) statsTexts.append( addRegionAndChannelToStatsText(canvas.GetName() ) ) statsTexts.append( " " ) @@ -802,8 +948,8 @@ if __name__ == '__main__': canvasList.sort( key = lambda x:x.GetTitle()) # i.e. we are sorting the list by the output of a function, where the function provides takes implicitly elements of the list, and in our case calls the .GetTitle() method of that element of the list and outputs it - - postProcessedDataFileName = os.path.basename( args.input[0] ) # split off the file name from the path+fileName string if necessary + if isinstance(args.input,list): postProcessedDataFileName = os.path.basename( args.input[0] ) # split off the file name from the path+fileName string if necessary + else: postProcessedDataFileName = os.path.basename( args.input ) # split off the file name from the path+fileName string if necessary if args.outputName is None: outputName = postProcessedDataFileName.split(".")[0] + "_" + "_".join(args.mcCampaign)+"_" else: outputName = args.outputName