-
Federica Pasquali authoredFederica Pasquali authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
analyze.py 9.33 KiB
#!/bin/env python
from CommonAnalysisHelpers import common,analyze
from CAFExample import hwwanalyze
def main(config):
"""execute your analysis according to the given configuration (can be created from a config file)"""
# print a welcome message
print(QFramework.TQStringUtils.makeBoldWhite("\nAnalyzing Analysis ROOT File\n"))
try:
ROOT.StatusCode.enableFailure()
except AttributeError:
pass
CLI = config.getFolder("CLI+")
if CLI.getTagIntegerDefault("width",0):
QFramework.TQLibrary.setConsoleWidth(CLI.getTagInteger("width"))
# TODO: make a get aliases method? aliases also loaded during cut creation and analysis job booking
# load the aliases from the config file
aliases = QFramework.TQTaggable()
aliases.importTagsWithoutPrefix(config,"cutParameters.")
aliases.importTagsWithoutPrefix(config,"aliases.")
QFramework.TQMVAObservable.globalAliases.importTags(aliases)
# read the channel definitions
channels = config.getTagVString("channels")
# load MVA libraries if required
if config.getTagBoolDefault("loadMVA",True):
try:
libMVA = analyze.loadLibMVA(config)
except Exception as ex:
libMVA = False
template = "An exception of type '{0}' occured: {1!s}"
message = template.format(type(ex).__name__, ",".join(ex.args))
QFramework.ERROR(message)
# set some global properties
if not config.getTagBoolDefault("useTransientTree",True):
QFramework.TQSample.gUseTransientTree = False
if config.getTagBoolDefault("useAthenaAccessMode",False):
QFramework.TQSample.gUseAthenaAccessMode = True
if config.getTagBoolDefault("loadStore",True):
try:
store = ROOT.xAOD.TStore()
store.setActive()
except:
pass
# load the sample folder from disk
samples = common.loadSampleFolder(config)
# make sure that the sample folder is writable before we go any further
# helps to discover typos ahead of time
common.testWriteSampleFolder(config, samples)
# remove the data folder if not desired
if not config.getTagBoolDefault("doData",True):
QFramework.INFO("removing data folder")
samples.deleteObject("data!")
# flag indicating to run a robust analysis
robust = CLI.getTagBoolDefault("robust",False)
# flag indicating to run a dummy analysis
dummy = CLI.getTagBoolDefault("dummy",False)
if not robust and not dummy:
# remove the channels that are not scheduled
pattern = config.getTagStringDefault("channelFolderPattern","?/$(channel)")
QFramework.TQFolder.getPathTail(pattern)
for sf in samples.getListOfSampleFolders(QFramework.TQFolder.concatPaths(pattern, "?")):
found = False
for c in channels:
if QFramework.TQStringUtils.equal(sf.getName(),c):
found = True
break
if not found:
QFramework.INFO("removing folder '{:s}' - unscheduled channel!".format(sf.getPath()))
sf.detachFromBase()
# check if all requested channels are present
for c in channels:
if not samples.getSampleFolder(QFramework.TQFolder.concatPaths(pattern,c)):
QFramework.BREAK("channel '{:s}' was requested, but is not present in input!".format(c))
# apply patches as given by the config
common.patchSampleFolder(config.getTagVStandardString("patches"), samples)
# run a reduction step if scheduled, slimming down the sample folder to reduce future memory consumption
if config.getTagBoolDefault("purgeSamples",False):
common.reduceSampleFolder(config, samples)
pathselect = CLI.getTagVStandardString("pathselect")
# TODO: put this in the reduceSampleFolder method as well?
# if requested, purge samples (even more)
if config.getTagBoolDefault("purgeRemainder",False) and pathselect.size() > 0:
for path in pathselect:
samples.setTagBool("restrict",True,path)
samples.purgeWithoutTag("~restrict")
if not samples.getListOfSampleFolders("?"):
QFramework.BREAK("sample folder empty after purge - something is wrong!")
# perform any pre-processing of the sample folder for handling of systematic uncertainties
# this step is likely to be highly analysis-dependent, so this is just an example implementation
hwwanalyze.prepareSystematics(config, samples)
# load all the observables that allow access of the physics-content of your samples
customobservables = analyze.loadObservables(config)
# load all the cuts that are defined
cuts = analyze.loadCuts(config)
# load all the analysis jobs - booking histograms, event counters for cutflows, and much more
# stores a boolean in config for determining whether or not analysis is cutbased
# (yes if no MVA detected or at least 1 analysis job is booked)
analyze.bookAnalysisJobs(config, cuts)
# cutbased bool set in bookAnalysisJobs above
runtime = config.getFolder("runtime+")
cutbased = runtime.getTagBoolDefault("cutbased", False)
if config.getTagBoolDefault("printCuts",cutbased):
QFramework.INFO("cuts to visit samples with:")
cuts.printCut();
if customobservables or config.getTagBoolDefault("printObservables"):
QFramework.INFO("custom observables were defined - this is the list of known observables:")
QFramework.TQTreeObservable.printObservables()
# run the cutbased analysis
if cutbased:
# create an analysis sample visitor that will successively visit all the samples and execute the analysis when used
visitor = analyze.createAnalysisSampleVisitor(config, cuts)
# book algorithms that will be executed on the events before any cuts are applied or analysis jobs are executed
analyze.bookAlgorithms(config, visitor)
# execute analysis, visiting every sample and running over every event
# this step might take a VERY LONG time
#nEvents = analyze.executeAnalysis(config, samples, visitor)
nsamples = analyze.executeAnalysis(config, samples, visitor)
# attach the cut definitions to the sample folder
common.storeCuts(cuts, samples)
# train any multivariate classifiers
mvascriptnames = config.getTagVString("MVA")
if len(mvascriptnames)>0:
analyze.trainMVA(config, samples, cuts)
if config.getTagBoolDefault("printObservables",False):
QFramework.TQObservable.printObservables()
# retrieve variables that have been set to determine a successful analysis
# successful by default, error if analysisError has a value or mvaOK is false
analysisError = runtime.getTagStringDefault("analysisError","")
mvaOK = runtime.getTagBoolDefault("mvaOK",len(mvascriptnames))
# apply patches as given by the config
common.patchSampleFolder(config.getTagVStandardString("postPatches"), samples)
# write the sample folder to disk
if len(analysisError) == 0 or mvaOK:
common.writeSampleFolder(config, samples)
else: #write alternative output file (the analysis didn't crash but there is something the user should check!
# TODO: make this another method in common.py? probably
# TODO: fix TString/std::string mess
if CLI.getTagBoolDefault("debug",False):
outfilename = ROOT.TString("debug.root")
else:
outfilename = config.getTagStringDefault("outputFile","")
altFileName = outfilename
QFramework.TQStringUtils.removeTrailingText(altFileName,".root")
altFileName.Append(".isDone")
with open(altFileName.Data(),'w') as f :
f.write(analysisError.Data() if isinstance(analysisError,ROOT.TString) else analysisError)
QFramework.WARN("No regular output sample file has been written but the analysis completed without fatal errors. Please check '{:s}' for information on potential errors".format(altFileName.Data()))
if (config.hasTagString("memoryGraph")):
ROOT.gROOT.SetBatch(True)
memGraph = QFramework.TQLibrary.getMemoryGraph()
memFileName = config.getTagStringDefault("memoryGraph","memoryUsage.pdf")
QFramework.TQStringUtils.ensureTrailingText(memFileName,".pdf")
memCanvas = QFramework.TQHistogramUtils.applyATLASstyle(memGraph,"Internal",0.2,0.9,0.9,"timestamp","rss [byte]")
memCanvas.SaveAs(memFileName.Data())
# TODO: this doesn't work as intended for now
# print any keys which were not read during the job
#common.printUnreadKeys(config)
# write and print some performance information like memory usage and runtime
common.printExecutionSummary(config)
# temporary fix to prevent segfaults in AnaBase 2.3.48 and beyond
# update: still necessary in 21.2.4
try:
ROOT.xAOD.ClearTransientTrees()
except AttributeError:
pass
if __name__ == "__main__":
# create a pre-configured argument parser
parser = analyze.DefaultArgumentParser()
import QFramework
import ROOT
import imp
# use the argument parser to read the command line arguments and config options from the config file
config = common.getConfigOptions(parser.parse_args())
# ignore command line arguments since ROOT is very greedy here (and tends to choke from it!)
ROOT.PyConfig.IgnoreCommandLineOptions = True
# call the main function
main(config)