diff --git a/python/baseTtbbPlotter.py b/python/baseTtbbPlotter.py index f39cba962e4788386d4b7ddf9bf1cb7cde03a42b..70ba15a6508fdfdc5a10730a4152dc4b38bfcd63 100644 --- a/python/baseTtbbPlotter.py +++ b/python/baseTtbbPlotter.py @@ -33,6 +33,23 @@ class baseTtbbPlotter(NanoAODHistoModule): } self.doSysts = self.args.systematic + # FIXME find a better way of doing that... add an argument? + # either "separate" (muR/muF variations) + # or "combined" (7-point envelope) + self.qcdScaleVarMode = "separate" + + # used for yields tables/cutflowreports + self.yieldsPrefix = "yields" + + def getQcdScaleVariations(self, sample, sampleCfg): + """Just to know what histogram envelopes to take during postprocessing: + only for MC, if systematics are enabled, if the sample has no buggy weights, + and if we take the QCD scale envelope!""" + if not (self.doSysts and self.isMC(sample) and\ + "QCD" not in utils.getBuggySystList(sample, sampleCfg) and\ + self.qcdScaleVarMode == "combined"): + return [] + return [ (i, f"qcdScalevar{i}") for i in [0, 1, 3, 5, 7, 8] ] def addArgs(self, parser): super().addArgs(parser) @@ -81,37 +98,13 @@ class baseTtbbPlotter(NanoAODHistoModule): if self.args.test and not self.args.distributed: # only keep 1 MC (if possible, a signal) and 1 data file of any era, for testing the plotter - foundMC = None - foundData = None - chosenEra = None - for smpNm,smp in samples.items(): - if not foundData and not self.isMC(smpNm): - if chosenEra: - if smp["era"] == chosenEra: - foundData = smpNm - else: - foundData = smpNm - chosenEra = smp["era"] - if not foundMC and smp.get("is_signal", False) and self.isMC(smpNm): - if chosenEra: - if smp["era"] == chosenEra: - foundMC = smpNm - else: - foundMC = smpNm - chosenEra = smp["era"] - if foundMC and foundData: - break - # no signal found, now look for any MC - if not foundMC: - for smpNm,smp in samples.items(): - if self.isMC(smpNm): - if chosenEra: - if smp["era"] == chosenEra: - foundMC = smpNm - else: - foundMC = smpNm - chosenEra = smp["era"] - break + chosenEra = next(self.args.eras[1]) if self.args.eras[1] else None + foundMC = utils.getAnySignalSample(samples, self.isMC, era=chosenEra) + if foundMC is None: + foundMC = utils.getAnyMCSample(samples, self.isMC) + chosenEra = samples[foundMC]["era"] if foundMC else None + foundData = utils.getAnyDataSample(samples, self.isMC, era=chosenEra) + chosenEra = samples[foundData]["era"] for smpNm in list(samples.keys()): if smpNm != foundMC and smpNm != foundData: samples.pop(smpNm) @@ -152,8 +145,9 @@ class baseTtbbPlotter(NanoAODHistoModule): def postProcess(self, taskList, config=None, workdir=None, resultsdir=None, makeBU=True, removeBatch=True, createEnvelope=True, moveSystHists=True, removeFileList=True, remove4F5Foverlap=True): - if not self.plotList: - self.plotList = self.getPlotList(resultsdir=resultsdir, config=config) + self.plotList = self.getPlotList( + resultsdir=resultsdir, config=config, + sampleHint=(utils.getAnyMCSample(config["samples"], self.isMC) or utils.getAnyDataSample(config["samples"], self.isMC))) if makeBU: # create backup directory with all merged results @@ -169,18 +163,17 @@ class baseTtbbPlotter(NanoAODHistoModule): shutil.rmtree(batchOut) if createEnvelope: - # create QCD scale envelopes - if needed (only for 7-point variation, which defines self.qcdScaleVariations) + # create QCD scale envelopes - if needed (only if 7-point variations are done) for task in taskList: - if self.doSysts and self.isMC(task.outputFile): - utils.produceMEScaleEnvelopes(self.plotList, self.qcdScaleVariations, os.path.join(resultsdir, task.outputFile)) + qcdScaleVariations = self.getQcdScaleVariations(task.name, task.config) + utils.produceMEScaleEnvelopes(self.plotList, qcdScaleVariations, os.path.join(resultsdir, task.outputFile)) if moveSystHists: # renormalize, copy and rename histograms from systematic variation files into nominal files # also remove the systematic samples from the plotIt list - + if self.doSysts: - #if False: - utils.postProcSystSamples(taskList, config["samples"], resultsdir) + utils.postProcSystSamples(taskList, config["samples"], resultsdir, cutFlowPrefix=self.yieldsPrefix) if removeFileList: # remove the file lists from the yml config - not needed for plotIt diff --git a/python/controlPlotter.py b/python/controlPlotter.py index fe0735dfae8784bf893fb8fd9cf1f2f3ffecabc7..6b8aa9f14e5f26fd816101e4f1620647e53afbfe 100644 --- a/python/controlPlotter.py +++ b/python/controlPlotter.py @@ -48,7 +48,7 @@ class controlPlotter(recoBaseTtbbPlotter): ###### Cutflow report - yields = CutFlowReport("yields") + yields = CutFlowReport(s.yieldsPrefix) plots.append(yields) ###### Plots for ==1 lepton, >=4 jets (ttbar level) ###### diff --git a/python/utils.py b/python/utils.py index 2c9c66f507d8854ba4f2e2753b28d58964c0182f..69f24fb41da1ff16181dd8ea7f5668695f72867c 100644 --- a/python/utils.py +++ b/python/utils.py @@ -1,6 +1,7 @@ import os import re import json +import yaml from copy import deepcopy import logging @@ -72,6 +73,35 @@ def getRunEra(sample): raise RuntimeError("Could not find run era from sample {}".format(sample)) return result.group(1) +def getAnyDataSample(samples, mcChecker, era=None): + """Return any data from the sample list. + If an era is specified, find a dataset from that era. + mcChecker a function: mcChecker(sample) is True if sample is MC - typically pass analysisModule.isMC""" + for smpNm,smp in samples.items(): + if not mcChecker(smpNm): + if (era is None) or (era and smp["era"] == era): + return smpNm +def getAnySignalSample(samples, mcChecker, era=None): + """Return any signal from the sample list. + If an era is specified, find a signal from that era. + mcChecker a function: mcChecker(sample) is True if sample is MC - typically pass analysisModule.isMC""" + for smpNm,smp in samples.items(): + if mcChecker(smpNm): + if (era is None) or (era and smp["era"] == era): + if smp.get("is_signal", False): + return smpNm +def getAnyMCSample(samples, mcChecker, era=None, noSystSample=True): + """Return any MC from the sample list. + If an era is specified, find a sample from that era. + mcChecker a function: mcChecker(sample) is True if sample is MC - typically pass analysisModule.isMC + If noSystSample is True, systematic variation samples are not considered""" + for smpNm,smp in samples.items(): + if mcChecker(smpNm): + if (era is None) or (era and smp["era"] == era): + if (noSystSample and "syst" in smp): + continue + return smpNm + def makeMergedPlots(categDef, newCat, name, binning, var=None, **kwargs): """ Make a series of plots which will be merged. - categDef can either be e.g.: @@ -111,33 +141,34 @@ def makeMergedPlots(categDef, newCat, name, binning, var=None, **kwargs): #### Common tasks (systematics, sample splittings...) -def addTheorySystematics(plotter, sample, sampleCfg, tree, noSel, MEscale=True, PSISR=True, PSFSR=True, combinedMEscale=False): - # FIXME - import yaml +def getBuggySystList(sample, sampleCfg): with open(os.path.join(os.path.dirname(__file__), '..', 'config', 'buggyThSyst.yml')) as file_: origName = sample if not "subprocess" in sampleCfg else "".join(sample.split("_" + sampleCfg["subprocess"])) buggySamples = yaml.load(file_, Loader=yaml.FullLoader) - buggySyst = buggySamples.get(origName, "") - if buggySyst: - logger.warning(f"Will NOT apply the following uncertainties to sample {sample}: {buggySyst}") - - if MEscale: - # for muF and muR separately - if "QCD" not in buggySyst: - logger.info("Adding separate muF/muR systematics") - noSel = noSel.refine("qcdMuF", weight=op.systematic(op.c_float(1.), name="qcdMuF", up=tree.LHEScaleWeight[3], down=tree.LHEScaleWeight[5])) - noSel = noSel.refine("qcdMuR", weight=op.systematic(op.c_float(1.), name="qcdMuR", up=tree.LHEScaleWeight[1], down=tree.LHEScaleWeight[7])) - plotter.qcdScaleVariations = dict() - - if combinedMEscale: - # for taking envelope of 7-point variation - if "QCD" not in buggySyst: - logger.info("Adding 7-point muF/muR systematics") - plotter.qcdScaleVariations = { f"qcdScalevar{i}": tree.LHEScaleWeight[i] for i in [0, 1, 3, 5, 7, 8] } - qcdScaleSyst = op.systematic(op.c_float(1.), name="qcdScale", **plotter.qcdScaleVariations) - noSel = noSel.refine("qcdScale", weight=qcdScaleSyst) - else: - plotter.qcdScaleVariations = dict() + return buggySamples.get(origName, "") + +def addTheorySystematics(plotter, sample, sampleCfg, tree, noSel, qcdVar=True, PSISR=True, PSFSR=True): + # FIXME + buggySyst = getBuggySystList(sample, sampleCfg) + if buggySyst: + logger.warning(f"Will NOT apply the following uncertainties to sample {sample}: {buggySyst}") + + if qcdVar: + if plotter.qcdScaleVarMode == "separate": + # for muF and muR separately + if "QCD" not in buggySyst: + logger.info("Adding separate muF/muR systematics") + noSel = noSel.refine("qcdMuF", weight=op.systematic(op.c_float(1.), name="qcdMuF", up=tree.LHEScaleWeight[3], down=tree.LHEScaleWeight[5])) + noSel = noSel.refine("qcdMuR", weight=op.systematic(op.c_float(1.), name="qcdMuR", up=tree.LHEScaleWeight[1], down=tree.LHEScaleWeight[7])) + + elif plotter.qcdScaleVarMode == "combined": + # for taking envelope of 7-point variation + qcdScaleVariations = plotter.getQcdScaleVariations(sample, sampleCfg) + if qcdScaleVariations: + logger.info("Adding 7-point muF/muR systematics") + qcdScaleVariations = { varNm: tree.LHEScaleWeight[varIdx] for (varIdx, varNm) in qcdScaleVariations } + qcdScaleSyst = op.systematic(op.c_float(1.), name="qcdScale", **qcdScaleVariations) + noSel = noSel.refine("qcdScale", weight=qcdScaleSyst) if PSISR and "PS" not in buggySyst: logger.info("Adding PS ISR systematics") @@ -249,7 +280,7 @@ def produceMEScaleEnvelopes(plots, scaleVariations, path): tf.Close() -def postProcSystSamples(taskList, samples, resultsdir): +def postProcSystSamples(taskList, samples, resultsdir, cutFlowPrefix="yields_"): """ Rename the systematics samples so they can be interpreted by plotIt Also fix the normalization of their histograms @@ -268,6 +299,8 @@ def postProcSystSamples(taskList, samples, resultsdir): tfNom = HT.openFileAndGet(os.path.join(resultsdir, nominalSample + ".root"), "update") wgtRatio = samples[nominalSample]["generated-events"] / task.config["generated-events"] for k in tfSyst.GetListOfKeys(): + # skip cutflowreports for now, as systematics are not entirely supported yet + if k.GetName().startswith(cutFlowPrefix): continue hist = k.ReadObj() if not hist.InheritsFrom("TH1"): continue name = hist.GetName() + "__" + systVar