From 96999b1766297c956a2fe15a2dfd446747914818 Mon Sep 17 00:00:00 2001 From: Sebastien Wertz Date: Mon, 21 Jun 2021 16:59:09 +0200 Subject: [PATCH 1/5] slurm default partition name changed --- config/bamboo.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/bamboo.ini b/config/bamboo.ini index c2c4c5e..45bca79 100644 --- a/config/bamboo.ini +++ b/config/bamboo.ini @@ -4,7 +4,7 @@ update = 20 [slurm] sbatch_qos = normal -sbatch_partition = wn +sbatch_partition = standard sbatch_time = 0-6:00 sbatch_memPerCPU = 2000 ; can add below (and run with -t 4): --cpus-per-task=4 -- GitLab From a24d9a9cb35a74d9159c3f77d185d8e5682596dc Mon Sep 17 00:00:00 2001 From: Sebastien Wertz Date: Mon, 21 Jun 2021 17:00:09 +0200 Subject: [PATCH 2/5] call to prepateTree() changed --- python/baseTtbbPlotter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/baseTtbbPlotter.py b/python/baseTtbbPlotter.py index 70ba15a..bc6b8a1 100644 --- a/python/baseTtbbPlotter.py +++ b/python/baseTtbbPlotter.py @@ -118,7 +118,7 @@ class baseTtbbPlotter(NanoAODHistoModule): def prepareTree(self, tree, sample=None, sampleCfg=None): era = sampleCfg["era"] - tree,noSel,be,lumiArgs = super().prepareTree(tree, sample=sample, sampleCfg=sampleCfg, description=defs.getNanoAODDescription(era, self.isMC(sample)), lazyBackend=True) + tree,noSel,be,lumiArgs = super().prepareTree(tree, sample=sample, sampleCfg=sampleCfg, description=defs.getNanoAODDescription(era, self.isMC(sample)), backend="lazy") if self.isMC(sample): # if it's a systematics sample, turn off other systematics -- GitLab From 4c914394d7611563a39b27ac34be88ddb9929a0e Mon Sep 17 00:00:00 2001 From: Sebastien Wertz Date: Mon, 21 Jun 2021 17:12:22 +0200 Subject: [PATCH 3/5] new interface for loading C++ header files --- python/baseTtbbPlotter.py | 14 ++++++++++---- python/recoBaseTtbbPlotter.py | 3 +-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/python/baseTtbbPlotter.py b/python/baseTtbbPlotter.py index bc6b8a1..a3f867e 100644 --- a/python/baseTtbbPlotter.py +++ b/python/baseTtbbPlotter.py @@ -10,7 +10,6 @@ logger = logging.getLogger("ttbb plotter") from bamboo.analysismodules import NanoAODHistoModule from bamboo.analysisutils import parseAnalysisConfig -from bamboo.root import addIncludePath, loadHeader import definitions as defs import utils @@ -20,9 +19,6 @@ class baseTtbbPlotter(NanoAODHistoModule): def __init__(self, args): super().__init__(args) - addIncludePath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "include")) - loadHeader("HistogramEvaluator.h") - self.plotDefaults = { "y-axis" : "Events", "log-y" : "both", @@ -116,10 +112,20 @@ class baseTtbbPlotter(NanoAODHistoModule): if era != chosenEra: analysisCfg["eras"].pop(era) + print(analysisCfg) + + # newCfg = os.path.join(self.args.output, "full_analysis.yml") + # os.makedirs(self.args.output, exist_ok=True) + # with open(newCfg, "w") as f_: + # yaml.dump(analysisCfg, f_) + def prepareTree(self, tree, sample=None, sampleCfg=None): era = sampleCfg["era"] tree,noSel,be,lumiArgs = super().prepareTree(tree, sample=sample, sampleCfg=sampleCfg, description=defs.getNanoAODDescription(era, self.isMC(sample)), backend="lazy") + be.addDependency(includePath=os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "include")) + be.addDependency(headers="HistogramEvaluator.h") + if self.isMC(sample): # if it's a systematics sample, turn off other systematics sample_doSysts = self.doSysts diff --git a/python/recoBaseTtbbPlotter.py b/python/recoBaseTtbbPlotter.py index 838ff1f..2b47cd0 100644 --- a/python/recoBaseTtbbPlotter.py +++ b/python/recoBaseTtbbPlotter.py @@ -3,7 +3,6 @@ logger = logging.getLogger("ttbb plotter") from bamboo import treefunctions as op from bamboo.root import gbl as ROOT -from bamboo.root import loadHeader from bamboo.analysisutils import forceDefine import definitions as defs @@ -14,7 +13,6 @@ class recoBaseTtbbPlotter(baseTtbbPlotter): """""" def __init__(self, args): super().__init__(args) - loadHeader("BTagEffEvaluator.h") def addArgs(self, parser): super().addArgs(parser) @@ -29,6 +27,7 @@ class recoBaseTtbbPlotter(baseTtbbPlotter): defs.addMuonRocCor(be, tree._Muon, era, sample, self.isMC(sample)) if self.isMC(sample): + be.addDependency(headers="BTagEffEvaluator.h") # self.bTagEff = op.define("BTagEffEvaluator", 'const auto <> = BTagEffEvaluator("/t3home/swertz/swertz/bambooOutput/200113_bTagEffs/results/efficiencies.root", {0.277});') if self.args.sf_patches == "3d": -- GitLab From 0ecff1ca32bd5f430c45489a94fa718821bd7558 Mon Sep 17 00:00:00 2001 From: Sebastien Wertz Date: Mon, 21 Jun 2021 17:28:21 +0200 Subject: [PATCH 4/5] Fill in sample template in any case Instead of having the driver fill in the template (and make necessary changes), then dump the resulting config in the working directory and have the workers open that one, have all the workers open the template and fill in/change the content. This is required for the new bamboo tracking system to work: the analysis config should be tracked by git in the analysis repository. --- python/baseTtbbPlotter.py | 52 +++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/python/baseTtbbPlotter.py b/python/baseTtbbPlotter.py index a3f867e..2f0422e 100644 --- a/python/baseTtbbPlotter.py +++ b/python/baseTtbbPlotter.py @@ -3,6 +3,7 @@ import json import yaml import glob import shutil +from itertools import chain import logging logger = logging.getLogger("ttbb plotter") @@ -53,46 +54,37 @@ class baseTtbbPlotter(NanoAODHistoModule): parser.add_argument("-s", "--systematic", action="store_true", help="Produce systematic variations") parser.add_argument("-r", "--reduce-split", type=int, default=0, help="Reduce number of jobs by factor X") parser.add_argument("--samples", nargs='*', help="Sample template YML files: JSON files are inserted from env. variable SAMPLE_JSONS") + parser.add_argument("--sample-jsons", default=[os.getenv("SAMPLE_JSONS")], nargs='*', help="Sample template YML files: JSON files are inserted from env. variable SAMPLE_JSONS") - def initialize(self): - super().initialize() - # If required, insert list of files from runPostCrab JSONS at this stage, - # write the resulting full analysis yml into the output dir and make sure the worker jobs use that new one. - # This must be done here to change the analysis cfg that will be used later on directly in `args.inputs[0]` - if self.args.samples and (not self.args.distributed or self.args.distributed != "worker"): - analysisCfg = parseAnalysisConfig(self.args.input[0]) - json_folder = os.getenv("SAMPLE_JSONS") - if not json_folder: + def customizeAnalysisCfg(self, analysisCfg): + # fill sample template using JSON files + if self.args.samples: + json_folders = self.args.sample_jsons + if not json_folders: raise RuntimeError("Should specify a SAMPLE_JSONS environment variable when using the 'samples' argument!") - jsons = glob.glob(json_folder + "/*.json") + jsons = list(chain.from_iterable(glob.glob(folder + "/*.json") for folder in json_folders)) eras = self.args.eras[1] - sampleDict = {} + samples = {} + # make sure we use absolute paths as this argument will be used by the worker jobs + self.args.samples = [ os.path.abspath(p) for p in self.args.samples ] for tmpPath in self.args.samples: with open(tmpPath) as f_: template = yaml.load(f_, Loader=yaml.SafeLoader) - sampleDict.update(utils.insertSampleFilesIntoTemplate(template, jsons, eras)) - analysisCfg["samples"] = sampleDict - newCfg = os.path.join(self.args.output, "full_analysis.yml") - os.makedirs(self.args.output, exist_ok=True) - with open(newCfg, "w") as f_: - yaml.dump(analysisCfg, f_) - self.args.input = [newCfg] + samples.update(utils.insertSampleFilesIntoTemplate(template, jsons, eras)) + analysisCfg["samples"] = samples - def customizeAnalysisCfg(self, analysisCfg): - samples = analysisCfg["samples"] - # reduce job splitting if self.args.reduce_split: for smp in samples.values(): smp["split"] *= self.args.reduce_split - + # if we're not doing systematics, remove the systematics samples from the list if not self.doSysts: for smp in list(samples.keys()): if "syst" in samples[smp]: samples.pop(smp) - if self.args.test and not self.args.distributed: + if self.args.test and (not self.args.distributed or self.args.distributed != "worker"): # only keep 1 MC (if possible, a signal) and 1 data file of any era, for testing the plotter chosenEra = next(self.args.eras[1]) if self.args.eras[1] else None foundMC = utils.getAnySignalSample(samples, self.isMC, era=chosenEra) @@ -100,7 +92,8 @@ class baseTtbbPlotter(NanoAODHistoModule): foundMC = utils.getAnyMCSample(samples, self.isMC) chosenEra = samples[foundMC]["era"] if foundMC else None foundData = utils.getAnyDataSample(samples, self.isMC, era=chosenEra) - chosenEra = samples[foundData]["era"] + if foundData: + chosenEra = samples[foundData]["era"] for smpNm in list(samples.keys()): if smpNm != foundMC and smpNm != foundData: samples.pop(smpNm) @@ -112,12 +105,11 @@ class baseTtbbPlotter(NanoAODHistoModule): if era != chosenEra: analysisCfg["eras"].pop(era) - print(analysisCfg) - - # newCfg = os.path.join(self.args.output, "full_analysis.yml") - # os.makedirs(self.args.output, exist_ok=True) - # with open(newCfg, "w") as f_: - # yaml.dump(analysisCfg, f_) + # back up analysis config - not really needed since git config is stored, but JIC + newCfg = os.path.join(self.args.output, "full_analysis.yml") + os.makedirs(self.args.output, exist_ok=True) + with open(newCfg, "w") as f_: + yaml.dump(analysisCfg, f_) def prepareTree(self, tree, sample=None, sampleCfg=None): era = sampleCfg["era"] -- GitLab From 6f875a9fff1ce40cb2dc7e6cf137debac1625894 Mon Sep 17 00:00:00 2001 From: Sebastien Wertz Date: Wed, 23 Jun 2021 14:29:51 +0200 Subject: [PATCH 5/5] fix dumping of analysis config --- python/baseTtbbPlotter.py | 41 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/python/baseTtbbPlotter.py b/python/baseTtbbPlotter.py index 2f0422e..52a2f12 100644 --- a/python/baseTtbbPlotter.py +++ b/python/baseTtbbPlotter.py @@ -84,26 +84,27 @@ class baseTtbbPlotter(NanoAODHistoModule): if "syst" in samples[smp]: samples.pop(smp) - if self.args.test and (not self.args.distributed or self.args.distributed != "worker"): - # only keep 1 MC (if possible, a signal) and 1 data file of any era, for testing the plotter - chosenEra = next(self.args.eras[1]) if self.args.eras[1] else None - foundMC = utils.getAnySignalSample(samples, self.isMC, era=chosenEra) - if foundMC is None: - foundMC = utils.getAnyMCSample(samples, self.isMC) - chosenEra = samples[foundMC]["era"] if foundMC else None - foundData = utils.getAnyDataSample(samples, self.isMC, era=chosenEra) - if foundData: - chosenEra = samples[foundData]["era"] - for smpNm in list(samples.keys()): - if smpNm != foundMC and smpNm != foundData: - samples.pop(smpNm) - # only keep 1 file per sample - for smpNm,smp in samples.items(): - smp["files"] = [smp["files"][0]] - # adjust the eras in the analysis config - for era in list(analysisCfg["eras"].keys()): - if era != chosenEra: - analysisCfg["eras"].pop(era) + if not self.args.distributed or self.args.distributed != "worker": + if self.args.test: + # only keep 1 MC (if possible, a signal) and 1 data file of any era, for testing the plotter + chosenEra = next(self.args.eras[1]) if self.args.eras[1] else None + foundMC = utils.getAnySignalSample(samples, self.isMC, era=chosenEra) + if foundMC is None: + foundMC = utils.getAnyMCSample(samples, self.isMC) + chosenEra = samples[foundMC]["era"] if foundMC else None + foundData = utils.getAnyDataSample(samples, self.isMC, era=chosenEra) + if foundData: + chosenEra = samples[foundData]["era"] + for smpNm in list(samples.keys()): + if smpNm != foundMC and smpNm != foundData: + samples.pop(smpNm) + # only keep 1 file per sample + for smpNm,smp in samples.items(): + smp["files"] = [smp["files"][0]] + # adjust the eras in the analysis config + for era in list(analysisCfg["eras"].keys()): + if era != chosenEra: + analysisCfg["eras"].pop(era) # back up analysis config - not really needed since git config is stored, but JIC newCfg = os.path.join(self.args.output, "full_analysis.yml") -- GitLab