From a696706bdd6553c4c036bb447c342542f3fd33db Mon Sep 17 00:00:00 2001 From: Davide Valsecchi <davide.valsecchi@cern.ch> Date: Thu, 7 Apr 2022 08:23:33 +0200 Subject: [PATCH] Debugging the new cut implementation --- config/base.py | 2 +- lib/cut_functions.py | 3 ++- parameters/cuts/baseline_cuts.py | 5 ++-- parameters/cuts/cut_definition.py | 2 ++ utils/Configurator.py | 41 ++++++++++++++++--------------- workflows/base.py | 25 ++++++++++++------- 6 files changed, 44 insertions(+), 34 deletions(-) diff --git a/config/base.py b/config/base.py index 32977a1c..b6760e42 100644 --- a/config/base.py +++ b/config/base.py @@ -1,4 +1,4 @@ - from parameters.cuts.baseline_cuts import dilepton_presel, passthrough +from parameters.cuts.baseline_cuts import dilepton_presel, passthrough cfg = { # Dataset parameters diff --git a/lib/cut_functions.py b/lib/cut_functions.py index 6bf731d8..85a809f2 100644 --- a/lib/cut_functions.py +++ b/lib/cut_functions.py @@ -1,6 +1,7 @@ import awkward as ak -from parameters.selection import event_selection +def passthrough(events, **kargs): + return ak.full_like(events.event, True, dtype=bool) def dilepton(events, params, year, sample): diff --git a/parameters/cuts/baseline_cuts.py b/parameters/cuts/baseline_cuts.py index 7eeeb14e..20709445 100644 --- a/parameters/cuts/baseline_cuts.py +++ b/parameters/cuts/baseline_cuts.py @@ -3,11 +3,10 @@ import awkward as ak import lib.cut_functions as cuts_f from parameters.cuts.cut_definition import Cut - passthrough = Cut( name="passthrough", params = {}, - function : lambda (events, params, year, sample): return ak.ones_like(events) + function= cuts_f.passthrough ) dilepton_presel = Cut( @@ -25,7 +24,7 @@ dilepton_presel = Cut( "mll" : 20, "mll_SFOS" : {'low' : 76, 'high' : 106} }, - function: cuts_f.dilepton + function = cuts_f.dilepton ) diff --git a/parameters/cuts/cut_definition.py b/parameters/cuts/cut_definition.py index b86d6715..bf5d1f39 100644 --- a/parameters/cuts/cut_definition.py +++ b/parameters/cuts/cut_definition.py @@ -1,4 +1,6 @@ +from dataclasses import dataclass from collections.abc import Callable +import awkward as ak @dataclass class Cut: diff --git a/utils/Configurator.py b/utils/Configurator.py index f1020c75..5cf1215a 100644 --- a/utils/Configurator.py +++ b/utils/Configurator.py @@ -133,29 +133,30 @@ class Configurator(): def load_workflow(self): if self.workflow == "base": from workflows.base import ttHbbBaseProcessor - self.processor_instance = ttHbbBaseProcessor(cfg=self.cfg) + self.processor_instance = ttHbbBaseProcessor(cfg=self) elif self.workflow == "mem": from workflows.mem import MEMStudiesProcessor - self.processor_instance = MEMStudiesProcessor(cfg=self.cfg) + self.processor_instance = MEMStudiesProcessor(cfg=self) else: raise NotImplemented def save_config(self): - functions_to_import = [] - import_line = "from lib.cuts import " - for key in self.cfg['cuts_definition'].keys(): - functions_to_import.append(self.cfg['cuts_definition'][key]['f']) - buffer = ''.join( ("cfg = ", pprint.pformat(self.cfg, sort_dicts=False)) ) - for f in functions_to_import: - buffer = buffer.replace(str(f), f.__name__) - import_line = ''.join( (import_line, ', '.join([f.__name__ for f in functions_to_import])) ) - buffer = import_line + '\n\n' + buffer + '\n' - - if self.plot: - config_file = os.path.join(self.plots, "config.py") - else: - config_file = os.path.join(self.output, "config.py") - print("Saving config file to " + config_file) - with open(config_file, 'w') as f: - f.write(buffer) - f.close() + # functions_to_import = [] + # import_line = "from lib.cuts import " + # for key in self.cfg['cuts_definition'].keys(): + # functions_to_import.append(self.cfg['cuts_definition'][key]['f']) + # buffer = ''.join( ("cfg = ", pprint.pformat(self.cfg, sort_dicts=False)) ) + # for f in functions_to_import: + # buffer = buffer.replace(str(f), f.__name__) + # import_line = ''.join( (import_line, ', '.join([f.__name__ for f in functions_to_import])) ) + # buffer = import_line + '\n\n' + buffer + '\n' + + # if self.plot: + # config_file = os.path.join(self.plots, "config.py") + # else: + # config_file = os.path.join(self.output, "config.py") + # print("Saving config file to " + config_file) + # with open(config_file, 'w') as f: + # f.write(buf + # f.close() + pass diff --git a/workflows/base.py b/workflows/base.py index eec31857..dafd72c6 100644 --- a/workflows/base.py +++ b/workflows/base.py @@ -8,7 +8,6 @@ import numpy as np import awkward as ak from lib.objects import lepton_selection, jet_selection, get_dilepton -from lib.cuts import dilepton from lib.fill import fill_histograms_object from parameters.triggers import triggers from parameters.btag import btag @@ -48,7 +47,8 @@ class ttHbbBaseProcessor(processor.ProcessorABC): self._sumw_dict = { "sumw": processor.defaultdict_accumulator(float), - "nevts": processor.defaultdict_accumulator(int), + "nevts_initial": processor.defaultdict_accumulator(int), + "nevts_presel" : processor.defaultdict_accumulator(int), } #for var in self._vars_to_plot.keys(): @@ -80,6 +80,10 @@ class ttHbbBaseProcessor(processor.ProcessorABC): def accumulator(self): return self._accumulator + @property + def nevents(self): + return ak.count(self.events.event) + # Function to load year-dependent parameters def load_metadata(self): self._dataset = self.events.metadata["dataset"] @@ -90,7 +94,7 @@ class ttHbbBaseProcessor(processor.ProcessorABC): # Function to apply flags and lumi mask def clean_events(self): - mask_clean = np.ones(self.nEvents, dtype=np.bool) + mask_clean = np.ones(self.nEvents_initial, dtype=np.bool) flags = [ "goodVertices", "globalSuperTightHalo2016Filter", "HBHENoiseFilter", "HBHENoiseIsoFilter", "EcalDeadCellTriggerPrimitiveFilter", "BadPFMuonFilter"]#, "BadChargedCandidateFilter", "ecalBadCalibFilter"] if not self.isMC: @@ -158,13 +162,13 @@ class ttHbbBaseProcessor(processor.ProcessorABC): self.events = self.events[self._preselection_masks.all(*self._preselection_masks.names)] def define_categories(self): - for cut in self._cuts: + for cut_name, cut in self._cuts.items(): mask = cut.get_mask(self.events, year=self._year, sample=self._sample) self._cuts_masks.add(cut.name, mask) # We make sure that for each category the list of cuts is unique in the Configurator validation def compute_weights(self): - self.weights = Weights(self.nEvents) + self.weights = Weights(self.nevents) if self.isMC: self.weights.add('genWeight', self.events.genWeight) self.weights.add('lumi', ak.full_like(self.events.genWeight, lumi[self._year])) @@ -181,14 +185,15 @@ class ttHbbBaseProcessor(processor.ProcessorABC): pass def process(self, events): - self.output = self.accumulator.identity() - #if len(events)==0: return output self.events = events - self.nEvents = ak.count(self.events.event) self.load_metadata() - self.output['nevts'][self._sample] += self.nEvents + self.output = self.accumulator.identity() + #if len(events)==0: return output + self.nEvents_initial = self.nevents + self.output['nevts_initial'][self._sample] += self.nEvents_initial self.isMC = 'genWeight' in self.events.fields if self.isMC: + # This is computed before any preselection self.output['sumw'][self._sample] += sum(self.events.genWeight) # Event cleaning and PV selection @@ -200,6 +205,8 @@ class ttHbbBaseProcessor(processor.ProcessorABC): self.apply_triggers() # This will remove all the events not passing preselection from further self.apply_preselection_cuts() + self.nEvents_after_presel = self.nevents + self.output['nevts_presel'][self._sample] += self.nEvents_after_presel # This function looks at the categories in the cfg file, # apply the single cut functions and then prepares the categories -- GitLab