From a696706bdd6553c4c036bb447c342542f3fd33db Mon Sep 17 00:00:00 2001
From: Davide Valsecchi <davide.valsecchi@cern.ch>
Date: Thu, 7 Apr 2022 08:23:33 +0200
Subject: [PATCH] Debugging the new cut implementation

---
 config/base.py                    |  2 +-
 lib/cut_functions.py              |  3 ++-
 parameters/cuts/baseline_cuts.py  |  5 ++--
 parameters/cuts/cut_definition.py |  2 ++
 utils/Configurator.py             | 41 ++++++++++++++++---------------
 workflows/base.py                 | 25 ++++++++++++-------
 6 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/config/base.py b/config/base.py
index 32977a1c..b6760e42 100644
--- a/config/base.py
+++ b/config/base.py
@@ -1,4 +1,4 @@
- from parameters.cuts.baseline_cuts import dilepton_presel, passthrough
+from parameters.cuts.baseline_cuts import dilepton_presel, passthrough
 
 cfg =  {
     # Dataset parameters
diff --git a/lib/cut_functions.py b/lib/cut_functions.py
index 6bf731d8..85a809f2 100644
--- a/lib/cut_functions.py
+++ b/lib/cut_functions.py
@@ -1,6 +1,7 @@
 import awkward as ak
 
-from parameters.selection import event_selection
+def passthrough(events, **kargs):
+    return ak.full_like(events.event, True, dtype=bool)
 
 def dilepton(events, params, year, sample):
 
diff --git a/parameters/cuts/baseline_cuts.py b/parameters/cuts/baseline_cuts.py
index 7eeeb14e..20709445 100644
--- a/parameters/cuts/baseline_cuts.py
+++ b/parameters/cuts/baseline_cuts.py
@@ -3,11 +3,10 @@ import awkward as ak
 import lib.cut_functions as cuts_f
 from parameters.cuts.cut_definition import Cut
 
-
 passthrough = Cut(
     name="passthrough",
     params = {},
-    function : lambda (events, params, year, sample): return ak.ones_like(events)
+    function= cuts_f.passthrough
 )
 
 dilepton_presel = Cut(
@@ -25,7 +24,7 @@ dilepton_presel = Cut(
 		"mll" : 20,
 		"mll_SFOS" : {'low' : 76, 'high' : 106}
 	},
-    function: cuts_f.dilepton
+    function = cuts_f.dilepton
  )
 
 
diff --git a/parameters/cuts/cut_definition.py b/parameters/cuts/cut_definition.py
index b86d6715..bf5d1f39 100644
--- a/parameters/cuts/cut_definition.py
+++ b/parameters/cuts/cut_definition.py
@@ -1,4 +1,6 @@
+from dataclasses import dataclass
 from collections.abc import Callable
+import awkward as ak
 
 @dataclass
 class Cut:
diff --git a/utils/Configurator.py b/utils/Configurator.py
index f1020c75..5cf1215a 100644
--- a/utils/Configurator.py
+++ b/utils/Configurator.py
@@ -133,29 +133,30 @@ class Configurator():
     def load_workflow(self):
         if self.workflow == "base":
             from workflows.base import ttHbbBaseProcessor
-            self.processor_instance = ttHbbBaseProcessor(cfg=self.cfg)
+            self.processor_instance = ttHbbBaseProcessor(cfg=self)
         elif self.workflow == "mem":
             from workflows.mem import MEMStudiesProcessor
-            self.processor_instance = MEMStudiesProcessor(cfg=self.cfg)
+            self.processor_instance = MEMStudiesProcessor(cfg=self)
         else:
             raise NotImplemented
 
     def save_config(self):
-        functions_to_import = []
-        import_line = "from lib.cuts import "
-        for key in self.cfg['cuts_definition'].keys():
-            functions_to_import.append(self.cfg['cuts_definition'][key]['f'])
-        buffer = ''.join( ("cfg = ", pprint.pformat(self.cfg, sort_dicts=False)) )
-        for f in functions_to_import:
-            buffer = buffer.replace(str(f), f.__name__)
-        import_line = ''.join( (import_line, ', '.join([f.__name__ for f in functions_to_import])) )
-        buffer = import_line + '\n\n' + buffer + '\n'
-
-        if self.plot:
-            config_file = os.path.join(self.plots, "config.py")
-        else:
-            config_file = os.path.join(self.output, "config.py")
-        print("Saving config file to " + config_file)
-        with open(config_file, 'w') as f:
-            f.write(buffer)
-        f.close()
+        # functions_to_import = []
+        # import_line = "from lib.cuts import "
+        # for key in self.cfg['cuts_definition'].keys():
+        #     functions_to_import.append(self.cfg['cuts_definition'][key]['f'])
+        # buffer = ''.join( ("cfg = ", pprint.pformat(self.cfg, sort_dicts=False)) )
+        # for f in functions_to_import:
+        #     buffer = buffer.replace(str(f), f.__name__)
+        # import_line = ''.join( (import_line, ', '.join([f.__name__ for f in functions_to_import])) )
+        # buffer = import_line + '\n\n' + buffer + '\n'
+
+        # if self.plot:
+        #     config_file = os.path.join(self.plots, "config.py")
+        # else:
+        #     config_file = os.path.join(self.output, "config.py")
+        # print("Saving config file to " + config_file)
+        # with open(config_file, 'w') as f:
+        #     f.write(buf    
+        # f.close()
+        pass
diff --git a/workflows/base.py b/workflows/base.py
index eec31857..dafd72c6 100644
--- a/workflows/base.py
+++ b/workflows/base.py
@@ -8,7 +8,6 @@ import numpy as np
 import awkward as ak
 
 from lib.objects import lepton_selection, jet_selection, get_dilepton
-from lib.cuts import dilepton
 from lib.fill import fill_histograms_object
 from parameters.triggers import triggers
 from parameters.btag import btag
@@ -48,7 +47,8 @@ class ttHbbBaseProcessor(processor.ProcessorABC):
 
         self._sumw_dict = {
             "sumw": processor.defaultdict_accumulator(float),
-            "nevts": processor.defaultdict_accumulator(int),
+            "nevts_initial": processor.defaultdict_accumulator(int),
+            "nevts_presel" : processor.defaultdict_accumulator(int),
         }
 
         #for var in self._vars_to_plot.keys():
@@ -80,6 +80,10 @@ class ttHbbBaseProcessor(processor.ProcessorABC):
     def accumulator(self):
         return self._accumulator
 
+    @property
+    def nevents(self):
+        return ak.count(self.events.event)
+
     # Function to load year-dependent parameters
     def load_metadata(self):
         self._dataset = self.events.metadata["dataset"]
@@ -90,7 +94,7 @@ class ttHbbBaseProcessor(processor.ProcessorABC):
 
     # Function to apply flags and lumi mask
     def clean_events(self):
-        mask_clean = np.ones(self.nEvents, dtype=np.bool)
+        mask_clean = np.ones(self.nEvents_initial, dtype=np.bool)
         flags = [
             "goodVertices", "globalSuperTightHalo2016Filter", "HBHENoiseFilter", "HBHENoiseIsoFilter", "EcalDeadCellTriggerPrimitiveFilter", "BadPFMuonFilter"]#, "BadChargedCandidateFilter", "ecalBadCalibFilter"]
         if not self.isMC:
@@ -158,13 +162,13 @@ class ttHbbBaseProcessor(processor.ProcessorABC):
         self.events = self.events[self._preselection_masks.all(*self._preselection_masks.names)]
             
     def define_categories(self):
-        for cut in self._cuts:
+        for cut_name, cut in self._cuts.items():
             mask = cut.get_mask(self.events, year=self._year, sample=self._sample)
             self._cuts_masks.add(cut.name, mask)
         # We make sure that for each category the list of cuts is unique in the Configurator validation
 
     def compute_weights(self):
-        self.weights = Weights(self.nEvents)
+        self.weights = Weights(self.nevents)
         if self.isMC:
             self.weights.add('genWeight', self.events.genWeight)
             self.weights.add('lumi', ak.full_like(self.events.genWeight, lumi[self._year]))
@@ -181,14 +185,15 @@ class ttHbbBaseProcessor(processor.ProcessorABC):
         pass
 
     def process(self, events):
-        self.output = self.accumulator.identity()
-        #if len(events)==0: return output
         self.events = events
-        self.nEvents = ak.count(self.events.event)
         self.load_metadata()
-        self.output['nevts'][self._sample] += self.nEvents
+        self.output = self.accumulator.identity()
+        #if len(events)==0: return output
+        self.nEvents_initial = self.nevents
+        self.output['nevts_initial'][self._sample] += self.nEvents_initial
         self.isMC = 'genWeight' in self.events.fields
         if self.isMC:
+            # This is computed before any preselection
             self.output['sumw'][self._sample] += sum(self.events.genWeight)
 
         # Event cleaning and  PV selection
@@ -200,6 +205,8 @@ class ttHbbBaseProcessor(processor.ProcessorABC):
         self.apply_triggers()
         # This will remove all the events not passing preselection from further 
         self.apply_preselection_cuts()
+        self.nEvents_after_presel = self.nevents
+        self.output['nevts_presel'][self._sample] += self.nEvents_after_presel
 
         # This function looks at the categories in the cfg file,
         # apply the single cut functions and then prepares the categories
-- 
GitLab