diff --git a/config/test_subsamples.py b/config/test_subsamples.py index a75afa41964768c2c88c320ca1e1ae3767e2f89b..cdc64b6f8f62857b6e991892e1fe5be04378c75b 100644 --- a/config/test_subsamples.py +++ b/config/test_subsamples.py @@ -1,12 +1,65 @@ from pocket_coffea.parameters.cuts.preselection_cuts import * from pocket_coffea.workflows.tthbb_base_processor import ttHbbBaseProcessor -from pocket_coffea.lib.cut_functions import get_nObj_min, get_nObj_eq, get_nBtag, get_HLTsel, get_nObj_less +from pocket_coffea.lib.cut_functions import get_nObj_min, get_nObj_eq, get_HLTsel, get_nObj_less from pocket_coffea.parameters.histograms import * from pocket_coffea.lib.columns_manager import ColOut from pocket_coffea.parameters.btag import btag_variations -from pocket_coffea.lib.weights_manager import WeightCustom -import numpy as np +def ptmsd(events, params, **kwargs): + # Mask to select events with a fatjet with minimum softdrop mass and maximum tau21 + #return (events.FatJetGood[:,0].pt > params["pt"]) & (events.FatJetGood[:,0].msoftdrop > params["msd"]) + mask = (events.FatJetGood.pt > params["pt"]) & (events.FatJetGood.msoftdrop > params["msd"]) + + assert not ak.any(ak.is_none(mask, axis=1)), f"None in ptmsd\n{events.FatJetGood.pt[ak.is_none(mask, axis=1)]}" + + return ak.where(~ak.is_none(mask, axis=1), mask, False) + +def get_ptmsd(pt, msd, name=None): + if name == None: + name = f"pt{pt}msd{msd}" + return Cut( + name=name, + params= {"pt" : pt, "msd" : msd}, + function=ptmsd, + collection="FatJetGood" + ) + +def flavor_mask(events, params, **kwargs): + mask = { + "l" : events.FatJetGood.hadronFlavour < 4, + "c" : events.FatJetGood.hadronFlavour == 4, + "b" : events.FatJetGood.hadronFlavour == 5, + "cc" : abs(events.FatJetGood.hadronFlavour == 4) & (events.FatJetGood.nBHadrons == 0) & (events.FatJetGood.nCHadrons >= 2), + "bb" : abs(events.FatJetGood.hadronFlavour == 5) & (events.FatJetGood.nBHadrons >= 2) + } + + if params["flavor"] in ["bb", "cc"]: + return mask[params["flavor"]] + elif params["flavor"] == "b": + return mask[params["flavor"]] & ~mask["bb"] + elif params["flavor"] == "c": + return mask[params["flavor"]] & ~mask["cc"] + elif params["flavor"] == "l": + return mask[params["flavor"]] & ~mask["bb"] & ~mask["cc"] & ~mask["b"] & ~mask["c"] + else: + raise NotImplementedError + +def get_flavor(flavor): + return Cut( + name=flavor, + params={"flavor": flavor}, + function=flavor_mask, + collection="FatJetGood" + ) + +samples = ["TTToSemiLeptonic", + "ttHTobb", + "DATA_SingleMuon" + ] + +subsamples = {} +for s in filter(lambda x: 'DATA' not in x, samples): + subsamples[s] = {f"{s}_{f}" : [get_flavor(f)] for f in ['l', 'c', 'b', 'cc', 'bb']} cfg = { "dataset" : { @@ -16,35 +69,17 @@ cfg = { "datasets/DATA_SingleMuon_local.json", "datasets/DATA_SingleEle.json"], "filter" : { - "samples": [ - # "TTToSemiLeptonic", - "TTbbSemiLeptonic", - "ttHTobb", - "DATA_SingleMu", - # "DATA_SingleEle" - ], + "samples": samples, "samples_exclude" : [], "year": ['2018'] }, - "subsamples": { - "TTbbSemiLeptonic": - { - "tt<3b": [get_nObj_less(3, coll="BJetGood")], - "tt+3b": [get_nObj_eq(3, coll="BJetGood")], - "tt+4b": [get_nObj_eq(4, coll="BJetGood")], - "tt>4b": [get_nObj_min(5, coll="BJetGood")], - }, - "ttHTobb":{ - "ttH<4b": [get_nObj_less(4, coll="BJetGood")], - "ttH+4b": [get_nObj_min(4, coll="BJetGood")], - } - } + "subsamples": subsamples }, # Input and output files "workflow" : ttHbbBaseProcessor, - "output" : "output/test_subsamples", + "output" : "output/test/test_subsamples", "worflow_options" : {}, "run_options" : { @@ -67,16 +102,15 @@ cfg = { }, # Cuts and plots settings - "finalstate" : "semileptonic", - "skim": [get_nObj_min(4, 15., "Jet"), + "finalstate" : "mutag", + "skim": [get_nObj_min(1, 400., "FatJet"), get_HLTsel("semileptonic")], - "preselections" : [semileptonic_presel], + "preselections" : [semileptonic_presel, get_nObj_min(1, 450., "FatJetGood")], "categories": { "baseline": [passthrough], + "pt450msd40" : [get_ptmsd(450., 40.)], }, - - "weights": { "common": { "inclusive": ["genWeight","lumi","XS", @@ -111,23 +145,16 @@ cfg = { "variables": { - + **fatjet_hists(coll="FatJetGood", fields=["pt", "eta", "phi",]), + **fatjet_hists(coll="FatJetGood", fields=["pt", "eta", "phi",], pos=0), **ele_hists(coll="ElectronGood", pos=0), **muon_hists(coll="MuonGood", pos=0), }, "columns": { "common": { - "inclusive": [ ColOut("JetGood", ["pt", "eta","phi"]), - ColOut("ElectronGood", ["pt","eta","phi"])] + "inclusive": [ ColOut("FatJetGood", ["pt", "eta", "phi"])] }, - "bysample":{ - "ttHTobb": { - "inclusive": [ColOut("MuonGood", ["pt", "eta", "phi"])] - }, - "ttH+4b":{ - "inclusive": [ColOut("BJetGood", ["pt", "eta", "phi"])] - } - } + "bysample" : { subs : { "inclusive" : [ColOut("FatJetGood", ["hadronFlavour", "nBHadrons", "nCHadrons"])] } for subs in subsamples } } } diff --git a/pocket_coffea/lib/hist_manager.py b/pocket_coffea/lib/hist_manager.py index 5e1dd11dd21d53140ccd13c63896b737d1671448..4f65ad1857239b0c3169d7341327fbe89da6e4a0 100644 --- a/pocket_coffea/lib/hist_manager.py +++ b/pocket_coffea/lib/hist_manager.py @@ -258,7 +258,7 @@ class HistManager: weights_manager, categories, shape_variation="nominal", - subsamples_masks=None, # This is a dictionary with name:ak.Array(bool) + subsamples=None, # This is a dictionary with name:ak.Array(bool) custom_fields=None, ): ''' @@ -367,7 +367,7 @@ class HistManager: # Mask the events, the weights and then flatten and remove the None correctly for category, cat_mask in categories.get_masks(): # loop directly on subsamples - for subsample, subs_mask in subsamples_masks.items(): + for subsample, subs_mask in subsamples.get_masks(): # logging.info(f"\t\tcategory {category}, subsample {subsample}") mask = cat_mask & subs_mask # Skip empty categories and subsamples diff --git a/pocket_coffea/lib/jets.py b/pocket_coffea/lib/jets.py index 79051fdecb899f4a3232c9053e0bd0c41b836784..07e37ceec7ea9484126572933ec9de7b43be8e8f 100644 --- a/pocket_coffea/lib/jets.py +++ b/pocket_coffea/lib/jets.py @@ -226,7 +226,7 @@ def jet_selection(events, Jet, finalstate): # Only jets that are more distant than dr to ALL leptons are tagged as good jets leptons = events["LeptonGood"] # Mask for jets not passing the preselection - presel_mask = ( + mask_presel = ( (jets.pt > cuts["pt"]) & (np.abs(jets.eta) < cuts["eta"]) & (jets.jetId >= cuts["jetId"]) @@ -234,18 +234,20 @@ def jet_selection(events, Jet, finalstate): # Lepton cleaning if "dr" in cuts.keys(): dR_jets_lep = jets.metric_table(leptons) - lepton_cleaning_mask = ak.prod(dR_jets_lep > cuts["dr"], axis=2) == 1 + mask_lepton_cleaning = ak.prod(dR_jets_lep > cuts["dr"], axis=2) == 1 if Jet == "Jet": - jetpuid_mask = (jets.puId >= cuts["puId"]["value"]) | ( + mask_jetpuid = (jets.puId >= cuts["puId"]["value"]) | ( jets.pt >= cuts["puId"]["maxpt"] ) - good_jets_mask = presel_mask & lepton_cleaning_mask & jetpuid_mask + mask_good_jets = mask_presel & mask_lepton_cleaning & mask_jetpuid elif Jet == "FatJet": - raise NotImplementedError() + # Apply the msd and preselection cuts + mask_msd = (events.FatJet.msoftdrop > cuts["msd"]) + mask_good_jets = mask_presel & mask_msd - return jets[good_jets_mask], good_jets_mask + return jets[mask_good_jets], mask_good_jets def btagging(Jet, btag): diff --git a/pocket_coffea/utils/configurator.py b/pocket_coffea/utils/configurator.py index a3c626a181a715a2a8e2f7856cfdb496e7897984..454b0741eae04e9db38107825960e13186d58a57 100644 --- a/pocket_coffea/utils/configurator.py +++ b/pocket_coffea/utils/configurator.py @@ -186,29 +186,42 @@ class Configurator: def load_subsamples(self): # subsamples configuration - # Dictionary with subsample_name:[list of Cut ids] - self.subsamples_cuts = { - key: subscfg - for key, subscfg in self.dataset.get("subsamples", {}).items() - if key in self.samples - } - # Map of subsamples names + subsamples_dict = self.dataset.get("subsamples", {}) self.subsamples = {} + self.subsamples_names = {} self.has_subsamples = {} + # Save list of subsamples for each sample for sample in self.samples: - if sample in self.subsamples_cuts: - self.subsamples[sample] = list(self.subsamples_cuts[sample].keys()) + if sample in subsamples_dict.keys(): + self.subsamples_names[sample] = list(subsamples_dict[sample].keys()) self.has_subsamples[sample] = True else: - self.subsamples[sample] = [] self.has_subsamples[sample] = False # Complete list of samples and subsamples self.subsamples_list = [] - for sam in self.subsamples.values(): + for sam in self.subsamples_names.values(): self.subsamples_list += sam self.total_samples_list = list(set(self.samples + self.subsamples_list)) + # Now saving the subsamples + for sample in self.samples: + if sample in subsamples_dict: + subscfg = subsamples_dict[sample] + if isinstance(subscfg, dict): + # Convert it to StandardSelection + self.subsamples[sample] = StandardSelection(subscfg) + elif isinstance(subscfg, StandardSelection): + self.subsamples[sample] = subscfg + elif isinstance(subscfg, CartesianSelection): + self.subsamples[sample] = subscfg + else: + # if there is no configured subsample, the full sample becomes its subsample + self.subsamples[sample] = StandardSelection({sample: [passthrough]}) + # Unique set of cuts + logging.info("Subsamples:") + logging.info(self.subsamples) + def filter_dataset(self, nfiles): filtered_dataset = {} for sample, ds in self.fileset.items(): diff --git a/pocket_coffea/workflows/base.py b/pocket_coffea/workflows/base.py index bbcbc9fd5aa07cc939553b2436cfe30103d201fe..3d6f4d363c6327a2888397ef47a080570da74587 100644 --- a/pocket_coffea/workflows/base.py +++ b/pocket_coffea/workflows/base.py @@ -55,7 +55,7 @@ class BaseProcessorABC(processor.ProcessorABC, ABC): self._categories = self.cfg.categories # Subsamples configurations: special cuts to split a sample in subsamples - self._subsamplesCfg = self.cfg.subsamples_cuts + self._subsamples = self.cfg.subsamples # Weights configuration self.weights_config_allsamples = self.cfg.weights_config @@ -116,15 +116,7 @@ class BaseProcessorABC(processor.ProcessorABC, ABC): self._era = self.events.metadata["era"] self._goldenJSON = goldenJSON[self._year] # Loading metadata for subsamples - if self._sample in self._subsamplesCfg: - self._subsamples = self._subsamplesCfg[self._sample] - self._subsamples_names = list(self._subsamples.keys()) - self._hasSubsamples = True - else: - # if there is no configured subsample, the full sample becomes its subsample - self._subsamples = {self._sample: [passthrough]} - self._subsamples_names = [self._sample] - self._hasSubsamples = False + self._hasSubsamples = self.cfg.has_subsamples[self._sample] def load_metadata_extra(self): ''' @@ -276,28 +268,12 @@ class BaseProcessorABC(processor.ProcessorABC, ABC): isMC=self._isMC, ) - # Defining the subsamples cut - # saving all the cuts in a single selector - self._subsamples_masks = PackedSelection() - self._subsamples_cuts_ids = [] - # saving the map of cut ids for each subsample - self._subsamples_map = defaultdict(list) - self._subsamples_masks = PackedSelection() - - for subs, cuts in self._subsamples.items(): - for cut in cuts: - if cut.id not in self._subsamples_cuts_ids: - self._subsamples_masks.add( - cut.id, - cut.get_mask( - self.events, - year=self._year, - sample=self._sample, - isMC=self._isMC, - ), - ) - self._subsamples_cuts_ids.append(cut.id) - self._subsamples_map[subs].append(cut.id) + self._subsamples[self._sample].prepare( + events=self.events, + year=self._year, + sample=self._sample, + isMC=self._isMC, + ) def define_common_variables_before_presel(self, variation): ''' @@ -377,11 +353,7 @@ class BaseProcessorABC(processor.ProcessorABC, ABC): # If subsamples are defined we also save their metadata if self._hasSubsamples: - for subs in self._subsamples_names: - # Get the mask - subsam_mask = self._subsamples_masks.all( - *self._subsamples_map[subs] - ) + for subs, subsam_mask in self._categories.get_masks(): mask_withsub = mask_on_events & subsam_mask self.output["cutflow"][category][subs] = ak.sum(mask_withsub) if self._isMC: @@ -410,7 +382,7 @@ class BaseProcessorABC(processor.ProcessorABC, ABC): self.hists_managers = HistManager( self.cfg.variables, self._sample, - self._subsamples_names, + self._subsamples[self._sample].keys(), self._categories, variations_config=self.cfg.variations_config[self._sample], custom_axes=self.custom_axes, @@ -434,21 +406,17 @@ class BaseProcessorABC(processor.ProcessorABC, ABC): throught the HistManager. ''' # Filling the autofill=True histogram automatically - subs_masks = {} - for subs in self._subsamples_names: - # Get the mask - subs_masks[subs] = self._subsamples_masks.all(*self._subsamples_map[subs]) # Calling hist manager with the subsample masks self.hists_managers.fill_histograms( self.events, self.weights_manager, self._categories, - subsamples_masks=subs_masks, + subsamples=self._subsamples[self._sample], shape_variation=variation, custom_fields=self.custom_histogram_fields, ) # Saving the output - for subs in self._subsamples_names: + for subs in self._subsamples[self._sample].keys(): # Saving the output for each subsample for var, H in self.hists_managers.get_histograms(subs).items(): self.output["variables"][var][subs] = H @@ -466,7 +434,7 @@ class BaseProcessorABC(processor.ProcessorABC, ABC): If Subsamples are defined a columnsmager is created for each of them. ''' self.column_managers = {} - for subs in self._subsamples_names: + for subs in self._subsamples[self._sample].keys(): self.column_managers[subs] = ColumnsManager( self.cfg.columns[subs], subs, self._categories ) @@ -483,14 +451,12 @@ class BaseProcessorABC(processor.ProcessorABC, ABC): # TODO Fill column accumulator for different variations if self._hasSubsamples: # call the filling for each - for subs in self._subsamples_names: - # Get the mask - subsam_mask = self._subsamples_masks.all(*self._subsamples_map[subs]) + for subs in self._subsamples[self._sample].keys(): # Calling hist manager with a subsample mask self.output["columns"][subs] = self.column_managers[subs].fill_columns( self.events, self._categories, - subsample_mask=subsam_mask, + subsample_mask=self._subsamples[self._sample].get_mask(subs), ) else: self.output["columns"][self._sample] = self.column_managers[ @@ -658,7 +624,7 @@ class BaseProcessorABC(processor.ProcessorABC, ABC): # This is computed before any preselection self.output['sum_genweights'][self._sample] = ak.sum(self.events.genWeight) if self._hasSubsamples: - for subs in self._subsamples_names: + for subs in self._subsamples[self._sample].keys(): self.output['sum_genweights'][subs] = self.output['sum_genweights'][ self._sample ] diff --git a/pocket_coffea/workflows/tthbb_base_processor.py b/pocket_coffea/workflows/tthbb_base_processor.py index 8602188b0f35ac23d25e1cea381f9c3911a66688..5c03538076a77fb5f7d573cf702364c1c92aafc1 100644 --- a/pocket_coffea/workflows/tthbb_base_processor.py +++ b/pocket_coffea/workflows/tthbb_base_processor.py @@ -66,6 +66,10 @@ class ttHbbBaseProcessor(BaseProcessorABC): self.events.ElectronGood, self.events.MuonGood ) + self.events["FatJetGood"], self.jetGoodMask = jet_selection( + self.events, "FatJet", self.cfg.finalstate + ) + def count_objects(self, variation): self.events["nMuonGood"] = ak.num(self.events.MuonGood) self.events["nElectronGood"] = ak.num(self.events.ElectronGood)