diff --git a/environment.yml b/environment.yml index bf4e19d17af1ee3ddf07ca85b5028e3398bd3e2b..233f0da0bccf565daa96202b6e3df02b510da249 100644 --- a/environment.yml +++ b/environment.yml @@ -4,6 +4,7 @@ channels: - pytorch dependencies: - python>=3.6 + - setuptools<71 - ipython - coffea<2023 - correctionlib<=2.5.0 diff --git a/higgs_dna/samples/fetch.py b/higgs_dna/samples/fetch.py index 267974dd49227e2d508090d717ff39f485c7c77a..b55399d74d015747c805a2988fc63621f0375384 100644 --- a/higgs_dna/samples/fetch.py +++ b/higgs_dna/samples/fetch.py @@ -98,6 +98,8 @@ if __name__ == "__main__": for i, line in enumerate(fp.readlines()): if line.strip().startswith("#"): continue + if line.strip() == "": + continue fset.append(tuple(line.strip().split())) if len(fset[-1]) != 2: raise Exception( diff --git a/higgs_dna/selections/photon_selections.py b/higgs_dna/selections/photon_selections.py index be148e6ecb9048aa1778fc4ad4a773d9369c2cc6..7081b82bb224529107df5bf21166f7810b979ddc 100644 --- a/higgs_dna/selections/photon_selections.py +++ b/higgs_dna/selections/photon_selections.py @@ -73,7 +73,7 @@ def photon_preselection( photons.pfPhoIso03 - (rho * self.EA1_EE1) - (rho * rho * self.EA2_EE1) - < self.max_pho_iso_EB_low_r9 + < self.max_pho_iso_EE_low_r9 ) ) | ( @@ -82,7 +82,7 @@ def photon_preselection( photons.pfPhoIso03 - (rho * self.EA1_EE2) - (rho * rho * self.EA2_EE2) - < self.max_pho_iso_EB_low_r9 + < self.max_pho_iso_EE_low_r9 ) ) | ( @@ -91,7 +91,7 @@ def photon_preselection( photons.pfPhoIso03 - (rho * self.EA1_EE3) - (rho * rho * self.EA2_EE3) - < self.max_pho_iso_EB_low_r9 + < self.max_pho_iso_EE_low_r9 ) ) | ( @@ -100,7 +100,7 @@ def photon_preselection( photons.pfPhoIso03 - (rho * self.EA1_EE4) - (rho * rho * self.EA2_EE4) - < self.max_pho_iso_EB_low_r9 + < self.max_pho_iso_EE_low_r9 ) ) | ( @@ -109,7 +109,7 @@ def photon_preselection( photons.pfPhoIso03 - (rho * self.EA1_EE5) - (rho * rho * self.EA2_EE5) - < self.max_pho_iso_EB_low_r9 + < self.max_pho_iso_EE_low_r9 ) ) ) diff --git a/higgs_dna/systematics/__init__.py b/higgs_dna/systematics/__init__.py index 637e0d2514cdd16943c455dec2c74daa34a87cff..b47ad5dc30a818256420154cd9c73768926958b5 100644 --- a/higgs_dna/systematics/__init__.py +++ b/higgs_dna/systematics/__init__.py @@ -227,7 +227,9 @@ weight_systematics = { "PreselSF": partial(PreselSF, is_correction=False), "TriggerSF": partial(TriggerSF, is_correction=False), "cTagSF": partial(cTagSF, is_correction=False), - "bTagShapeSF": partial(bTagShapeSF, is_correction=False), + "deepJet_bTagShapeSF": partial(bTagShapeSF, ShapeSF_name="deepJet_shape", is_correction=False), + "PNet_bTagShapeSF": partial(bTagShapeSF, ShapeSF_name="particleNet_shape", is_correction=False, ), + "ParT_bTagShapeSF": partial(bTagShapeSF, ShapeSF_name="robustParticleTransformer_shape", is_correction=False), "AlphaS": partial(AlphaS), "PartonShower": partial(PartonShower), "LHEScale": None, @@ -245,7 +247,9 @@ weight_corrections = { "PreselSF": partial(PreselSF, is_correction=True), "TriggerSF": partial(TriggerSF, is_correction=True), "cTagSF": partial(cTagSF, is_correction=True), - "bTagShapeSF": partial(bTagShapeSF, is_correction=True), + "deepJet_bTagShapeSF": partial(bTagShapeSF, ShapeSF_name="deepJet_shape", is_correction=True), + "PNet_bTagShapeSF": partial(bTagShapeSF, ShapeSF_name="particleNet_shape", is_correction=True), + "ParT_bTagShapeSF": partial(bTagShapeSF, ShapeSF_name="robustParticleTransformer_shape", is_correction=True), "NNLOPS": partial(NNLOPS, is_correction=True), "Zpt": partial(Zpt, is_correction=True), } diff --git a/higgs_dna/systematics/event_weight_systematics.py b/higgs_dna/systematics/event_weight_systematics.py index 67546aa0b1324e981e1a8ffbfba4f987aa48df0d..15a309fec66a34b3b52d07465254a37fbf0894aa 100644 --- a/higgs_dna/systematics/event_weight_systematics.py +++ b/higgs_dna/systematics/event_weight_systematics.py @@ -707,11 +707,22 @@ def PartonShower(photons, events, weights, dataset_name, **kwargs): return weights -def bTagShapeSF(events, weights, is_correction=True, year="2017", **kwargs): +def bTagShapeSF(events, weights, ShapeSF_name, is_correction=True, year="2017", **kwargs): avail_years = ["2016preVFP", "2016postVFP", "2017", "2018", "2022preEE", "2022postEE", "2023preBPix", "2023postBPix"] if year not in avail_years: print(f"\n WARNING: only scale corrections for the year strings {avail_years} are already implemented! \n Exiting. \n") exit() + + if (ShapeSF_name in ["particleNet_shape", "robustParticleTransformer_shape"]) and (year in ["2016preVFP", "2016postVFP", "2017", "2018"]): + print(f"\n WARNING: The ShapeSF {ShapeSF_name} is not available for the year {year}. \n Exiting. \n") + exit() + + ShapeSF_name_to_discriminant = { + "deepJet_shape": "btagDeepFlavB", + "particleNet_shape": "btagPNetB", + "robustParticleTransformer_shape": "btagRobustParTAK4B" + } + btag_systematics = [ "lf", "hf", @@ -729,56 +740,56 @@ def bTagShapeSF(events, weights, is_correction=True, year="2017", **kwargs): "file": os.path.join( inputFilePath , "2016preVFP_UL/btagging.json.gz" ), - "method": "deepJet_shape", + "method": ShapeSF_name, "systs": btag_systematics, }, "2016postVFP": { "file": os.path.join( inputFilePath , "2016postVFP_UL/btagging.json.gz" ), - "method": "deepJet_shape", + "method": ShapeSF_name, "systs": btag_systematics, }, "2017": { "file": os.path.join( inputFilePath , "2017_UL/btagging.json.gz" ), - "method": "deepJet_shape", + "method": ShapeSF_name, "systs": btag_systematics, }, "2018": { "file": os.path.join( inputFilePath , "2018_UL/btagging.json.gz" ), - "method": "deepJet_shape", + "method": ShapeSF_name, "systs": btag_systematics, }, "2022preEE":{ "file": os.path.join( inputFilePath , "2022_Summer22/btagging.json.gz" ), - "method": "deepJet_shape", + "method": ShapeSF_name, "systs": btag_systematics, }, "2022postEE":{ "file": os.path.join( inputFilePath , "2022_Summer22EE/btagging.json.gz" ), - "method": "deepJet_shape", + "method": ShapeSF_name, "systs": btag_systematics, }, "2023preBPix":{ "file": os.path.join( inputFilePath , "2023_Summer23/btagging.json.gz" ), - "method": "deepJet_shape", + "method": ShapeSF_name, "systs": btag_systematics, }, "2023postBPix":{ "file": os.path.join( inputFilePath , "2023_Summer23BPix/btagging.json.gz" ), - "method": "deepJet_shape", + "method": ShapeSF_name, "systs": btag_systematics, }, } @@ -791,20 +802,25 @@ def bTagShapeSF(events, weights, is_correction=True, year="2017", **kwargs): dummy_sf = ak.ones_like(events["event"]) + # Removing jets with eta beyond 2.5 and has negative discriminant score. (No bining exist in input JSON file for such jets) relevant_jets = events["sel_jets"][ - np.abs(events["sel_jets"].eta) < 2.5 + (np.abs(events["sel_jets"].eta) < 2.5) + & (events["sel_jets"][ShapeSF_name_to_discriminant[ShapeSF_name]] >= 0) ] + # only calculate correction to nominal weight # we will evaluate the scale factors relative to all jets to be multiplied jet_pt = relevant_jets.pt jet_eta = np.abs(relevant_jets.eta) jet_hFlav = relevant_jets.hFlav - jet_btagDeepFlavB = relevant_jets.btagDeepFlav_B + jet_discriminant = relevant_jets[ + ShapeSF_name_to_discriminant[ShapeSF_name] + ] # Convert the jets in one dimension array and store the orignal structure of the ak array in counts flat_jet_pt = ak.flatten(jet_pt) flat_jet_eta = ak.flatten(jet_eta) - flat_jet_btagDeepFlavB = ak.flatten(jet_btagDeepFlavB) + flat_jet_discriminant = ak.flatten(jet_discriminant) flat_jet_hFlav = ak.flatten(jet_hFlav) counts = ak.num(jet_hFlav) @@ -821,12 +837,12 @@ def bTagShapeSF(events, weights, is_correction=True, year="2017", **kwargs): flat_jet_hFlav, flat_jet_eta, flat_jet_pt, - flat_jet_btagDeepFlavB, + flat_jet_discriminant, ), counts ) # Multiply the scale factore of all jets in a even - sf = ak.prod(_sf,axis=1) + sf = ak.prod(_sf, axis=1) sfs_up = [None for _ in btag_systematics] sfs_down = [None for _ in btag_systematics] @@ -842,7 +858,7 @@ def bTagShapeSF(events, weights, is_correction=True, year="2017", **kwargs): flat_jet_hFlav, flat_jet_eta, flat_jet_pt, - flat_jet_btagDeepFlavB, + flat_jet_discriminant, ) # Multiply the scale factore of all jets in a even @@ -855,9 +871,9 @@ def bTagShapeSF(events, weights, is_correction=True, year="2017", **kwargs): variations = {} # Define a condiation based the jet flavour because the json file are defined for the 4(c),5(b),0(lf) flavour jets - flavour_condition = np.logical_or(jet_hFlav < 4,jet_hFlav > 5) + flavour_condition = np.logical_or(jet_hFlav < 4, jet_hFlav > 5) # Replace the flavour to 0 (lf) if the jet flavour is neither 4 nor 5 - jet_hFlav_JSONrestricted = ak.where(flavour_condition, 0 ,jet_hFlav) + jet_hFlav_JSONrestricted = ak.where(flavour_condition, 0, jet_hFlav) flat_jet_hFlav_JSONrestricted = ak.flatten(jet_hFlav_JSONrestricted) # We need a dmmy sf array set to one to multiply for flavour dependent systentic variation flat_dummy_sf = ak.ones_like(flat_jet_hFlav_JSONrestricted) @@ -879,7 +895,7 @@ def bTagShapeSF(events, weights, is_correction=True, year="2017", **kwargs): flat_jet_hFlavC_JSONrestricted, flat_jet_eta, flat_jet_pt, - flat_jet_btagDeepFlavB, + flat_jet_discriminant, ) _Csfdown = evaluator.evaluate( @@ -887,7 +903,7 @@ def bTagShapeSF(events, weights, is_correction=True, year="2017", **kwargs): flat_jet_hFlavC_JSONrestricted, flat_jet_eta, flat_jet_pt, - flat_jet_btagDeepFlavB, + flat_jet_discriminant, ) _Csfup = ak.where( cjet_masks, @@ -905,20 +921,20 @@ def bTagShapeSF(events, weights, is_correction=True, year="2017", **kwargs): _sf_central, flat_dummy_sf, ) - _sfup = ak.unflatten(np.multiply(_sfcentral_Masked_notC,_Csfup),counts) - _sfdown = ak.unflatten(np.multiply(_sfcentral_Masked_notC,_Csfdown),counts) + _sfup = ak.unflatten(np.multiply(_sfcentral_Masked_notC, _Csfup), counts) + _sfdown = ak.unflatten(np.multiply(_sfcentral_Masked_notC, _Csfdown), counts) else: # We to remember which jet is correspond to c(hadron flv 4) jets cjet_masks = flat_jet_hFlav_JSONrestricted == 4 - flat_jet_hFlavNonC_JSONrestricted = ak.where(cjet_masks, 0 ,flat_jet_hFlav_JSONrestricted) + flat_jet_hFlavNonC_JSONrestricted = ak.where(cjet_masks, 0, flat_jet_hFlav_JSONrestricted) _NonCsfup = evaluator.evaluate( "up_" + syst_name, flat_jet_hFlavNonC_JSONrestricted, flat_jet_eta, flat_jet_pt, - flat_jet_btagDeepFlavB, + flat_jet_discriminant, ) _NonCsfdown = evaluator.evaluate( @@ -926,7 +942,7 @@ def bTagShapeSF(events, weights, is_correction=True, year="2017", **kwargs): flat_jet_hFlavNonC_JSONrestricted, flat_jet_eta, flat_jet_pt, - flat_jet_btagDeepFlavB, + flat_jet_discriminant, ) _NonCsfup = ak.where( @@ -945,11 +961,11 @@ def bTagShapeSF(events, weights, is_correction=True, year="2017", **kwargs): _sf_central, flat_dummy_sf, ) - _sfup = ak.unflatten(np.multiply(_sfcentral_Masked_C,_NonCsfup),counts) - _sfdown = ak.unflatten(np.multiply(_sfcentral_Masked_C,_NonCsfdown),counts) + _sfup = ak.unflatten(np.multiply(_sfcentral_Masked_C, _NonCsfup), counts) + _sfdown = ak.unflatten(np.multiply(_sfcentral_Masked_C, _NonCsfdown), counts) - sf_up = ak.prod(_sfup,axis=1) - sf_down = ak.prod(_sfdown,axis=1) + sf_up = ak.prod(_sfup, axis=1) + sf_down = ak.prod(_sfdown, axis=1) variations[syst_name]["up"] = sf_up variations[syst_name]["down"] = sf_down # coffea weights.add_multivariation() wants a list of arrays for the multiple up and down variations diff --git a/higgs_dna/systematics/jet_systematics_json.py b/higgs_dna/systematics/jet_systematics_json.py index 829a85a76c77f4f977324623fc98cb1f3a7ee4ed..7382dc548579811a5d90dfd52a335e5028360b47 100644 --- a/higgs_dna/systematics/jet_systematics_json.py +++ b/higgs_dna/systematics/jet_systematics_json.py @@ -167,6 +167,14 @@ def jerc_jet( os.path.dirname(__file__), "../systematics/JSONs/POG/JME/2022_Summer22EE/jet_jerc.json.gz", ), + "2023preBPix": os.path.join( + os.path.dirname(__file__), + "../systematics/JSONs/POG/JME/2023_Summer23/jet_jerc.json.gz", + ), + "2023postBPix": os.path.join( + os.path.dirname(__file__), + "../systematics/JSONs/POG/JME/2023_Summer23BPix/jet_jerc.json.gz", + ), } jec_version = { "2016preVFP": { @@ -209,6 +217,14 @@ def jerc_jet( "RunG": "Summer22EE_22Sep2023_RunG_V2_DATA", "MC": "Summer22EE_22Sep2023_V2_MC", }, + "2023preBPix": { + "RunC": "Summer23Prompt23_RunCv123_V1_DATA", + "MC": "Summer23Prompt23_V1_MC", + }, + "2023postBPix": { + "RunD": "Summer23BPixPrompt23_RunD_V1_DATA", + "MC": "Summer23BPixPrompt23_V1_MC", + }, } jec = jec_version[year][era] tag_jec = "_".join([jec, level, algo]) @@ -253,6 +269,7 @@ def jerc_jet( eval_dict = { "JetPt": jets.pt_raw, "JetEta": jets.eta, + "JetPhi": jets.phi, "Rho": jets.rho_value, "JetA": jets.area, } @@ -269,7 +286,6 @@ def jerc_jet( f"[ jerc_jet ] No JEC correction: {tag_jec} - Year: {year} - Era: {era} - Level: {level}" ) exit(-1) - inputs = [eval_dict[input.name] for input in sf.inputs] sf_value = sf.evaluate(*inputs) jets["pt_jec"] = sf_value * jets["pt_raw"] @@ -288,6 +304,8 @@ def jerc_jet( "2018": "Summer19UL18_JRV2_MC", "2022preEE": "Summer22_22Sep2023_JRV1_MC", "2022postEE": "Summer22EE_22Sep2023_JRV1_MC", + "2023preBPix": "Summer23Prompt23_RunCv1234_JRV1_MC", + "2023postBPix": "Summer23BPixPrompt23_RunD_JRV1_MC", } jer = jer_version[year] jer_ptres_tag = f"{jer}_PtResolution_{algo}" @@ -490,6 +508,66 @@ def jerc_jet( "jec_syst_TimePtEta": "TimePtEta", "jec_syst_Total": "Total", }, + "2023preBPix": { + "jec_syst_AbsoluteMPFBias": "AbsoluteMPFBias", + "jec_syst_AbsoluteScale": "AbsoluteScale", + "jec_syst_AbsoluteStat": "AbsoluteStat", + "jec_syst_FlavorQCD": "FlavorQCD", + "jec_syst_Fragmentation": "Fragmentation", + "jec_syst_PileUpDataMC": "PileUpDataMC", + "jec_syst_PileUpPtBB": "PileUpPtBB", + "jec_syst_PileUpPtEC1": "PileUpPtEC1", + "jec_syst_PileUpPtEC2": "PileUpPtEC2", + "jec_syst_PileUpPtHF": "PileUpPtHF", + "jec_syst_PileUpPtRef": "PileUpPtRef", + "jec_syst_RelativeFSR": "RelativeFSR", + "jec_syst_RelativeJEREC1": "RelativeJEREC1", + "jec_syst_RelativeJEREC2": "RelativeJEREC2", + "jec_syst_RelativeJERHF": "RelativeJERHF", + "jec_syst_RelativePtBB": "RelativePtBB", + "jec_syst_RelativePtEC1": "RelativePtEC1", + "jec_syst_RelativePtEC2": "RelativePtEC2", + "jec_syst_RelativePtHF": "RelativePtHF", + "jec_syst_RelativeBal": "RelativeBal", + "jec_syst_RelativeSample": "RelativeSample", + "jec_syst_RelativeStatEC": "RelativeStatEC", + "jec_syst_RelativeStatFSR": "RelativeStatFSR", + "jec_syst_RelativeStatHF": "RelativeStatHF", + "jec_syst_SinglePionECAL": "SinglePionECAL", + "jec_syst_SinglePionHCAL": "SinglePionHCAL", + "jec_syst_TimePtEta": "TimePtEta", + "jec_syst_Total": "Total", + }, + "2023postBPix": { + "jec_syst_AbsoluteMPFBias": "AbsoluteMPFBias", + "jec_syst_AbsoluteScale": "AbsoluteScale", + "jec_syst_AbsoluteStat": "AbsoluteStat", + "jec_syst_FlavorQCD": "FlavorQCD", + "jec_syst_Fragmentation": "Fragmentation", + "jec_syst_PileUpDataMC": "PileUpDataMC", + "jec_syst_PileUpPtBB": "PileUpPtBB", + "jec_syst_PileUpPtEC1": "PileUpPtEC1", + "jec_syst_PileUpPtEC2": "PileUpPtEC2", + "jec_syst_PileUpPtHF": "PileUpPtHF", + "jec_syst_PileUpPtRef": "PileUpPtRef", + "jec_syst_RelativeFSR": "RelativeFSR", + "jec_syst_RelativeJEREC1": "RelativeJEREC1", + "jec_syst_RelativeJEREC2": "RelativeJEREC2", + "jec_syst_RelativeJERHF": "RelativeJERHF", + "jec_syst_RelativePtBB": "RelativePtBB", + "jec_syst_RelativePtEC1": "RelativePtEC1", + "jec_syst_RelativePtEC2": "RelativePtEC2", + "jec_syst_RelativePtHF": "RelativePtHF", + "jec_syst_RelativeBal": "RelativeBal", + "jec_syst_RelativeSample": "RelativeSample", + "jec_syst_RelativeStatEC": "RelativeStatEC", + "jec_syst_RelativeStatFSR": "RelativeStatFSR", + "jec_syst_RelativeStatHF": "RelativeStatHF", + "jec_syst_SinglePionECAL": "SinglePionECAL", + "jec_syst_SinglePionHCAL": "SinglePionHCAL", + "jec_syst_TimePtEta": "TimePtEta", + "jec_syst_Total": "Total", + }, } for i in jec_syst_regrouped[year]: # get the total uncertainty diff --git a/higgs_dna/tools/decorrelator.py b/higgs_dna/tools/decorrelator.py index 497e5436b49fb6d358f151212b8a3820ef1949dd..5b5be19e5a98a27b0ea7fbda1597e6d4d8f3450c 100644 --- a/higgs_dna/tools/decorrelator.py +++ b/higgs_dna/tools/decorrelator.py @@ -90,7 +90,7 @@ class decorrelator(object): def findGb(self): self.df['{}_bin'.format(self.dvar)] = pd.cut(self.df[self.dvar].values, bins=self.bins, labels=[str(x) for x in self.xc]) - self.gb = self.df.groupby('{}_bin'.format(self.dvar)) + self.gb = self.df.groupby('{}_bin'.format(self.dvar), observed=False) def doDecorr(self, ref): diff --git a/higgs_dna/workflows/top.py b/higgs_dna/workflows/top.py index 02d1d3e5f00532a664d9864345743be972b368f7..0a8c9641243afd7ebe9312461676f819ac1d9e03 100644 --- a/higgs_dna/workflows/top.py +++ b/higgs_dna/workflows/top.py @@ -21,6 +21,7 @@ import functools import warnings from typing import Any, Dict, List, Optional import awkward as ak +import numpy as np import vector from coffea.analysis_tools import Weights from copy import deepcopy @@ -144,6 +145,13 @@ class TopProcessor(HggBaseProcessor): # type: ignore # we need ScEta for corrections and systematics, it is present in NanoAODv13+ and can be calculated using PV for older versions events.Photon = add_photon_SC_eta(events.Photon, events.PV) + # Need to add ScEta variables to electrons for scale and smearing corrections + electrons = events.Electron + electrons["ScEta"] = electrons.eta + electrons.deltaEtaSC + electrons["isScEtaEB"] = np.abs(electrons.ScEta) < 1.4442 + electrons["isScEtaEE"] = np.abs(electrons.ScEta) > 1.566 + events.Electron = electrons + # add veto EE leak branch for photons, could also be used for electrons if self.year[dataset_name][0] == "2022postEE": events.Photon = veto_EEleak_flag(self, events.Photon) @@ -176,6 +184,24 @@ class TopProcessor(HggBaseProcessor): # type: ignore original_photons = events.Photon # NOTE: jet jerc systematics are added in the correction functions and handled later original_jets = events.Jet + original_electrons = events.Electron + + # Computing the normalizing flow correction + if self.data_kind == "mc" and self.doFlow_corrections: + + logger.info("Calculating normalizing flow corrections to photon MVA ID inputs") + counts = ak.num(original_photons) + corrected_inputs,var_list = calculate_flow_corrections(original_photons, events, self.meta["flashggPhotons"]["flow_inputs"], self.meta["flashggPhotons"]["Isolation_transform_order"], year=self.year[dataset_name][0]) + + # Store the raw nanoAOD value and update photon ID MVA value for preselection + original_photons["mvaID_nano"] = original_photons["mvaID"] + + # Store the raw values of the inputs and update the input values with the corrections since some variables used in the preselection + for i in range(len(var_list)): + original_photons["raw_" + str(var_list[i])] = original_photons[str(var_list[i])] + original_photons[str(var_list[i])] = ak.unflatten(corrected_inputs[:,i] , counts) + + original_photons["mvaID"] = ak.unflatten(self.add_photonid_mva_run3(original_photons, events), counts) # systematic object variations for systematic_name in systematic_names: @@ -186,7 +212,6 @@ class TopProcessor(HggBaseProcessor): # type: ignore f"Adding systematic {systematic_name} to photons collection of dataset {dataset_name}" ) original_photons.add_systematic( - # passing the arguments here explicitly since I want to pass the events to the varying function. If there is a more elegant / flexible way, just change it! name=systematic_name, kind=systematic_dct["args"]["kind"], what=systematic_dct["args"]["what"], @@ -195,9 +220,22 @@ class TopProcessor(HggBaseProcessor): # type: ignore events=events, year=self.year[dataset_name][0], ) - # name=systematic_name, **systematic_dct["args"] ) - # to be implemented for other objects than photons or jets here + elif systematic_dct["object"] == "Electron": + logger.info( + f"Adding systematic {systematic_name} to electrons collection of dataset {dataset_name}" + ) + original_electrons.add_systematic( + name=systematic_name, + kind=systematic_dct["args"]["kind"], + what=systematic_dct["args"]["what"], + varying_function=functools.partial( + systematic_dct["args"]["varying_function"], + events=events, + year=self.year[dataset_name][0], + ) + ) + # to be implemented for other objects here elif systematic_name in available_weight_systematics: # event weight systematics will be applied after photon preselection / application of further taggers continue @@ -218,20 +256,25 @@ class TopProcessor(HggBaseProcessor): # type: ignore photons_dct[f"{systematic}_{variation}"] = deepcopy( original_photons.systematics[systematic][variation] ) + electrons_dct = {} + electrons_dct["nominal"] = original_electrons + logger.debug(original_electrons.systematics.fields) + for systematic in original_electrons.systematics.fields: + for variation in original_electrons.systematics[systematic].fields: + # no deepcopy here unless we find a case where it's actually needed + electrons_dct[f"{systematic}_{variation}"] = original_electrons.systematics[systematic][variation] # NOTE: jet jerc systematics are added in the corrections, now extract those variations and create the dictionary jerc_syst_list, jets_dct = get_obj_syst_dict(original_jets, ["pt", "mass"]) - # print("\n", jets_dct, jets_dct.keys(), "\n") - # object systematics dictionary logger.debug(f"[ jerc systematics ] {jerc_syst_list}") # Build the flattened array of all possible variations variations_combined = [] variations_combined.append(original_photons.systematics.fields) + variations_combined.append(original_electrons.systematics.fields) # NOTE: jet jerc systematics are not added with add_systematics variations_combined.append(jerc_syst_list) - # Flatten - variations_flattened = sum(variations_combined, []) # Begin with empty list and keep concatenating + variations_flattened = sum(variations_combined, []) # Attach _down and _up variations = [item + suffix for item in variations_flattened for suffix in ['_down', '_up']] # Add nominal to the list @@ -241,12 +284,15 @@ class TopProcessor(HggBaseProcessor): # type: ignore for variation in variations: logger.info(f"Processing {variation} samples.\n") - photons, jets = photons_dct["nominal"], events.Jet + photons, electrons, jets = photons_dct["nominal"], electrons_dct["nominal"], events.Jet if variation == "nominal": pass # Do nothing since we already get the unvaried, but nominally corrected objets above elif variation in [*photons_dct]: # [*dict] gets the keys of the dict since Python >= 3.5 photons = photons_dct[variation] logger.info(f"Replacing nominal photons with variation {variation}.\n") + elif variation in [*electrons_dct]: + electrons = electrons_dct[variation] + logger.info(f"Replacing nominal electrons with variation {variation}.\n") elif variation in [*jets_dct]: jets = jets_dct[variation] logger.info(f"Replacing nominal jets with variation {variation}.\n") @@ -258,25 +304,6 @@ class TopProcessor(HggBaseProcessor): # type: ignore if self.photonid_mva_EB and self.photonid_mva_EE: photons = self.add_photonid_mva(photons, events) - # Computing the normalizing flow correction - # align this with base workflow! Move flow corrections outside of loop! - if self.data_kind == "mc" and self.doFlow_corrections: - - # Applying the Flow corrections to all photons before pre-selection - counts = ak.num(photons) - corrected_inputs,var_list = calculate_flow_corrections(photons, events, self.meta["flashggPhotons"]["flow_inputs"], self.meta["flashggPhotons"]["Isolation_transform_order"], year=self.year[dataset_name][0]) - - # Store the raw nanoAOD value and update photon ID MVA value for preselection - photons["mvaID_run3"] = ak.unflatten(self.add_photonid_mva_run3(photons, events), counts) - photons["mvaID_nano"] = photons["mvaID"] - - # Store the raw values of the inputs and update the input values with the corrections since some variables used in the preselection - for i in range(len(var_list)): - photons["raw_" + str(var_list[i])] = photons[str(var_list[i])] - photons[str(var_list[i])] = ak.unflatten(corrected_inputs[:,i] , counts) - - photons["mvaID"] = ak.unflatten(self.add_photonid_mva_run3(photons, events), counts) - # photon preselection photons = photon_preselection(self, photons, events, year=self.year[dataset_name][0]) # sort photons in each event descending in pt @@ -344,13 +371,13 @@ class TopProcessor(HggBaseProcessor): # type: ignore electrons = ak.zip( { - "pt": events.Electron.pt, - "eta": events.Electron.eta, - "phi": events.Electron.phi, - "mass": events.Electron.mass, - "charge": events.Electron.charge, - "mvaIso_WP90": events.Electron.mvaIso_WP90, - "mvaIso_WP80": events.Electron.mvaIso_WP80, + "pt": electrons.pt, + "eta": electrons.eta, + "phi": electrons.phi, + "mass": electrons.mass, + "charge": electrons.charge, + "mvaIso_WP90": electrons.mvaIso_WP90, + "mvaIso_WP80": electrons.mvaIso_WP80, } ) electrons = ak.with_name(electrons, "PtEtaPhiMCandidate") @@ -371,19 +398,20 @@ class TopProcessor(HggBaseProcessor): # type: ignore muons = ak.with_name(muons, "PtEtaPhiMCandidate") # lepton cleaning - sel_electrons = electrons[ - select_electrons(self, electrons, diphotons) - ] - sel_muons = muons[select_muons(self, muons, diphotons)] + electrons = electrons[select_electrons(self, electrons, diphotons)] + muons = muons[select_muons(self, muons, diphotons)] + jets = jets[select_jets(self, jets, diphotons, muons, electrons)] - # jet selection and pt ordering - jets = jets[ - select_jets(self, jets, diphotons, sel_muons, sel_electrons) - ] + # ordering in pt since corrections may have changed the order + electrons = electrons[ak.argsort(electrons.pt, ascending=False)] + muons = muons[ak.argsort(muons.pt, ascending=False)] jets = jets[ak.argsort(jets.pt, ascending=False)] - # adding selected jets to events to be used in ctagging SF calculation + # adding selected jets, electrons and muons of the specific variation to events to be used in SF calculations events["sel_jets"] = jets + events["sel_muons"] = muons + events["sel_electrons"] = electrons + n_jets = ak.num(jets) diphotons["JetHT"] = ak.sum(jets.pt,axis=1) @@ -398,14 +426,12 @@ class TopProcessor(HggBaseProcessor): # type: ignore diphotons["n_jets"] = n_jets # Adding a 'generation' field to electrons and muons - sel_electrons['generation'] = ak.ones_like(sel_electrons.pt) - sel_muons['generation'] = 2 * ak.ones_like(sel_muons.pt) + electrons['generation'] = ak.ones_like(electrons.pt) + muons['generation'] = 2 * ak.ones_like(muons.pt) # Combine electrons and muons into a single leptons collection - leptons = ak.concatenate([sel_electrons, sel_muons], axis=1) + leptons = ak.concatenate([electrons, muons], axis=1) leptons = ak.with_name(leptons, "PtEtaPhiMCandidate") - - # Sort leptons by pt in descending order leptons = leptons[ak.argsort(leptons.pt, ascending=False)] n_leptons = ak.num(leptons) @@ -471,9 +497,10 @@ class TopProcessor(HggBaseProcessor): # type: ignore ] event_weights = varying_function( events=events[selection_mask], - photons=events[f"diphotons_{do_variation}"][ - selection_mask - ], + photons=events[f"diphotons_{do_variation}"][selection_mask], + # adding muons and electrons because I don't want to introduce a naming obligation like e.g. "sel_muons" in the syst functions + muons=events["sel_muons"][selection_mask], + electrons=events["sel_electrons"][selection_mask], weights=event_weights, dataset_name=dataset_name, year=self.year[dataset_name][0], @@ -525,9 +552,10 @@ class TopProcessor(HggBaseProcessor): # type: ignore ] event_weights = varying_function( events=events[selection_mask], - photons=events[f"diphotons_{do_variation}"][ - selection_mask - ], + photons=events[f"diphotons_{do_variation}"][selection_mask], + # adding muons and electrons because I don't want to introduce a naming obligation like e.g. "sel_muons" in the syst functions + muons=events["sel_muons"][selection_mask], + electrons=events["sel_electrons"][selection_mask], weights=event_weights, dataset_name=dataset_name, year=self.year[dataset_name][0], diff --git a/scripts/pull_files.py b/scripts/pull_files.py index a2212138c073a8f348a55047ec0369b62fc2d410..dc2a4ddaa536fccff0ae5b6dcbbce7290d15be12 100644 --- a/scripts/pull_files.py +++ b/scripts/pull_files.py @@ -446,10 +446,6 @@ def get_btag_json(logger, target_dir): "from": "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG/BTV/2018_UL/btagging.json.gz", "to": f"{to_prefix}/2018_UL/btagging.json.gz", }, - "2018": { - "from": "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG/BTV/2018_UL/btagging.json.gz", - "to": f"{to_prefix}/2018_UL/btagging.json.gz", - }, "2022preEE": { "from": "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG/BTV/2022_Summer22/btagging.json.gz", "to": f"{to_prefix}/2022_Summer22/btagging.json.gz", @@ -784,6 +780,20 @@ def get_jetmet_json(logger, target_dir): "../higgs_dna/systematics/JSONs/POG/JME/2022_Summer22EE", ), }, + "2023_Summer23": { + "from": os.path.join(base_path, "2023_Summer23"), + "to": os.path.join( + to_prefix, + "../higgs_dna/systematics/JSONs/POG/JME/2023_Summer23", + ), + }, + "2023_Summer23BPix": { + "from": os.path.join(base_path, "2023_Summer23BPix"), + "to": os.path.join( + to_prefix, + "../higgs_dna/systematics/JSONs/POG/JME/2023_Summer23BPix", + ), + }, } fetch_file("JetMET", logger, from_to_dict, type="copy") diff --git a/setup.cfg b/setup.cfg index d729118f7e0dbd1e8589cb08eb6514520a932675..9b951ab2040f33174c453843dcb6bfe88dd2ac7c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,6 +35,7 @@ project_urls = [options] packages = find: install_requires = + setuptools<71 coffea<2023 vector<=1.4.2 xgboost<=1.5.1