Skip to content
Snippets Groups Projects

Merging full workflow into Tanay's HiggsDNA

Open Sergi Castells requested to merge castells/higgs-dna-4-gamma-tanays-copy:master into master
Compare and Show latest version
10 files
+ 1870
52
Compare changes
  • Side-by-side
  • Inline
Files
10
+ 578
213
# flake8: noqa
# Suppresses flake8 on this file to pass pipeline
from higgs_dna.tools.chained_quantile import ChainedQuantileRegression
from higgs_dna.tools.diphoton_mva import calculate_diphoton_mva
from higgs_dna.tools.xgb_loader import load_bdt
@@ -8,7 +11,7 @@ from higgs_dna.tools.EELeak_region import veto_EEleak_flag
from higgs_dna.tools.EcalBadCalibCrystal_events import remove_EcalBadCalibCrystal_events
from higgs_dna.tools.gen_helpers import get_fiducial_flag, get_genJets, get_higgs_gen_attributes
from higgs_dna.tools.sigma_m_tools import compute_sigma_m
from higgs_dna.selections.photon_selections import photon_preselection
from higgs_dna.selections.photon_selections import photon_preselection_h4g
from higgs_dna.selections.diphoton_selections import apply_fiducial_cut_det_level
from higgs_dna.selections.lepton_selections import select_electrons, select_muons
from higgs_dna.selections.jet_selections import select_jets, jetvetomap
@@ -17,6 +20,7 @@ from higgs_dna.utils.dumping_utils import (
diphoton_ak_array,
dump_ak_array,
diphoton_list_to_pandas,
ps_list_to_pandas,
dump_pandas,
get_obj_syst_dict,
)
@@ -40,6 +44,7 @@ import warnings
from typing import Any, Dict, List, Optional
import awkward
import numpy
import pandas
import sys
import vector
from coffea import processor
@@ -89,18 +94,77 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
self.doFlow_corrections = doFlow_corrections
self.output_format = output_format
# Need these two lines to grab the correct trigger from metaconditions:
# HLT_Diphoton30_18_R9IdL_AND_HE_AND_IsoCaloId
self.trigger_group = ".*DoubleEG.*" # For Run 3. Equivalent for Run 2: ".*EGamma.*2018.*"
#self.trigger_group = ".*EGamma.*2018.*"
self.analysis = "lowMassAnalysis"
# diphoton preselection cuts
#self.min_pt_photon = 25.0 --orig
self.min_pt_photon = 15.0
self.min_pt_lead_photon = 30.0
#self.min_mvaid = -0.9
self.min_mvaid = 0.2
self.max_sc_eta = 2.5
### Preselections ###
# eta
self.gap_barrel_eta = 1.4442
self.gap_endcap_eta = 1.566
#############################
self.max_eta = 2.5
# Electron veto
#self.e_veto = 0.5
self.e_veto = False
## HLT-mimicking selections
# pT
self.min_lead_pho_pt = 30.0
self.min_sublead_pho_pt = 18.0
# H/E
self.hoe_eb = 0.08
self.hoe_ee = 0.08
#self.hoe_eb = 0.12 # Trying exactly as HLT
#self.hoe_ee = 0.10 # Trying exactly as HLT
# R9
self.hlt_r9_eb = 0.5
self.hlt_r9_ee = 0.8
# Sigma_ieie
self.sigma_ieie_eb = 0.015
self.sigma_ieie_ee = 0.035
# pfPhoIso + Rho corrections
self.pfPhoIso_eb = 4.0
self.pfPhoIso_ee = 4.0
#self.pfPhoIso_eb = 6.0 # Trying exactly as HLT
#self.pfPhoIso_ee = 6.0 # Trying exactly as HLT
# TrackerIso
self.trackerIso_eb = 6.0
self.trackerIso_ee = 6.0
# Multiple option isolation cuts
self.iso_min_full5x5_r9 = 0.8
self.iso_max_chad = 20.0
self.iso_max_chad_rel = 0.3
self.iso_chad_rel_pt = 14.0
self.iso_chad_rel_hoe = 0.15
### Additional Preselections ###
self.diphoton_min_pt_lead = 30.0
self.diphoton_min_pt_sublead = 18.0
self.diphoton_min_lead_pt_mgg = 30.55 / 65 # ~0.47
self.diphoton_min_sublead_pt_mgg = 18.20 / 65 # ~0.28
# H4g-specific selections
self.lead_pt = 30.0
self.sublead_pt = 18.0
self.min_pt = 15.0
self.eta_gap_low = 1.4442
self.eta_gap_high = 1.566
self.eta_max = 2.5
self.mass_range_low = 110.0
self.mass_range_high = 180.0
# Removing these built-in values to be consistent with the naming that I already have.
# This can be updated if things break or if I'm wrong that the naming choices don't matter
"""
self.max_hovere = 0.08
self.min_full5x5_r9 = 0.8
self.max_chad_iso = 20.0
@@ -122,6 +186,8 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
self.eta_rho_corr = 1.5
self.low_eta_rho_corr = 0.16544
self.high_eta_rho_corr = 0.13212
"""
# EA values for Run3 from Egamma
self.EA1_EB1 = 0.102056
self.EA2_EB1 = -0.000398112
@@ -137,10 +203,7 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
self.EA2_EE4 = -8.10369e-05
self.EA1_EE5 = 0.0369417
self.EA2_EE5 = -2.76885e-05
self.e_veto = 0.5
####################################################################################
#logger.debug("debug-at diphoton cuts, just to see if the logger.info works")
#logger.info("info-at diphoton cuts, just to see if the logger.info works")
logger.debug(f"Setting up processor with metaconditions: {self.meta}")
self.taggers = []
@@ -208,8 +271,17 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
warnings.warn(f"Could not instantiate diphoton MVA: {e}")
self.diphoton_mva = None
def process_extra(self, events: awkward.Array) -> awkward.Array:
raise NotImplementedError
def process_extra(self, meta: str, photons: awkward.Array, diphotons: awkward.Array, signal: bool, variation: str) -> awkward.Array:
# Run selections for pseudoscalars
pseudos, diphotons, cuts = self.produce_and_select_ps(
meta,
photons=photons,
diphotons=diphotons,
signal=signal,
variation=variation
)
return pseudos, diphotons, {}
def apply_filters_and_triggers(self, events: awkward.Array) -> awkward.Array:
# met filters
@@ -237,13 +309,21 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
def process(self, events: awkward.Array) -> Dict[Any, Any]:
dataset_name = events.metadata["dataset"]
if "2018" in dataset_name:
dataset_name = events.metadata["dataset"].replace("2018_","")
# Need Nevents for selection efficiency checks with signal samples
# Number of initial events
Nevents = {"initial_events": awkward.sum(events.genWeight) if "signal" in dataset_name.lower() else len(events)}
Nphotons = {"initial_events": numpy.unique(awkward.num(events.Photon, axis=1).to_numpy(), return_counts=True)}
Nphotons_EB = {"initial_events": numpy.unique(awkward.num(events.Photon[events.Photon.isScEtaEB], axis=1).to_numpy(), return_counts=True)}
Nphotons_EE = {"initial_events": numpy.unique(awkward.num(events.Photon[events.Photon.isScEtaEE], axis=1).to_numpy(), return_counts=True)}
# data or monte carlo?
self.data_kind = "mc" if hasattr(events, "GenPart") else "data"
# here we start recording possible coffea accumulators
# most likely histograms, could be counters, arrays, ...
logger.info("before lumi mask (original count): %i"%len(events)) #kt
histos_etc = {}
histos_etc[dataset_name] = {}
if self.data_kind == "mc":
@@ -268,18 +348,22 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
# lumi mask
if self.data_kind == "data":
try:
pass
#lumimask = select_lumis(self.year[dataset_name][0], events, logger)
#events = events[lumimask]
lumimask = select_lumis(self.year[dataset_name][0], events, logger)
events = events[lumimask]
except:
logger.info(
f"[ lumimask ] Skip now! Unable to find year info of {dataset_name}"
)
# No jets so we can remove this
"""
# apply jetvetomap: only retain events that without any jets in the EE leakage region
if not self.skipJetVetoMap:
events = jetvetomap(
events, logger, dataset_name, year=self.year[dataset_name][0]
)
"""
# metadata array to append to higgsdna output
metadata = {}
@@ -289,16 +373,24 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
else:
metadata["sum_genw_presel"] = "Data"
# Correct trigger has been applied so we can uncomment this AND the EcalBadCalibCrystal
# apply filters and triggers
#events = self.apply_filters_and_triggers(events)
events = self.apply_filters_and_triggers(events)
# Filters + triggers efficiency
Nevents.update({"filters_triggers": awkward.sum(events.genWeight) if "signal" in dataset_name.lower() else len(events)})
Nphotons.update({"filters_triggers": numpy.unique(awkward.num(events.Photon, axis=1).to_numpy(), return_counts=True)})
Nphotons_EB.update({"filters_triggers": numpy.unique(awkward.num(events.Photon[events.Photon.isScEtaEB], axis=1).to_numpy(), return_counts=True)})
Nphotons_EE.update({"filters_triggers": numpy.unique(awkward.num(events.Photon[events.Photon.isScEtaEE], axis=1).to_numpy(), return_counts=True)})
# remove events affected by EcalBadCalibCrystal
#if self.data_kind == "data":
#events = remove_EcalBadCalibCrystal_events(events)
if self.data_kind == "data":
events = remove_EcalBadCalibCrystal_events(events)
# we need ScEta for corrections and systematics, it is present in NanoAODv13+ and can be calculated using PV for older versions
events.Photon = add_photon_SC_eta(events.Photon, events.PV)
# Sometimes these can be saved to parquet as NaN entries. This can be dealt with in the to_pandas step.
# add veto EE leak branch for photons, could also be used for electrons
if (
self.year[dataset_name][0] == "2022EE"
@@ -355,7 +447,60 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
warnings.warn(f"Could not process correction {correction_name}.")
continue
original_photons = events.Photon
# Do event mixing
if "EvtMix" in dataset_name:
num_cut = awkward.num(events.Photon, axis=1) >= 4
events = events[num_cut]
events = events[~awkward.is_none(events.Photon)]
tmp_events = events
tmp_events2 = events
Nevents.update({"event-mixing-4photon-cut": len(events)})
tmp_photons = None
# Do offset first so each mixing scheme is done is one go
# Repeat this N times to artificially increase background statistics
# Can optimally do floor[(# of events - 1) / 3] cycles of mixing if we don't want to repeat photons
cycles = 10
for offset in range(cycles):
artificial_stats_photons = events.Photon
# Sort photons by pt
artificial_stats_photons = artificial_stats_photons[awkward.argsort(artificial_stats_photons.pt, ascending=False, axis=1)]
# Keep 4 with highest pt
artificial_stats_photons = artificial_stats_photons[:,:4]
# Shift photons within events and replace per field
for field in artificial_stats_photons.fields:
# Remove 1st/2nd/3rd event and append them to end of array (shifts up events.Photon array)
shift = numpy.array([1,2,3])
shift = (shift + offset).tolist()
artificial_stats_photons[field] = artificial_stats_photons[field] # no shift
artificial_stats_photons[:,1][field] = awkward.concatenate((artificial_stats_photons[shift[0]:], artificial_stats_photons[:shift[0]]))[:,1][field]
artificial_stats_photons[:,2][field] = awkward.concatenate((artificial_stats_photons[shift[1]:], artificial_stats_photons[:shift[1]]))[:,2][field]
artificial_stats_photons[:,3][field] = awkward.concatenate((artificial_stats_photons[shift[2]:], artificial_stats_photons[:shift[2]]))[:,3][field]
# Append increased statistics to original array
if offset == 0:
tmp_photons = artificial_stats_photons
tmp_events2.Photon = artificial_stats_photons
tmp_events = tmp_events2
elif offset >= 1:
tmp_photons = awkward.concatenate((tmp_photons, artificial_stats_photons), axis=0)
tmp_events2.Photon = artificial_stats_photons
tmp_events = awkward.concatenate((tmp_events, tmp_events2), axis=0)
# Add to tmp_events first so events is same size and tmp_photons. Otherwise, it is offset by N/cycles * (cycles + 1)
events = tmp_events
assert len(events) == len(events.Photon) == len(tmp_photons), f"events: {len(events)}\t events.Photon: {len(events.Photon)}\t tmp_photons: {len(tmp_photons)}\t tmp_photons num: {awkward.num(tmp_photons, axis=0)}\t Cycle: {offset}"
else:
tmp_photons = events.Photon
original_photons = tmp_photons
#logger.info("original photons length %i"%len(original_photons)) # kt
# NOTE: jet jerc systematics are added in the correction functions and handled later
original_jets = events.Jet
@@ -439,6 +584,7 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
for variation in variations:
photons, jets = photons_dct["nominal"], events.Jet
if variation == "nominal":
pass # Do nothing since we already get the unvaried, but nominally corrected objets above
elif variation in [*photons_dct]: # [*dict] gets the keys of the dict since Python >= 3.5
@@ -452,155 +598,71 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
# recompute photonid_mva on the fly
if self.photonid_mva_EB and self.photonid_mva_EE:
photons = self.add_photonid_mva(photons, events)
#logger.info("length of photons before any cut : %i"%len(events))
photons4 = photons[awkward.num(photons,axis=1)>3]
#logger.info("Testing 4")
#kt#
#logger.info("Event 1 : ")
#logger.info(photons4.pt[0])
#logger.info("Event 2 : ")
#logger.info(photons4.pt[1])
# photon preselection
photons = photon_preselection(self, photons, events, year=self.year[dataset_name][0])
#photons42 = photons[awkward.num(photons,axis=1)>3]
#logger.info("Testing 2")
#kt#
#logger.info("Event 1 : ")
#logger.info(photons42.pt[0])
#logger.info("Event 2 : ")
#kt#
#logger.info(photons.pt[0])
# sort photons in each event descending in pt
# make descending-pt combinations of photons
# Photon preselection
photons = photon_preselection_h4g(
self, photons, events, year=self.year[dataset_name][0]
)
# Create diphotons and do selections before h4g-specfic selections
photons = photons[awkward.argsort(photons.pt, ascending=False)]
photons["charge"] = awkward.zeros_like(
photons.pt
) # added this because charge is not a property of photons in nanoAOD v11. We just assume every photon has charge zero...
#kt#
fourphotonevents = events[awkward.num(photons,axis=1)>3]
#logger.info("four photon events length : %i"%len(fourphotonevents))
fourphotons = photons[awkward.num(photons,axis=1)>3]
######################################################################################################################################################################################
# need to put pt cuts earlier
#four_photon_events_length = len(fourphotons) # has atleast 4 photons
#for eve_num in range(four_photon_events_length):
# for pho_i in range(len(fourphotons[eve_num])):
# for pho_j in range(pho_i+1,len(fourphotons[eve_num])):
diphotons = awkward.combinations(
photons, 2, fields=["pho_lead", "pho_sublead"]
)
######################################################################################################################################################################################
fourphotons = fourphotons[:,:4] #selecting four highest pt photons
A = awkward.combinations(fourphotons, 4, fields=["p1", "p2", "p3", "p4"])
A.p1 = awkward.with_name(A.p1, "PtEtaPhiMCandidate")
A.p2 = awkward.with_name(A.p2, "PtEtaPhiMCandidate")
A.p3 = awkward.with_name(A.p3, "PtEtaPhiMCandidate")
A.p4 = awkward.with_name(A.p4, "PtEtaPhiMCandidate")
#logger.info("A length before pt cut:%i"%len(A))
A = A[A["p1"].pt >= 30.0]
A = A[A["p2"].pt >= 18.0]
A = A[A["p3"].pt >= 15.0]
A = A[A["p4"].pt >= 15.0]
A_mask = awkward.num(A)>0
#logger.info("A length after pt cut:%i"%len(A))
dm1 = abs((A["p1"] + A["p2"]).mass - (A["p3"] + A["p4"]).mass) #12,34 combination
dm2 = abs((A["p1"] + A["p3"]).mass - (A["p2"] + A["p4"]).mass) #13,24
dm3 = abs((A["p1"] + A["p4"]).mass - (A["p2"] + A["p3"]).mass) #14,23
dm_minimum = awkward.min(awkward.concatenate((dm1,dm2,dm3),axis=1),axis=1)
a11 = A.p1 + A.p2
a12 = A.p3 + A.p4
a21 = A.p1 + A.p3
a22 = A.p2 + A.p4
a31 = A.p1 + A.p3
a32 = A.p2 + A.p4
#logger.info("checkpoint4")
mask11 = a11.pt > a12.pt
mask12 = a11.pt <= a12.pt
mask21 = a21.pt > a22.pt
mask22 = a21.pt <= a22.pt
mask31 = a31.pt > a32.pt
mask32 = a31.pt <= a32.pt
#logger.info("checkpoint5")
lead_a1 = awkward.concatenate((a11[mask11],a12[mask12]),axis=1)
sublead_a1 = awkward.concatenate((a11[mask12],a12[mask11]),axis=1)
lead_a2 = awkward.concatenate((a21[mask21],a22[mask22]),axis=1)
sublead_a2 = awkward.concatenate((a21[mask22],a22[mask21]),axis=1)
lead_a3 = awkward.concatenate((a31[mask31],a32[mask32]),axis=1)
sublead_a3 = awkward.concatenate((a31[mask32],a32[mask31]),axis=1)
#logger.info("checkpoint6")
lead_a = awkward.concatenate((lead_a1[dm_minimum==dm1],lead_a2[dm_minimum==dm2],lead_a3[dm_minimum==dm3]),axis=1)
#logger.info("checkpoint7")
sublead_a = awkward.concatenate((sublead_a1[dm_minimum==dm1],sublead_a2[dm_minimum==dm2],sublead_a3[dm_minimum==dm3]),axis=1)
#logger.info("checkpoint8")
#logger.info(lead_a.pt[:10])
#logger.info(sublead_a.pt[:10])
#logger.info((lead_a+sublead_a).mass[:10])
#diphotons = awkward.combinations( --orig
# photons, 2, fields=["pho_lead", "pho_sublead"]
#)
#logger.info(diphotons.pho_lead.pt[:10])
#logger.info(diphotons.pho_sublead.pt[:10])
#logger.info("lead pt : %f \t sublead pt : %f "%(diphotons.pho_lead.pt[i][0],diphotons.pho_sublead.pt[i][0]))
# the remaining cut is to select the leading photons
# the previous sort assures the order
#diphotons = diphotons[
# diphotons["pho_lead"].pt > self.min_pt_lead_photon
#]
#logger.info("Diphoton : ")
#logger.info(diphotons.pho_lead.pt[:10])
# now turn the diphotons into candidates with four momenta and such
diphotons = awkward.combinations((lead_a + sublead_a).mass,1,fields=["mass_0"])
diphoton_4mom = lead_a + sublead_a
#logger.info(lead_a.layout==sublead_a.layout)
#diphotons = awkward.combinations(awkward.concatenate((lead_a ,sublead_a),axis=1),2,fields=["a_lead","a_sublead"])
#diphoton_4mom = diphotons["pho_lead"] + diphotons["pho_sublead"]
logger.info("diphoton tree length %i"%len(diphotons))
#diphoton_4mom = lead_a + sublead_a
diphoton_4mom = diphotons["pho_lead"] + diphotons["pho_sublead"]
diphotons["pt"] = diphoton_4mom.pt
diphotons["eta"] = diphoton_4mom.eta
diphotons["phi"] = diphoton_4mom.phi
diphotons["mass"] = diphoton_4mom.mass
diphotons["lead_a_mass"] = lead_a.mass
diphotons["sublead_a_mass"] = sublead_a.mass
diphotons["charge"] = diphoton_4mom.charge
#diphoton_pz = diphoton_4mom.z
#diphoton_e = diphoton_4mom.energy
diphoton_pz = diphoton_4mom.z
diphoton_e = diphoton_4mom.energy
#diphotons["rapidity"] = 0.5 * numpy.log((diphoton_e + diphoton_pz) / (diphoton_e - diphoton_pz))
diphotons["rapidity"] = 0.5 * numpy.log(
(diphoton_e + diphoton_pz) / (diphoton_e - diphoton_pz)
)
diphotons = awkward.with_name(diphotons, "PtEtaPhiMCandidate")
if (self.data_kind == "mc") :
diphotons["genWeight"] = fourphotonevents.genWeight
diphotons["dZ"] = fourphotonevents.GenVtx.z - fourphotonevents.PV.z
#diphotons["event"]=fourphotonevents
diphotons["weight_central"] = awkward.ones_like(
diphotons["genWeight"]
)
diphotons["weight"] = awkward.ones_like(diphotons["genWeight"])
# sort diphotons by pT
diphotons = diphotons[
awkward.argsort(diphotons.pt, ascending=False)
]
# sort diphotons by pT. match photons
diphotons = diphotons[awkward.argsort(diphotons.pt, ascending=False)]
diphotons = awkward.with_name(diphotons, "PtEtaPhiMCandidate")
diphotons["pT1_m_gg"] = diphotons.pho_lead.pt / diphotons.mass
diphotons["pT2_m_gg"] = diphotons.pho_sublead.pt / diphotons.mass
if (self.data_kind == "mc") :
# This is not used by FlashggFinalFits so we can set it to whatever we want, but it does need to exist!
diphotons["dZ"] = diphotons.pt
diphotons["weight_central"] = awkward.ones_like(diphotons.pt)
diphotons["weight"] = awkward.ones_like(diphotons.pt)
# Diphoton cuts
lead_pt_cut = diphotons.pho_lead.pt > self.diphoton_min_pt_lead
sublead_pt_cut = diphotons.pho_sublead.pt > self.diphoton_min_pt_sublead
lead_pt_mgg_cut = (diphotons.pho_lead.pt / diphotons.mass) > self.diphoton_min_lead_pt_mgg
sublead_pt_mgg_cut = (diphotons.pho_sublead.pt / diphotons.mass) > self.diphoton_min_sublead_pt_mgg
"""
# Check diphoton cuts and selection mask behavior (see notes Feb 24, 2025)
for idx, (d,np,dc,sm) in enumerate(zip(diphotons.to_list()[:5], awkward.num(diphotons, axis=1)[:5], diphoton_cuts.to_list()[:5], ~awkward.is_none(diphotons[diphoton_cuts]).to_list()[:5])):
print(f"diphotons {idx}: {p}")
print(f"diphoton cut {idx}: {dc}")
print(f"diphoton mask {idx}: {sm}")
print(f"# diphotons {idx}: {np}")
"""
# Apply diphoton cuts
#diphoton_cuts = lead_pt_cut & sublead_pt_cut & lead_pt_mgg_cut & sublead_pt_mgg_cut
diphoton_cuts = lead_pt_cut & sublead_pt_cut
diphotons = diphotons[diphoton_cuts]
dp_len = len(diphotons)
assert len(diphotons) == len(photons), f"{len(diphotons)} {len(photons)}"
for tagger in self.taggers:
(
@@ -635,61 +697,114 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
# lowest priority is most important (ascending sort)
# leave in order of diphoton pT in case of ties (stable sort)
sorted = awkward.argsort(diphotons.best_tag, stable=True)
diphotons = diphotons[sorted]
sorted_dipoho = awkward.argsort(diphotons.best_tag, stable=True)
diphotons = diphotons[sorted_dipho]
diphotons = awkward.firsts(diphotons)
# set diphotons as part of the event record
#logger.info("line 641 -- diphoton length before mask : %i"%len(diphotons))
diphotons = diphotons[A_mask]
#logger.info("line 643 -- diphoton length after mask : %i"%len(diphotons))
# drop events without a tag, if there are tags
if len(self.taggers):
selection_mask = ~(
awkward.is_none(diphotons)
| awkward.is_none(diphotons.best_tag)
)
diphotons = diphotons[selection_mask]
else:
selection_mask = ~awkward.is_none(diphotons)
diphotons = diphotons[selection_mask]
logger.info("line 652 -- diphoton length : %i"%len(diphotons))
# return if there is no surviving events
if len(diphotons) == 0:
logger.debug("No surviving events in this run, return now!")
return histos_etc
# Instead of the nominal sigma_m_over_m, we will use the smeared version of it -> (https://indico.cern.ch/event/1319585/#169-update-on-the-run-3-mass-r)
# else:
# warnings.warn("Smeamering need to be applied in order to decorrelate the (Smeared) mass resolution. -- Exiting!")
# sys.exit(0)
#logger.info("started")
#diphotons = awkward.combinations((lead_a + sublead_a).mass,1,fields=["random"])
#diphoton_4mom = lead_a + sublead_a
#diphotons = awkward.combinations(photons, 2, fields=["pho_lead", "pho_sublead"])
#diphoton_4mom = diphotons["pho_lead"] + diphotons["pho_sublead"]
#logger.info(lead_a.layout==sublead_a.layout)
#diphotons["pt"] = diphoton_4mom.pt
#diphotons["eta"] = diphoton_4mom.eta
#diphotons["phi"] = diphoton_4mom.phi
#diphotons["mass"] = diphoton_4mom.mass
#diphotons["charge"] = diphoton_4mom.charge
#diphoton_pz = diphoton_4mom.z
#diphoton_e = diphoton_4mom.energy
#diphotons["rapidity"] = 0.5 * numpy.log((diphoton_e + diphoton_pz) / (diphoton_e - diphoton_pz))
#diphotons = awkward.firsts(diphotons)
#logger.info("edit ended")
# Ensure at least one diphoton candidate in all arrays
"""
print("len photons before cuts, len photons after cuts and mask:", len(photons), len(photons[selection_mask]))
print("len diphotons before cuts, len diphotons after cuts and mask:", dp_len, len(diphotons[selection_mask]))
"""
diphotons = diphotons[selection_mask]
photons = photons[selection_mask]
assert len(diphotons) == len(photons), f"{len(diphotons)} {len(photons)}"
"""
# Check behavior of cuts on photons and events.Photon (see notes Feb 24, 2025). To use this, need to put back cuts on events.Photon and return it from pre-selection.
for idx, (p,e,np,ne) in enumerate(zip(photons.to_list()[:5], events.Photon.to_list()[:5], awkward.num(photons, axis=1)[:5], awkward.num(events.Photon)[:5])):
print(f"photons {idx}: {p}")
print(f"# photons (photons) {idx}: {np} {np >= 4}")
print(f"events.Photon {idx}: {e}")
print(f"# photons (events) {idx}: {ne} {ne >= 4}")
"""
# Pre-selections efficiency
Nevents.update({"pre_selections": len(photons)})
Nphotons.update({"pre_selections": numpy.unique(awkward.num(photons, axis=1).to_numpy(), return_counts=True)})
Nphotons_EB.update({"pre_selections": numpy.unique(awkward.num(photons[photons.isScEtaEB], axis=1).to_numpy(), return_counts=True)})
Nphotons_EE.update({"pre_selections": numpy.unique(awkward.num(photons[photons.isScEtaEE], axis=1).to_numpy(), return_counts=True)})
"""
for Np in [Nphotons, Nphotons_EB, Nphotons_EE]:
d = dict(zip(Np["pre_selections"][0], Np["pre_selections"][1]))
for key, val in d.items():
try:
#assert d[key] <= dict(zip(Np["filters_triggers"][0], Np["filters_triggers"][1]))[key], f"{key} {val} {dict(zip(Np['filters_triggers'][0], Np['filters_triggers'][1]))[key]}"
assert d[key] <= dict(zip(Np["filters_triggers"][0], Np["filters_triggers"][1]))[key], f"{Np['pre_selections']} {Np['filters_triggers']} {d} {dict(zip(Np['filters_triggers'][0], Np['filters_triggers'][1]))}"
except KeyError:
pass
"""
# Previous method was just grabbing the number of photons without numpy.unique
#assert numpy.sum(Nphotons["pre_selections"]) <= numpy.sum(Nphotons["filters_triggers"])
# Store # of photons into diphotons (otherwise it won't save - and dump samples here) to confirm relative efficiency of 4-photon cut.
diphotons["count4Photon"] = awkward.num(photons, axis=1)
# Pre-selections + 4 photon efficiency
num_photons = awkward.num(photons, axis=1) >= 4
photons = photons[num_photons]
diphotons = diphotons[num_photons]
assert len(photons) == len(diphotons), f"Photons: {len(photons)}\t Diphotons: {len(diphotons)}"
Nevents.update({"pre_selections+4photon": len(photons)})
Nphotons.update({"pre_selections+4photon": numpy.unique(awkward.num(photons, axis=1).to_numpy(), return_counts=True)})
Nphotons_EB.update({"pre_selections+4photon": numpy.unique(awkward.num(photons[photons.isScEtaEB], axis=1).to_numpy(), return_counts=True)})
Nphotons_EE.update({"pre_selections+4photon": numpy.unique(awkward.num(photons[photons.isScEtaEE], axis=1).to_numpy(), return_counts=True)})
# H4g-specific processing
pseudos, diphotons, process_extra = self.process_extra(events.metadata["dataset"], photons, diphotons, True if "signal" in dataset_name.lower() else False, variation)
histos_etc.update(process_extra)
assert len(pseudos) == len(diphotons), f"Pseudos: {len(pseudos)}\t Diphotons: {len(diphotons)}"
# Full selections efficiency
Nevents.update({"selections": len(pseudos)})
Nphotons.update({"selections": numpy.unique((awkward.num(pseudos, axis=1) * 4).to_numpy(), return_counts=True)})
Nphotons_EB.update({"selections": numpy.unique((awkward.num(pseudos[pseudos.pho1.isScEtaEB], axis=1) + awkward.num(pseudos[pseudos.pho2.isScEtaEB], axis=1) + awkward.num(pseudos[pseudos.pho3.isScEtaEB], axis=1) + awkward.num(pseudos[pseudos.pho4.isScEtaEB], axis=1)).to_numpy(), return_counts=True)})
Nphotons_EE.update({"selections": numpy.unique((awkward.num(pseudos[pseudos.pho1.isScEtaEE], axis=1) + awkward.num(pseudos[pseudos.pho2.isScEtaEE], axis=1) + awkward.num(pseudos[pseudos.pho3.isScEtaEE], axis=1) + awkward.num(pseudos[pseudos.pho4.isScEtaEE], axis=1)).to_numpy(), return_counts=True)})
# Print Nevents for checks
for key, val in Nevents.items():
logger.info(f"{key}: {val / Nevents['initial_events'] * 100.0}")
"""
# Look at number of photons per event
for key, val in Nphotons.items():
unique, counts = numpy.unique(val, return_counts=True)
try:
logger.info(f"{key}: (N = {unique[0]}): {counts[0]} (N = {unique[1]}): {counts[1]} (N = {unique[2]}): {counts[2]} (N = {unique[3]}): {counts[3]} (N = {unique[4]}): {counts[4]}")
except:
try:
logger.info(f"{key}: (N = {unique[0]}): {counts[0]} (N = {unique[1]}): {counts[1]} (N = {unique[2]}): {counts[2]} (N = {unique[3]}): {counts[3]}")
except:
try:
logger.info(f"{key}: (N = {unique[0]}): {counts[0]} (N = {unique[1]}): {counts[1]} (N = {unique[2]}): {counts[2]}")
except:
try:
logger.info(f"{key}: (N = {unique[0]}): {counts[0]} (N = {unique[1]}): {counts[1]}")
except:
logger.info(f"{key}: (N = {unique[0]}): {counts[0]}")
"""
# return if there is no surviving events (in this case, want at least one pseudoscalar)
if len(pseudos) == 0:
logger.info("No surviving events in this run, return now!")
return histos_etc
if self.output_location is not None:
# Need to convert to ROOT after doing plots + BDT + categories.
# Will need to sort out a middle step so the samples can be saved as ROOT directly if that is desired.
# Otherwise, use tbe conversion scripts that Sergi added. Instructions below...
# Convert to ROOT maintaining directory structure: convert_parquet_to_root_Haa.py
# Convert to ROOT as one file (specify era or merge all): convert_parquet_to_root_Haa_oneFile.py
# Merge all parquet to one file (specify era or merge all): combine_parquet_outputs_oneFile.py
"""
if self.output_format == "root":
df = diphoton_list_to_pandas(self, diphotons)
else:
@@ -703,29 +818,56 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
if "lead_fixedGridRhoAll" not in field
]
]
"""
# Handle Nevents to DataFrame
if "signal" in dataset_name.lower():
Nevents_clean = {
"initial_events": Nevents["initial_events"],
"filters_triggers": Nevents["filters_triggers"],
"pre_selections": Nevents["pre_selections"],
"pre_selections+4photon": Nevents["pre_selections+4photon"],
"selections": Nevents["selections"],
}
assert ~awkward.any(awkward.is_none(diphotons)) and ~awkward.any(awkward.is_none(pseudos)), f"{~awkward.any(awkward.is_none(diphotons))} {~awkward.any(awkward.is_none(pseudos))}"
assert len(diphotons) == len(pseudos)
df = diphoton_list_to_pandas(self, diphotons)
df_ps = ps_list_to_pandas(pseudos)
evtMix = pandas.Series([1 if "evtMix" in dataset_name else 0] * len(pseudos), name="eventMixing")
if "Signal" in dataset_name:
eff_N = pandas.Series(list(Nevents_clean.values()), name="Nevents")
photon_N = pandas.Series(list(Nphotons.values()), name="Nphotons")
photon_N_EB = pandas.Series(list(Nphotons_EB.values()), name="Nphotons_EB")
photon_N_EE = pandas.Series(list(Nphotons_EE.values()), name="Nphotons_EE")
final_df = pandas.concat([df, df_ps, evtMix, eff_N, photon_N, photon_N_EB, photon_N_EE], axis=1)
# NOTE: If there are NaN values in events then look at the assert below. len(final_df) will be a multiple of 5 (for eff_N) and the others will not be.
# When there are fewer than 5 events, eff_N is bigger than the other arrays and those events are filled with NaN. This fine/expected behavior.
# A similar effect is seen with Nphotons
assert not final_df.drop(columns=["Nevents", "Nphotons", "Nphotons_EB", "Nphotons_EE"]).isnull().values.any() or not (len(final_df) == len(df) == len(df_ps) == len(evtMix)), f"final_df: {final_df.drop(columns=['Nevents', 'Nphotons', 'Nphotons_EB', 'Nphotons_EE']).isnull().values.any()} diphoton: {df.isnull().values.any()} pseudos: {df_ps.isnull().values.any()} evtMix: {evtMix.isnull().values.any()} eff_N {eff_N.isnull().values.any()}\n {len(final_df)} {len(df)} {len(df_ps)} {len(evtMix)}"
fname = (
events.behavior[
"__events_factory__"
]._partition_key.replace("/", "_")
+ ".%s" % self.output_format
)
fname = (fname.replace("%2F","")).replace("%3B1","")
else:
final_df = pandas.concat([df, df_ps, evtMix], axis=1)
# NOTE: If there are NaN values in events then look at the assert below. len(final_df) will be a multiple of 5 (for eff_N) and the others will not be.
# When there are fewer than 5 events, eff_N is bigger than the other arrays and those events are filled with NaN. This fine/expected behavior.
# A similar effect is seen with Nphotons
assert not final_df.drop(columns=["Nevents", "Nphotons", "Nphotons_EB", "Nphotons_EE"]).isnull().values.any() or not (len(final_df) == len(df) == len(df_ps) == len(evtMix)), f"final_df: {final_df.drop(columns=['Nevents', 'Nphotons', 'Nphotons_EB', 'Nphotons_EE']).isnull().values.any()} diphoton: {df.isnull().values.any()} pseudos: {df_ps.isnull().values.any()} evtMix: {evtMix.isnull().values.any()} eff_N {eff_N.isnull().values.any()}\n {len(final_df)} {len(df)} {len(df_ps)} {len(evtMix)}"
fname = (events.behavior["__events_factory__"]._partition_key.replace("/", "_") + ".parquet")
subdirs = []
if "dataset" in events.metadata:
subdirs.append(events.metadata["dataset"])
subdirs.append(do_variation)
if self.output_format == "root":
dump_pandas(self, df, fname, self.output_location, subdirs)
else:
dump_ak_array(
self, akarr, fname, self.output_location, metadata, subdirs,
)
#logger.info("Likely works")
subdirs.append(variation)
dump_pandas(self, final_df, fname, self.output_location, subdirs)
return histos_etc
def postprocess(self, accumulant: Dict[Any, Any]) -> Any:
raise NotImplementedError
#raise NotImplementedError
pass
def add_diphoton_mva(
self, diphotons: awkward.Array, events: awkward.Array
@@ -803,3 +945,226 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
corr_mva = awkward.where(isEB, corr_mva_EB, corr_mva_EE)
return corr_mva
def produce_and_select_ps(self, meta: str, photons: awkward.Array, diphotons: awkward.Array, signal: bool, variation: str) -> awkward.Array:
# Sort photons by pt
photons = photons[awkward.argsort(photons.pt, ascending=False, axis=1)]
# Keep 4 with highest pt
photons = photons[:,:4]
# Set mass and charge fields to zeros
photons["mass"] = numpy.zeros_like(photons.pt)
photons["charge"] = numpy.zeros_like(photons.pt)
# Make fields for photons and add photon_group to pseudos
pseudos = awkward.combinations(photons, 4, fields=["pho1", "pho2", "pho3", "pho4"]) # easiest/most compact method to add 4 fields
pseudos.pho1 = awkward.with_name(pseudos.pho1, "PtEtaPhiMCandidate")
pseudos.pho2 = awkward.with_name(pseudos.pho2, "PtEtaPhiMCandidate")
pseudos.pho3 = awkward.with_name(pseudos.pho3, "PtEtaPhiMCandidate")
pseudos.pho4 = awkward.with_name(pseudos.pho4, "PtEtaPhiMCandidate")
pseudos["mass_gggg"] = (pseudos.pho1 + pseudos.pho2 + pseudos.pho3 + pseudos.pho4).mass
# Add sumPt
pseudos["sumPt_gggg"] = pseudos.pho1.pt + pseudos.pho2.pt + pseudos.pho3.pt + pseudos.pho4.pt
# Pt cuts
lead_pt = pseudos.pho1.pt >= self.lead_pt
sublead_pt = pseudos.pho2.pt >= self.sublead_pt
min_pt = (pseudos.pho3.pt >= self.min_pt) & (pseudos.pho4.pt >= self.min_pt)
pt_cuts = lead_pt & sublead_pt & min_pt
# Eta cuts
eta_1 = (abs(pseudos.pho1.eta) < self.eta_gap_low) | (abs(pseudos.pho1.eta) > self.eta_gap_high)
eta_2 = (abs(pseudos.pho2.eta) < self.eta_gap_low) | (abs(pseudos.pho2.eta) > self.eta_gap_high)
eta_3 = (abs(pseudos.pho3.eta) < self.eta_gap_low) | (abs(pseudos.pho3.eta) > self.eta_gap_high)
eta_4 = (abs(pseudos.pho4.eta) < self.eta_gap_low) | (abs(pseudos.pho4.eta) > self.eta_gap_high)
eta_gap = eta_1 & eta_2 & eta_3 & eta_4
eta_max = (abs(pseudos.pho1.eta) < self.eta_max) & (abs(pseudos.pho2.eta) < self.eta_max) & (abs(pseudos.pho3.eta) < self.eta_max) & (abs(pseudos.pho4.eta) < self.eta_max)
eta_cuts = eta_gap & eta_max
# Mass cuts
mass_low = pseudos.mass_gggg >= self.mass_range_low
mass_high = pseudos.mass_gggg <= self.mass_range_high
mass_cuts = mass_low & mass_high
# Combine all cuts
all_cuts = pt_cuts & eta_cuts & mass_cuts
assert len(all_cuts) == len(awkward.any(all_cuts, axis=1)), f"{len(all_cuts)} {len(awkward.any(all_cuts, axis=1))}"
# It looks like the 4-photon cut on photons is correct (see below)
# The cuts for pseudoscalars have been wrong in the past since it was done using flatten instead of ak.any.
# The number of flattened cuts is the same as events left with 4 photons (makes sense that # of cuts is only as many as events left)
assert len(awkward.flatten(all_cuts)) == len(photons[awkward.num(photons, axis=1) >= 4]), f"{len(awkward.flatten(all_cuts))} {len(photons[awkward.num(photons, axis=1) >= 4])}"
all_cuts = awkward.any(all_cuts, axis=1)
# Get length to compare after making pseudoscalars
len_ps = len(pseudos)
def makePsPermutation(ps_array, perm):
phoA = ps_array[f"pho{perm[0]}"]
phoB = ps_array[f"pho{perm[1]}"]
assert awkward.all(awkward.all((phoA.pt >= phoB.pt), axis=1)), "PhoA pT must be bigger than PhoB pT!"
# Ensure shape of array is (3, N) so we can keep the daughter photons AND the pseudoscalar together
ps_perm = awkward.zip({"ps": phoA + phoB, "leading_pho": phoA, "subleading_pho": phoB})
for field in ["ps", "leading_pho", "subleading_pho"]:
ps_perm[field] = ps_perm[field]
ps_perm[(field, "pt")] = ps_perm[field].pt
ps_perm[(field, "eta")] = ps_perm[field].eta
ps_perm[(field, "phi")] = ps_perm[field].phi
ps_perm[(field, "mass")] = ps_perm[field].mass
ps_perm.ps = awkward.with_name(ps_perm.ps, "PtEtaPhiMCandidate")
ps_perm["leading_pho"] = awkward.with_name(ps_perm.leading_pho, "PtEtaPhiMCandidate")
ps_perm["subleading_pho"] = awkward.with_name(ps_perm.subleading_pho, "PtEtaPhiMCandidate")
return ps_perm
# Start dM mixing procedure
# Permutation A - (1,2,3,4)
#dM1 = abs((pseudos.pho1 + pseudos.pho2).mass - (pseudos.pho3 + pseudos.pho4).mass)
Ps11 = makePsPermutation(pseudos, (1,2))
Ps12 = makePsPermutation(pseudos, (3,4))
dM1 = abs(Ps11.ps.mass - Ps12.ps.mass)
# Permutation B - (1,3,2,4)
#dM2 = abs((pseudos.pho1 + pseudos.pho3).mass - (pseudos.pho2 + pseudos.pho4).mass)
Ps21 = makePsPermutation(pseudos, (1,3))
Ps22 = makePsPermutation(pseudos, (2,4))
dM2 = abs(Ps21.ps.mass - Ps22.ps.mass)
# Permutation C - (1,4,2,3)
#dM3 = abs((pseudos.pho1 + pseudos.pho4).mass - (pseudos.pho2 + pseudos.pho3).mass)
Ps31 = makePsPermutation(pseudos, (1,4))
Ps32 = makePsPermutation(pseudos, (2,3))
dM3 = abs(Ps31.ps.mass - Ps32.ps.mass)
# Store eta information for daughter photons
Ps11[("ps", "gg_inEB")] = (Ps11.leading_pho.eta < self.eta_gap_low) & (Ps11.subleading_pho.eta < self.eta_gap_low)
Ps12[("ps", "gg_inEB")] = (Ps12.leading_pho.eta < self.eta_gap_low) & (Ps12.subleading_pho.eta < self.eta_gap_low)
Ps21[("ps", "gg_inEB")] = (Ps21.leading_pho.eta < self.eta_gap_low) & (Ps21.subleading_pho.eta < self.eta_gap_low)
Ps22[("ps", "gg_inEB")] = (Ps22.leading_pho.eta < self.eta_gap_low) & (Ps22.subleading_pho.eta < self.eta_gap_low)
Ps31[("ps", "gg_inEB")] = (Ps31.leading_pho.eta < self.eta_gap_low) & (Ps31.subleading_pho.eta < self.eta_gap_low)
Ps32[("ps", "gg_inEB")] = (Ps32.leading_pho.eta < self.eta_gap_low) & (Ps32.subleading_pho.eta < self.eta_gap_low)
# Masks to determine lead/sublead pseudoscalars
mask11 = Ps11.ps.pt >= Ps12.ps.pt
mask12 = Ps11.ps.pt < Ps12.ps.pt
mask21 = Ps21.ps.pt >= Ps22.ps.pt
mask22 = Ps21.ps.pt < Ps22.ps.pt
mask31 = Ps31.ps.pt >= Ps32.ps.pt
mask32 = Ps31.ps.pt < Ps32.ps.pt
# Minimize dM and create associated masks
dM_min = awkward.min(awkward.concatenate((dM1, dM2, dM3), axis=1), axis=1)
dM_mask1 = (dM_min == dM1)
dM_mask2 = (dM_min == dM2)
dM_mask3 = (dM_min == dM3)
# Lead/sublead pseudoscalars for each permutation
LeadPs1 = awkward.concatenate((Ps11[mask11], Ps12[mask12]), axis=1)
SubleadPs1 = awkward.concatenate((Ps11[mask12], Ps12[mask11]), axis=1)
LeadPs2 = awkward.concatenate((Ps21[mask21], Ps22[mask22]), axis=1)
SubleadPs2 = awkward.concatenate((Ps21[mask22], Ps22[mask21]), axis=1)
LeadPs3 = awkward.concatenate((Ps31[mask31], Ps32[mask32]), axis=1)
SubleadPs3 = awkward.concatenate((Ps31[mask32], Ps32[mask31]), axis=1)
# Apply masks and make final arrays
dM = awkward.concatenate((dM1[dM_mask1], dM2[dM_mask2], dM3[dM_mask3]), axis=1)
LeadPs = awkward.concatenate((LeadPs1[dM_mask1], LeadPs2[dM_mask2], LeadPs3[dM_mask3]), axis=1)
SubleadPs = awkward.concatenate((SubleadPs1[dM_mask1], SubleadPs2[dM_mask2], SubleadPs3[dM_mask3]), axis=1)
# Add dM, LeadPs, SubleadPs to events
pseudos["dM"] = dM
pseudos["LeadPs"] = LeadPs.ps
pseudos[("LeadPs", "leading_pho")] = LeadPs.leading_pho
pseudos[("LeadPs", "subleading_pho")] = LeadPs.subleading_pho
pseudos["SubleadPs"] = SubleadPs.ps
pseudos[("SubleadPs", "leading_pho")] = SubleadPs.leading_pho
pseudos[("SubleadPs", "subleading_pho")] = SubleadPs.subleading_pho
# Ensure length of pseudos stayed the same during mixing/optimization
assert len_ps == len(pseudos)
# Ensure fields in pseudos
for field in ["pho1", "pho2", "pho3", "pho4", "LeadPs", "SubleadPs"]:
pseudos[field] = pseudos[field]
pseudos[(field, "pt")] = pseudos[field].pt
pseudos[(field, "eta")] = pseudos[field].eta
pseudos[(field, "phi")] = pseudos[field].phi
pseudos[(field, "mass")] = pseudos[field].mass
# Add iso_charged_rel to photons for plotting checks
if field != "LeadPs" and field != "SubleadPs":
pseudos[(field, "pfRelIso_photon_pt")] = pseudos[field].pfRelIso03_chg_quadratic * pseudos[field].pt
# Set behavior for pseudoscalar vectors (re vector module)
pseudos["LeadPs"] = awkward.with_name(pseudos.LeadPs, "PtEtaPhiMCandidate")
pseudos["SubleadPs"] = awkward.with_name(pseudos.SubleadPs, "PtEtaPhiMCandidate")
pseudos["pT1_ma1"] = pseudos.LeadPs.leading_pho.pt / pseudos.LeadPs.mass
pseudos["pT2_ma1"] = pseudos.LeadPs.subleading_pho.pt / pseudos.LeadPs.mass
pseudos["pT1_ma2"] = pseudos.SubleadPs.leading_pho.pt / pseudos.SubleadPs.mass
pseudos["pT2_ma2"] = pseudos.SubleadPs.subleading_pho.pt / pseudos.SubleadPs.mass
# Store dR between pseudoscalars
pseudos["dR_aa"] = pseudos.LeadPs.delta_r(pseudos.SubleadPs)
pseudos["dR_aa_mass_gggg"] = pseudos.dR_aa / pseudos.mass_gggg
# Store dR between daughter photons
pseudos[("LeadPs", "dR_gg")] = pseudos.LeadPs.leading_pho.delta_r(pseudos.LeadPs.subleading_pho)
pseudos[("SubleadPs", "dR_gg")] = pseudos.SubleadPs.leading_pho.delta_r(pseudos.SubleadPs.subleading_pho)
# Make cos(theta_ag) variables
# Get leading photon in lab frame
lead_pho = awkward.concatenate((
pseudos.pho1[(pseudos.pho1.pt >= pseudos.pho2.pt) & (pseudos.pho1.pt >= pseudos.pho3.pt) & (pseudos.pho1.pt >= pseudos.pho4.pt)],
pseudos.pho2[(pseudos.pho2.pt >= pseudos.pho1.pt) & (pseudos.pho2.pt >= pseudos.pho3.pt) & (pseudos.pho2.pt >= pseudos.pho4.pt)],
pseudos.pho3[(pseudos.pho3.pt >= pseudos.pho1.pt) & (pseudos.pho3.pt >= pseudos.pho2.pt) & (pseudos.pho3.pt >= pseudos.pho4.pt)],
pseudos.pho4[(pseudos.pho4.pt >= pseudos.pho1.pt) & (pseudos.pho4.pt >= pseudos.pho2.pt) & (pseudos.pho4.pt >= pseudos.pho3.pt)]),
axis=1
)
LeadPs_Hframe = pseudos.LeadPs.boost(-(pseudos.LeadPs + pseudos.SubleadPs).boostvec)
#pho1_Aframe = lead_pho.boost(-pseudos.LeadPs.boostvec)
pho1_Aframe = pseudos.pho1.boost(-pseudos.LeadPs.boostvec) # Want leading photon in lab frame
pseudos["cos_ag"] = LeadPs_Hframe.unit.dot(pho1_Aframe.unit)
# Make hypMass for data/eventMixing
if not signal:
m_hyp = numpy.random.choice([float(x) for x in range(15, 65, 5)], len(pseudos.LeadPs.mass))
assert len(m_hyp) == len(pseudos.LeadPs.mass)
else:
m_hyp = numpy.full_like(pseudos.LeadPs.mass, float(meta.replace("2018_","")[7:9]))
assert len(m_hyp) == len(pseudos.LeadPs.mass)
# Make other BDT input variables
pseudos["m_hyp"] = m_hyp
pseudos["LeadPs_interMass"] = (pseudos.LeadPs.mass - m_hyp) / pseudos.mass_gggg
pseudos["SubleadPs_interMass"] = (pseudos.SubleadPs.mass - m_hyp) / pseudos.mass_gggg
pseudos["Ps_massDiff"] = pseudos.LeadPs.mass - pseudos.SubleadPs.mass
"""
print("pseudos pseudos[all_cuts]", len(pseudos), len(pseudos[all_cuts]))
print("pseudos pseudos[all_cuts]", len(pseudos), len(pseudos[all_cuts]))
"""
"""
# This takes 2 hours to complete with one file per mass point so don't run this. Nothing is printed, so there are no Nones in the array!
# Check behavior of cuts on photons and events.Photon (see notes Feb 24, 2025). To use this, need to put back cuts on events.Photon and return it from pre-selection.
for idx, (ps,nps,psc,sm) in enumerate(zip(pseudos.to_list()[:5], awkward.num(pseudos, axis=1)[:5], all_cuts.to_list()[:5], (~awkward.is_none(pseudos[all_cuts])).to_list()[:5])):
if sm == True:
continue
print(f"pseudos {idx}: {ps}")
print(f"pseudos cut {idx}: {psc}")
print(f"pseudos mask {idx}: {sm}")
print(f"# pseudos {idx}: {nps}")
"""
# Ensure at least no events without pseudoscalars
assert len(pseudos) == len(diphotons), f"{len(pseudos)}\t {len(diphotons)}"
pseudos = pseudos[all_cuts]
diphotons = diphotons[~awkward.is_none(pseudos)]
psuedos = pseudos[~awkward.is_none(pseudos)]
return pseudos, diphotons, all_cuts
Loading