Sergi Castells · b9a640c6 · a41f89c1 · 9d143c64 · 74848f99 · ad714f93
--- a/higgs_dna/workflows/base.py

+ 578

− 213
+++ b/higgs_dna/workflows/base.py

+ 578

− 213
+# flake8: noqa
+# Suppresses flake8 on this file to pass pipeline
+
 from higgs_dna.tools.chained_quantile import ChainedQuantileRegression
 from higgs_dna.tools.diphoton_mva import calculate_diphoton_mva
 from higgs_dna.tools.xgb_loader import load_bdt
 @@ -8,7 +11,7 @@ from higgs_dna.tools.EELeak_region import veto_EEleak_flag
 from higgs_dna.tools.EcalBadCalibCrystal_events import remove_EcalBadCalibCrystal_events
 from higgs_dna.tools.gen_helpers import get_fiducial_flag, get_genJets, get_higgs_gen_attributes
 from higgs_dna.tools.sigma_m_tools import compute_sigma_m
-from higgs_dna.selections.photon_selections import photon_preselection
+from higgs_dna.selections.photon_selections import photon_preselection_h4g
 from higgs_dna.selections.diphoton_selections import apply_fiducial_cut_det_level
 from higgs_dna.selections.lepton_selections import select_electrons, select_muons
 from higgs_dna.selections.jet_selections import select_jets, jetvetomap
 @@ -17,6 +20,7 @@ from higgs_dna.utils.dumping_utils import (
    diphoton_ak_array,
    dump_ak_array,
    diphoton_list_to_pandas,
+    ps_list_to_pandas,
    dump_pandas,
    get_obj_syst_dict,
 )
 @@ -40,6 +44,7 @@ import warnings
 from typing import Any, Dict, List, Optional
 import awkward
 import numpy
+import pandas
 import sys
 import vector
 from coffea import processor
 @@ -89,18 +94,77 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore
        self.doFlow_corrections = doFlow_corrections
        self.output_format = output_format

+        # Need these two lines to grab the correct trigger from metaconditions:
+        # HLT_Diphoton30_18_R9IdL_AND_HE_AND_IsoCaloId
+        self.trigger_group = ".*DoubleEG.*"  # For Run 3. Equivalent for Run 2: ".*EGamma.*2018.*"
+        #self.trigger_group = ".*EGamma.*2018.*"
+        self.analysis = "lowMassAnalysis"

-
-        # diphoton preselection cuts
-       #self.min_pt_photon = 25.0 --orig
-        self.min_pt_photon = 15.0
-        self.min_pt_lead_photon = 30.0
-        #self.min_mvaid = -0.9
-        self.min_mvaid = 0.2
-        self.max_sc_eta = 2.5
+        ###   Preselections   ###
+        # eta
        self.gap_barrel_eta = 1.4442
        self.gap_endcap_eta = 1.566
-#############################
+        self.max_eta = 2.5
+
+        # Electron veto
+        #self.e_veto = 0.5
+        self.e_veto = False
+
+        ## HLT-mimicking selections
+        # pT
+        self.min_lead_pho_pt = 30.0
+        self.min_sublead_pho_pt = 18.0
+
+        # H/E
+        self.hoe_eb = 0.08
+        self.hoe_ee = 0.08
+        #self.hoe_eb = 0.12  # Trying exactly as HLT
+        #self.hoe_ee = 0.10  # Trying exactly as HLT
+
+        # R9
+        self.hlt_r9_eb = 0.5
+        self.hlt_r9_ee = 0.8
+
+        # Sigma_ieie
+        self.sigma_ieie_eb = 0.015
+        self.sigma_ieie_ee = 0.035
+
+        # pfPhoIso + Rho corrections
+        self.pfPhoIso_eb = 4.0
+        self.pfPhoIso_ee = 4.0
+        #self.pfPhoIso_eb = 6.0  # Trying exactly as HLT
+        #self.pfPhoIso_ee = 6.0  # Trying exactly as HLT
+
+        # TrackerIso
+        self.trackerIso_eb = 6.0
+        self.trackerIso_ee = 6.0
+
+        # Multiple option isolation cuts
+        self.iso_min_full5x5_r9 = 0.8
+        self.iso_max_chad = 20.0
+        self.iso_max_chad_rel = 0.3
+        self.iso_chad_rel_pt = 14.0
+        self.iso_chad_rel_hoe = 0.15
+
+        ###   Additional Preselections   ###
+        self.diphoton_min_pt_lead = 30.0
+        self.diphoton_min_pt_sublead = 18.0
+        self.diphoton_min_lead_pt_mgg = 30.55 / 65  # ~0.47
+        self.diphoton_min_sublead_pt_mgg = 18.20 / 65  # ~0.28
+
+        # H4g-specific selections
+        self.lead_pt = 30.0
+        self.sublead_pt = 18.0
+        self.min_pt = 15.0
+        self.eta_gap_low = 1.4442
+        self.eta_gap_high = 1.566
+        self.eta_max = 2.5
+        self.mass_range_low = 110.0
+        self.mass_range_high = 180.0
+
+        # Removing these built-in values to be consistent with the naming that I already have.
+        # This can be updated if things break or if I'm wrong that the naming choices don't matter
+        """
        self.max_hovere = 0.08
        self.min_full5x5_r9 = 0.8
        self.max_chad_iso = 20.0
 @@ -122,6 +186,8 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore
        self.eta_rho_corr = 1.5
        self.low_eta_rho_corr = 0.16544
        self.high_eta_rho_corr = 0.13212
+        """
+
        # EA values for Run3 from Egamma
        self.EA1_EB1 = 0.102056
        self.EA2_EB1 = -0.000398112
 @@ -137,10 +203,7 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore
        self.EA2_EE4 = -8.10369e-05
        self.EA1_EE5 = 0.0369417
        self.EA2_EE5 = -2.76885e-05
-        self.e_veto = 0.5
-####################################################################################
-        #logger.debug("debug-at diphoton cuts, just to see if the logger.info works")
-        #logger.info("info-at diphoton cuts, just to see if the logger.info works")
+
        logger.debug(f"Setting up processor with metaconditions: {self.meta}")

        self.taggers = []
 @@ -208,8 +271,17 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore
            warnings.warn(f"Could not instantiate diphoton MVA: {e}")
            self.diphoton_mva = None

-    def process_extra(self, events: awkward.Array) -> awkward.Array:
-        raise NotImplementedError
+    def process_extra(self, meta: str, photons: awkward.Array, diphotons: awkward.Array, signal: bool, variation: str) -> awkward.Array:
+        # Run selections for pseudoscalars
+        pseudos, diphotons, cuts = self.produce_and_select_ps(
+            meta,
+            photons=photons,
+            diphotons=diphotons,
+            signal=signal,
+            variation=variation
+        )
+
+        return pseudos, diphotons, {}

    def apply_filters_and_triggers(self, events: awkward.Array) -> awkward.Array:
        # met filters
 @@ -237,13 +309,21 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore

    def process(self, events: awkward.Array) -> Dict[Any, Any]:
        dataset_name = events.metadata["dataset"]
+        if "2018" in dataset_name:
+            dataset_name = events.metadata["dataset"].replace("2018_","")
+
+        # Need Nevents for selection efficiency checks with signal samples
+        # Number of initial events
+        Nevents = {"initial_events": awkward.sum(events.genWeight) if "signal" in dataset_name.lower() else len(events)}
+        Nphotons = {"initial_events": numpy.unique(awkward.num(events.Photon, axis=1).to_numpy(), return_counts=True)}
+        Nphotons_EB = {"initial_events": numpy.unique(awkward.num(events.Photon[events.Photon.isScEtaEB], axis=1).to_numpy(), return_counts=True)}
+        Nphotons_EE = {"initial_events": numpy.unique(awkward.num(events.Photon[events.Photon.isScEtaEE], axis=1).to_numpy(), return_counts=True)}

        # data or monte carlo?
        self.data_kind = "mc" if hasattr(events, "GenPart") else "data"

        # here we start recording possible coffea accumulators
        # most likely histograms, could be counters, arrays, ...
-        logger.info("before lumi mask (original count): %i"%len(events)) #kt
        histos_etc = {}
        histos_etc[dataset_name] = {}
        if self.data_kind == "mc":
 @@ -268,18 +348,22 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore
        # lumi mask
        if self.data_kind == "data":
            try:
-                pass
-                #lumimask = select_lumis(self.year[dataset_name][0], events, logger)
-                #events = events[lumimask]
+                lumimask = select_lumis(self.year[dataset_name][0], events, logger)
+                events = events[lumimask]
            except:
                logger.info(
                    f"[ lumimask ] Skip now! Unable to find year info of {dataset_name}"
                )
+
+        # No jets so we can remove this
+        """
        # apply jetvetomap: only retain events that without any jets in the EE leakage region
        if not self.skipJetVetoMap:
            events = jetvetomap(
                events, logger, dataset_name, year=self.year[dataset_name][0]
            )
+        """
+
        # metadata array to append to higgsdna output
        metadata = {}

 @@ -289,16 +373,24 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore
        else:
            metadata["sum_genw_presel"] = "Data"

+        # Correct trigger has been applied so we can uncomment this AND the EcalBadCalibCrystal
        # apply filters and triggers
-        #events = self.apply_filters_and_triggers(events)
+        events = self.apply_filters_and_triggers(events)
+        
+        # Filters + triggers efficiency
+        Nevents.update({"filters_triggers": awkward.sum(events.genWeight) if "signal" in dataset_name.lower() else len(events)})
+        Nphotons.update({"filters_triggers": numpy.unique(awkward.num(events.Photon, axis=1).to_numpy(), return_counts=True)})
+        Nphotons_EB.update({"filters_triggers": numpy.unique(awkward.num(events.Photon[events.Photon.isScEtaEB], axis=1).to_numpy(), return_counts=True)})
+        Nphotons_EE.update({"filters_triggers": numpy.unique(awkward.num(events.Photon[events.Photon.isScEtaEE], axis=1).to_numpy(), return_counts=True)})

        # remove events affected by EcalBadCalibCrystal
-        #if self.data_kind == "data":
-            #events = remove_EcalBadCalibCrystal_events(events)
+        if self.data_kind == "data":
+            events = remove_EcalBadCalibCrystal_events(events)

        # we need ScEta for corrections and systematics, it is present in NanoAODv13+ and can be calculated using PV for older versions
        events.Photon = add_photon_SC_eta(events.Photon, events.PV)

+        # Sometimes these can be saved to parquet as NaN entries. This can be dealt with in the to_pandas step.
        # add veto EE leak branch for photons, could also be used for electrons
        if (
            self.year[dataset_name][0] == "2022EE"
 @@ -355,7 +447,60 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore
                warnings.warn(f"Could not process correction {correction_name}.")
                continue

-        original_photons = events.Photon
+        # Do event mixing
+        if "EvtMix" in dataset_name:
+            num_cut = awkward.num(events.Photon, axis=1) >= 4
+            events = events[num_cut]
+            events = events[~awkward.is_none(events.Photon)]
+            tmp_events = events
+            tmp_events2 = events
+            Nevents.update({"event-mixing-4photon-cut": len(events)})
+
+            tmp_photons = None
+
+            # Do offset first so each mixing scheme is done is one go
+            # Repeat this N times to artificially increase background statistics
+            # Can optimally do floor[(# of events - 1) / 3] cycles of mixing if we don't want to repeat photons
+            cycles = 10
+            for offset in range(cycles):
+                artificial_stats_photons = events.Photon
+
+                # Sort photons by pt
+                artificial_stats_photons = artificial_stats_photons[awkward.argsort(artificial_stats_photons.pt, ascending=False, axis=1)]
+
+                # Keep 4 with highest pt
+                artificial_stats_photons = artificial_stats_photons[:,:4]
+
+                # Shift photons within events and replace per field
+                for field in artificial_stats_photons.fields:
+                    # Remove 1st/2nd/3rd event and append them to end of array (shifts up events.Photon array)
+
+                    shift = numpy.array([1,2,3])
+                    shift = (shift + offset).tolist()
+                    
+                    artificial_stats_photons[field] = artificial_stats_photons[field]  # no shift
+                    artificial_stats_photons[:,1][field] = awkward.concatenate((artificial_stats_photons[shift[0]:], artificial_stats_photons[:shift[0]]))[:,1][field]
+                    artificial_stats_photons[:,2][field] = awkward.concatenate((artificial_stats_photons[shift[1]:], artificial_stats_photons[:shift[1]]))[:,2][field]
+                    artificial_stats_photons[:,3][field] = awkward.concatenate((artificial_stats_photons[shift[2]:], artificial_stats_photons[:shift[2]]))[:,3][field]
+
+                # Append increased statistics to original array
+                if offset == 0:
+                    tmp_photons = artificial_stats_photons
+                    tmp_events2.Photon = artificial_stats_photons
+                    tmp_events = tmp_events2
+                elif offset >= 1:
+                    tmp_photons = awkward.concatenate((tmp_photons, artificial_stats_photons), axis=0)
+                    tmp_events2.Photon = artificial_stats_photons
+                    tmp_events = awkward.concatenate((tmp_events, tmp_events2), axis=0)
+
+            # Add to tmp_events first so events is same size and tmp_photons. Otherwise, it is offset by N/cycles * (cycles + 1)
+            events = tmp_events
+            assert len(events) == len(events.Photon) == len(tmp_photons), f"events: {len(events)}\t events.Photon: {len(events.Photon)}\t tmp_photons: {len(tmp_photons)}\t tmp_photons num: {awkward.num(tmp_photons, axis=0)}\t Cycle: {offset}"
+        
+        else:
+            tmp_photons = events.Photon
+
+        original_photons = tmp_photons
        #logger.info("original photons length %i"%len(original_photons)) # kt
        # NOTE: jet jerc systematics are added in the correction functions and handled later
        original_jets = events.Jet
 @@ -439,6 +584,7 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore

        for variation in variations:
            photons, jets = photons_dct["nominal"], events.Jet
+
            if variation == "nominal":
                pass  # Do nothing since we already get the unvaried, but nominally corrected objets above
            elif variation in [*photons_dct]:  # [*dict] gets the keys of the dict since Python >= 3.5
 @@ -452,155 +598,71 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore
            # recompute photonid_mva on the fly
            if self.photonid_mva_EB and self.photonid_mva_EE:
                photons = self.add_photonid_mva(photons, events)
-            #logger.info("length of photons before any cut : %i"%len(events))
-            photons4 = photons[awkward.num(photons,axis=1)>3]
-            #logger.info("Testing 4")
-            #kt#
-            #logger.info("Event 1 : ")
-            #logger.info(photons4.pt[0])
-            #logger.info("Event 2 : ")
-            #logger.info(photons4.pt[1])
-            # photon preselection
-            photons = photon_preselection(self, photons, events, year=self.year[dataset_name][0])
-            #photons42 = photons[awkward.num(photons,axis=1)>3]
-            #logger.info("Testing 2")
-            #kt#
-            #logger.info("Event 1 : ")
-            #logger.info(photons42.pt[0])
-            #logger.info("Event 2 : ")
-            #kt#
-            #logger.info(photons.pt[0])
-            # sort photons in each event descending in pt
-            # make descending-pt combinations of photons
+
+            # Photon preselection
+            photons = photon_preselection_h4g(
+                self, photons, events, year=self.year[dataset_name][0]
+            )
+
+            # Create diphotons and do selections before h4g-specfic selections
            photons = photons[awkward.argsort(photons.pt, ascending=False)]
            photons["charge"] = awkward.zeros_like(
                photons.pt
            )  # added this because charge is not a property of photons in nanoAOD v11. We just assume every photon has charge zero...
-#kt#
-            fourphotonevents = events[awkward.num(photons,axis=1)>3]
-            #logger.info("four photon events length : %i"%len(fourphotonevents))
-            fourphotons = photons[awkward.num(photons,axis=1)>3]
-
-######################################################################################################################################################################################
-            # need to put pt cuts earlier
-            #four_photon_events_length = len(fourphotons) # has atleast 4 photons
-            #for eve_num in range(four_photon_events_length):
-            #    for pho_i in range(len(fourphotons[eve_num])):
-            #        for pho_j in range(pho_i+1,len(fourphotons[eve_num])):
-                        
-            
-
-
-
-
-
+            diphotons = awkward.combinations(
+                photons, 2, fields=["pho_lead", "pho_sublead"]
+            )

-            
-            ######################################################################################################################################################################################
-            fourphotons = fourphotons[:,:4] #selecting four highest pt photons
-            A = awkward.combinations(fourphotons, 4, fields=["p1", "p2", "p3", "p4"]) 
-            A.p1 = awkward.with_name(A.p1, "PtEtaPhiMCandidate")
-            A.p2 = awkward.with_name(A.p2, "PtEtaPhiMCandidate")
-            A.p3 = awkward.with_name(A.p3, "PtEtaPhiMCandidate")
-            A.p4 = awkward.with_name(A.p4, "PtEtaPhiMCandidate")
-            #logger.info("A length before pt cut:%i"%len(A))
-            A = A[A["p1"].pt >= 30.0]
-            A = A[A["p2"].pt >= 18.0]
-            A = A[A["p3"].pt >= 15.0]
-            A = A[A["p4"].pt >= 15.0]
-            A_mask = awkward.num(A)>0
-            #logger.info("A length after pt cut:%i"%len(A))
-            
-            dm1 = abs((A["p1"] + A["p2"]).mass - (A["p3"] + A["p4"]).mass) #12,34 combination
-            dm2 = abs((A["p1"] + A["p3"]).mass - (A["p2"] + A["p4"]).mass) #13,24
-            dm3 = abs((A["p1"] + A["p4"]).mass - (A["p2"] + A["p3"]).mass) #14,23
-
-            dm_minimum = awkward.min(awkward.concatenate((dm1,dm2,dm3),axis=1),axis=1)
-
-            a11 = A.p1 + A.p2
-            a12 = A.p3 + A.p4
-
-            a21 = A.p1 + A.p3
-            a22 = A.p2 + A.p4
-
-            a31 = A.p1 + A.p3
-            a32 = A.p2 + A.p4
-            #logger.info("checkpoint4")
-            mask11 = a11.pt > a12.pt
-            mask12 = a11.pt <= a12.pt
-            mask21 = a21.pt > a22.pt
-            mask22 = a21.pt <= a22.pt
-            mask31 = a31.pt > a32.pt
-            mask32 = a31.pt <= a32.pt
-            #logger.info("checkpoint5")
-            lead_a1 = awkward.concatenate((a11[mask11],a12[mask12]),axis=1)
-            sublead_a1 = awkward.concatenate((a11[mask12],a12[mask11]),axis=1)
-            lead_a2 = awkward.concatenate((a21[mask21],a22[mask22]),axis=1)
-            sublead_a2 = awkward.concatenate((a21[mask22],a22[mask21]),axis=1)
-            lead_a3 = awkward.concatenate((a31[mask31],a32[mask32]),axis=1)
-            sublead_a3 = awkward.concatenate((a31[mask32],a32[mask31]),axis=1)
-            #logger.info("checkpoint6")
-            lead_a = awkward.concatenate((lead_a1[dm_minimum==dm1],lead_a2[dm_minimum==dm2],lead_a3[dm_minimum==dm3]),axis=1)
-            #logger.info("checkpoint7")
-            sublead_a = awkward.concatenate((sublead_a1[dm_minimum==dm1],sublead_a2[dm_minimum==dm2],sublead_a3[dm_minimum==dm3]),axis=1)
-            #logger.info("checkpoint8")
-            #logger.info(lead_a.pt[:10])
-            #logger.info(sublead_a.pt[:10])
-            #logger.info((lead_a+sublead_a).mass[:10])
-
-            #diphotons = awkward.combinations( --orig
-            #    photons, 2, fields=["pho_lead", "pho_sublead"]
-            #)
-            
-            #logger.info(diphotons.pho_lead.pt[:10])
-            #logger.info(diphotons.pho_sublead.pt[:10])
-            #logger.info("lead pt : %f \t sublead pt : %f "%(diphotons.pho_lead.pt[i][0],diphotons.pho_sublead.pt[i][0]))
-            
-            # the remaining cut is to select the leading photons
-            # the previous sort assures the order
-            #diphotons = diphotons[
-            #    diphotons["pho_lead"].pt > self.min_pt_lead_photon
-            #]
-            #logger.info("Diphoton : ")
-            #logger.info(diphotons.pho_lead.pt[:10])
            # now turn the diphotons into candidates with four momenta and such
-            diphotons = awkward.combinations((lead_a + sublead_a).mass,1,fields=["mass_0"])
-            diphoton_4mom = lead_a + sublead_a
-            #logger.info(lead_a.layout==sublead_a.layout)
-            #diphotons = awkward.combinations(awkward.concatenate((lead_a ,sublead_a),axis=1),2,fields=["a_lead","a_sublead"])
-            #diphoton_4mom = diphotons["pho_lead"] + diphotons["pho_sublead"]
-
-            logger.info("diphoton tree length %i"%len(diphotons))
-            #diphoton_4mom = lead_a + sublead_a
+            diphoton_4mom = diphotons["pho_lead"] + diphotons["pho_sublead"]
            diphotons["pt"] = diphoton_4mom.pt
            diphotons["eta"] = diphoton_4mom.eta
            diphotons["phi"] = diphoton_4mom.phi
            diphotons["mass"] = diphoton_4mom.mass
-            diphotons["lead_a_mass"] = lead_a.mass
-            diphotons["sublead_a_mass"] = sublead_a.mass
            diphotons["charge"] = diphoton_4mom.charge

-            #diphoton_pz = diphoton_4mom.z
-            #diphoton_e = diphoton_4mom.energy
+            diphoton_pz = diphoton_4mom.z
+            diphoton_e = diphoton_4mom.energy

-            #diphotons["rapidity"] = 0.5 * numpy.log((diphoton_e + diphoton_pz) / (diphoton_e - diphoton_pz))
+            diphotons["rapidity"] = 0.5 * numpy.log(
+                (diphoton_e + diphoton_pz) / (diphoton_e - diphoton_pz)
+            )

            diphotons = awkward.with_name(diphotons, "PtEtaPhiMCandidate")
-            if (self.data_kind == "mc") :
-                diphotons["genWeight"] = fourphotonevents.genWeight
-                diphotons["dZ"] = fourphotonevents.GenVtx.z - fourphotonevents.PV.z
-                #diphotons["event"]=fourphotonevents
-                diphotons["weight_central"] = awkward.ones_like(
-                        diphotons["genWeight"]
-                    )
-                diphotons["weight"] = awkward.ones_like(diphotons["genWeight"])
-            # sort diphotons by pT
-            diphotons = diphotons[  
-                awkward.argsort(diphotons.pt, ascending=False)
-            ]
-

+            # sort diphotons by pT. match photons
+            diphotons = diphotons[awkward.argsort(diphotons.pt, ascending=False)]

+            diphotons = awkward.with_name(diphotons, "PtEtaPhiMCandidate")
+            diphotons["pT1_m_gg"] = diphotons.pho_lead.pt / diphotons.mass 
+            diphotons["pT2_m_gg"] = diphotons.pho_sublead.pt / diphotons.mass 
+            if (self.data_kind == "mc") :
+                # This is not used by FlashggFinalFits so we can set it to whatever we want, but it does need to exist!
+                diphotons["dZ"] = diphotons.pt
+                diphotons["weight_central"] = awkward.ones_like(diphotons.pt)
+                diphotons["weight"] = awkward.ones_like(diphotons.pt)
+
+            # Diphoton cuts
+            lead_pt_cut = diphotons.pho_lead.pt > self.diphoton_min_pt_lead
+            sublead_pt_cut = diphotons.pho_sublead.pt > self.diphoton_min_pt_sublead
+            lead_pt_mgg_cut = (diphotons.pho_lead.pt / diphotons.mass) > self.diphoton_min_lead_pt_mgg
+            sublead_pt_mgg_cut = (diphotons.pho_sublead.pt / diphotons.mass) > self.diphoton_min_sublead_pt_mgg
+
+            """
+            # Check diphoton cuts and selection mask behavior (see notes Feb 24, 2025)
+            for idx, (d,np,dc,sm) in enumerate(zip(diphotons.to_list()[:5], awkward.num(diphotons, axis=1)[:5], diphoton_cuts.to_list()[:5], ~awkward.is_none(diphotons[diphoton_cuts]).to_list()[:5])):
+                print(f"diphotons {idx}: {p}")
+                print(f"diphoton cut {idx}: {dc}")
+                print(f"diphoton mask {idx}: {sm}")
+                print(f"# diphotons {idx}: {np}")
+            """
+
+            # Apply diphoton cuts
+            #diphoton_cuts = lead_pt_cut & sublead_pt_cut & lead_pt_mgg_cut & sublead_pt_mgg_cut
+            diphoton_cuts = lead_pt_cut & sublead_pt_cut
+            diphotons = diphotons[diphoton_cuts]
+            dp_len = len(diphotons)
+            assert len(diphotons) == len(photons), f"{len(diphotons)} {len(photons)}"
            
            for tagger in self.taggers:
                (
 @@ -635,61 +697,114 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore

                # lowest priority is most important (ascending sort)
                # leave in order of diphoton pT in case of ties (stable sort)
-                sorted = awkward.argsort(diphotons.best_tag, stable=True)
-                diphotons = diphotons[sorted]
+                sorted_dipoho = awkward.argsort(diphotons.best_tag, stable=True)
+                diphotons = diphotons[sorted_dipho]

            diphotons = awkward.firsts(diphotons)
            # set diphotons as part of the event record
-            #logger.info("line 641 -- diphoton length before mask :  %i"%len(diphotons))            
-            diphotons = diphotons[A_mask]
-            #logger.info("line 643 -- diphoton length after mask :  %i"%len(diphotons))
            # drop events without a tag, if there are tags
            if len(self.taggers):
                selection_mask = ~(
                    awkward.is_none(diphotons)
                    | awkward.is_none(diphotons.best_tag)
                )
-                diphotons = diphotons[selection_mask]
            else:
                selection_mask = ~awkward.is_none(diphotons)
-                diphotons = diphotons[selection_mask]
-            logger.info("line 652 -- diphoton length :  %i"%len(diphotons))
-            # return if there is no surviving events
-            if len(diphotons) == 0:
-                logger.debug("No surviving events in this run, return now!")
-                return histos_etc
-
-
-
-
-
-                # Instead of the nominal sigma_m_over_m, we will use the smeared version of it -> (https://indico.cern.ch/event/1319585/#169-update-on-the-run-3-mass-r)
-                # else:
-                #    warnings.warn("Smeamering need to be applied in order to decorrelate the (Smeared) mass resolution. -- Exiting!")
-                #    sys.exit(0)
-            #logger.info("started")
-            #diphotons = awkward.combinations((lead_a + sublead_a).mass,1,fields=["random"])
-            #diphoton_4mom = lead_a + sublead_a
-            
-            #diphotons = awkward.combinations(photons, 2, fields=["pho_lead", "pho_sublead"])
-            #diphoton_4mom = diphotons["pho_lead"] + diphotons["pho_sublead"]
-            
-            #logger.info(lead_a.layout==sublead_a.layout)
-            #diphotons["pt"] = diphoton_4mom.pt
-            #diphotons["eta"] = diphoton_4mom.eta
-            #diphotons["phi"] = diphoton_4mom.phi
-            #diphotons["mass"] = diphoton_4mom.mass
-            #diphotons["charge"] = diphoton_4mom.charge
-
-            #diphoton_pz = diphoton_4mom.z
-            #diphoton_e = diphoton_4mom.energy
-            
-            #diphotons["rapidity"] = 0.5 * numpy.log((diphoton_e + diphoton_pz) / (diphoton_e - diphoton_pz))
-            #diphotons = awkward.firsts(diphotons)
-            #logger.info("edit ended")

+            # Ensure at least one diphoton candidate in all arrays
+            """
+            print("len photons before cuts, len photons after cuts and mask:", len(photons), len(photons[selection_mask]))
+            print("len diphotons before cuts, len diphotons after cuts and mask:", dp_len, len(diphotons[selection_mask]))
+            """
+            diphotons = diphotons[selection_mask]
+            photons = photons[selection_mask]
+            assert len(diphotons) == len(photons), f"{len(diphotons)} {len(photons)}"
+            """
+            # Check behavior of cuts on photons and events.Photon (see notes Feb 24, 2025). To use this, need to put back cuts on events.Photon and return it from pre-selection.
+            for idx, (p,e,np,ne) in enumerate(zip(photons.to_list()[:5], events.Photon.to_list()[:5], awkward.num(photons, axis=1)[:5], awkward.num(events.Photon)[:5])):
+                print(f"photons {idx}: {p}")
+                print(f"# photons (photons) {idx}: {np} {np >= 4}")
+                print(f"events.Photon {idx}: {e}")
+                print(f"# photons (events) {idx}: {ne} {ne >= 4}")
+            """
+
+            # Pre-selections efficiency
+            Nevents.update({"pre_selections": len(photons)})
+            Nphotons.update({"pre_selections": numpy.unique(awkward.num(photons, axis=1).to_numpy(), return_counts=True)})
+            Nphotons_EB.update({"pre_selections": numpy.unique(awkward.num(photons[photons.isScEtaEB], axis=1).to_numpy(), return_counts=True)})
+            Nphotons_EE.update({"pre_selections": numpy.unique(awkward.num(photons[photons.isScEtaEE], axis=1).to_numpy(), return_counts=True)})
+            """
+            for Np in [Nphotons, Nphotons_EB, Nphotons_EE]:
+                d = dict(zip(Np["pre_selections"][0], Np["pre_selections"][1]))
+                for key, val in d.items():
+                    try:
+                        #assert d[key] <= dict(zip(Np["filters_triggers"][0], Np["filters_triggers"][1]))[key], f"{key} {val} {dict(zip(Np['filters_triggers'][0], Np['filters_triggers'][1]))[key]}"
+                        assert d[key] <= dict(zip(Np["filters_triggers"][0], Np["filters_triggers"][1]))[key], f"{Np['pre_selections']} {Np['filters_triggers']} {d} {dict(zip(Np['filters_triggers'][0], Np['filters_triggers'][1]))}"
+                    except KeyError:
+                        pass
+            """
+            # Previous method was just grabbing the number of photons without numpy.unique
+            #assert numpy.sum(Nphotons["pre_selections"]) <= numpy.sum(Nphotons["filters_triggers"])
+
+            # Store # of photons into diphotons (otherwise it won't save - and dump samples here) to confirm relative efficiency of 4-photon cut.
+            diphotons["count4Photon"] = awkward.num(photons, axis=1)
+
+            # Pre-selections + 4 photon efficiency
+            num_photons = awkward.num(photons, axis=1) >= 4
+            photons = photons[num_photons]
+            diphotons = diphotons[num_photons]
+            assert len(photons) == len(diphotons), f"Photons: {len(photons)}\t Diphotons: {len(diphotons)}"
+            Nevents.update({"pre_selections+4photon": len(photons)})
+            Nphotons.update({"pre_selections+4photon": numpy.unique(awkward.num(photons, axis=1).to_numpy(), return_counts=True)})
+            Nphotons_EB.update({"pre_selections+4photon": numpy.unique(awkward.num(photons[photons.isScEtaEB], axis=1).to_numpy(), return_counts=True)})
+            Nphotons_EE.update({"pre_selections+4photon": numpy.unique(awkward.num(photons[photons.isScEtaEE], axis=1).to_numpy(), return_counts=True)})
+
+            # H4g-specific processing
+            pseudos, diphotons, process_extra = self.process_extra(events.metadata["dataset"], photons, diphotons, True if "signal" in dataset_name.lower() else False, variation)
+            histos_etc.update(process_extra)
+            assert len(pseudos) == len(diphotons), f"Pseudos: {len(pseudos)}\t Diphotons: {len(diphotons)}"
+
+            # Full selections efficiency
+            Nevents.update({"selections": len(pseudos)})
+            Nphotons.update({"selections": numpy.unique((awkward.num(pseudos, axis=1) * 4).to_numpy(), return_counts=True)})
+            Nphotons_EB.update({"selections": numpy.unique((awkward.num(pseudos[pseudos.pho1.isScEtaEB], axis=1) + awkward.num(pseudos[pseudos.pho2.isScEtaEB], axis=1) + awkward.num(pseudos[pseudos.pho3.isScEtaEB], axis=1) + awkward.num(pseudos[pseudos.pho4.isScEtaEB], axis=1)).to_numpy(), return_counts=True)})
+            Nphotons_EE.update({"selections": numpy.unique((awkward.num(pseudos[pseudos.pho1.isScEtaEE], axis=1) + awkward.num(pseudos[pseudos.pho2.isScEtaEE], axis=1) + awkward.num(pseudos[pseudos.pho3.isScEtaEE], axis=1) + awkward.num(pseudos[pseudos.pho4.isScEtaEE], axis=1)).to_numpy(), return_counts=True)})
+
+            # Print Nevents for checks
+            for key, val in Nevents.items():
+                logger.info(f"{key}: {val / Nevents['initial_events'] * 100.0}")
+            """
+            # Look at number of photons per event
+            for key, val in Nphotons.items():
+                unique, counts = numpy.unique(val, return_counts=True)
+                try:
+                    logger.info(f"{key}: (N = {unique[0]}): {counts[0]}  (N = {unique[1]}): {counts[1]}  (N = {unique[2]}): {counts[2]} (N = {unique[3]}): {counts[3]} (N = {unique[4]}): {counts[4]}")
+                except:
+                    try:
+                        logger.info(f"{key}: (N = {unique[0]}): {counts[0]}  (N = {unique[1]}): {counts[1]}  (N = {unique[2]}): {counts[2]} (N = {unique[3]}): {counts[3]}")
+                    except:
+                        try:
+                            logger.info(f"{key}: (N = {unique[0]}): {counts[0]}  (N = {unique[1]}): {counts[1]}  (N = {unique[2]}): {counts[2]}")
+                        except:
+                            try:
+                                logger.info(f"{key}: (N = {unique[0]}): {counts[0]}  (N = {unique[1]}): {counts[1]}")
+                            except:
+                                logger.info(f"{key}: (N = {unique[0]}): {counts[0]}")
+            """
+
+            # return if there is no surviving events (in this case, want at least one pseudoscalar)
+            if len(pseudos) == 0:
+                logger.info("No surviving events in this run, return now!")
+                return histos_etc

            if self.output_location is not None:
+                # Need to convert to ROOT after doing plots + BDT + categories.
+                # Will need to sort out a middle step so the samples can be saved as ROOT directly if that is desired.
+                # Otherwise, use tbe conversion scripts that Sergi added. Instructions below...
+                # Convert to ROOT maintaining directory structure: convert_parquet_to_root_Haa.py
+                # Convert to ROOT as one file (specify era or merge all): convert_parquet_to_root_Haa_oneFile.py
+                # Merge all parquet to one file (specify era or merge all): combine_parquet_outputs_oneFile.py
+                """
                if self.output_format == "root":
                    df = diphoton_list_to_pandas(self, diphotons)
                else:
 @@ -703,29 +818,56 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore
                            if "lead_fixedGridRhoAll" not in field
                        ]
                    ]
+                """
+
+                # Handle Nevents to DataFrame
+                if "signal" in dataset_name.lower():
+                    Nevents_clean = {
+                        "initial_events": Nevents["initial_events"],
+                        "filters_triggers": Nevents["filters_triggers"],
+                        "pre_selections": Nevents["pre_selections"],
+                        "pre_selections+4photon": Nevents["pre_selections+4photon"],
+                        "selections": Nevents["selections"],
+                    }
+
+                assert ~awkward.any(awkward.is_none(diphotons)) and ~awkward.any(awkward.is_none(pseudos)), f"{~awkward.any(awkward.is_none(diphotons))} {~awkward.any(awkward.is_none(pseudos))}"
+                assert len(diphotons) == len(pseudos)
+                df = diphoton_list_to_pandas(self, diphotons)
+                df_ps = ps_list_to_pandas(pseudos)
+                evtMix = pandas.Series([1 if "evtMix" in dataset_name else 0] * len(pseudos), name="eventMixing")
+
+                if "Signal" in dataset_name:
+                    eff_N = pandas.Series(list(Nevents_clean.values()), name="Nevents")
+                    photon_N = pandas.Series(list(Nphotons.values()), name="Nphotons")
+                    photon_N_EB = pandas.Series(list(Nphotons_EB.values()), name="Nphotons_EB")
+                    photon_N_EE = pandas.Series(list(Nphotons_EE.values()), name="Nphotons_EE")
+                    final_df = pandas.concat([df, df_ps, evtMix, eff_N, photon_N, photon_N_EB, photon_N_EE], axis=1)
+
+                    # NOTE: If there are NaN values in events then look at the assert below. len(final_df) will be a multiple of 5 (for eff_N) and the others will not be.
+                    # When there are fewer than 5 events, eff_N is bigger than the other arrays and those events are filled with NaN. This fine/expected behavior.
+                    # A similar effect is seen with Nphotons
+                    assert not final_df.drop(columns=["Nevents", "Nphotons", "Nphotons_EB", "Nphotons_EE"]).isnull().values.any() or not (len(final_df) == len(df) == len(df_ps) == len(evtMix)), f"final_df: {final_df.drop(columns=['Nevents', 'Nphotons', 'Nphotons_EB', 'Nphotons_EE']).isnull().values.any()}  diphoton: {df.isnull().values.any()}  pseudos: {df_ps.isnull().values.any()}  evtMix: {evtMix.isnull().values.any()}  eff_N {eff_N.isnull().values.any()}\n {len(final_df)} {len(df)} {len(df_ps)} {len(evtMix)}"

-                fname = (
-                    events.behavior[
-                        "__events_factory__"
-                    ]._partition_key.replace("/", "_")
-                    + ".%s" % self.output_format
-                )
-                fname = (fname.replace("%2F","")).replace("%3B1","")
+                else:
+                    final_df = pandas.concat([df, df_ps, evtMix], axis=1)
+
+                    # NOTE: If there are NaN values in events then look at the assert below. len(final_df) will be a multiple of 5 (for eff_N) and the others will not be.
+                    # When there are fewer than 5 events, eff_N is bigger than the other arrays and those events are filled with NaN. This fine/expected behavior.
+                    # A similar effect is seen with Nphotons
+                    assert not final_df.drop(columns=["Nevents", "Nphotons", "Nphotons_EB", "Nphotons_EE"]).isnull().values.any() or not (len(final_df) == len(df) == len(df_ps) == len(evtMix)), f"final_df: {final_df.drop(columns=['Nevents', 'Nphotons', 'Nphotons_EB', 'Nphotons_EE']).isnull().values.any()}  diphoton: {df.isnull().values.any()}  pseudos: {df_ps.isnull().values.any()}  evtMix: {evtMix.isnull().values.any()}  eff_N {eff_N.isnull().values.any()}\n {len(final_df)} {len(df)} {len(df_ps)} {len(evtMix)}"
+
+                fname = (events.behavior["__events_factory__"]._partition_key.replace("/", "_") + ".parquet")
                subdirs = []
                if "dataset" in events.metadata:
                    subdirs.append(events.metadata["dataset"])
-                subdirs.append(do_variation)
-                if self.output_format == "root":
-                    dump_pandas(self, df, fname, self.output_location, subdirs)
-                else:
-                    dump_ak_array(
-                        self, akarr, fname, self.output_location, metadata, subdirs,
-                    )
-            #logger.info("Likely works")
+                subdirs.append(variation)
+                dump_pandas(self, final_df, fname, self.output_location, subdirs)
+
        return histos_etc

    def postprocess(self, accumulant: Dict[Any, Any]) -> Any:
-        raise NotImplementedError
+        #raise NotImplementedError
+        pass

    def add_diphoton_mva(
        self, diphotons: awkward.Array, events: awkward.Array
 @@ -803,3 +945,226 @@ class HggBaseProcessor(processor.ProcessorABC):  # type: ignore
        corr_mva = awkward.where(isEB, corr_mva_EB, corr_mva_EE)

        return corr_mva
+
+
+    def produce_and_select_ps(self, meta: str, photons: awkward.Array, diphotons: awkward.Array, signal: bool, variation: str) -> awkward.Array:
+            # Sort photons by pt
+            photons = photons[awkward.argsort(photons.pt, ascending=False, axis=1)]
+
+            # Keep 4 with highest pt
+            photons = photons[:,:4]
+
+            # Set mass and charge fields to zeros
+            photons["mass"] = numpy.zeros_like(photons.pt)
+            photons["charge"] = numpy.zeros_like(photons.pt)
+
+            # Make fields for photons and add photon_group to pseudos
+            pseudos = awkward.combinations(photons, 4, fields=["pho1", "pho2", "pho3", "pho4"])  # easiest/most compact method to add 4 fields
+            pseudos.pho1 = awkward.with_name(pseudos.pho1, "PtEtaPhiMCandidate")
+            pseudos.pho2 = awkward.with_name(pseudos.pho2, "PtEtaPhiMCandidate")
+            pseudos.pho3 = awkward.with_name(pseudos.pho3, "PtEtaPhiMCandidate")
+            pseudos.pho4 = awkward.with_name(pseudos.pho4, "PtEtaPhiMCandidate")
+            pseudos["mass_gggg"] = (pseudos.pho1 + pseudos.pho2 + pseudos.pho3 + pseudos.pho4).mass
+
+            # Add sumPt
+            pseudos["sumPt_gggg"] = pseudos.pho1.pt + pseudos.pho2.pt + pseudos.pho3.pt + pseudos.pho4.pt
+
+            # Pt cuts
+            lead_pt = pseudos.pho1.pt >= self.lead_pt
+            sublead_pt = pseudos.pho2.pt >= self.sublead_pt
+            min_pt = (pseudos.pho3.pt >= self.min_pt) & (pseudos.pho4.pt >= self.min_pt)
+            pt_cuts = lead_pt & sublead_pt & min_pt
+
+            # Eta cuts
+            eta_1 = (abs(pseudos.pho1.eta) < self.eta_gap_low) | (abs(pseudos.pho1.eta) > self.eta_gap_high)
+            eta_2 = (abs(pseudos.pho2.eta) < self.eta_gap_low) | (abs(pseudos.pho2.eta) > self.eta_gap_high)
+            eta_3 = (abs(pseudos.pho3.eta) < self.eta_gap_low) | (abs(pseudos.pho3.eta) > self.eta_gap_high)
+            eta_4 = (abs(pseudos.pho4.eta) < self.eta_gap_low) | (abs(pseudos.pho4.eta) > self.eta_gap_high)
+            eta_gap = eta_1 & eta_2 & eta_3 & eta_4
+            eta_max = (abs(pseudos.pho1.eta) < self.eta_max) & (abs(pseudos.pho2.eta) < self.eta_max) & (abs(pseudos.pho3.eta) < self.eta_max) & (abs(pseudos.pho4.eta) < self.eta_max)
+            eta_cuts = eta_gap & eta_max
+
+            # Mass cuts
+            mass_low = pseudos.mass_gggg >= self.mass_range_low
+            mass_high = pseudos.mass_gggg <= self.mass_range_high
+            mass_cuts = mass_low & mass_high
+
+            # Combine all cuts
+            all_cuts = pt_cuts & eta_cuts & mass_cuts
+            assert len(all_cuts) == len(awkward.any(all_cuts, axis=1)), f"{len(all_cuts)} {len(awkward.any(all_cuts, axis=1))}"
+            # It looks like the 4-photon cut on photons is correct (see below)
+            # The cuts for pseudoscalars have been wrong in the past since it was done using flatten instead of ak.any.
+            # The number of flattened cuts is the same as events left with 4 photons (makes sense that # of cuts is only as many as events left)
+            assert len(awkward.flatten(all_cuts)) == len(photons[awkward.num(photons, axis=1) >= 4]), f"{len(awkward.flatten(all_cuts))} {len(photons[awkward.num(photons, axis=1) >= 4])}"
+            all_cuts = awkward.any(all_cuts, axis=1)
+
+            # Get length to compare after making pseudoscalars
+            len_ps = len(pseudos)
+
+            def makePsPermutation(ps_array, perm):
+                phoA = ps_array[f"pho{perm[0]}"]
+                phoB = ps_array[f"pho{perm[1]}"]
+                assert awkward.all(awkward.all((phoA.pt >= phoB.pt), axis=1)), "PhoA pT must be bigger than PhoB pT!"
+
+                # Ensure shape of array is (3, N) so we can keep the daughter photons AND the pseudoscalar together
+                ps_perm = awkward.zip({"ps": phoA + phoB, "leading_pho": phoA, "subleading_pho": phoB})
+
+                for field in ["ps", "leading_pho", "subleading_pho"]:
+                    ps_perm[field] = ps_perm[field]
+                    ps_perm[(field, "pt")] = ps_perm[field].pt
+                    ps_perm[(field, "eta")] = ps_perm[field].eta
+                    ps_perm[(field, "phi")] = ps_perm[field].phi
+                    ps_perm[(field, "mass")] = ps_perm[field].mass
+                    
+                ps_perm.ps = awkward.with_name(ps_perm.ps, "PtEtaPhiMCandidate")
+                ps_perm["leading_pho"] = awkward.with_name(ps_perm.leading_pho, "PtEtaPhiMCandidate")
+                ps_perm["subleading_pho"] = awkward.with_name(ps_perm.subleading_pho, "PtEtaPhiMCandidate")
+
+                return ps_perm
+
+            # Start dM mixing procedure
+            # Permutation A - (1,2,3,4)
+            #dM1 = abs((pseudos.pho1 + pseudos.pho2).mass - (pseudos.pho3 + pseudos.pho4).mass)
+            Ps11 = makePsPermutation(pseudos, (1,2))
+            Ps12 = makePsPermutation(pseudos, (3,4))
+            dM1 = abs(Ps11.ps.mass - Ps12.ps.mass)
+
+            # Permutation B - (1,3,2,4)
+            #dM2 = abs((pseudos.pho1 + pseudos.pho3).mass - (pseudos.pho2 + pseudos.pho4).mass)
+            Ps21 = makePsPermutation(pseudos, (1,3))
+            Ps22 = makePsPermutation(pseudos, (2,4))
+            dM2 = abs(Ps21.ps.mass - Ps22.ps.mass)
+
+            # Permutation C - (1,4,2,3)
+            #dM3 = abs((pseudos.pho1 + pseudos.pho4).mass - (pseudos.pho2 + pseudos.pho3).mass)
+            Ps31 = makePsPermutation(pseudos, (1,4))
+            Ps32 = makePsPermutation(pseudos, (2,3))
+            dM3 = abs(Ps31.ps.mass - Ps32.ps.mass)
+
+            # Store eta information for daughter photons
+            Ps11[("ps", "gg_inEB")] = (Ps11.leading_pho.eta < self.eta_gap_low) & (Ps11.subleading_pho.eta < self.eta_gap_low)
+            Ps12[("ps", "gg_inEB")] = (Ps12.leading_pho.eta < self.eta_gap_low) & (Ps12.subleading_pho.eta < self.eta_gap_low)
+            Ps21[("ps", "gg_inEB")] = (Ps21.leading_pho.eta < self.eta_gap_low) & (Ps21.subleading_pho.eta < self.eta_gap_low)
+            Ps22[("ps", "gg_inEB")] = (Ps22.leading_pho.eta < self.eta_gap_low) & (Ps22.subleading_pho.eta < self.eta_gap_low)
+            Ps31[("ps", "gg_inEB")] = (Ps31.leading_pho.eta < self.eta_gap_low) & (Ps31.subleading_pho.eta < self.eta_gap_low)
+            Ps32[("ps", "gg_inEB")] = (Ps32.leading_pho.eta < self.eta_gap_low) & (Ps32.subleading_pho.eta < self.eta_gap_low)
+
+            # Masks to determine lead/sublead pseudoscalars
+            mask11 = Ps11.ps.pt >= Ps12.ps.pt
+            mask12 = Ps11.ps.pt < Ps12.ps.pt
+            mask21 = Ps21.ps.pt >= Ps22.ps.pt
+            mask22 = Ps21.ps.pt < Ps22.ps.pt
+            mask31 = Ps31.ps.pt >= Ps32.ps.pt
+            mask32 = Ps31.ps.pt < Ps32.ps.pt
+
+            # Minimize dM and create associated masks
+            dM_min = awkward.min(awkward.concatenate((dM1, dM2, dM3), axis=1), axis=1)
+            dM_mask1 = (dM_min == dM1)
+            dM_mask2 = (dM_min == dM2)
+            dM_mask3 = (dM_min == dM3)
+
+            # Lead/sublead pseudoscalars for each permutation
+            LeadPs1 = awkward.concatenate((Ps11[mask11], Ps12[mask12]), axis=1)
+            SubleadPs1 = awkward.concatenate((Ps11[mask12], Ps12[mask11]), axis=1)
+            LeadPs2 = awkward.concatenate((Ps21[mask21], Ps22[mask22]), axis=1)
+            SubleadPs2 = awkward.concatenate((Ps21[mask22], Ps22[mask21]), axis=1)
+            LeadPs3 = awkward.concatenate((Ps31[mask31], Ps32[mask32]), axis=1)
+            SubleadPs3 = awkward.concatenate((Ps31[mask32], Ps32[mask31]), axis=1)
+
+            # Apply masks and make final arrays
+            dM = awkward.concatenate((dM1[dM_mask1], dM2[dM_mask2], dM3[dM_mask3]), axis=1)
+            LeadPs = awkward.concatenate((LeadPs1[dM_mask1], LeadPs2[dM_mask2], LeadPs3[dM_mask3]), axis=1)
+            SubleadPs = awkward.concatenate((SubleadPs1[dM_mask1], SubleadPs2[dM_mask2], SubleadPs3[dM_mask3]), axis=1)
+
+            # Add dM, LeadPs, SubleadPs to events
+            pseudos["dM"] = dM
+            pseudos["LeadPs"] = LeadPs.ps
+            pseudos[("LeadPs", "leading_pho")] = LeadPs.leading_pho
+            pseudos[("LeadPs", "subleading_pho")] = LeadPs.subleading_pho
+            pseudos["SubleadPs"] = SubleadPs.ps
+            pseudos[("SubleadPs", "leading_pho")] = SubleadPs.leading_pho
+            pseudos[("SubleadPs", "subleading_pho")] = SubleadPs.subleading_pho
+
+            # Ensure length of pseudos stayed the same during mixing/optimization
+            assert len_ps == len(pseudos)
+
+            # Ensure fields in pseudos
+            for field in ["pho1", "pho2", "pho3", "pho4", "LeadPs", "SubleadPs"]:
+                pseudos[field] = pseudos[field]
+                pseudos[(field, "pt")] = pseudos[field].pt
+                pseudos[(field, "eta")] = pseudos[field].eta
+                pseudos[(field, "phi")] = pseudos[field].phi
+                pseudos[(field, "mass")] = pseudos[field].mass
+
+                # Add iso_charged_rel to photons for plotting checks
+                if field != "LeadPs" and field != "SubleadPs":
+                    pseudos[(field, "pfRelIso_photon_pt")] = pseudos[field].pfRelIso03_chg_quadratic * pseudos[field].pt
+
+            # Set behavior for pseudoscalar vectors (re vector module)
+            pseudos["LeadPs"] = awkward.with_name(pseudos.LeadPs, "PtEtaPhiMCandidate")
+            pseudos["SubleadPs"] = awkward.with_name(pseudos.SubleadPs, "PtEtaPhiMCandidate")
+            pseudos["pT1_ma1"] = pseudos.LeadPs.leading_pho.pt / pseudos.LeadPs.mass
+            pseudos["pT2_ma1"] = pseudos.LeadPs.subleading_pho.pt / pseudos.LeadPs.mass
+            pseudos["pT1_ma2"] = pseudos.SubleadPs.leading_pho.pt / pseudos.SubleadPs.mass
+            pseudos["pT2_ma2"] = pseudos.SubleadPs.subleading_pho.pt / pseudos.SubleadPs.mass
+
+            # Store dR between pseudoscalars
+            pseudos["dR_aa"] = pseudos.LeadPs.delta_r(pseudos.SubleadPs)
+            pseudos["dR_aa_mass_gggg"] = pseudos.dR_aa / pseudos.mass_gggg
+            
+            # Store dR between daughter photons
+            pseudos[("LeadPs", "dR_gg")] = pseudos.LeadPs.leading_pho.delta_r(pseudos.LeadPs.subleading_pho)
+            pseudos[("SubleadPs", "dR_gg")] = pseudos.SubleadPs.leading_pho.delta_r(pseudos.SubleadPs.subleading_pho)
+
+            # Make cos(theta_ag) variables
+            # Get leading photon in lab frame
+            lead_pho = awkward.concatenate((
+                pseudos.pho1[(pseudos.pho1.pt >= pseudos.pho2.pt) & (pseudos.pho1.pt >= pseudos.pho3.pt) & (pseudos.pho1.pt >= pseudos.pho4.pt)],
+                pseudos.pho2[(pseudos.pho2.pt >= pseudos.pho1.pt) & (pseudos.pho2.pt >= pseudos.pho3.pt) & (pseudos.pho2.pt >= pseudos.pho4.pt)],
+                pseudos.pho3[(pseudos.pho3.pt >= pseudos.pho1.pt) & (pseudos.pho3.pt >= pseudos.pho2.pt) & (pseudos.pho3.pt >= pseudos.pho4.pt)],
+                pseudos.pho4[(pseudos.pho4.pt >= pseudos.pho1.pt) & (pseudos.pho4.pt >= pseudos.pho2.pt) & (pseudos.pho4.pt >= pseudos.pho3.pt)]),
+                axis=1
+            )
+            LeadPs_Hframe = pseudos.LeadPs.boost(-(pseudos.LeadPs + pseudos.SubleadPs).boostvec)
+            #pho1_Aframe = lead_pho.boost(-pseudos.LeadPs.boostvec)
+            pho1_Aframe = pseudos.pho1.boost(-pseudos.LeadPs.boostvec)  # Want leading photon in lab frame
+            pseudos["cos_ag"] = LeadPs_Hframe.unit.dot(pho1_Aframe.unit)
+
+            # Make hypMass for data/eventMixing
+            if not signal:
+                m_hyp = numpy.random.choice([float(x) for x in range(15, 65, 5)], len(pseudos.LeadPs.mass))
+                assert len(m_hyp) == len(pseudos.LeadPs.mass)
+            else:
+                m_hyp = numpy.full_like(pseudos.LeadPs.mass, float(meta.replace("2018_","")[7:9]))
+                assert len(m_hyp) == len(pseudos.LeadPs.mass)
+
+            # Make other BDT input variables
+            pseudos["m_hyp"] = m_hyp
+            pseudos["LeadPs_interMass"] = (pseudos.LeadPs.mass - m_hyp) / pseudos.mass_gggg
+            pseudos["SubleadPs_interMass"] = (pseudos.SubleadPs.mass - m_hyp) / pseudos.mass_gggg
+            pseudos["Ps_massDiff"] = pseudos.LeadPs.mass - pseudos.SubleadPs.mass
+
+            """
+            print("pseudos pseudos[all_cuts]", len(pseudos), len(pseudos[all_cuts]))
+            print("pseudos pseudos[all_cuts]", len(pseudos), len(pseudos[all_cuts]))
+            """
+
+            """
+            # This takes 2 hours to complete with one file per mass point so don't run this. Nothing is printed, so there are no Nones in the array!
+            # Check behavior of cuts on photons and events.Photon (see notes Feb 24, 2025). To use this, need to put back cuts on events.Photon and return it from pre-selection.
+            for idx, (ps,nps,psc,sm) in enumerate(zip(pseudos.to_list()[:5], awkward.num(pseudos, axis=1)[:5], all_cuts.to_list()[:5], (~awkward.is_none(pseudos[all_cuts])).to_list()[:5])):
+                if sm == True:
+                    continue
+                print(f"pseudos {idx}: {ps}")
+                print(f"pseudos cut {idx}: {psc}")
+                print(f"pseudos mask {idx}: {sm}")
+                print(f"# pseudos {idx}: {nps}")
+            """
+
+            # Ensure at least no events without pseudoscalars
+            assert len(pseudos) == len(diphotons), f"{len(pseudos)}\t {len(diphotons)}"
+            pseudos = pseudos[all_cuts]
+            diphotons = diphotons[~awkward.is_none(pseudos)]
+            psuedos = pseudos[~awkward.is_none(pseudos)]
+
+            return pseudos, diphotons, all_cuts