Skip to content
Snippets Groups Projects

Merging full workflow into Tanay's HiggsDNA

Open Sergi Castells requested to merge castells/higgs-dna-4-gamma-tanays-copy:master into master
Compare and Show latest version
1 file
+ 59
3
Compare changes
  • Side-by-side
  • Inline
@@ -449,6 +449,57 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
warnings.warn(f"Could not process correction {correction_name}.")
continue
# Do event mixing
if "EvtMix" in dataset_name:
num_cut = awkward.num(events.Photon, axis=1) >= 4
events = events[num_cut]
events = events[~awkward.is_none(events.Photon)]
tmp_events = events
tmp_events2 = events
Nevents.update({"event-mixing-4photon-cut": len(events)})
tmp_photons = None
# Do offset first so each mixing scheme is done is one go
# Repeat this N times to artificially increase background statistics
# Can optimally do floor[(# of events - 1) / 3] cycles of mixing if we don't want to repeat photons
cycles = 10
for offset in range(cycles):
artificial_stats_photons = events.Photon
# Sort photons by pt
artificial_stats_photons = artificial_stats_photons[awkward.argsort(artificial_stats_photons.pt, ascending=False, axis=1)]
# Keep 4 with highest pt
artificial_stats_photons = artificial_stats_photons[:,:4]
# Shift photons within events and replace per field
for field in artificial_stats_photons.fields:
# Remove 1st/2nd/3rd event and append them to end of array (shifts up events.Photon array)
shift = numpy.array([1,2,3])
shift = (shift + offset).tolist()
artificial_stats_photons[:,0][field] = artificial_stats_photons[:,0][field] # no shift
artificial_stats_photons[:,1][field] = awkward.concatenate((artificial_stats_photons[shift[0]:], artificial_stats_photons[:shift[0]]))[:,1][field]
artificial_stats_photons[:,2][field] = awkward.concatenate((artificial_stats_photons[shift[1]:], artificial_stats_photons[:shift[1]]))[:,2][field]
artificial_stats_photons[:,3][field] = awkward.concatenate((artificial_stats_photons[shift[2]:], artificial_stats_photons[:shift[2]]))[:,3][field]
# Append increased statistics to original array
if offset == 0:
tmp_photons = artificial_stats_photons
tmp_events2.Photon = artificial_stats_photons
tmp_events = tmp_events2
elif offset >= 1:
tmp_photons = awkward.concatenate((tmp_photons, artificial_stats_photons), axis=0)
tmp_events2.Photon = artificial_stats_photons
tmp_events = awkward.concatenate((tmp_events, tmp_events2), axis=0)
# Add to tmp_events first so events is same size and tmp_photons. Otherwise, it is offset by N/cycles * (cycles + 1)
events = tmp_events
assert len(events) == len(events.Photon) == len(tmp_photons), f"events: {len(events)}\t events.Photon: {len(events.Photon)}\t tmp_photons: {len(tmp_photons)}\t tmp_photons num: {awkward.num(tmp_photons, axis=0)}\t Cycle: {offset}"
"""
# Do event mixing - activated if "EvtMix" is in the dataset name in the samples JSON
if "EvtMix" in dataset_name:
num_cut = awkward.num(events.Photon, axis=1) >= 4
@@ -480,11 +531,12 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
artificial_stats_photons[:,2][field] = awkward.concatenate((artificial_stats_photons[shift[1]:], artificial_stats_photons[:shift[1]]))[:,2][field]
artificial_stats_photons[:,3][field] = awkward.concatenate((artificial_stats_photons[shift[2]:], artificial_stats_photons[:shift[2]]))[:,3][field]
# Append increased statistics to original array
tmp_photons = awkward.concatenate((tmp_photons, artificial_stats_photons))
# Append increased statistics to original array
tmp_photons = awkward.concatenate((tmp_photons, artificial_stats_photons))
else:
tmp_photons = events.Photon
"""
original_photons = tmp_photons
#logger.info("original photons length %i"%len(original_photons)) # kt
@@ -568,7 +620,11 @@ class HggBaseProcessor(processor.ProcessorABC): # type: ignore
variations.append('nominal')
logger.debug(f"[systematics variations] {variations}")
assert len(events.Photon[awkward.num(events.Photon, axis=1) >= 2]) == len(photons_dct["nominal"][awkward.num(photons_dct["nominal"], axis=1) >= 2])
if "EvtMix" in dataset_name:
assert len(events.Photon[awkward.num(events.Photon, axis=1) >= 2]) * 10 == len(photons_dct["nominal"][awkward.num(photons_dct["nominal"], axis=1) >= 2])
elif "EvtMix" not in dataset_name:
assert len(events.Photon[awkward.num(events.Photon, axis=1) >= 2]) == len(photons_dct["nominal"][awkward.num(photons_dct["nominal"], axis=1) >= 2])
for variation in variations:
photons, jets = photons_dct["nominal"], events.Jet
if variation == "nominal":
Loading