From efda02fa8e97d1e99f3781106585c41fc003e182 Mon Sep 17 00:00:00 2001
From: Zach Marshall <ZLMarshall@lbl.gov>
Date: Mon, 17 Mar 2025 22:59:48 +0100
Subject: [PATCH] Updating expected TestHepMC minimum efficiency for low
 Nevents jobs

When jobs generate fewer than 100 events (50 or fewer), then a single ev
event can cause them to fall below the required TestHepMC efficiency thr
threshold of 98%. This MR updates the default TestHepMC efficiency in su
such cases to N/(N+1) to allow jobs that have one failing event to compl
complete successfully.

There is some concern that if this happens for *many* jobs with low numb
numbers of events, the effective TestHepMC efficiency will be low enough
that it should fail (e.g. if all jobs fail one event, and each job
generates 25 events, the total efficiency will be 96% and the jobs
should be failing. Perhaps this is something that can be improved in the
future by tracking a global TestHepMC efficiency for the tasks via AMI
or ProdSys.
---
 Generators/EvgenJobTransforms/share/skel.ABtoEVGEN.py  | 6 ++++++
 Generators/EvgenJobTransforms/share/skel.GENtoEVGEN.py | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/Generators/EvgenJobTransforms/share/skel.ABtoEVGEN.py b/Generators/EvgenJobTransforms/share/skel.ABtoEVGEN.py
index 3a5ea571681c..e87359112229 100644
--- a/Generators/EvgenJobTransforms/share/skel.ABtoEVGEN.py
+++ b/Generators/EvgenJobTransforms/share/skel.ABtoEVGEN.py
@@ -325,6 +325,12 @@ else:
     postSeq.CountHepMC.RequestedOutput = evgenConfig.nEventsPerJob if runArgs.maxEvents == -1  else runArgs.maxEvents
     evgenLog.info('Requested output events = '+str(postSeq.CountHepMC.RequestedOutput))
 
+    # Special case of N<100: adjust TestHepMC. We will allow _one_ event to fail the checks.
+    # This means the minimum efficiency is N/N+1 for N generated events. Note that if N<100,
+    # each failed event costs us more than 1% of efficiency.
+    if hasattr(testSeq, "TestHepMC") and postSeq.CountHepMC.RequestedOutput<100:
+        testSeq.TestHepMC.EffFailThreshold = postSeq.CountHepMC.RequestedOutput/(postSeq.CountHepMC.RequestedOutput+1) - 0.01
+
 ## Check that the keywords are in the list of allowed words (and exit if processing an official JO)
 if evgenConfig.keywords:
     from GeneratorConfig.GenConfigHelpers import checkKeywords
diff --git a/Generators/EvgenJobTransforms/share/skel.GENtoEVGEN.py b/Generators/EvgenJobTransforms/share/skel.GENtoEVGEN.py
index f58a9f231a77..a3f8687cf110 100644
--- a/Generators/EvgenJobTransforms/share/skel.GENtoEVGEN.py
+++ b/Generators/EvgenJobTransforms/share/skel.GENtoEVGEN.py
@@ -372,6 +372,12 @@ else:
     postSeq.CountHepMC.RequestedOutput = evgenConfig.nEventsPerJob if runArgs.maxEvents == -1  else runArgs.maxEvents
     evgenLog.info('Requested output events = '+str(postSeq.CountHepMC.RequestedOutput))
 
+    # Special case of N<100: adjust TestHepMC. We will allow _one_ event to fail the checks.
+    # This means the minimum efficiency is N/N+1 for N generated events. Note that if N<100,
+    # each failed event costs us more than 1% of efficiency.
+    if hasattr(testSeq, "TestHepMC") and postSeq.CountHepMC.RequestedOutput<100:
+        testSeq.TestHepMC.EffFailThreshold = postSeq.CountHepMC.RequestedOutput/(postSeq.CountHepMC.RequestedOutput+1) - 0.01
+
 ## Check that the keywords are in the list of allowed words (and exit if processing an official JO)
 if evgenConfig.keywords:
     from GeneratorConfig.GenConfigHelpers import checkKeywords
-- 
GitLab