From 727a812a76fa2fb59a4781b9084f3b9e556f6f04 Mon Sep 17 00:00:00 2001
From: Nicole Skidmore <nicola.skidmore@cern.ch>
Date: Thu, 20 Mar 2025 04:39:24 +0000
Subject: [PATCH] turbospruce persistence expansion

---
 .../options/sprucing/hlt2_foroverlapcheck.py  |  7 ++-
 .../sprucing/lbexec_yamls/spruce_overlap.yaml |  4 +-
 .../sprucing/lbexec_yamls/turbo_overlap.yaml  |  6 +--
 .../python/Hlt2Conf/lines/test/spruce_test.py | 24 +++++++++
 .../spruce_turbopass_overlap_check.py         | 17 ++++++-
 Hlt/Moore/python/Moore/lines.py               | 50 ++++++++++++++++++-
 .../Moore/persistence/particle_moving.py      | 16 +++++-
 Hlt/Moore/python/Moore/production.py          | 28 +++++++++--
 8 files changed, 130 insertions(+), 22 deletions(-)

diff --git a/Hlt/Hlt2Conf/options/sprucing/hlt2_foroverlapcheck.py b/Hlt/Hlt2Conf/options/sprucing/hlt2_foroverlapcheck.py
index 2e49a65fc33..9e7729308f8 100644
--- a/Hlt/Hlt2Conf/options/sprucing/hlt2_foroverlapcheck.py
+++ b/Hlt/Hlt2Conf/options/sprucing/hlt2_foroverlapcheck.py
@@ -14,8 +14,7 @@ HLT2 options to test overlap of HLT2 lines and the solution to this using Spruci
 
 """
 
-from Hlt2Conf.lines.test.spruce_test import Test_extraoutputs_hlt2_line
-from Hlt2Conf.lines.topological_b import threebody_line
+from Hlt2Conf.lines.test.spruce_test import Test_extraoutputs_hlt2_line, threebody_line
 from Moore import options, run_moore
 from Moore.lines import Hlt2Line
 from RecoConf.global_tools import stateProvider_with_simplified_geom
@@ -36,8 +35,8 @@ def make_lines():
     return [
         # Bespoke version of `b2kstgamma_line` that has extra_outputs and all the other persistency options set to true
         Test_extraoutputs_hlt2_line("Hlt2Lineone_extraoutputs"),
-        # Normal 3body TOPO line that should have good efficiency on BdToKstgamma
-        threebody_line("Hlt2Linetwo", persistreco=False),
+        # Bespoke version of 3 body TOPO line (good efficiency on BdToKstgamma), with NeutralProtos SP persistency
+        threebody_line("Hlt2Linetwo"),
         # Line that does not make any output at `/Particles`
         Hlt2Line(name="Hlt2Linethree", persistreco=True, algs=[], raw_banks=["Muon"]),
     ]
diff --git a/Hlt/Hlt2Conf/options/sprucing/lbexec_yamls/spruce_overlap.yaml b/Hlt/Hlt2Conf/options/sprucing/lbexec_yamls/spruce_overlap.yaml
index db386a2b248..f76fae06a58 100644
--- a/Hlt/Hlt2Conf/options/sprucing/lbexec_yamls/spruce_overlap.yaml
+++ b/Hlt/Hlt2Conf/options/sprucing/lbexec_yamls/spruce_overlap.yaml
@@ -1,14 +1,12 @@
 input_files: ['hlt2_foroverlapcheck.mdf']
 input_type: 'RAW'
-simulation: True
+simulation: False
 output_file : 'spruce_overlaptest.{stream}.dst'
 output_type : 'ROOT'
 output_manifest_file : "spruce_overlaptest.tck.json"
 input_streams_attributes_file: "line_attribute_dict.json"
 input_process: Hlt2
 evt_max: -1
-geometry_version: run3/trunk
-conditions_version: jonrob/all-pmts-active
 
 process: TurboSpruce
 input_raw_format : 0.5
diff --git a/Hlt/Hlt2Conf/options/sprucing/lbexec_yamls/turbo_overlap.yaml b/Hlt/Hlt2Conf/options/sprucing/lbexec_yamls/turbo_overlap.yaml
index 984d63c2d0a..89754fbadd8 100644
--- a/Hlt/Hlt2Conf/options/sprucing/lbexec_yamls/turbo_overlap.yaml
+++ b/Hlt/Hlt2Conf/options/sprucing/lbexec_yamls/turbo_overlap.yaml
@@ -1,19 +1,15 @@
 
 input_files: ['hlt2_2or3bodytopo_realtime.mdf']
 input_type: 'RAW'
-simulation: True
+simulation: False
 output_file : 'turbo_overlaptest.{stream}.dst'
 output_type : 'ROOT'
 output_manifest_file : "turbo_overlaptest.tck.json"
 input_process: Hlt2
 evt_max: -1
-geometry_version: run3/trunk
-conditions_version: jonrob/all-pmts-active
-
 
 process: TurboPass
 input_raw_format : 0.5
 
-
 dddb_tag: dddb-20231017
 conddb_tag: sim-20231017-vc-mu100
diff --git a/Hlt/Hlt2Conf/python/Hlt2Conf/lines/test/spruce_test.py b/Hlt/Hlt2Conf/python/Hlt2Conf/lines/test/spruce_test.py
index 23a4a1c81fe..e0d2a431c0a 100644
--- a/Hlt/Hlt2Conf/python/Hlt2Conf/lines/test/spruce_test.py
+++ b/Hlt/Hlt2Conf/python/Hlt2Conf/lines/test/spruce_test.py
@@ -338,6 +338,30 @@ def Test_extraoutputs_hlt2_line(name="Hlt2Test_ExtraOutputs", prescale=1):
     )
 
 
+# This lives here as its used to test sprucing
+# Bespoke version of topo that has selective persistreco
+from Moore.persistence.persistreco import persistreco_line_outputs
+from RecoConf.reconstruction_objects import reconstruction
+
+from Hlt2Conf.lines.topological_b import make_filtered_topo_threebody
+
+
+def threebody_line(name="Hlt2Topo3Body", prescale=1):
+    reco = reconstruction()
+    pr_objs = persistreco_line_outputs(reco)
+    extra_objs = [
+        ("", pr_objs["NeutralProtos"]),
+    ]
+
+    candidates = make_filtered_topo_threebody(MVACut=0.998)
+    return Hlt2Line(
+        name=name,
+        algs=[candidates],
+        prescale=prescale,
+        extra_outputs=extra_objs,
+    )
+
+
 def Test_persistreco_sprucing_line(name="SpruceTest_PersistReco", prescale=1):
     kst, b0 = b2kstgamma_line()
 
diff --git a/Hlt/Hlt2Conf/tests/options/sprucing/spruce_turbopass_overlap_check.py b/Hlt/Hlt2Conf/tests/options/sprucing/spruce_turbopass_overlap_check.py
index dbe1e1c90c2..5d8dc294194 100644
--- a/Hlt/Hlt2Conf/tests/options/sprucing/spruce_turbopass_overlap_check.py
+++ b/Hlt/Hlt2Conf/tests/options/sprucing/spruce_turbopass_overlap_check.py
@@ -29,7 +29,7 @@ args = parser.parse_args()
 
 ##Prepare application
 options.data_type = "Upgrade"
-options.simulation = True
+options.simulation = False
 options.geometry_version = "run3/trunk"
 options.conditions_version = "jonrob/all-pmts-active"
 options.input_files = [args.i]
@@ -77,6 +77,7 @@ else:
 
 lineone_extraoutputs = ["LongTracks", "LongerTracks"]
 reco_locs = ["Calo/Electrons", "ProtoP/Long"]
+reco_locs_forlinetwo = ["ProtoP/Neutrals"]
 
 
 def check_particles(TES, prefix):
@@ -186,7 +187,7 @@ for ii in range(nevents):
                 check_particles(TES, f"/Event/Turbo/{linetwo}")
                 check_not_banks(TES, options.input_stream, [9])
 
-                # Check persistreco
+                # Check that all persistreco is not present
                 for reco in reco_locs:
                     persistreco = TES[f"/Event/Turbo/HLT2/Rec/{reco}"]
                     if not persistreco or persistreco.size() <= 0:
@@ -195,6 +196,18 @@ for ii in range(nevents):
                         raise RuntimeError(
                             f"Check ERROR - reco locations propagated. There are {persistreco.size()} {reco} - this is wrong"
                         )
+                # Check that the selective persistreco is present
+                for reco in reco_locs_forlinetwo:
+                    persistreco = TES[f"/Event/Turbo/HLT2/Rec/{reco}"]
+                    if not persistreco or persistreco.size() <= 0:
+                        print(f"persistreco.size() {persistreco.size()}")
+                        raise RuntimeError(
+                            "Check ERROR - Reco locations not propagated"
+                        )
+                    else:
+                        print(
+                            f"Selective reco locations propagated. There are {persistreco.size()} {reco}"
+                        )
 
                 # Cannot check absense of CALO objs with linetwo
 
diff --git a/Hlt/Moore/python/Moore/lines.py b/Hlt/Moore/python/Moore/lines.py
index a3338ac2e24..05e5440fcc2 100644
--- a/Hlt/Moore/python/Moore/lines.py
+++ b/Hlt/Moore/python/Moore/lines.py
@@ -22,6 +22,7 @@ from PyConf.dataflow import DataHandle
 from PyConf.reading import get_decreports
 from RecoConf.reconstruction_objects import reconstruction
 
+from Moore.persistence.particle_moving import get_final_location
 from Moore.persistence.persistreco import persistreco_line_outputs
 from Moore.selreports import (
     UnconvertableAlgorithmError,
@@ -39,6 +40,17 @@ DECISION_SUFFIX = "Decision"
 log = logging.getLogger(__name__)
 
 
+from GaudiConf.reading import type_map
+
+
+def _get_type(dh):
+    types = type_map()
+    if dh.type in types.keys():
+        return types[dh.type].replace("Selection", "")
+
+    return None
+
+
 def _producer(datahandle_or_producer):
     try:
         return datahandle_or_producer.producer
@@ -499,7 +511,42 @@ class Hlt2Line(DecisionLine):
 
     @property
     def __dict__(self):
-        extra_outputs = [loc[0] for loc in self.extra_outputs]
+        reco_objs = reconstruction()
+        prdict = persistreco_line_outputs(reco_objs)
+
+        # Extra outputs support particles, reco and relations
+        extra_outputs = []
+        for entry in self.extra_outputs:
+            prefix, output = entry
+            output_type = _get_type(output)
+            ##peculiarity
+            output_type = (
+                "ParticlesSelection" if output_type == "Particles" else output_type
+            )
+
+            if prefix == "" and output in prdict.values():  # for selective reco case
+                entry_new = [
+                    prefix,
+                    [k for k, v in prdict.items() if output == v][0],
+                    output_type,
+                    "reco",
+                ]
+            elif prefix != "" and output_type in [
+                "ParticlesSelection",
+                "P2InfoRelations",
+            ]:  # for selective particle and additional info case
+                entry_new = [
+                    prefix,
+                    get_final_location(output, (self.name, prefix))[0],
+                    output_type,
+                    "",
+                ]
+            else:
+                raise ValueError(
+                    f"extra_output {entry} for line {self.name} is not supported in current persistency framework and will not be persisted"
+                )
+            extra_outputs.append(tuple(entry_new))
+
         is_output_producer = True if self.output_producer else False
         return {
             "name": self.name,
@@ -542,6 +589,7 @@ class Hlt2Line(DecisionLine):
         prdict = persistreco_line_outputs(reco_objs)
 
         for prefix, output in extra_outputs:
+            # !!!! ANY additions to what type of objects `extra_outputs` supports will need to be implemented for turbospruce persistency as well - contact DPA WP1
             if not isinstance(output, DataHandle):
                 raise ValueError(
                     "extra_outputs for lines have to be DataHandles. {!r} for line {!r} is not a DataHandle. ".format(
diff --git a/Hlt/Moore/python/Moore/persistence/particle_moving.py b/Hlt/Moore/python/Moore/persistence/particle_moving.py
index be55886d527..791bc57de6b 100644
--- a/Hlt/Moore/python/Moore/persistence/particle_moving.py
+++ b/Hlt/Moore/python/Moore/persistence/particle_moving.py
@@ -71,8 +71,9 @@ def cloning_map():
     }
 
 
-def copy_to_final_location(input, path_components):
-    """Return an algorithm that copies Particle objects.
+def get_final_location(input, path_components):
+    """
+    Determine the final output location from a DataHandle.
 
     Args:
         input (DataHandle): input container to be copied.
@@ -92,7 +93,18 @@ def copy_to_final_location(input, path_components):
     # We will add the suffix in this method to ensure consistency
     base = os.path.join("/Event", *path_components)
     output_loc = os.path.join(base, input_type)
+    return output_loc, input_type
+
+
+def copy_to_final_location(input, path_components):
+    """Return an algorithm that copies Particle objects.
 
+    Args:
+        input (DataHandle): input container to be copied.
+        path_components (list of str): Definition of TES path, relative to
+        '/Event', under which to store copied objects.
+    """
+    output_loc, input_type = get_final_location(input, path_components)
     # Keep track of forced output locations to make sure we don't write to the
     # same place twice
     assert output_loc not in _FORCED_LOCATIONS, (
diff --git a/Hlt/Moore/python/Moore/production.py b/Hlt/Moore/python/Moore/production.py
index bf43c08ec37..b2afe531c50 100644
--- a/Hlt/Moore/python/Moore/production.py
+++ b/Hlt/Moore/python/Moore/production.py
@@ -14,6 +14,7 @@ import json
 import re
 from contextlib import contextmanager
 
+from PyConf.reading import _get_unpacked
 from PyConf.utilities import ConfigurationError
 
 from Moore import Options
@@ -622,6 +623,9 @@ def _make_pass_spruceline(line_attributes, custom_prescales={}):
     # )
 
     line_attributes["prescale"] = custom_prescales.get(linename, 1.0)
+    from RecoConf.reconstruction_objects import reconstruction
+
+    from Moore.persistence.persistreco import persistreco_line_outputs
 
     with upfront_decoder.bind(source="Hlt2"):
         hlt2_particles = get_particles(location)
@@ -633,15 +637,29 @@ def _make_pass_spruceline(line_attributes, custom_prescales={}):
             print(
                 f"Line {linename} did not produce location `/Event/HLT2/{linename}/Particles`, skipping this location"
             )
+        reco = reconstruction()
 
         extra_outputs_tuplelist = []
         # get extra_outputs - note might be several of them
         if line_attributes["extra_outputs"]:
-            for loc in line_attributes["extra_outputs"]:
-                # print(f"Adding extra_output {loc} for line {spruce_linename}")
-                extra_outputs_tuplelist.append(
-                    (loc, get_particles(f"/Event/HLT2/{linename}/{loc}/Particles"))
-                )
+            for extra_output in line_attributes["extra_outputs"]:
+                prefix, handle, input_type, description = extra_output
+                if description == "reco":
+                    extra_outputs_tuplelist.append(("", reco[handle]))
+                elif input_type in ["ParticlesSelection", "P2InfoRelations"]:
+                    extra_outputs_tuplelist.append(
+                        (
+                            prefix,
+                            _get_unpacked(
+                                input_type, handle.replace("/Event/", "/Event/HLT2/")
+                            ),
+                        )
+                    )
+                else:
+                    raise ValueError(
+                        f"extra_output {extra_output} for line {linename} is not supported in current persistency framework and will not be persisted"
+                    )
+
         pass_spruceline = SpruceLine(
             name=linename,
             hlt2_filter_code=filter,
-- 
GitLab