Compare revisions

Shunan Zhang · Nicole Skidmore · Nicole Skidmore · Luke Grazette · Nicole Skidmore · Nicole Skidmore
--- a/cmt/requirements
+++ b/cmt/requirements
@@ -3,7 +3,7 @@
 # Maintainer : Ben Couturier
 #============================================================================
 package           PRConfig
-version           v1r63
+version           v1r65

 #============================================================================
 # Structure, i.e. directories to process.

--- a/doc/release.notes
+++ b/doc/release.notes
@@ -4,6 +4,125 @@
 ! Purpose     : App Configuration for performance and regression tests
 !-----------------------------------------------------------------------------

+========================= PRConfig v1r65 2023-12-07 =========================
+
+! 2023-12-05 - commit 2fd9c22
+
+ - Merge branch 'lugrazet-BW-hlt1-fix-loggingerror' into 'master'
+
+   [RTA BW Tests] quick fix for Hlt1 BW test. len(inputs) < 2 breaks logging
+   string
+
+   See merge request lhcb-datapkg/PRConfig!363
+
+! 2023-12-05 - commit ee5e840
+
+ - Merge branch 'spruce-bw-input-Dec23' into 'master'
+
+   Add new Dec2023 samples for Sprucing PR tests
+
+   See merge request lhcb-datapkg/PRConfig!364
+
+! 2023-11-29 - commit f4c873b
+
+ - Merge branch 'rjhunter-bwtest-cleanup-copies' into 'master'
+
+   [RTA BW tests] Small cleanups after !359
+
+   See merge request lhcb-datapkg/PRConfig!362
+
+! 2023-11-28 - commit 6f6c963
+
+ - Merge branch 'rjhunter-reduce-moore-threads-in-bw-test' into 'master'
+
+   [Bandwidth tests] Use LBN_BUILD_JOBS to properly set n_threads on Moore in BW
+   test
+
+   See merge request lhcb-datapkg/PRConfig!356
+
+! 2023-11-16 - commit 11208b4
+
+ - Merge branch 'rjhunter-chained-test-feasibility' into 'master'
+
+   [RTA BW tests] Test feasibility of copying HLT2 output to read into sprucing
+   test
+
+   See merge request lhcb-datapkg/PRConfig!359
+
+! 2023-11-08 - commit ac9460c
+
+ - Merge branch 'lugrazet-BW-hlt1testpage-cleanup' into 'master'
+
+   [RTA BW Tests] BW test page clean-ups
+
+   See merge request lhcb-datapkg/PRConfig!355
+
+========================= PRConfig v1r64 2023-11-01 =========================
+
+! 2023-11-01 - commit f7f0f10
+
+ - Merge branch 'lugrazet-BW-initialhlt1test' into 'master'
+
+   [RTA BW Tests] Introducing an Hlt1-bandwidth test via Moore_in_Allen
+
+   See merge request lhcb-datapkg/PRConfig!330
+
+! 2023-10-26 - commit e4b282f
+
+ - Merge branch 'bw-test-minor-update' into 'master'
+
+   Minor update to BW test page
+
+   See merge request lhcb-datapkg/PRConfig!353
+
+! 2023-10-17 - commit 2a05e36
+
+ - Merge branch 'audurier-ift-run3' into 'master'
+
+   Update Ion sequence and MC datasets
+
+   See merge request lhcb-datapkg/PRConfig!328
+
+! 2023-10-17 - commit 9749563
+
+ - Merge branch 'rm-2023_raw_hlt1_269939' into 'master'
+
+   Add 2023_raw_hlt1_269939 TestFileDB sample
+
+   See merge request lhcb-datapkg/PRConfig!352
+
+! 2023-10-11 - commit 81970da
+
+ - Merge branch 'sponce_newFileUT' into 'master'
+
+   Added new file to be used for Boole tests of the UT
+
+   See merge request lhcb-datapkg/PRConfig!335
+
+! 2023-10-09 - commit 51ab80c
+
+ - Merge branch 'xueting_addSMOG2jobs_update' into 'master'
+
+   Add_2_SMOG_jobs
+
+   See merge request lhcb-datapkg/PRConfig!351
+
+! 2023-10-03 - commit dfd6b02
+
+ - Merge branch 'dd4hep_future_upgrades_refactor' into 'master'
+
+   Fix errors caused by DD4Hep future upgrades refactor
+
+   See merge request lhcb-datapkg/PRConfig!350
+
+! 2023-09-28 - commit aab79d9
+
+ - Merge branch 'rjhunter-trim-fat-from-BW-tests' into 'master'
+
+   Refactor and speed-up the periodic BW tests
+
+   See merge request lhcb-datapkg/PRConfig!349
+
 ========================= PRConfig v1r63 2023-09-25 =========================

 ! 2023-09-13 - commit 1fd8aa7

--- a/python/MooreTests/calculate_stream_overlap.py
+++ b/python/MooreTests/calculate_stream_overlap.py
@@ -30,7 +30,7 @@ def get_all_event_numbers(args):
    return ret


-def calculate_similarity_matrix(event_numbers_by_stream):
+def get_event_number_matrix(event_numbers_by_stream):

    all_event_numbers = set([
        evt_no for evt_no_list in event_numbers_by_stream.values()
@@ -48,6 +48,11 @@ def calculate_similarity_matrix(event_numbers_by_stream):
        for evt_no in evt_no_list:
            df[stream][evt_no] = True

+    return df
+
+
+def calculate_similarity_matrix(df):
+
    jaccard = 1 - pairwise_distances(
        df.T.to_numpy(), metric='jaccard'
    )  # .T bcuz pairwise_distance must expect the fields to take similarity between to be rows rather than columns
@@ -57,6 +62,18 @@ def calculate_similarity_matrix(event_numbers_by_stream):
    return jaccard_sim_matrix_df


+def calculate_overlap_matrix(df):
+    cond_prob_per_stream = {stream: [] for stream in df.columns}
+    for target_stream in df.columns:
+        for comparison_stream in df.columns:
+            cond_prob_per_stream[target_stream].append(
+                sum(df[comparison_stream] * df[target_stream]) / sum(
+                    df[target_stream]))
+    overlap_matrix_df = pd.DataFrame(
+        cond_prob_per_stream, columns=df.columns, index=df.columns)
+    return overlap_matrix_df
+
+
 def save(df, htmlpath):
    # Generate HTML table for similarity matrix
    html = df.to_html(float_format=lambda x: f"{x:.1%}")
@@ -68,12 +85,18 @@ def main():

    parser = argparse.ArgumentParser()
    parser.add_argument(
-        '-p', '--process', type=str, required=True, choices=['hlt2', 'spruce'])
+        '-p',
+        '--process',
+        type=str,
+        help='Compute for Hlt2 or Sprucing lines',
+        choices=['hlt2', 'spruce'],
+        required=True)
    parser.add_argument(
        '--stream-config',
        type=str,
-        required=True,
-        choices=["wg", "production"])
+        help='Choose production or per-WG stream configuration',
+        choices=['production', 'wg'],
+        required=True)
    parser.add_argument('--streams', nargs='+', type=str, required=True)
    args = parser.parse_args()
    fname_helper = FileNameHelper(args.process)
@@ -83,14 +106,23 @@ def main():
        print(
            f"Found {len(event_numbers[stream])} events for {stream} stream.")

+    df = get_event_number_matrix(event_numbers)
+
    ofile = fname_helper.jaccard_similarities_path(args.stream_config)
-    sim_matrix = calculate_similarity_matrix(event_numbers)
+    sim_matrix = calculate_similarity_matrix(df)
    print(
        f"Calculated similarity matrix. Printing and saving to html at {ofile}."
    )
    print(sim_matrix)
    save(sim_matrix, ofile)

+    ofile = fname_helper.overlap_matrix_path(args.stream_config)
+    overlap_matrix = calculate_overlap_matrix(df)
+    print(
+        f"Calculated overlap matrix. Printing and saving to html at {ofile}.")
+    print(overlap_matrix)
+    save(overlap_matrix, ofile)
+

 if __name__ == "__main__":
    main()
--- a/python/MooreTests/combine_rate_output.py
+++ b/python/MooreTests/combine_rate_output.py
@@ -25,19 +25,24 @@ COLUMNS_PER_STREAM = [
 ]


-def _columns_per_line():
-    # Possibility is here (add an arg) to make the thresholds change based on hlt2/spruce
+def _columns_per_line(process):
+    tols = {
+        # Tolerances per process.
+        'hlt1': (1e3, None, 150, 0, 0),
+        'hlt2': (1, 1e3, 0.2, 1e3, 0.2),
+        'spruce': (1, 1e3, 0.2, 1e3, 0.2),
+    }[process]
    return {
        # col_name, threshold for turning it red to catch the reader's eye
        'Line': None,
        'Total Retention (%)': None,
-        'Rate (kHz)': 1,
+        'Rate (kHz)': tols[0],
        'Exclusive Retention(%)': None,
        'Exclusive Rate (kHz)': None,
-        'Avg Total Event Size (kB)': 1e3,
-        'Total Bandwidth (GB/s)': 0.2,
-        'Avg DstData Size (kB)': 1e3,
-        'DstData Bandwidth (GB/s)': 0.2
+        'Avg Total Event Size (kB)': tols[1],
+        'Total Bandwidth (GB/s)': tols[2],
+        'Avg DstData Size (kB)': tols[3],
+        'DstData Bandwidth (GB/s)': tols[4]
    }


@@ -46,7 +51,7 @@ def _sorted_df_by_retention(df):
        by=['Total Retention (%)'], ascending=False).reset_index(drop=True)


-def rates_all_lines(stream_config, fname_helper):
+def rates_all_lines(stream_config, fname_helper, process):
    """Make 1 enormous table with rate/bw info per line for all lines in all streams (i.e. n_rows = n_lines).
       Saves to .csv and .html.
       stream_config is either "production" or "wg"
@@ -60,7 +65,7 @@ def rates_all_lines(stream_config, fname_helper):
        frames.append(df)

    df = pd.concat(frames)
-    df.columns = _columns_per_line().keys()
+    df.columns = _columns_per_line(process).keys()

    df = _sorted_df_by_retention(df)
    df.to_csv(fname_helper.final_rate_table_all_lines_path("csv"))
@@ -69,7 +74,7 @@ def rates_all_lines(stream_config, fname_helper):
        return f'background-color: {color}' if val > threshold else ''

    styler = None
-    for column, threshold in _columns_per_line().items():
+    for column, threshold in _columns_per_line(process).items():
        # Make cell red if column value greater than threshold
        if threshold:
            if styler:
@@ -79,14 +84,16 @@ def rates_all_lines(stream_config, fname_helper):
                styler = df.style.applymap(
                    highlight_vals, subset=[column], threshold=threshold)

-    html = styler.set_table_attributes("border=1").to_html()
+    html = styler.format(
+        '{:.3g}', subset=df.columns[
+            df.columns != 'Line']).set_table_attributes("border=1").to_html()
    with open(fname_helper.final_rate_table_all_lines_path("html"), 'w') as f:
        f.write(html)

    return


-def make_rate_table_row_per_line(stream_config, fname_helper):
+def make_rate_table_row_per_line(stream_config, fname_helper, process):
    """ Makes (1 table with rate/bw info per line in the streamed mdf) for all <stream_config> streams (i.e. n_tables = n_streams).
        Puts them all on 1 html page, adds hyperlinks to jump to the different streams on the page.
        Saves to .html page only.
@@ -113,23 +120,25 @@ def make_rate_table_row_per_line(stream_config, fname_helper):
            f.write(f'<head>{stream.upper()}</head>')
            f.write(f'<a id="{stream}_label">')
            df = pd.read_csv(file, header=None)
-            df.columns = _columns_per_line().keys()
+            df.columns = _columns_per_line(process).keys()
            df = _sorted_df_by_retention(df)
-            f.write(df.to_html())
+            f.write(
+                df.style.format(
+                    '{:.3g}', subset=df.columns[df.columns != 'Line']).
+                set_table_attributes("border=1").to_html())
            f.write('</a>')
            f.write('<br/><br/>')

    return


-def make_rate_table_row_per_stream(stream_config, fname_helper):
+def make_rate_table_row_per_stream(stream_config, fname_helper, process):
    """ Makes 1 table with rate/bw info integrated over the whole streamed mdf for all <stream_config> streams (i.e. a table with n_rows = n_streams).
        Saves to .html and .csv.
        stream_config is either "production" or "wg"
    """

    frames = []
-
    for file in glob.glob(
            fname_helper.tmp_rate_table_per_stream_path(stream_config, "*")):
        df = pd.read_csv(file, header=None)
@@ -143,7 +152,10 @@ def make_rate_table_row_per_stream(stream_config, fname_helper):
        fname_helper.final_rate_table_all_streams_path(
            stream_config, ext="csv"))

-    html = df.to_html()
+    html = df.style.format(
+        '{:.3g}',
+        subset=df.columns[df.columns != 'Stream']).set_table_attributes(
+            "border=1").to_html()
    with open(
            fname_helper.final_rate_table_all_streams_path(
                stream_config, ext="html"), 'w') as f:
@@ -155,14 +167,23 @@ def make_rate_table_row_per_stream(stream_config, fname_helper):
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
-        '--process', type=str, required=True, choices=["hlt2", "spruce"])
+        '-p',
+        '--process',
+        type=str,
+        help='Compute for Hlt1, Hlt2 or Sprucing lines',
+        choices=['hlt1', 'hlt2', 'spruce'],
+        required=True)
    args = parser.parse_args()

    fname_helper = FileNameHelper(args.process)
-    stream_configs = ["production", "wg"] if args.process == "hlt2" else ["wg"]
+    stream_configs, main_stream_config = {
+        "hlt1": (["streamless"], "streamless"),
+        "hlt2": (["production", "wg"], "production"),
+        "spruce": (["wg"], "wg")
+    }[args.process]

-    rates_all_lines("production" if args.process == "hlt2" else "wg",
-                    fname_helper)
+    rates_all_lines(main_stream_config, fname_helper, args.process)
    for stream_config in stream_configs:
-        make_rate_table_row_per_stream(stream_config, fname_helper)
-        make_rate_table_row_per_line(stream_config, fname_helper)
+        make_rate_table_row_per_stream(stream_config, fname_helper,
+                                       args.process)
+        make_rate_table_row_per_line(stream_config, fname_helper, args.process)
--- a/python/MooreTests/download_hlt2_output.py
+++ b/python/MooreTests/download_hlt2_output.py
+#!/usr/bin/env python
+###############################################################################
+# (c) Copyright 2023 CERN for the benefit of the LHCb Collaboration           #
+#                                                                             #
+# This software is distributed under the terms of the GNU General Public      #
+# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING".   #
+#                                                                             #
+# In applying this licence, CERN does not waive the privileges and immunities #
+# granted to it by virtue of its status as an Intergovernmental Organization  #
+# or submit itself to any jurisdiction.                                       #
+###############################################################################
+
+import socket
+import os
+import atexit
+import tempfile
+import logging
+from datetime import datetime
+import shutil
+from Moore.qmtest.context import download_mdf_inputs_locally
+
+# Default cache dir is the current working directory as this is most convenient for the machine
+# that the test runs on periodically. It assumes the working directory is not cleaned up often,
+# and so the files remain available for subsequent jobs.
+DEFAULT_CACHE_DIRS = {'default': '.'}
+
+# prefer XDG_RUNTIME_DIR which should be on tmpfs
+FALLBACK_CACHE_DIR = os.getenv('XDG_RUNTIME_DIR', tempfile.gettempdir())
+
+FILE_TO_COPY = "mdf:root://eoslhcb.cern.ch//eos/lhcb/storage/lhcbpr/www/UpgradeRateTest/current_hlt2_output/hlt2_bw_testing__production__full.mdf"
+
+
+def default_cache_dirs():
+    hostname = socket.getfqdn()
+    dirs = DEFAULT_CACHE_DIRS.get(hostname, DEFAULT_CACHE_DIRS['default'])
+    return dirs
+
+
+def main():
+
+    logging.basicConfig(
+        format='%(levelname)-7s %(message)s', level=logging.INFO)
+
+    cache_dir = default_cache_dirs()
+    if not os.path.isdir(cache_dir):
+        fallback_dir = tempfile.mkdtemp(
+            prefix='bandwidth-', dir=FALLBACK_CACHE_DIR)
+        logging.warning('default cache dir {!r} doesnt exist, using {}'.format(
+            cache_dir, fallback_dir))
+        cache_dir = fallback_dir
+        # if we use the fallback directory, clean up after ourselves
+        atexit.register(shutil.rmtree, fallback_dir)
+
+    # Now download file
+    logging.info(f'Downloading input file {FILE_TO_COPY}')
+    # download_mdf_inputs_locally only downloads if files
+    # are not already available locally on the machine
+    logging.info(f'Downloading inputs for bandwidth job to {cache_dir}')
+    before_copy = datetime.now()
+    kB_to_GB = 1e3
+    downloaded_path = download_mdf_inputs_locally(
+        [FILE_TO_COPY], cache_dir, max_size=300 * kB_to_GB * 2e4
+    )  # Guesses as to output size and n_events in the FULL stream TODO improve
+    logging.info(
+        f"Downloaded {downloaded_path}. This took: {datetime.now() - before_copy}"
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/python/MooreTests/line-and-stream-rates.py
+++ b/python/MooreTests/line-and-stream-rates.py
@@ -12,7 +12,8 @@
 import GaudiPython as GP
 from GaudiConf.reading import decoder, unpack_rawevent, hlt_decisions
 from Configurables import (ApplicationMgr, LHCbApp, IODataManager,
-                           EventSelector, createODIN)
+                           EventSelector, createODIN, LHCb__UnpackRawEvent,
+                           HltDecReportsDecoder)
 from GaudiConf import IOHelper
 from PyConf.application import configured_ann_svc
 import operator
@@ -46,6 +47,7 @@ from PRConfig.bandwidth_helpers import FileNameHelper
            6. Bandwidth

    When running wg-stream config, returns same figures as above (both per line and per stream)
+    When running streamless-stream config, returns just the per-line information.

 '''

@@ -247,14 +249,14 @@ if __name__ == '__main__':
        '-p',
        '--process',
        type=str,
-        help='Compute for Hlt2 or Sprucing lines',
-        choices=['hlt2', 'spruce'],
+        help='Compute for Hlt1, Hlt2 or Sprucing lines',
+        choices=['hlt1', 'hlt2', 'spruce'],
        required=True)
    parser.add_argument(
        '--stream-config',
        type=str,
-        help='Choose production or per-WG stream configuration',
-        choices=['production', 'wg'],
+        help='Choose production, per-WG or streamless stream configuration',
+        choices=['streamless', 'production', 'wg'],
        required=True)
    args = parser.parse_args()

@@ -264,17 +266,15 @@ if __name__ == '__main__':

    input_config = parse_yaml(args.config)

-    if args.process == "spruce" and args.stream_config == "production":
+    if args.process == "spruce" and args.stream_config != "wg":
        raise RuntimeError(
-            '"production" stream config not defined for sprucing. Please use "wg".'
+            '"production" and "streamless" stream configs are not defined for sprucing. Please use "wg".'
        )
-
-    LHCbApp(
-        DataType="Upgrade",
-        Simulation=True,
-        DDDBtag="dddb-20171126",
-        CondDBtag="sim-20171127-vc-md100",
-        EvtMax=n_events)
+    if args.process == "hlt1" and args.stream_config != "streamless":
+        raise RuntimeError(
+            '"production" and "wg" stream configs are not defined for hlt1. Please use "streamless".'
+        )
+    LHCbApp(DataType="Upgrade", Simulation=True, EvtMax=n_events)
    EventSelector().PrintFreq = 10000
    IODataManager(DisablePFNWarning=True)

@@ -282,43 +282,61 @@ if __name__ == '__main__':
    # because we need to set `input_process='Hlt2'` in `unpack_rawevent`
    # to read MDF output from Sprucing
    algs = []
-    unpack = unpack_rawevent(
-        bank_types=['ODIN', 'HltDecReports', 'DstData', 'HltRoutingBits'],
-        configurables=True)
-    hlt2 = [hlt_decisions(source="Hlt2", output_loc="/Event/Hlt2/DecReports")]
-    if args.process == 'spruce':
-        spruce = [
-            hlt_decisions(
-                source="Spruce", output_loc="/Event/Spruce/DecReports")
-        ]
-    else:
-        spruce = []
-    decoder = decoder(input_process=args.process.capitalize())
-    algs = [unpack] + hlt2 + spruce + [decoder] + [createODIN(ODIN='myODIN')]
-
-    appMgr = ApplicationMgr(TopAlg=algs)
-    appMgr.ExtSvc += [
-        configured_ann_svc(json_file=fname_helper.tck(args.stream_config))
-    ]
+    with open(fname_helper.stream_config_json_path(args.stream_config)) as f:
+        lines = json.load(f)[args.stream]

    IOHelper("MDF").inputFiles(
        [fname_helper.mdf_fname_for_reading(args.stream_config, args.stream)])

-    with open(fname_helper.stream_config_json_path(args.stream_config)) as f:
-        lines = json.load(f)[args.stream]
-
+    # Hlt1 requires different unpacking than hlt2/sprucing.
+    if args.process == "hlt1":
+        unpacker = LHCb__UnpackRawEvent(
+            "UnpackRawEvent",
+            RawBankLocations=["DAQ/RawBanks/HltDecReports"],
+            BankTypes=["HltDecReports"])
+        decDec = HltDecReportsDecoder(
+            "HltDecReportsDecoder/Hlt1DecReportsDecoder",
+            OutputHltDecReportsLocation="/Event/Hlt1/DecReports",
+            SourceID="Hlt1",
+            DecoderMapping="TCKANNSvc",
+            RawBanks=unpacker.RawBankLocations[0])
+        appMgr = ApplicationMgr(TopAlg=[unpacker, decDec])
+        appMgr.ExtSvc += [configured_ann_svc(name='TCKANNSvc')]
+    else:
+        unpack = unpack_rawevent(
+            bank_types=['ODIN', 'HltDecReports', 'DstData', 'HltRoutingBits'],
+            configurables=True)
+        hlt2 = [
+            hlt_decisions(source="Hlt2", output_loc="/Event/Hlt2/DecReports")
+        ]
+        if args.process == 'spruce':
+            spruce = [
+                hlt_decisions(
+                    source="Spruce", output_loc="/Event/Spruce/DecReports")
+            ]
+        else:
+            spruce = []
+        decoder = decoder(input_process=args.process.capitalize())
+        algs = [unpack] + hlt2 + spruce + [decoder
+                                           ] + [createODIN(ODIN='myODIN')]
+        appMgr = ApplicationMgr(TopAlg=algs)
+        appMgr.ExtSvc += [
+            configured_ann_svc(json_file=fname_helper.tck(args.stream_config))
+        ]
    appMgr = GP.AppMgr()
    evt = appMgr.evtsvc()

-    # Calculates retention, rate and bandwidth per line and stream (file)
    evts_all, rawbanks_all, dst_all, event_stats, exclusive, raw, dst = processing_events_per_line_and_stream(
        LHCbApp().EvtMax, lines, args.process)
-    rates_per_line(
-        event_stats, exclusive, raw, dst, input_config['input_rate'],
-        fname_helper.tmp_rate_table_per_line_path(args.stream_config,
-                                                  args.stream))
+
+    # Calculate key quantities per stream
    rates_per_stream(
        evts_all, rawbanks_all, dst_all, args.stream,
        input_config['input_rate'],
        fname_helper.tmp_rate_table_per_stream_path(args.stream_config,
                                                    args.stream))
+    # Calculate key quantities per line
+    rates_per_line(
+        event_stats, exclusive, raw, dst, input_config['input_rate'],
+        fname_helper.tmp_rate_table_per_line_path(args.stream_config,
+                                                  args.stream))
--- a/python/MooreTests/line-descriptives.py
+++ b/python/MooreTests/line-descriptives.py
@@ -38,6 +38,11 @@ def _descriptives(lines, process):
    return


+if options.input_process == "Hlt1":
+    raise RuntimeError(
+        "line-descriptives only makes sense for options.input_process = Hlt2/Sprucing"
+    )
+
 options.input_type = 'MDF'
 options.simulation = True
 options.dddb_tag = 'dddb-20171010'

--- a/python/MooreTests/make_bandwidth_test_page.py
+++ b/python/MooreTests/make_bandwidth_test_page.py
--- a/python/MooreTests/run_bandwidth_test_jobs.py
+++ b/python/MooreTests/run_bandwidth_test_jobs.py
@@ -27,6 +27,7 @@ import tempfile
 import atexit
 import shutil
 import yaml
+from datetime import datetime

 # Default cache dir is the current working directory as this is most convenient for the machine
 # that the test runs on periodically. It assumes the working directory is not cleaned up often,
@@ -36,9 +37,6 @@ DEFAULT_CACHE_DIRS = {'default': ['.']}
 # prefer XDG_RUNTIME_DIR which should be on tmpfs
 FALLBACK_CACHE_DIR = os.getenv('XDG_RUNTIME_DIR', tempfile.gettempdir())

-# Limit size of output log if many options files
-MAX_NFILES_TO_PRINT_TO_LOG = 10
-

 def default_cache_dirs():
    hostname = socket.getfqdn()
@@ -170,7 +168,6 @@ if __name__ == '__main__':
    job_inputs = [
        inputs_fns
    ]  # This is a list to allow for possible NUMA extension: see discussion on !316.
-    logging.info(inputs_fns[:MAX_NFILES_TO_PRINT_TO_LOG])

    # Set up local directories where inputs are cached
    if args.download_input_files:
@@ -190,22 +187,25 @@ if __name__ == '__main__':

        # Now download files
        for i, inputs in enumerate(job_inputs):
-            logging.info(
-                f'Downloading input files {inputs[:MAX_NFILES_TO_PRINT_TO_LOG]}'
-            )
            if all(is_remote(url) for url in inputs):
                from Moore.qmtest.context import download_mdf_inputs_locally
                # download_mdf_inputs_locally only downloads if files
                # are not already available locally on the machine
+                before_copy = datetime.now()
+                logging.info(
+                    f'Downloading inputs for bandwidth job to {args.cache_dirs[i]}'
+                )
                logging.info(
-                    'Downloading inputs for bandwidth job to {}'.format(
-                        args.cache_dirs[i]))
+                    f'There are {len(inputs)} input files: [{inputs[0]} ' +
+                    ']' if len(inputs) < 2 else '{inputs[1]}, ... ]')
                kB_to_GB = 1e3
                job_inputs[i] = download_mdf_inputs_locally(
                    inputs,
                    args.cache_dirs[i],
                    max_size=args.avg_evt_size * kB_to_GB * args.events)
-                logging.info(inputs)
+                logging.info(
+                    f"Finished file downloads. This took: {datetime.now() - before_copy}"
+                )
            elif any(is_remote(url) for url in inputs_fns):
                parser.error('inputs must either be all xrootd or all local')
            else:

--- a/python/PRConfig/TestFileDB.py
+++ b/python/PRConfig/TestFileDB.py
@@ -13483,3 +13483,27 @@ testfiles(
    },
    comment='Real data selected by HLT1 from run 269939.',
    test_file_db=test_file_db)
+
+testfiles(
+    myname='upgrade-minbias-hlt2-full-output-Dec2023',
+    filenames=[
+        "mdf:root://eoslhcb.cern.ch//eos/lhcb/wg/dpa/wp1/hlt2_full_stream_Dec2023/MagDown/hlt2_full_stream_{0}.mdf"
+        .format(i) for i in list(range(35)) + list(range(36, 240))
+    ] + [
+        "mdf:root://eoslhcb.cern.ch//eos/lhcb/wg/dpa/wp1/hlt2_full_stream_Dec2023/MagUp/hlt2_full_stream_{0}.mdf"
+        .format(i) for i in list(range(117)) + list(range(118, 135))
+    ],
+    qualifiers={
+        'Author': 'Shunan Zhang',
+        'Format': 'MDF',
+        'DataType': 'Upgrade',
+        'Date': '2023-12-01',
+        'Simulation': True,
+        "CondDB": "sim-20171127-vc-md100",
+        "GeometryVersion": "run3/trunk",
+        "ConditionsVersion": "master",
+        "DDDB": "dddb-20171126",
+    },
+    comment=
+    'Hlt2 Full stream output as of December 2023, used as inputs for Sprucing tests, HLT2 output rate around 128 kHz',
+    test_file_db=test_file_db)
--- a/python/PRConfig/bandwidth_helpers.py
+++ b/python/PRConfig/bandwidth_helpers.py
@@ -76,6 +76,12 @@ class FileNameHelper(object):
            self._join(self.process, stream_config,
                       "jaccard_similarity_matrix") + ".html")

+    def overlap_matrix_path(self, stream_config):
+        return os.path.join(
+            self.base_dir, self.output_subdir,
+            self._join(self.process, stream_config, "overlap_matrix") +
+            ".html")
+
    def event_no_fname(self, stream_config, stream):
        return os.path.join(
            self.base_dir, self.output_subdir,

--- a/scripts/benchmark-scripts/Moore_bandwidth_test.sh
+++ b/scripts/benchmark-scripts/Moore_bandwidth_test.sh
@@ -21,9 +21,10 @@ Usage: Moore/run /path/to/Moore_bandwidth_test.sh [options] 2>&1 | tee <path-to-

       Expected to be called by e.g. Moore_hlt2_bandwidth.sh for the periodic LHCbPR tests.

--process: "hlt2" or "spruce"
--input-data: "nominal" or "2023". "2023" not currently available for process == spruce
-h|--help: print this message and exit
+--process: "hlt1", "hlt2" or "spruce".
+--input-data: "nominal" or "2023".
+    "2023" not currently available for process == spruce or hlt1.
+-h|--help: print this message and exit.

 EOF
 )
@@ -96,11 +97,30 @@ mkdir -p tmp/Output
 mkdir -p tmp/Output/Inter

 # Set configuration variables and check configuration makes sense
-MOORE_THREADS=$(nproc)
-N_EVTS=1e5
-TEST_PATH_PREFIX='$HLT2CONFROOT/tests/options/bandwidth/'
+# TODO: Remove Process Dependence on N_EVTS, see: https://gitlab.cern.ch/lhcb-datapkg/PRConfig/-/issues/12
 case $PROCESS in
+    hlt1)
+    MOORE_THREADS=1
+    TEST_PATH_PREFIX='$HLT1CONFROOT/tests/options/bandwidth/'
+    EVENT_SIZE_UPPER_LIMIT=200
+    GAUDIRUN_INPUT_PROCESS="Hlt1"
+    STREAM_CONFIGS=( "streamless" )
+    case $INPUTDATA in
+        nominal)
+        N_EVTS=1e4
+        CONFIG_FILE="${TEST_PATH_PREFIX}hlt1_bandwidth_input.yaml"
+        EXTRA_OPTS="-e 1 $MOOREROOT/options/muon_geometry_v2.py" #Requires #EvtSlots==1 due to singlethreading on DIGIs.
+        ;;
+        *)
+        echo "ERROR: --input-data must be \"nominal\" for process \"$PROCESS\""
+        exit 1
+        ;;
+    esac
+    ;;
    hlt2)
+    N_EVTS=1e5
+    MOORE_THREADS=${LBN_BUILD_JOBS:-1} # Default to single-threaded
+    TEST_PATH_PREFIX='$HLT2CONFROOT/tests/options/bandwidth/'
    EVENT_SIZE_UPPER_LIMIT=200
    GAUDIRUN_INPUT_PROCESS="Hlt2"
    STREAM_CONFIGS=( "wg" "production" )
@@ -120,6 +140,9 @@ case $PROCESS in
    esac
    ;;
    spruce)
+    N_EVTS=1e5
+    MOORE_THREADS=${LBN_BUILD_JOBS:-1} # Default to single-threaded
+    TEST_PATH_PREFIX='$HLT2CONFROOT/tests/options/bandwidth/'
    EVENT_SIZE_UPPER_LIMIT=300
    GAUDIRUN_INPUT_PROCESS="Spruce"
    STREAM_CONFIGS=( "wg" )
@@ -135,7 +158,7 @@ case $PROCESS in
    esac
    ;;
    *)
-    echo "Unrecognised process \"$PROCESS\". It must be \"hlt2\" or \"spruce\"."
+    echo "Unrecognised process \"$PROCESS\". It must be \"hlt1\" or \"hlt2\" or \"spruce\"."
    exit 1
    ;;
 esac
@@ -151,9 +174,14 @@ for STREAM_CONFIG in "${STREAM_CONFIGS[@]}"; do
 done

 # 2. Compute line descriptives: persist reco, extra output
-echo 'Obtaining line descriptives'
-time gaudirun.py --option "from Moore import options;options.input_process=\"${GAUDIRUN_INPUT_PROCESS}\"" $PRCONFIGROOT/python/MooreTests/line-descriptives.py
-STORE_ERR_CODE
+if [ $PROCESS = "hlt1" ]
+then
+    echo 'Skipping line descriptives as $PROCESS = "hlt1"'
+else
+    echo 'Obtaining line descriptives'
+    time gaudirun.py --option "from Moore import options;options.input_process=\"${GAUDIRUN_INPUT_PROCESS}\"" $PRCONFIGROOT/python/MooreTests/line-descriptives.py
+    STORE_ERR_CODE
+fi

 for STREAM_CONFIG in "${STREAM_CONFIGS[@]}"; do
    # 3. Work out what the streams are from the config JSON; needed for later steps
@@ -163,14 +191,19 @@ for STREAM_CONFIG in "${STREAM_CONFIGS[@]}"; do
    echo "Found ${STREAM_CONFIG} streams: ${STREAMS[@]}"

    # 4. Compute similarity matrices between streams by comparing event numbers
-    echo "Obtaining similarity matrix for ${STREAM_CONFIG}-stream configuration"
-    for stream in "${STREAMS[@]}"; do
-        echo "Stream name: ${stream}"
-        time python $PRCONFIGROOT/python/MooreTests/list_event_numbers.py -p $PROCESS -n $N_EVTS --stream-config $STREAM_CONFIG --stream $stream
+    if [ $PROCESS = "hlt1" ]
+    then
+        echo 'Skipping similarity matrix per stream as $PROCESS = "hlt1"'
+    else
+        echo "Obtaining similarity matrix for ${STREAM_CONFIG}-stream configuration"
+        for stream in "${STREAMS[@]}"; do
+            echo "Stream name: ${stream}"
+            time python $PRCONFIGROOT/python/MooreTests/list_event_numbers.py -p $PROCESS -n $N_EVTS --stream-config $STREAM_CONFIG --stream $stream
+            STORE_ERR_CODE
+        done
+        time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py -p $PROCESS --stream-config $STREAM_CONFIG --streams ${STREAMS[@]}
        STORE_ERR_CODE
-    done
-    time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py -p $PROCESS --stream-config $STREAM_CONFIG --streams ${STREAMS[@]}
-    STORE_ERR_CODE
+    fi

    # 5. Computing rates per stream as well as per line (tables split by stream)
    echo "Obtaining rates and bandwidth for ${STREAM_CONFIG}-stream configuration"
@@ -186,6 +219,14 @@ echo 'Combining all rate and bandwidth tables'
 time python $PRCONFIGROOT/python/MooreTests/combine_rate_output.py --process $PROCESS
 STORE_ERR_CODE

-# 7. Produce plots and HTML pages; add the --building-locally flag to make the links work if you are building the html pages locally
+# 7. Test on the feasibility of a chained HLT2->Sprucing test
+if [ $PROCESS = "spruce" ] && [ $INPUTDATA = "nominal" ]
+then
+    echo 'Testing downloads of Hlt2 output for the future'
+    time python -m MooreTests.download_hlt2_output
+fi
+STORE_ERR_CODE
+
+# 8. Produce plots and HTML pages; add the --building-locally flag to make the links work if you are building the html pages locally
 echo 'Making plots and HTML pages'
 time python -m MooreTests.make_bandwidth_test_page -p $PROCESS -c $CONFIG_FILE -s $SCRIPT_PATH -e $ERR_CODE
\ No newline at end of file
--- a/scripts/benchmark-scripts/Moore_hlt1_bandwidth.sh
+++ b/scripts/benchmark-scripts/Moore_hlt1_bandwidth.sh
+#!/bin/bash
+
+###############################################################################
+# (c) Copyright 2022-2023 CERN for the benefit of the LHCb Collaboration      #
+#                                                                             #
+# This software is distributed under the terms of the GNU General Public      #
+# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING".   #
+#                                                                             #
+# In applying this licence, CERN does not waive the privileges and immunities #
+# granted to it by virtue of its status as an Intergovernmental Organization  #
+# or submit itself to any jurisdiction.                                       #
+###############################################################################
+
+# this path ends up printed on the BW test page; export so it can be picked up in the child process
+export SCRIPT_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/$(basename "$0")"
+
+$PRCONFIGROOT/scripts/benchmark-scripts/Moore_bandwidth_test.sh --process hlt1 --input-data nominal
+
+# force 0 return code so the handler runs even for failed jobs
+exit 0
--- a/scripts/benchmark-scripts/Moore_spruce_all_lines.sh
+++ b/scripts/benchmark-scripts/Moore_spruce_all_lines.sh
@@ -22,9 +22,9 @@ fi
 export THOR_JIT_N_SPLITS=8
 export THOR_JIT_N_JOBS=8

-python -m MooreTests.run_throughput_jobs -n=2e4 --avg-event-size=300000 --test-file-db-key=upgrade-minbias-hlt2-full-output-Aug2023 '$MOOREROOT/tests/options/disable-git-metadata-write.py' '$HLT2CONFROOT/options/sprucing/spruce_all_lines.py' "${cache_dirs[@]}"
+python -m MooreTests.run_throughput_jobs -n=2e4 --avg-event-size=300000 --test-file-db-key=upgrade-minbias-hlt2-full-output-Dec2023 '$MOOREROOT/tests/options/disable-git-metadata-write.py' '$HLT2CONFROOT/options/sprucing/spruce_all_lines.py' "${cache_dirs[@]}"

-python -m MooreTests.run_throughput_jobs -n=-1 -j 1 --profile --avg-event-size=300000 --test-file-db-key=upgrade-minbias-hlt2-full-output-Aug2023 '$MOOREROOT/tests/options/disable-git-metadata-write.py' '$HLT2CONFROOT/options/sprucing/spruce_all_lines.py' "${cache_dirs[@]}"
+python -m MooreTests.run_throughput_jobs -n=-1 -j 1 --profile --avg-event-size=300000 --test-file-db-key=upgrade-minbias-hlt2-full-output-Dec2023 '$MOOREROOT/tests/options/disable-git-metadata-write.py' '$HLT2CONFROOT/options/sprucing/spruce_all_lines.py' "${cache_dirs[@]}"

 # force 0 return code so the handler runs even for failed jobs
 exit 0
No results found